本文讲从EVM操作码的角度,研究storage、memory、calldata的数据拷贝。
solidity version = 0.8.15,optimizer = false,evm_version = "london"
在线反汇编 https://ethervm.io/decompile
由于字节码阅读起来较为麻烦,本文主要分析反汇编后的代码。
有兴趣的同学可以自行逐行研究evm操作码。
contract Test {
uint256[3] storage_numbers1;
uint256[3] storage_numbers2;
function test() public {
storage_numbers1 = storage_numbers2;
}
}
反汇编代码如下,test函数大致对应func_0035(), func_0035()确认了两个数组的长度(0x03), 然后进入func_0048.
我们主要关注func_0048里的storage[temp5] = storage[temp4]; 这其实是在一个goto循环里,会把数组中storage_numbers2中的每一个元素复制到storage_numbers1中。
可见,storage之间的赋值是深拷贝。
contract Contract {
function main() {
......
func_0035();
stop();
}
function func_0035() {
var var0 = 0x45;
var var1 = 0x00;
var var2 = 0x03;
var var3 = 0x03;
var0 = func_0048(var1, var2, var3);
}
function func_0048(var arg0, var arg1, var arg2) returns (var r0) {
var var0 = arg0;
var temp0 = arg1;
arg1 = var0 + 0x03;
var var1 = temp0;
if (!arg2) {
......
} else {
var temp2 = arg2;
var temp3 = var1;
arg2 = temp3;
var1 = arg2 + temp2;
if (var1 <= arg2) {
label_0070:
goto label_0071;
} else {
label_005F:
var temp4 = arg2;
var temp5 = var0;
storage[temp5] = storage[temp4];
arg2 = temp4 + 0x01;
var1 = var1;
var0 = temp5 + 0x01;
if (var1 <= arg2) { goto label_0070; }
else { goto label_005F; }
}
}
}
}
contract Test {
uint256[3] storage_numbers1;
uint256 n;
function test() public {
uint256[3] storage local1 = storage_numbers1;
uint256[3] storage local2 = local1;
// add this to avoid being opt out
n = local2[1];
}
}
反汇编代码如下.
在汇编代码里看不到storage复制的痕迹。
solidity的两行代码对应func_0035()中的:
var var1 = var0; var var2 = var1;
local storage相当于storage的指针(storage的slot编号),storage/local storage → local storage只是指针的赋值,显然属于浅拷贝。
contract Contract {
function main() {
......
func_0035();
stop();
}
function func_0035() {
var var0 = 0x00;
var var1 = var0;
var var2 = var1;
var var3 = 0x01;
if (var3 < 0x03) {
storage[0x03] = storage[var3 + var2];
return;
} else {
....
}
}
}
contract Test {
uint256[3] storage_numbers;
function test() public {
uint256[3] storage local = storage_numbers;
storage_numbers = local;
}
}
反汇编代码如下。
主要的拷贝逻辑在func_004A,我们看到赋值语句storage[temp4] = storage[temp3];在一个goto循环里,说明这个函数会拷贝数组中的每一个值。
显然,local storage→storage是深拷贝,过程和storage→storage类似。
contract Contract {
function main() {
......
func_0035();
stop();
}
function func_0035() {
var var0 = 0x00;
var var1 = 0x46;
var var2 = 0x00;
var var3 = var0;
var var4 = 0x03;
var1 = func_004A(var2, var3, var4);
}
function func_004A(var arg0, var arg1, var arg2) returns (var r0) {
var var0 = arg0;
var var1 = arg1;
arg1 = var0 + 0x03;
if (!arg2) {
label_0073:
......
} else {
var temp1 = arg2;
var temp2 = var1;
arg2 = temp2;
var1 = arg2 + temp1;
if (var1 <= arg2) {
label_0072:
goto label_0073;
} else {
label_0061:
var temp3 = arg2;
var temp4 = var0;
storage[temp4] = storage[temp3];
arg2 = temp3 + 0x01;
var0 = temp4 + 0x01;
var1 = var1;
if (var1 <= arg2) { goto label_0072; }
else { goto label_0061; }
}
}
}
}
contract Test {
uint256[3] storage_numbers;
uint256 n;
function test() public {
uint256[3] storage local = storage_numbers;
uint256[3] memory m1 = local;
// add this to avoid being opt out
n = m1[1];
}
}
反汇编代码如下.
复制对应代码为memory[temp5:temp5 + 0x20] = storage[temp4];这个语句也在一个goto循环中,storage中的每一个元素都会被复制到memory中。
可见storage/local storage→memory是深拷贝。
contract Contract {
function main() {
......
func_0035();
stop();
}
function func_0035() {
var var0 = 0x00;
var var1 = var0;
var temp0 = memory[0x40:0x60];
memory[0x40:0x60] = temp0 + 0x20 * 0x03;
var var2 = temp0;
var var3 = var1;
var var5 = var2;
var var4 = 0x03;
var var6 = var3;
var var7 = 0x03;
if (!var7) {
label_006E:
......
} else {
var temp1 = var5;
var temp2 = temp1 + var7 * 0x20;
var5 = temp2;
var temp3 = var6;
memory[temp1:temp1 + 0x20] = storage[temp3];
var7 = temp1 + 0x20;
var6 = temp3 + 0x01;
if (var5 <= var7) { goto label_006E; }
label_005B:
var temp4 = var6;
var temp5 = var7;
memory[temp5:temp5 + 0x20] = storage[temp4];
var7 = temp5 + 0x20;
var6 = temp4 + 0x01;
if (var5 > var7) { goto label_005B; }
else { goto label_006E; }
}
}
}
contract Test {
// uint256[3] storage_numbers;
uint256 n;
function test() public {
uint256[3] memory m1;
uint256[3] memory m2 = m1;
// add this to avoid being opt out
n = m2[1];
}
}
反汇编代码如下:
func_0060()函数是uint256[3]所对应的空间的申请,返回值表示solidity中的m1,实际上m1就是一个memory的指针。
var var2 = var1;对应solidity中的uint256[3] memory m2 = m1; 可见这里并没有复制数组的每一个元素,而仅仅是负值了指针本身。
所以memory→memory是浅拷贝。
contract Contract {
function main() {
......
func_0035();
stop();
}
function func_0035() {
var var0 = 0x3b;
var0 = func_0060();
var var1 = var0;
var var2 = var1;
var var3 = 0x01;
if (var3 < 0x03) {
storage[0x00] = memory[var3 * 0x20 + var2:var3 * 0x20 + var2 + 0x20];
return;
} else {
......
}
}
function func_0060() returns (var r0) {
var temp0 = memory[0x40:0x60];
memory[0x40:0x60] = temp0 + 0x60;
memory[temp0:temp0 + 0x03 * 0x20] = msg.data[msg.data.length:msg.data.length + 0x03 * 0x20];
return temp0;
}
}
contract Test {
uint256[3] storage_numbers;
function test() public {
uint256[3] storage local = storage_numbers;
uint256[3] memory m;
storage_numbers = m;
//local = m; // compile failed
}
}
memory→local storage的赋值无法通过编译:
Type uint256[3] memory is not implicitly convertible to expected type uint256[3] storage pointer.
我们来看memory→storage的情况。
function func_0051()是为m在memory内申请空间。主要的赋值代码在func_0073。
我们看到storage[temp5] = memory[temp4:temp4 + 0x20];这个赋值语句在一个goto循环里。所以,m的每一个元素都会被复制到storage_numbers之中。
可见memory→storage是深拷贝。
contract Contract {
function main() {
......
func_0035();
stop();
}
function func_0035() {
var var0 = 0x00;
var var1 = 0x3d;
var1 = func_0051();
var var2 = 0x4c;
var var3 = 0x00;
var var4 = var1;
var var5 = 0x03;
var2 = func_0073(var3, var4, var5);
}
function func_0051() returns (var r0) {
var temp0 = memory[0x40:0x60];
memory[0x40:0x60] = temp0 + 0x60;
memory[temp0:temp0 + 0x03 * 0x20] = msg.data[msg.data.length:msg.data.length + 0x03 * 0x20];
return temp0;
}
function func_0073(var arg0, var arg1, var arg2) returns (var r0) {
var var0 = arg0;
var temp0 = arg1;
arg1 = var0 + 0x03;
var var1 = temp0;
if (!arg2) {
label_009F:
......
} else {
var temp2 = arg2;
var temp3 = var1;
arg2 = temp3;
var1 = arg2 + temp2 * 0x20;
if (var1 <= arg2) {
label_009E:
goto label_009F;
} else {
label_008D:
var temp4 = arg2;
var temp5 = var0;
storage[temp5] = memory[temp4:temp4 + 0x20];
arg2 = temp4 + 0x20;
var1 = var1;
var0 = temp5 + 0x01;
if (var1 <= arg2) { goto label_009E; }
else { goto label_008D; }
}
}
}
}
contract Test {
uint256[3] storage_numbers;
function test(uint256[3] calldata data) public {
uint256[3] storage local = storage_numbers;
storage_numbers = data;
//local = data; // compile failed
}
}
calldata→local storage的赋值无法通过编译:
Type uint256[3] calldata is not implicitly convertible to expected type uint256[3] storage pointer.
我们来看calldata→storage的情况。
主要关注func_0064,我们可以看到storage[temp5] = msg.data[temp4:temp4 + 0x20];在一个goto循环之中,说明calldata数组中的每个元素都会被复制到storage中。
可见calldata→storage是深拷贝。
contract Contract {
function main() {
......
func_0045(var2);
stop();
}
function func_0045(var arg0) {
var var0 = 0x00;
var var1 = 0x005f;
var var2 = 0x00;
var var3 = arg0;
var var4 = 0x03;
var1 = func_0064(var2, var3, var4);
}
function func_0064(var arg0, var arg1, var arg2) returns (var r0) {
var var0 = arg0;
var temp0 = arg1;
arg1 = var0 + 0x03;
var var1 = temp0;
if (!arg2) {
label_0093:
......
} else {
var temp2 = arg2;
var temp3 = var1;
arg2 = temp3;
var1 = arg2 + temp2 * 0x20;
if (var1 <= arg2) {
label_0092:
goto label_0093;
} else {
label_0080:
var temp4 = arg2;
var temp5 = var0;
storage[temp5] = msg.data[temp4:temp4 + 0x20];
arg2 = temp4 + 0x20;
var0 = temp5 + 0x01;
var1 = var1;
if (var1 <= arg2) { goto label_0092; }
else { goto label_0080; }
}
}
}
}
contract Test {
uint256 n;
function test(uint256[3] calldata data) public {
uint256[3] memory m;
m = data;
// add this to avoid being opt out
n = m[1];
}
}
反汇编代码如下,
我们看到func_0045中有这样一个语句:
memory[temp0:temp0 + 0x20 * 0x03] = msg.data[arg0:arg0 + 0x20 * 0x03];
0x20是uint256的字节数,0x03表示数组有3个元素。显然这句话是把整个数组都从calldata复制到了memory中。
可见calldata→memory是深拷贝。
contract Contract {
function main() {
......
var2 = func_00FF(var3, var4);
func_0045(var2);
stop();
}
function func_0045(var arg0) {
var var0 = 0x0054;
var0 = func_00B1();
var temp0 = memory[0x40:0x60];
memory[0x40:0x60] = temp0 + 0x20 * 0x03;
memory[temp0:temp0 + 0x20 * 0x03] = msg.data[arg0:arg0 + 0x20 * 0x03];
memory[temp0 + 0x20 * 0x03:temp0 + 0x20 * 0x03 + 0x20] = 0x00;
var0 = temp0;
var var1 = var0;
var var2 = 0x01;
if (var2 < 0x03) {
storage[0x00] = memory[var2 * 0x20 + var1:var2 * 0x20 + var1 + 0x20];
return;
} else {
var var3 = 0x00a1;
memory[0x00:0x20] = 0x4e487b7100000000000000000000000000000000000000000000000000000000;
memory[0x04:0x24] = 0x32;
revert(memory[0x00:0x24]);
}
}
function func_00B1() returns (var r0) {
var temp0 = memory[0x40:0x60];
memory[0x40:0x60] = temp0 + 0x60;
memory[temp0:temp0 + 0x03 * 0x20] = msg.data[msg.data.length:msg.data.length + 0x03 * 0x20];
return temp0;
}
}
contract Test {
uint256[3] storage_numbers;
function test(uint256[3] calldata data) public {
uint256[3] memory m;
uint256[3] storage local;
//data = storage_numbers; // compile failed
//data = local; // compile failed
//data = m; // compile failed
}
}
storage、local storage、memory→calldata都无法通过编译。
Type uint256[3] storage ref is not implicitly convertible to expected type uint256[3] calldata.
Type uint256[3] storage pointer is not implicitly convertible to expected type uint256[3] calldata.
Type uint256[3] memory is not implicitly convertible to expected type uint256[3] calldata.
在solidity的官方文档中,我们可以看到calldata是不可改的,
Calldata is a non-modifiable, non-persistent area where function arguments are stored, and behaves mostly like memory.
contract Test {
uint256 n;
function test(uint256[3] calldata data1, uint256[3] calldata data2) public {
data1 = data2;
// add this to avoid being opt out
n = data1[1];
//data1[1] = 100; // compile failed
}
}
我原以为这段代码会无法通过编译,让我意外的是居然编译成功了。
data1 = data2;应该是对应func_003F中的var var0 = arg0;
这个地方显然是一个浅拷贝,calldata本身也可以看作一个指针。
在data1=data2之后,对data1的读操作都相当于对data2的读操作。
这并没有违背calldata数据不可更改的规则。
像data1[1] = 100; 这样的语句,会真正修改calldata,这是不允许的,会编译报错:
TypeError: Calldata arrays are read-only.
contract Contract {
function main() {
......
var2, var3 = func_0091(var3, var4);
func_003F(var2, var3);
stop();
}
function func_003F(var arg0, var arg1) {
arg0 = arg1;
var var0 = arg0;
var var1 = 0x01;
if (var1 < 0x03) {
storage[0x00] = msg.data[var1 * 0x20 + var0:var1 * 0x20 + var0 + 0x20];
return;
} else {
var var2 = 0x58;
memory[0x00:0x20] = 0x4e487b7100000000000000000000000000000000000000000000000000000000;
memory[0x04:0x24] = 0x32;
revert(memory[0x00:0x24]);
}
}
function func_0072(var arg0, var arg1) returns (var r0) {
var var0 = arg1;
if (var0 + 0x03 * 0x20 <= arg0) { return var0; }
var var1 = 0x8a;
revert(memory[0x00:0x00]);
}
function func_0091(var arg0, var arg1) returns (var r0, var arg0) {
var var0 = 0x00;
var var1 = var0;
if (arg0 - arg1 i>= 0xc0) {
var var2 = 0x00;
var var3 = 0xb1;
var var4 = arg0;
var var5 = arg1 + var2;
var3 = func_0072(var4, var5);
var0 = var3;
var2 = 0x60;
var3 = 0xc0;
var4 = arg0;
var5 = arg1 + var2;
var3 = func_0072(var4, var5);
arg0 = var3;
r0 = var0;
return r0, arg0;
} else {
var2 = 0xa4;
revert(memory[0x00:0x00]);
}
}
}