Microsoft PowerPoint – 30_Structure_Computations
O
SU
C
SE
2
42
1
J.E.Jones
O
SU
C
SE
2
42
1
J. E. Jones
leaX Imm(Base, Index, Scale), Dest
◦ Load effective address, a variant of movX
◦ Has form of an instruction that reads from memory to a
register, but it does not reference memory at all.
◦ Instruction is used to generate addresses for later memory
references
◦ Compilers often find other “clever” uses that have nothing to
do with address computations
◦ If %rdx contains the 8-byte value x, then
leaq 3(%rdx, %rdx, 4), %rax
will set register %rax to 5x +3
◦ lea instruction is valid with the ‘q’ suffix and the ‘l’ suffix
only.
O
SU
C
SE
2
42
1
J. E. Jones
◦ If %rdx contains the 8-byte value x, then
leaq 3(%rdx, %rdx, 4), %rax
will set register %rax to 5x +3
◦ If %edx contains the 4-byte value x, then
leal 3(%edx, %edx, 4), %eax
will set register %eax to 5x +3
O
SU
C
SE
2
42
1
J. E. Jones
◦ If %rdx contains the 8-byte value x, then
leaq 3(%rdx, %rdx, 2), %rax
will set register %rax to 3x +3
◦ If %edx contains the 4-byte value x, then
leal 3(%edx, %edx, 4), %eax
will set register %eax to 5x +3
Can’t be used for address
calculations but can be used for
other computations.’
O
SU
C
SE
2
42
1
J. E. Jones
We looked at data alignment last class
x86-64 hardware will work correctly regardless of the
alignment of data
However, Intel recommends that data be aligned to
improve memory system performance
In addition, when working with mixed C/x86 language
programs, data alignment must be used because C
compiler does.
Given this (and potential changes in policy as
processors upgrade to 128-bit processors in the future)
data alignment is implemented within the
compilers/assemblers as follows…
O
SU
C
SE
2
42
1
J. E. Jones
struct members are accessed by using a pointer to the
first byte of the struct and a displacement.
This is one of the common uses of displacements.
See the example below for an illustration of how this is
done.
O
SU
C
SE
2
42
1
J. E. Jones
struct S1 {
short m1;
short m2;
int m3;
int m4;
int m5[12];
} S1_array[200];
How would we access S1_array[i].m4 if %rdi contains
$S1_array and %rsi contains i?
O
SU
C
SE
2
42
1
J. E. Jones
Step 1: determine offset of each member from beginning
of the structure and size of the structure
struct S1 {
short m1; 2 byte; p+0
short m2; 2 byte; p+2 60 bytes/
int m3; 4 bytes; p+4 structure aligns on
int m4; 4 bytes; p+8 4-byte boundary
int m5[12]; 48 bytes; p+12
} S1_array[200];
How would we access S1_array[i].m4 if %rdi contains
$S1_Array and %rsi contains i?
O
SU
C
SE
2
42
1
J. E. Jones
struct S1 {
short m1; 2 byte; p+0
short m2; 2 byte; p+2 60 bytes/ structure aligns on
int m3; 4 bytes; p+4 4-byte boundary
int m4; 4 bytes; p+8
int m5[12]; 48 bytes; p+12
} S1_array[200];
How would we access S1_array[i].m4 if %rdi contains $S1_Array and %rsi contains i?
imulq $60, %rsi # 60 * i
leaq 8(%rdi, %rsi), %rcx # $S1_Array +(60*i) + 8 =&(S1_array[i].m4)
movl (%rcx), %eax # read int from memory to %eax
O
SU
C
SE
2
42
1
J. E. Jones
struct S1 {
short m1; 2 byte; p+0
short m2; 2 byte; p+2 60 bytes/ structure aligns on
int m3; 4 bytes; p+4 4-byte boundary
int m4; 4 bytes; p+8
int m5[12]; 48 bytes; p+12
} S1_array[200];
How would we access S1_array[i].m4 if %rdi contains $S1_Array and %rsi contains i?
imulq $60, %rsi # 60 * i
leaq 8(%rdi, %rsi), %rcx # $S1_Array +(60*i) + 8 = &(S1_array[i])
movl (%rcx), %eax # read int from memory to %eax
movl 8(%rdi, %rsi), %eax
O
SU
C
SE
2
42
1
J. E. Jones
struct S1 {
short m1; 2 byte; p+0
short m2; 2 byte; p+2 60 bytes/ structure aligns on
int m3; 4 bytes; p+4 4-byte boundary
int m4; 4 bytes; p+8
int m5[12]; 48 bytes; p+12
} S1_array[200];
How would we access S1_array[i].m4 if %rdi contains $S1_Array and %rsi contains i?
imulq $60, %rsi # 60 * i
movl 8(%rdi, %rsi), %eax
Can we do it without using imulq?
Option 1: 60 factors to (2 * 2 * 3 * 5). I think we can use that!
leaq (%rsi, %rsi, 2), %rax # 3*%rsi = 3*i
leaq (%rax, %rax, 4), %rax # 5*(3*%rsi) = 15*i
shlq $2, %rax # 4*(5*3*%rsi) = 60*i
movl 8(%rdi, %rax), %eax # read int from (%rdi + (5*3*2*2*%rsi) + 8)
movl 8(%rdi, %rax, 4), %eax
O
SU
C
SE
2
42
1
J. E. Jones
struct S1 {
short m1; 2 byte; p+0
short m2; 2 byte; p+2 60 bytes/ structure aligns on
int m3; 4 bytes; p+4 4-byte boundary
int m4; 4 bytes; p+8
int m5[12]; 48 bytes; p+12
} S1_array[200];
How would we access S1_array[i].m4 if %rdi contains $S1_Array and %rsi contains i?
imulq $60, %rsi # 60 * i
movl 8(%rdi, %rsi), %eax
Can we do it without using imulq?
Option 2: 60 = 0x3C = 0b 0011 1100, so n=5, m=2 Remember (x<<(n+1)) – (x<
p+16 => m5[1]
p+20 => m5[2]
p+24 => m5[3]…
p+56 -> m5[11]
O
SU
C
SE
2
42
1
J. E. Jones
struct S1 {
short m1; 2 byte; p+0
short m2; 2 byte; p+2
int m3; 4 bytes; p+4
int m4; 4 bytes; p+8
int m5[12]; 48 bytes; p+12
} S1_array[200];
leaq (%rsi, %rsi,2), %rax # 3*%rsi
leaq (%rax, %rax,4), %rax # 5*3*%rsi This calculates
shlq $2, %rax # 4*(5*3*%rsi) S1_array[i].m4
movl 8(%rdi, %rax), %eax # read int from (%rdi + (5*3*2*2*%rsi) + 8)
How would we access S1_array[i].m5[2]?
p+12 => m5[0]
p+16 => m5[1]
p+20 => m5[2]
movl 20(%rdi, %rax), %ecx
O
SU
C
SE
2
42
1
J. E. Jones
struct S1 {
short m1; 2 byte; p+0
short m2; 2 byte; p+2
int m3; 4 bytes; p+4
int m4; 4 bytes; p+8
int m5[12]; 48 bytes; p+12
} S1_array[200];
How would we access S1_array[i].m5[j], if %rdx contains j?
O
SU
C
SE
2
42
1
J. E. Jones
struct S1 {
short m1; 2 byte; p+0
short m2; 2 byte; p+2
int m3; 4 bytes; p+4
int m4; 4 bytes; p+8
int m5[12]; 48 bytes; p+12
} S1_array[200];
How would we access S1_array[i].m5[j], if %rdx contains j? Option 1
leaq (%rsi, %rsi,2), %rax # 3*i
leaq (%rax, %rax,4), %rax # 5*(3*i)
shlq $2, %rax # 4*(5*3*i)
leaq 12(%rdi, %rax,1), %rax # %rax = &S1_array[i].m5[0]
shlq $2, %rdx # j*4
movl (%rax, %rdx), %eax # read from (&S1_array[i].m5[0]+ j*4)
O
SU
C
SE
2
42
1
J. E. Jones
struct S1 {
short m1; 2 byte; p+0
short m2; 2 byte; p+2
int m3; 4 bytes; p+4
int m4; 4 bytes; p+8
int m5[12]; 48 bytes; p+12
} S1_array[200];
How would we access S1_array[i].m5[j], if %rdx contains j? Option 2
leaq (%rsi, %rsi,2), %rax # 3*i
leaq (%rax, %rax,4), %rax # 5*(3*i)
shlq $2, %rax # 4*(5*3*i)
leaq 12(%rdi, %rdx,4), %rdi # &S1_array[0].m5[j]
movl (%rdi, %rax ), %eax # read from (&S1_array[i].m5[j])
O
SU
C
SE
2
42
1
J. E. Jones
35?? 5*7 —-5*(5+2)
leaq (%rsi, %rsi, 4), %rax #i*5
shlq $1, %rsi #i*2
addq %rax, %rsi # 5i+2i =7i
leaq (%rsi,%rsi,4), %rax #7i*5
movb 30(%rdi, %rax), %r8b
O
SU
C
SE
2
42
1
J. E. Jones
19 = (18 + 1) = (2*3*3) +1
leaq (%rsi, %rsi, 2), %rax #i*3
leaq (%rax, %rax, 2), %rax #i*9
shlq $1, %rax #2(i*9) – i*18
addq %rsi, %rax # i*18 + i = 19i
leaq (%rdi, %rax), %rax
O
SU
C
SE
2
42
1
J. E. Jones
struct S57{
char ar1[50];
char ar2[7];
}S57_ar[300};
57 = 60-3
leaq (%rsi, %rsi,2), %rax # 3*i
movq %rax, %r8 #copy %rax
leaq (%rax, %rax,4), %rax # 5*(3*i)
shlq $2, %rax # 4*(5*3*i)
subq %r8, %rax
O
SU
C
SE
2
42
1
J. E. Jones
struct S43 {
char a[40];
char b[3];
}S43_array[200];
40 + 3? (2*2*2*5) + 3
leaq (%rsi, %rsi, 4), %rax #5*i
shlq $3, %rax # 8*(5*i) = 40*i
leaq (%rsi, %rsi, 2), %rsi #3*i
addq %rax, %rsi # 43*i
leaq (%rdi, %rsi), %rax
O
SU
C
SE
2
42
1
J. E. Jones
23 = 24 -1 = (3*2*2*2) -1
leaq (%rsi, %rsi, 2), %rax #i*3
leaq (%rax, %rax, 8), %rax #i*24
subq %rsi, %rax # i*24 – i = 23i
leaq (%rdi, %rax), %rax
O
SU
C
SE
2
42
1
J. E. Jones
47 = 48 – 1 = (2*3*2*2*2) – 1
leaq (%rsi, %rsi, 2), %rax #i*3
shlq $1, %rax #6i
shlq $3,%rax #(8(6*i)) = 48i
subq %rsi, %rax # i*48 – i = 47i
leaq (%rdi, %rax), %rax
O
SU
C
SE
2
42
1
J. E. Jones
50 5*5*2
leaq (%rsi, %rsi, 4), %rax #5*i
leaq (%rax, %rax, 4), %rax #5*5*i
leaq (%rdi, %rax, 2), %rax
movl 8(%rdi, %rax), %r9d