Introduction to Computer Systems 15-213/18-243, spring 2009
CSE 2421
X86-64 Assembly Language: Special Arithmetic Operations
Multiplication
Recall that when multiplying any 2 numbers represented with w bits, we must store the result in 2w bits to avoid overflow.
X86-64 has a way for us to accomplish this even when using 8 byte registers.
Special Arithmetic Instructions
<64 bit value> * <64 bit value> = <64 bit value>
or
<128 bit value>
x86-64 naming conventions:
Word – 2 bytes
Double Word – 4 bytes
Quad Word – 8 bytes
Oct Word – 16 bytes
Multiplying for a 128 bit result
ALU uses two 8-byte registers to accomplish this
One of the 64-bit arguments *MUST* be in %rax for this to work.
Either the mulq (unsigned) or the imulq (2’s compliment) instruction can be used.
Example:
void store_uprod(uint128_t *dest, uint64_t x, uint64_t y) {
*dest = x * (uint128_t) y;
}
%rdx %rax
High 8 bytes Lower 8 bytes
Code for 128 bit multiply
void store_uprod(uint128_t *dest, uint64_t x, uint_64_t y)
# *dest in %rdi, x in %rsi, y in %rdx
store_uprod:
movq %rsi, %rax #Copy x to multiplicand
mulq %rdx #Multiply by y (y could have been in any other 8 byte reg)
# Note mulq has only one operand
movq %rax, (%rdi) #Store lower 8 bytes at dest
movq %rdx, 8(%rdi) #Store upper 8 bytes at dest+8
#(Little Endian means high order bytes # stored at higher address)
ret
%rdx = y %rax = x
3rd Parameter/Multiplier Multiplicand
%rdx = x*y %rax = x*y
Product High 8 bytes Product Lower 8 bytes
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
Address in %rdi
Address in %rdi+8
Code for 128 bit multiply
void store_uprod(uint128_t *dest, uint64_t x, uint_64_t y)
# *dest in %rdi, x in %rsi, y in %rdx
store_uprod:
movq %rsi, %rax #Copy x to multiplicand
mulq %rdx #Multiply by y (y could have been in any other 8 byte reg)
# Note mulq has only one operand
movq %rax, (%rdi) #Store lower 8 bytes at dest
movq %rdx, 8(%rdi) #Store upper 8 bytes at dest+8
#(Little Endian means high order bytes # stored at higher address)
ret
%rdx = y %rax = x
3rd Parameter/Multiplier Multiplicand
%rdx = x*y %rax = x*y
Product High 8 bytes Product Lower 8 bytes
.
.
.
.
.
%rax (MSB)
.
.
.
.
.
.
.
.
%rax (LSB)
.
Address in %rdi
Address in %rdi+8
Code for 128 bit multiply
void store_uprod(uint128_t *dest, uint64_t x, uint_64_t y)
# *dest in %rdi, x in %rsi, y in %rdx
store_uprod:
movq %rsi, %rax #Copy x to multiplicand
mulq %rdx #Multiply by y (y could have been in any other 8 byte reg)
# Note mulq has only one operand
movq %rax, (%rdi) #Store lower 8 bytes at dest
movq %rdx, 8(%rdi) #Store upper 8 bytes at dest+8
#(Little Endian means high order bytes # stored at higher address)
ret
%rdx = y %rax = x
3rd Parameter/Multiplier Multiplicand
%rdx = x*y %rax = x*y
Product High 8 bytes Product Lower 8 bytes
.
.
.
.
.
%rax (MSB)
%rdx (LSB)
.
.
.
.
.
.
%rdx (MSB)
%rax (LSB)
.
Address in %rdi
Address in %rdi+8
Code for 128 bit multiply example
void store_uprod(uint128_t *dest, uint64_t x, uint_64_t y)
dest in %rdi, x=0x6261600000000000 in %rsi,
y = 0x10 in %rdx
store_uprod:
movq %rsi, %rax #Copy x to multiplicand
mulq %rdx #Multiply by y (y could have been in any other 8 byte reg)
# Note mulq has only one operand
movq %rax, (%rdi) #Store lower 8 bytes at dest
movq %rdx, 8(%rdi) #Store upper 8 bytes at dest+8
#(Little Endian means high order bytes # stored at higher address)
ret
NOTE: multiplying by 0x10 would be equivalent to a shlq $4
Stack frame construction omitted from this program
%rdx = 0x10 (16 decimal) %rax = 0x6261600000000000
3rd Parameter/Multiplier Multiplicand
%rdx = 0x0000000000000006 %rax = 0x2616000000000000
Product High 8 bytes Product Lower 8 bytes
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
Address in %rdi
Address in %rdi+8
Code for 128 bit multiply example
void store_uprod(uint128_t *dest, uint64_t x, uint_64_t y)
dest in %rdi, x=0x6261600000000000 in %rsi,
y = 0x10 in %rdx
store_uprod:
movq %rsi, %rax #Copy x to multiplicand
mulq %rdx #Multiply by y (y could have been in any other 8 byte reg)
# Note mulq has only one operand
movq %rax, (%rdi) #Store lower 8 bytes at dest
movq %rdx, 8(%rdi) #Store upper 8 bytes at dest+8
#(Little Endian means high order bytes # stored at higher address)
ret
NOTE: multiplying by 0x10 would be equivalent to a shlq $4
Stack frame construction omitted from this program
%rdx = 0x10 (16 decimal) %rax = 0x6261600000000000
3rd Parameter/Multiplier Multiplicand
%rdx = 0x0000000000000006 %rax = 0x2616000000000000
Product High 8 bytes Product Lower 8 bytes
0x00
0x00
0x00
0x00
0x16
0x26
.
.
.
.
.
.
.
.
0x00
0x00
Address in %rdi
Address in %rdi+8
Code for 128 bit multiply example
void store_uprod(uint128_t *dest, uint64_t x, uint_64_t y)
dest in %rdi, x=0x6261600000000000 in %rsi,
y = 0x10 in %rdx
store_uprod:
movq %rsi, %rax #Copy x to multiplicand
mulq %rdx #Multiply by y (y could have been in any other 8 byte reg)
# Note mulq has only one operand
movq %rax, (%rdi) #Store lower 8 bytes at dest
movq %rdx, 8(%rdi) #Store upper 8 bytes at dest+8
#(Little Endian means high order bytes # stored at higher address)
ret
NOTE: multiplying by 0x10 would be equivalent to a shlq $4
Stack frame construction omitted from this program
%rdx = 0x10 (16 decimal) %rax = 0x6261600000000000
3rd Parameter/Multipler Multiplicand
%rdx = 0x0000000000000006 %rax = 0x2616000000000000
Product High 8 bytes Product Lower 8 bytes
0x00
0x00
0x00
0x00
0x16
0x26
0x06
0x00
0x00
0x00
0x00
0x00
0x00
0x00
0x00
0x00
Address in %rdi
Address in %rdi+8
128 bit Division
ALU uses 2 8-byte registers to accomplish this
This means dividend *MUST* be in %rax or %rax/%rdx pair.
Either the divq (unsigned) or the idivq (2’s compliment) instruction is used.
Example:
void remdiv (long x, long y, long *qp, long *rp) {
long q = x/y;
long r = x%y;
*qp = q;
*rp = r;
}
%rdx %rax
Dividend (high) Dividend (low)
%rdx %rax
remainder quotient
Code for 128 bit division
void remdiv(long x, long y, long *qp, long *rp)
x in %rdi, y in %rsi, qp in %rdx, rp in %rcx
remdiv:
movq %rdx, %r8 #Copy qp
movq %rdi, %rax #Move x to lower 8 bytes of dividend
cqto #sign-extend to upper 8 bytes of dividend
idivq %rsi #Divide by y
movq %rax, (%r8) #Store quotient at qp
movq %rdx, (%rcx) #Store remainder at rp
ret
Unsigned division makes use of the divq instruction. Typically, register %rdx is set to zero beforehand rather than sign extending using cqto (Convert quadword to octoword) an unsigned value because that can cause errors.
%rdx (some address *qp) %rax (x)
3rd parameter dividend
%rdx = all 1’s or 0’s %rax (x)
Dividend (high) Dividend (low)
%rdx (x%y) %rax (x/y)
remainder quotient
Code for 128 bit division
void remdiv(long x, long y, long *qp, long *rp)
x(-5) in %rdi, y(2) in %rsi, qp in %rdx, rp in %rcx
remdiv:
movq %rdx, %r8 #Copy qp
movq %rdi, %rax #Move x to lower 8 bytes of dividend
cqto #sign-extend to upper 8 bytes of dividend
idivq %rsi #Divide by y
movq %rax, (%r8) #Store quotient at qp
movq %rdx, (%rcx) #Store remainder at rp
ret
Unsigned division makes use of the divq instruction. Typically, register %rdx is set to zero beforehand rather than sign extending (using cqto) an unsigned value because that can cause errors.
%rdx (some address *qp) %rax (x)
3rd parameter 0xfffffffffffffffb (-5)
%rdx = all 1’s or 0’s %rax (x)
0xffffffffffffffff 0xfffffffffffffffb (-5)
%rdx (x%y) %rax (x/y)
0x0000000000000001 0xffffffffffffffe (-2)
0xff
0xff
0xff
0xff
0xff
0xff
0x01
0x00
0x00
0x00
0x00
0x00
0x00
0x00
0xfe
0xff
Address in %rdx/%r8
Address in %rcx
Code for 128 bit division
void remdiv(long x, long y, long *qp, long *rp)
x(5) in %rdi, y(2) in %rsi, qp in %rdx, rp in %rcx
remdiv:
movq %rdx, %r8 #Copy qp
movq %rdi, %rax #Move x to lower 8 bytes of dividend
cqto #sign-extend to upper 8 bytes of dividend
idivq %rsi #Divide by y
movq %rax, (%r8) #Store quotient at qp
movq %rdx, (%rcx) #Store remainder at rp
ret
Unsigned division makes use of the divq instruction. Typically, register %rdx is set to zero beforehand rather than sign extending (using cqto) an unsigned value because that can cause errors.
%rdx (some address *qp) %rax (x)
3rd parameter 0x0000000000000005
%rdx = all 1’s or 0’s %rax (x)
0x0000000000000000 0x0000000000000005
%rdx (x%y) %rax (x/y)
0x0000000000000001 0x0000000000000002
0x00
0x00
0x00
0x00
0x00
0x00
0x01
0x00
0x00
0x00
0x00
0x00
0x00
0x00
0x02
0x00
Address in %rdx/%r8
Address in %rcx
Code for 32 bit division
void remdiv(int x, int y, int *qp, int *rp)
x(5) in %edi, y(2) in %esi, qp in %rdx, rp in %rcx
remdiv:
movq %rdx, %r8 #Copy qp
movl %edi, %eax #Move x to lower 4 bytes of dividend
movslq %eax, %rax (OR cltq) #sign-extend to upper 4 bytes of dividend
idivl %esi #Divide by y
movl %eax, (%r8) #Store quotient at qp
movl %edx, (%rcx) #Store remainder at rp
ret
16 bit/8 bit division work similarly
%rdx (some address *qp) %rax (x)
3rd parameter 0x00000005
%rdx (some address *qp) %rax (x)
3rd parameter 0x0000000000000005
%edx (x%y) %eax (x/y)
0x00000001 0x00000002
0x00
0x00
0x01
0x00
0x00
0x00
0x02
0x00
Address in %rdx/%r8
Address in %rcx
In Class Exercise:
void uremdiv(unsigned long x, unsigned unsigned long y, long *qp, unsigned long *rp)
unsigned long q = x/y;
unsigned long r = x%y;
*qp = q;
*rp = r;
}
Modify the assembly code shown on previous slide for signed division to implement this function.