Introduction to Computer Systems 15-213/18-243, spring 2009
CSE 2421
X86-64 Assembly Language Part 3: Control (Loops)
Today
Control: Condition codes (Review)
Conditional branches (Review)
Loops
Switch Statements
Jumping
jX Instructions
Jump to different part of code depending on condition codes
This is only a partial list
jX Condition Description
jmp 1 Unconditional
je ZF Equal / Zero
jne ~ZF Not Equal / Not Zero
js SF Negative
jns ~SF Nonnegative
jg ~(SF^OF)&~ZF Greater (Signed)
jge ~(SF^OF) Greater or Equal (Signed)
jl (SF^OF) Less (Signed)
jle (SF^OF)|ZF Less or Equal (Signed)
ja ~CF&~ZF Above (unsigned)
jb CF Below (unsigned)
Conditional Moves
cmovX Instructions
Move a value (or not) depending on condition codes
cmovX Condition Description
cmove ZF Equal / Zero
cmovne ~ZF Not Equal / Not Zero
cmovs SF Negative
cmovns ~SF Nonnegative
cmovg ~(SF^OF)&~ZF Greater (Signed)
cmovge ~(SF^OF) Greater or Equal (Signed)
cmovl (SF^OF) Less (Signed)
cmovle (SF^OF)|ZF Less or Equal (Signed)
cmova ~CF&~ZF Above (unsigned)
cmovb CF Below (unsigned)
Conditional Branch Example
C code example with assembly
long absdiff
(long x, long y)
{
long result;
if (x > y)
result = x-y;
else
result = y-x;
return result;
}
absdiff:
cmpq %rsi, %rdi # x:y
jle reverse
movq %rdi, %rax
subq %rsi, %rax
ret
reverse: # x <= y
movq %rsi, %rax
subq %rdi, %rax
ret
Register Use(s)
%rdi Argument x
%rsi Argument y
%rax Return value
Expressing with Goto Code
C allows goto statement
Jump to position designated by label
long absdiff
(long x, long y)
{
long result;
if (x > y)
result = x-y;
else
result = y-x;
return result;
}
long absdiff_j
(long x, long y)
{
long result;
int ntest = x <= y;
if (ntest) goto Else;
result = x-y;
goto Done;
Else:
result = y-x;
Done:
return result;
}
C Code
val = Test ? Then_Expr : Else_Expr;
Goto Version
ntest = !Test;
if (ntest) goto Else;
val = Then_Expr;
goto Done;
Else:
val = Else_Expr;
Done:
. . .
General Conditional Expression Translation (Using Branches)
Create separate code regions for then & else expressions
Execute appropriate one
val = x>y ? x-y : y-x;
C Code
val = Test
? Then_Expr
: Else_Expr;
Goto Version
result = Then_Expr;
eval = Else_Expr;
nt = !Test;
if (nt) result = eval;
return result;
Using Conditional Moves
Conditional Move Instructions
Instruction supports:
if (Test) Dest Src
Supported in post-1995 x86 processors
GCC tries to use them
But, only when known to be safe
Why?
Branches are very disruptive to instruction flow through pipelines
Conditional moves do not require control transfer
Conditional Move Example
absdiff:
movq %rdi, %rax # x
subq %rsi, %rax # result = x-y
movq %rsi, %rdx
subq %rdi, %rdx # eval = y-x
cmpq %rsi, %rdi # x:y
cmovle %rdx, %rax # if <=, result = eval
ret
long absdiff
(long x, long y)
{
long result;
if (x > y)
result = x-y;
else
result = y-x;
return result;
}
Register Use(s)
%rdi Argument x
%rsi Argument y
%rax Return value
Expensive Computations
Bad Cases for Conditional Move
Both values get computed
Only makes sense when computations are very simple
val = Test(x) ? Hard1(x) : Hard2(x);
Risky Computations
Both values get computed
May have undesirable effects
val = p ? *p : 0;
Computations with side effects
Both values get computed
Must be side-effect free
val = x > 0 ? x*=7 : x+=3;
Today
Control: Condition codes (Review)
Conditional branches (Review)
Loops
Switch Statements
C Code
long pcount_do
(unsigned long x) {
long result = 0;
do {
result += x & 0x1;
x >>= 1;
} while (x);
return result;
}
C Code goto Version
long pcount_goto
(unsigned long x) {
long result = 0;
loop:
result += x & 0x1;
x >>= 1;
if(x) goto loop;
return result;
}
“Do-While” Loop Example
Count number of 1’s in argument x (“popcount”)
Use conditional branch to either continue looping or to exit loop
C Code goto Version
“Do-While” Loop Compilation
movq $0, %rax # result = 0
loop: # loop:
movq %rdi, %rdx # t1 = x
andq $1, %rdx # t1 = t1 & 0x1
addq %rdx, %rax # result += t1
shrq %rdi # x >>= 1
jne loop # if (x) goto loop
ret # ret
long pcount_goto
(unsigned long x) {
long result = 0;
loop:
result += x & 0x1;
x >>= 1;
if(x) goto loop;
return result;
}
Register Use(s)
%rdi Argument x
%rax result
C Code
do
Body
while (Test);
Goto Version
loop:
Body
if (Test)
goto loop
General “Do-While” Translation
Body:
{
Statement1;
Statement2;
…
Statementn;
}
While version
while (Test)
Body
General “While” Translation #1
“Jump-to-middle” translation
Used with gcc -Og
Goto Version
goto test;
loop:
Body
test:
if (Test)
goto loop;
done:
C Code
long pcount_while
(unsigned long x) {
long result = 0;
while (x) {
result += x & 0x1;
x >>= 1;
}
return result;
}
Jump to Middle Version
long pcount_goto_jtm
(unsigned long x) {
long result = 0;
goto test;
loop:
result += x & 0x1;
x >>= 1;
test:
if(x) goto loop;
return result;
}
While Loop Example #1
Compare to do-while version of function
Initial goto starts loop at test
While version
while (Test)
Body
Do-While Version
if (!Test)
goto done;
do
Body
while(Test);
done:
General “While” Translation #2
“Do-while” conversion
Used with gcc -O1
Goto Version
if (!Test)
goto done;
loop:
Body
if (Test)
goto loop;
done:
C Code
long pcount_while
(unsigned long x) {
long result = 0;
while (x) {
result += x & 0x1;
x >>= 1;
}
return result;
}
Do-While Version
long pcount_goto_dw
(unsigned long x) {
long result = 0;
if (!x) goto done;
loop:
result += x & 0x1;
x >>= 1;
if(x) goto loop;
done:
return result;
}
While Loop Example #2
Compare to do-while version of function
Initial conditional guards entrance to loop
“For” Loop Form
for (Init; Test; Update )
Body
General Form
#define WSIZE 8*sizeof(int)
long pcount_for
(unsigned long x)
{
size_t i;
long result = 0;
for (i = 0; i < WSIZE; i++)
{
unsigned bit =
(x >> i) & 0x1;
result += bit;
}
return result;
}
i = 0
i < WSIZE
i++
{
unsigned bit =
(x >> i) & 0x1;
result += bit;
}
Init
Test
Update
Body
“For” Loop While Loop
for (Init; Test; Update )
Body
For Version
Init;
while (Test ) {
Body
Update;
}
While Version
For-While Conversion
long pcount_for_while
(unsigned long x)
{
size_t i;
long result = 0;
i = 0;
while (i < WSIZE)
{
unsigned bit =
(x >> i) & 0x1;
result += bit;
i++;
}
return result;
}
i = 0
i < WSIZE
i++
{
unsigned bit =
(x >> i) & 0x1;
result += bit;
}
Init
Test
Update
Body
C Code
“For” Loop Do-While Conversion
Initial test can be optimized away
long pcount_for
(unsigned long x)
{
size_t i;
long result = 0;
for (i = 0; i < WSIZE; i++)
{
unsigned bit =
(x >> i) & 0x1;
result += bit;
}
return result;
}
Goto Version
long pcount_for_goto_dw
(unsigned long x) {
size_t i;
long result = 0;
i = 0;
if (!(i < WSIZE))
goto done;
loop:
{
unsigned bit =
(x >> i) & 0x1;
result += bit;
}
i++;
if (i < WSIZE)
goto loop;
done:
return result;
}
Init
!Test
Body
Update
Test
Today
Control: Condition codes (Review)
Conditional branches (Review)
Loops
Switch Statements
Switch Statement Example
Multiple case labels
Here: 5 & 6
Fall through cases
Here: 2
Missing cases
Here: 4
long switch_eg
(long x, long y, long z)
{
long w = 1;
switch(x) {
case 1:
w = y*z;
break;
case 2:
w = y/z;
/* Fall Through */
case 3:
w += z;
break;
case 5:
case 6:
w -= z;
break;
default:
w = 2;
}
return w;
}
Jump Table Structure
Code Block
0
Targ0:
Code Block
1
Targ1:
Code Block
2
Targ2:
Code Block
n–1
Targn-1:
•
•
•
Targ0
Targ1
Targ2
Targn-1
•
•
•
jtab:
goto *JTab[x];
switch(x) {
case val_0:
Block 0
case val_1:
Block 1
• • •
case val_n-1:
Block n–1
}
Switch Form
Translation (Extended C)
Jump Table
Jump Targets
Switch Statement Example
Setup:
long switch_eg(long x, long y, long z)
{
long w = 1;
switch(x) {
. . .
}
return w;
}
switch_eg:
movq %rdx, %rcx
cmpq $6, %rdi # x:6
ja .L8
jmp *.L4(,%rdi,8)
What range of values takes default?
Note that w not initialized here
Register Use(s)
%rdi Argument x
%rsi Argument y
%rdx Argument z
%rax Return value
Switch Statement Example
long switch_eg(long x, long y, long z)
{
long w = 1;
switch(x) {
. . .
}
return w;
}
Indirect
jump
Jump table
.section .rodata
.align 8
.L4:
.quad .L8 # x = 0
.quad .L3 # x = 1
.quad .L5 # x = 2
.quad .L9 # x = 3
.quad .L8 # x = 4
.quad .L7 # x = 5
.quad .L7 # x = 6
Setup:
switch_eg:
movq %rdx, %rcx
cmpq $6, %rdi # x:6
ja .L8 # Use default
jmp *.L4(,%rdi,8) # goto *JTab[x]
Assembly Setup Explanation
Table Structure
Each target requires 8 bytes
Base address at .L4
Jumping
Direct: jmp .L8
Jump target is denoted by label .L8
Indirect: jmp *.L4(,%rdi,8)
Start of jump table: .L4
Must scale by factor of 8 (addresses are 8 bytes)
Fetch target from effective Address .L4 + x*8
Only for 0 ≤ x ≤ 6
Jump table
.section .rodata
.align 8
.L4:
.quad .L8 # x = 0
.quad .L3 # x = 1
.quad .L5 # x = 2
.quad .L9 # x = 3
.quad .L8 # x = 4
.quad .L7 # x = 5
.quad .L7 # x = 6
.section .rodata
.align 8
.L4:
.quad .L8 # x = 0
.quad .L3 # x = 1
.quad .L5 # x = 2
.quad .L9 # x = 3
.quad .L8 # x = 4
.quad .L7 # x = 5
.quad .L7 # x = 6
Jump Table
Jump table
switch(x) {
case 1: // .L3
w = y*z;
break;
case 2: // .L5
w = y/z;
/* Fall Through */
case 3: // .L9
w += z;
break;
case 5:
case 6: // .L7
w -= z;
break;
default: // .L8
w = 2;
}
Code Blocks (x == 1)
.L3:
movq %rsi, %rax # y
imulq %rdx, %rax # y*z
ret
switch(x) {
case 1: // .L3
w = y*z;
break;
. . .
}
Register Use(s)
%rdi Argument x
%rsi Argument y
%rdx Argument z
%rax Return value
Handling Fall-Through
long w = 1;
. . .
switch(x) {
. . .
case 2:
w = y/z;
/* Fall Through */
case 3:
w += z;
break;
. . .
}
case 3:
w = 1;
case 2:
w = y/z;
goto merge;
merge:
w += z;