CS代考 COMP1411: Introduction to Computer Systems

PowerPoint Presentation

Lecture 12
Optimization

Copyright By PowCoder代写 加微信 powcoder

COMP1411: Introduction to Computer Systems
Dr. Xianjin XIA
Department of Computing,
The Hong Kong Polytechnic University
Spring 2022
These slides are only intended to use internally. Do not publish it anywhere without permission.
Acknowledgement: These slides are based on the textbook (Computer Systems: A Programmer’s Perspective) and its slides.

Levels of optimization for a single program
Executable

Executable

Introducing several program optimization approaches
Code motion
Share common subexpressions
Reducing procedure calls
Reducing memory accesses

Introducing popular concepts of computing

Code motion
Reduce frequency with which computation performed
If it will always produce same result
Especially moving code out of loop

int ni = n*i;
for (j = 0; j < n; j++) a[ni+j] = b[j]; void set_row(double *a, double *b, long i, long n) for (j = 0; j < n; j++) a[n*i+j] = b[j]; Compiler-Generated Code Motion testq %rcx, %rcx # Test n jle .L4 # If 0, goto done movq %rcx, %rax # rax = n imulq %rdx, %rax # rax *= i, %rdx:i leaq (%rdi,%rax,8), %rdx # rowp = a + n*i*8 movl $0, %r8d # j = 0 .L3: # loop: movq (%rsi,%r8,8), %rax # t = b[j] movq %rax, (%rdx) # *rowp = t addq $1, %r8 # j++ addq $8, %rdx # rowp++ cmpq %r8, %rcx # Compare n:j jg .L3 # If >, goto loop
.L4: # done:

long ni = n*i;
double *rowp = a+ni;
for (j = 0; j < n; j++) *rowp++ = b[j]; void set_row(double *a, double *b, long i, long n) for (j = 0; j < n; j++) a[n*i+j] = b[j]; Share Common Subexpressions Reuse portions of expressions /* Sum neighbors of i,j */ up = val[(i-1)*n + j ]; down = val[(i+1)*n + j ]; left = val[i*n + j-1]; right = val[i*n + j+1]; sum = up + down + left + right; long inj = i*n + j; up = val[inj - n]; down = val[inj + n]; left = val[inj - 1]; right = val[inj + 1]; sum = up + down + left + right; 3 multiplications: i*n, (i–1)*n, (i+1)*n 1 multiplication: i*n leaq 1(%rsi), %rax # i+1 leaq -1(%rsi), %r8 # i-1 imulq %rcx, %rsi # i*n imulq %rcx, %rax # (i+1)*n imulq %rcx, %r8 # (i-1)*n addq %rdx, %rsi # i*n+j addq %rdx, %rax # (i+1)*n+j addq %rdx, %r8 # (i-1)*n+j imulq %rcx, %rsi # i*n addq %rdx, %rsi # i*n+j movq %rsi, %rax # i*n+j subq %rcx, %rax # i*n+j-n leaq (%rsi,%rcx), %rcx # i*n+j+n Procedure calls Procedure to Convert String to Lower Case void lower(char *s) for (i = 0; i < strlen(s); i++) if (s[i] >= ‘A’ && s[i] <= 'Z') s[i] -= ('A' - 'a'); lower1 0 20000 40000 60000 80000 100000 120000 140000 160000 180000 200000 220000 240000 260000 280000 300000 320000 340000 360000 380000 400000 420000 440000 460000 480000 500000 0 0.286912 1.1470389999999999 2.5802670000000001 4.5866410000000073 7.1661459999999941 10.318951999999999 14.044786999999999 18.344017000000001 23.216484999999999 28.673535999999999 34.707457000000012 41.304167 48.505589000000001 56.283847000000002 64.623097999999899 73.541931000000005 83.023829999999975 93.129919999999998 103.7657419999999 114.97881099999999 126.765697 139.12814400000019 152.06679399999999 165.5780470000002 179.704319 String length CPU seconds Procedure calls Overall performance, string of length N N calls to strlen Require NxN times Overall O(N2) performance /* My version of strlen */ size_t strlen(const char *s) size_t length = 0; while (*s != '\0') { return length; Procedure calls Improving performance Move call to strlen outside of loop Since result does not change from one iteration to another void lower(char *s) int len = strlen(s); for (i = 0; i < len; i++) if (s[i] >= ‘A’ && s[i] <= 'Z') s[i] -= ('A' - 'a'); Procedure calls lower1 0 20000 40000 60000 80000 100000 120000 140000 160000 180000 200000 220000 240000 260000 280000 300000 320000 340000 360000 380000 400000 420000 440000 460000 480000 500000 0 0.286912 1.1470389999999999 2.5802670000000001 4.5866410000000073 7.1661459999999941 10.318951999999999 14.044786999999999 18.344017000000001 23.216484999999999 28.673535999999999 34.707457000000012 41.304167 48.505589000000001 56.283847000000002 64.623097999999899 73.541931000000005 83.023829999999975 93.129919999999998 103.7657419999999 114.97881099999999 126.765697 139.12814400000019 152.06679399999999 165.5780470000002 179.704319 lower2 0 20000 40000 60000 80000 100000 120000 140000 160000 180000 200000 220000 240000 260000 280000 300000 320000 340000 360000 380000 400000 420000 440000 460000 480000 500000 0 2.9000000000000098E-5 5.7000000000000098E-5 8.6000000000000098E-5 1.15E-4 1.4300000000000001E-4 1.7200000000000001E-4 2.0000000000000001E-4 2.2900000000000001E-4 2.5700000000000001E-4 2.8600000000000001E-4 3.1500000000000001E-4 3.4299999999999999E-4 3.7200000000000102E-4 4.0099999999999999E-4 4.3000000000000102E-4 4.58000000000001E-4 4.87000000000001E-4 5.1599999999999997E-4 5.45000000000001E-4 5.7300000000000103E-4 6.02E-4 6.3100000000000103E-4 6.5900000000000095E-4 6.88000000000001E-4 7.1700000000000095E-4 String length CPU seconds Memory matters /* Sum rows is of n X n matrix a and store in vector b */ void sum_rows1(double *a, double *b, long n) { long i, j; for (i = 0; i < n; i++) { for (j = 0; j < n; j++) b[i] += a[i*n + j]; double A[9] = { 0, 1, 2, 4, 8, 16,} 32, 64, 128 }; double B[3] = A+3; sum_rows1(A, B, 3); i = 0: [3, 8, 16] init: [4, 8, 16] i = 1: [3, 28, 16] i = 2: [3, 28, 224] Value of B: Memory matters # sum_rows1 inner loop addsd (%rcx), %xmm0 # FP add addq $8, %rcx movsd %xmm0, (%rsi,%r8,8) # FP store /* Sum rows is of n X n matrix a and store in vector b */ void sum_rows1(double *a, double *b, long n) { long i, j; for (i = 0; i < n; i++) { for (j = 0; j < n; j++) b[i] += a[i*n + j]; In each inner loop iteration, there is a memory write time consuming Memory matters # sum_rows2 inner loop addsd (%rcx), %xmm0 # FP Add addq $8, %rcx decq %rax /* Sum rows is of n X n matrix a and store in vector b */ void sum_rows2(double *a, double *b, long n) { long i, j; for (i = 0; i < n; i++) { double val = 0; for (j = 0; j < n; j++) val += a[i*n + j]; b[i] = val; Removing memory writes in the inner loop Embedded Computers Computers are becoming smaller and smaller … Internet-of-Things Things are getting smarter Internet-of-Things The vision: connecting everything Internet-of-Things Application domains Big Data and Cloud Knowledge management Turning data into wisdom, learn from data Artificial Intelligence Learning and making decisions from big data Deep Neural Networks Edge Computing /docProps/thumbnail.jpeg 程序代写 CS代考 加微信: powcoder QQ: 1823890830 Email: powcoder@163.com