Code Performance and Caches
Winter 2020
Cache Size
Loop Time
1 int[] A = new int[128 * 1024*1024];
2 double total = 0,start,stop;
3 intN=8;
4 // Loop 1
5 for(intj=0;j
25 System.out.println(“xlabel(‘Step size-> ‘);”);
2/6
Cache Block Size
1 System.out.println(“A=[“);
2 String xticklabels = “{“;
3 int [] A = new int [128 *1024 *1024];
4 long start, stop;
5 intK=1;
6 for(intk=0;k<11;++k){
7 start = System.nanoTime();
8
9 for(int i = 0; i < A.length; i += K)
10 A[i] *= 3;
11
12 stop=System.nanoTime();
13 System.out.println( k + " " + K + " " + (stop - start));
14 xticklabels+="\'2^{"+ k +"}\',";
15 K *=2;
16 }
17 xticklabels += "}";
18 System.out.println("];");
19 System.out.println("plot(A(:,1), A(:,3));");
20 System.out.println("hold on;\nplot(A(:,1), A(:,3),'r*');");
21 System.out.println("xticklabels(" + xticklabels + ");");
22 System.out.println("xticks(A(:,1));");
23 System.out.println("title('Size of cache block');");
24 System.out.println("ylabel('Time ->‘);”);
25 System.out.println(“xlabel(‘Step size-> ‘);”);
1524888723 751189497 391276992 199589491
A=[
0 1
1 2
2 4
3 8
4 16 112349443 5 32 107168869 6 64 92080344
7 128 43472331 8 256 21159764 9 512 13482912 10 1024 7101547 ];
plot(A(: ,1) , A(: ,3));
hold on ;
plot(A(: ,1) , A(: ,3) , ‘r*’); xticklabels ({ ‘2^{0} ‘ , ‘2^{1} ‘… xticks(A(: ,1));
title(‘Size of cache block’); ylabel(‘Time −>’); xlabel(‘Step size−> ‘);
Source: http://igoro.com/archive/gallery-of-processor-cache-effects, Prof. Paul Kry
2/6
Cache Block Size
1 System.out.println(“A=[“);
2 String xticklabels = “{“;
3 int [] A = new int [128 *1024 *1024];
4 long start, stop;
5 intK=1;
6 for(intk=0;k<11;++k){
7 start = System.nanoTime();
8
9 for(int i = 0; i < A.length; i += K)
10 A[i] *= 3;
11
12 stop = System.nanoTime();
13 System.out.println( k + " " + K + " " + (stop - start));
14 xticklabels += "\'2^{"+ k + "}\',";
15 K *=2;
16 }
17 xticklabels += "}";
18 System.out.println("];");
19 System.out.println("plot(A(:,1), A(:,3));");
20 System.out.println("hold on;\nplot(A(:,1), A(:,3),'r*');");
21 System.out.println("xticklabels(" + xticklabels + ");");
22 System.out.println("xticks(A(:,1));");
23 System.out.println("title('Size of cache block');");
24 System.out.println("ylabel('Time ->‘);”);
25 System.out.println(“xlabel(‘Step size-> ‘);”);
2/6
Cache Block Size
1 System.out.println(“A=[“);
2 String xticklabels = “{“;
3 int [] A = new int [128 *1024 *1024];
4 long start, stop;
5 intK=1;
6 for(intk=0;k<11;++k){
7 start = System.nanoTime();
8
9 for(int i = 0; i < A.length; i += K)
10 A[i] *= 3;
11
12 stop = System.nanoTime();
13 System.out.println( k + " " + K + " " + (stop - start));
14 xticklabels += "\'2^{"+ k + "}\',";
15 K *=2;
16 }
17 xticklabels += "}";
18 System.out.println("];");
19 System.out.println("plot(A(:,1), A(:,3));");
20 System.out.println("hold on;\nplot(A(:,1), A(:,3),'r*');");
21 System.out.println("xticklabels(" + xticklabels + ");");
22 System.out.println("xticks(A(:,1));");
23 System.out.println("title('Size of cache block');");
24 System.out.println("ylabel('Time ->‘);”);
25 System.out.println(“xlabel(‘Step size-> ‘);”);
2/6
Cache Size
1 int steps = 64*1024*1024; //Arbitrary large number
2 long start,stop;
3
4 System.out.println(“B = [“);
5 int size = 1024; // initial size = 2^10 * 2^2 = 4KB
6 String xticklabels = “{“;
7 for(intj=0;j<20;++j){
8 int[] A = new int[size];
9 start = System.nanoTime();
10
11 int lengthMod = A.length - 1;
12 for (int i = 0; i < steps; ++i)
13 A[((i*32) & lengthMod)]++;
14
15 stop = System.nanoTime();
16 System.out.println(j + " " + size + " " + (stop - start));
17
18 System.gc(); // garbage collection
19 xticklabels += "\'2^{" + (j+12) + "}\',";
20 size *= 2;
21 }
22 xticklabels += "}";
23 System.out.println("];");
24 System.out.println("plot(B(:,1)+10, B(:,3));");
25 System.out.println("hold on;");
26 System.out.println("plot(B(:,1)+10, B(:,3),'r*');");
27 System.out.println("xticklabels(" + xticklabels + ");");
28 System.out.println("xticks(B(:,1)+10)");
29 System.out.println("title('Size of cache');");
30 System.out.println("ylabel('time ->‘);”);
31 System.out.println(“xlabel(‘Array Size (Bytes) -> ‘);”);
3/6
Cache Size
1 int steps = 64*1024*1024;
2 long start,stop;
3
4 System.out.println(“B = [“);
5 int size = 1024; // initial size = 2^10 * 2^2 = 4KB
6 String xticklabels = “{“;
7 for(intj=0;j<20;++j){
8 int[] A = new int[size];
9 start = System.nanoTime();
10
11 int lengthMod = A.length - 1;
12 for (int i = 0; i < steps; ++i)
13 A[((i*32) & lengthMod)]++;
14
15 stop = System.nanoTime();
16 System.out.println(j + " " + size + " " + (stop - start));
17
18 System.gc(); // garbage collection
19 xticklabels += "\'2^{" + (j+12) + "}\',";
20 size *= 2;
21 }
22 xticklabels += "}";
23 System.out.println("];");
24 System.out.println("plot(B(:,1)+10, B(:,3));");
25 System.out.println("hold on;");
26 System.out.println("plot(B(:,1)+10,B(:,3),'r*');");
27 System.out.println("xticklabels(" + xticklabels + ");");
28 System.out.println("xticks(B(:,1)+10)");
29 System.out.println("title('Size of cache');");
30 System.out.println("ylabel('time ->‘);”);
31 System.out.println(“xlabel(‘Array Size (Bytes) -> ‘);”);
//Arbitrary large number
B=[
0 1024 863180725
1 2048 842193845
2 4096 844589679
3 8192 846904579
4 16384 843970741
5 32768 843925466
6 65536 845388826
7 131072 846607085
8 262144 857787294
9 524288 875579717
10 1048576 873786842
11 2097152 902210755
12 4194304 1288983422
13 8388608 1644425253
14 16777216 1758837615
15 33554432 1783776047
16 67108864 1786640664
17 134217728 1786447414
18 268435456 1787642556
19 536870912 1787765700
];
plot(B(: ,1)+10 , B(: ,3));
hold on;
plot(B(:,1)+10, B(:,3),’r*’); xticklabels ({ ‘2^{12} ‘ , ‘2^{13} ‘ ,… xticks(B(: ,1)+10)
title(‘Size of cache’); ylabel(‘time −>’);
xlabel(‘Array Size (Bytes) −> ‘);
3/6
Source: http://igoro.com/archive/gallery-of-processor-cache-effects, Prof. Paul Kry
Cache Size
1 int steps = 64*1024*1024; //Arbitrary large number
2 long start,stop;
3
4 System.out.println(“B = [“);
5 int size = 1024; // initial size = 2^10 * 2^2 = 4KB
6 String xticklabels = “{“;
7 for(intj=0;j<20;++j){
8 int[] A = new int[size];
9 start = System.nanoTime();
10
11 int lengthMod = A.length - 1;
12 for (int i = 0; i < steps; ++i)
13 A[((i*32) & lengthMod)]++;
14
15 stop = System.nanoTime();
16 System.out.println(j + " " + size + " " + (stop - start));
17
18 System.gc(); // garbage collection
19 xticklabels += "\'2^{" + (j+12) + "}\',";
20 size *= 2;
21 }
22 xticklabels += "}";
23 System.out.println("];");
24 System.out.println("plot(B(:,1)+10, B(:,3));");
25 System.out.println("hold on;");
26 System.out.println("plot(B(:,1)+10, B(:,3),'r*');");
27 System.out.println("xticklabels(" + xticklabels + ");");
28 System.out.println("xticks(B(:,1)+10)");
29 System.out.println("title('Size of cache');");
30 System.out.println("ylabel('time ->‘);”);
31 System.out.println(“xlabel(‘Array Size (Bytes) -> ‘);”);
3/6
Cache Size
1 int steps = 64*1024*1024; //Arbitrary large number
2 long start,stop;
3
4 System.out.println(“B = [“);
5 int size = 1024; // initial size = 2^10 * 2^2 = 4KB
6 String xticklabels = “{“;
7 for(intj=0;j<20;++j){
8 int[] A = new int[size];
9 start = System.nanoTime();
10
11 int lengthMod = A.length - 1;
12 for (int i = 0; i < steps; ++i)
13 A[((i*32) & lengthMod)]++;
14
15 stop = System.nanoTime();
16 System.out.println(j + " " + size + " " + (stop - start));
17
18 System.gc(); // garbage collection
19 xticklabels += "\'2^{" + (j+12) + "}\',";
20 size *= 2;
21 }
22 xticklabels += "}";
23 System.out.println("];");
24 System.out.println("plot(B(:,1)+10, B(:,3));");
25 System.out.println("hold on;");
26 System.out.println("plot(B(:,1)+10, B(:,3),'r*');");
27 System.out.println("xticklabels(" + xticklabels + ");");
28 System.out.println("xticks(B(:,1)+10)");
29 System.out.println("title('Size of cache');");
30 System.out.println("ylabel('time ->‘);”);
31 System.out.println(“xlabel(‘Array Size (Bytes) -> ‘);”);
3/6
Machine Configuration
$ lscpu …
L1d cache : 32K L1i cache: 32K
L2 cache : 1024K
L3 cache : 14080K
…
4/6
Instruction Parallelism
Instruction Level Parallelism
1 int steps = 256 * 1024*1024;
2 int[] A = new int[8];
3 double start,stop;
4 intN=8;
5 // Loop 1
6 start = System.nanoTime();
7 for(inti=0;i