Csapp 3e Solutions PDF
Csapp 3e Solutions PDF
of Contents
Introduction 1.1
1. A Tour of Computer Systems 1.2
1
2.77 2.1.23
2.78 2.1.24
2.79 2.1.25
2.80 2.1.26
2.81 2.1.27
2.82 2.1.28
2.83 2.1.29
2.84 2.1.30
2.85 2.1.31
2.86 2.1.32
2.87 2.1.33
2.88 2.1.34
2.89 2.1.35
2.90 2.1.36
2.91 2.1.37
2.92 2.1.38
2.93 2.1.39
2.94 2.1.40
2.95 2.1.41
2.96 2.1.42
2.97 2.1.43
3. Machine-Level Representation of Programs 2.2
3.58 2.2.1
3.59 2.2.2
3.60 2.2.3
3.61 2.2.4
3.62 2.2.5
3.63 2.2.6
3.64 2.2.7
3.65 2.2.8
2
3.66 2.2.9
3.67 2.2.10
3.68 2.2.11
3.69 2.2.12
3.70 2.2.13
3.71 2.2.14
3.72 2.2.15
3.73 2.2.16
3.74 2.2.17
3.75 2.2.18
4. Processor Architecture 2.3
4.45 2.3.1
4.46 2.3.2
4.47 2.3.3
4.48 2.3.4
4.49 2.3.5
4.50 2.3.6
4.51 2.3.7
4.52 2.3.8
4.53 2.3.9
4.54 2.3.10
4.55 2.3.11
4.56 2.3.12
4.57 2.3.13
4.58 2.3.14
4.59 2.3.15
5. Optimizing Program Performance 2.4
5.13 2.4.1
5.14 2.4.2
5.15 2.4.3
3
5.16 2.4.4
5.17 2.4.5
5.18 2.4.6
5.19 2.4.7
6. The Memory Hierarchy 2.5
6.22 2.5.1
6.23 2.5.2
6.24 2.5.3
6.25 2.5.4
6.26 2.5.5
6.27 2.5.6
6.28 2.5.7
6.29 2.5.8
6.30 2.5.9
6.31 2.5.10
6.32 2.5.11
6.33 2.5.12
6.34 2.5.13
6.35 2.5.14
6.36 2.5.15
6.37 2.5.16
6.38 2.5.17
6.39 2.5.18
6.40 2.5.19
6.41 2.5.20
6.42 2.5.21
6.43 2.5.22
6.44 2.5.23
6.45 2.5.24
6.46 2.5.25
4
Part 2 Running Programs on a System
7. Linking 3.1
7.6 3.1.1
7.7 3.1.2
7.8 3.1.3
7.9 3.1.4
7.10 3.1.5
7.11 3.1.6
7.12 3.1.7
7.13 3.1.8
8. Exceptional Control Flow 3.2
8.9 3.2.1
8.10 3.2.2
8.11 3.2.3
8.12 3.2.4
8.13 3.2.5
8.14 3.2.6
8.15 3.2.7
8.16 3.2.8
8.17 3.2.9
8.18 3.2.10
8.19 3.2.11
8.20 3.2.12
8.21 3.2.13
8.22 3.2.14
8.23 3.2.15
8.24 3.2.16
8.25 3.2.17
8.26 3.2.18
9. Virtual Memory 3.3
5
9.11 3.3.1
9.12 3.3.2
9.13 3.3.3
9.14 3.3.4
9.15 3.3.5
9.16 3.3.6
9.17 3.3.7
9.18 3.3.8
9.19 3.3.9
9.20 3.3.10
6
12.17 4.3.2
12.18 4.3.3
12.19 4.3.4
12.20 4.3.5
12.21 4.3.6
12.22 4.3.7
12.23 4.3.8
12.24 4.3.9
12.25 4.3.10
12.26 4.3.11
12.27 4.3.12
12.28 4.3.13
12.29 4.3.14
12.30 4.3.15
12.31 4.3.16
12.32 4.3.17
12.33 4.3.18
12.34 4.3.19
12.35 4.3.20
12.36 4.3.21
12.37 4.3.22
12.38 4.3.23
12.39 4.3.24
7
Introduction
CSAPP-3e-Solutions
Computer Systems: A Programmer's Perspective Third Edition Solutions
at first
Almost all solutions has its own code piece in c/gas/yas and every code piece is
tested!
Code files are classified by chapter. Please visit the index page of every chapter
to see more info.
8
Introduction
issues
Hurry makes work faulty and no improvement makes it disappointed.
Thanks every issue and pr, they really make this project better.
build
prerequisite
x64 linux system
docker
code
clone code
compile
make
test
make test
clean
9
Introduction
make clean
gitbook
must install gitbook plugins first before other gitbook actions
make plugin
make serve
make html
generate E-books in ./
make pdf
make mobi
make epub
feedback
If you encounter some problem, you can email me or comment on disqus in
specific solution page
license
GPLv3
at last
10
Introduction
I'll be :) if this little book helps you and make your life more convenient.
11
1. A Tour of Computer Systems
by Edsger Dijkstra
no homework here.
12
2. Representing and Manipulating Information
by Katherine Johnson
test way:
13
2. Representing and Manipulating Information
14
2. Representing and Manipulating Information
15
2.55
2.55
/*
* show-bytes.c
*/
#include <stdio.h>
void show_int(int x) {
show_bytes((byte_pointer) &x, sizeof(int));
}
void show_float(float x) {
show_bytes((byte_pointer) &x, sizeof(float));
}
void show_pointer(void* x) {
show_bytes((byte_pointer) &x, sizeof(void*));
}
show_int(ival);
show_float(fval);
show_pointer(pval);
16
2.55
test_show_bytes(test_num);
return 0;
}
uname -mr:
4.4.26-gentoo x86_64
compile:
run:
./show-bytes
output:
48 01 00 00
00 00 a4 43
a8 1e 71 ee fc 7f 00 00
17
2.56
2.56
change
run:
./show-bytes
output:
00 04 00 00
00 00 80 44
c8 fe 83 2f fc 7f 00 00
18
2.57
2.57
/*
* show-bytes.c
*/
#include <stdio.h>
void show_int(int x) {
show_bytes((byte_pointer) &x, sizeof(int));
}
void show_float(float x) {
show_bytes((byte_pointer) &x, sizeof(float));
}
void show_pointer(void* x) {
show_bytes((byte_pointer) &x, sizeof(void*));
}
//=============
// 2.57 changes
//=============
void show_short(short x) {
show_bytes((byte_pointer) &x, sizeof(short));
}
void show_long(long x) {
show_bytes((byte_pointer) &x, sizeof(long));
}
19
2.57
void show_double(double x) {
show_bytes((byte_pointer) &x, sizeof(double));
}
//==================
// 2.57 changes end
//==================
show_int(ival);
show_float(fval);
show_pointer(pval);
//=============
// 2.57 changes
//=============
short sval = (short) ival;
long lval = (long) ival;
double dval = (double) ival;
show_short(sval);
show_long(lval);
show_double(dval);
//==================
// 2.57 changes end
//==================
}
test_show_bytes(test_num);
return 0;
}
uname -rm
20
2.57
4.4.0-21-generic x86_64
run
./show-bytes-more
output
48 01 00 00
00 00 a4 43
18 b7 2e 20 fd 7f 00 00
48 01
48 01 00 00 00 00 00 00
00 00 00 00 00 80 74 40
21
2.58
2.58
/*
* is-little-endian.c
*/
#include <stdio.h>
#include <assert.h>
int is_little_endian() {
int test_num = 0xff;
byte_pointer byte_start = (byte_pointer) &test_num;
if (byte_start[0] == 0xff) {
return 1;
}
return 0;
}
22
2.59
2.59
expression
try it
/*
* generate-a-word.c
*/
#include <stdio.h>
#include <assert.h>
return 0;
}
23
2.60
2.60
/*
* replace-byte.c
*/
#include <stdio.h>
#include <assert.h>
assert(rep_0 == 0x123456AB);
assert(rep_3 == 0xAB345678);
return 0;
}
24
2.61
2.61
!~x
!x
!~(x | ~0xff)
test it
/*
* 2.61.c
*/
#include <stdio.h>
#include <assert.h>
int A(int x) {
return !~x;
}
int B(int x) {
return !x;
}
int C(int x) {
return A(x | ~0xff);
25
2.61
int D(int x) {
return B((x >> ((sizeof(int)-1) << 3)) & 0xff);
}
assert(A(all_bit_one));
assert(!B(all_bit_one));
assert(C(all_bit_one));
assert(!D(all_bit_one));
assert(!A(all_bit_zero));
assert(B(all_bit_zero));
assert(!C(all_bit_zero));
assert(D(all_bit_zero));
return 0;
}
26
2.62
2.62
/*
* int-shifts-are-arithemetic.c
*/
#include <stdio.h>
#include <assert.h>
int int_shifts_are_arithemetic() {
int num = -1;
return !(num ^ (num >> 1));
}
27
2.63
2.63
/*
* srl-sra.c
*/
#include <stdio.h>
#include <assert.h>
test_unsigned = 0x87654321;
test_int = 0x87654321;
28
2.63
return 0;
}
29
2.64
2.64
/*
* any-odd-one.c
*/
#include <stdio.h>
#include <assert.h>
int any_odd_one(unsigned x) {
return !!(0xAAAAAAAA & x);
}
30
2.65
2.65
/*
* odd-ones.c
*/
#include <stdio.h>
#include <assert.h>
int odd_ones(unsigned x) {
x ^= x >> 16;
x ^= x >> 8;
x ^= x >> 4;
x ^= x >> 2;
x ^= x >> 1;
x &= 0x1;
return x;
}
31
2.66
2.66
32
2.66
/*
* leftmost-one.c
*/
#include <stdio.h>
#include <assert.h>
/*
* Generate mask indicating leftmost 1 in x. Assume w=32
* For example, 0xFF00 -> 0x8000, and 0x6000 -> 0x4000.
* If x = 0, then return 0
*/
int leftmost_one(unsigned x) {
/*
* first, generate a mask that all bits after leftmost one are
one
* e.g. 0xFF00 -> 0xFFFF, and 0x6000 -> 0x7FFF
* If x = 0, get 0
*/
x |= x >> 1;
x |= x >> 2;
x |= x >> 4;
x |= x >> 8;
x |= x >> 16;
/*
* then, do (mask >> 1) + (mask && 1), in which mask && 1 deal
s with case x = 0, reserve leftmost bit one
* that's we want
*/
return (x >> 1) + (x && 1);
}
33
2.66
34
2.67
2.67
A.
If the value of the right operand is negative or is greater than or equal to the
width of the promoted left operand, the behavior it undefined.
B.
C.
35
2.67
/*
* int-size-is-32.c
*/
#include <stdio.h>
#include <assert.h>
int int_size_is_32() {
int set_msb = 1 << 31;
int beyond_msb = set_msb << 1;
int int_size_is_32_for_16bit() {
int set_msb = 1 << 15 << 15 << 1;
int beyond_msb = set_msb << 1;
36
2.67
37
2.68
2.68
/*
* lower-one-mask.c
*/
#include <stdio.h>
#include <assert.h>
/*
* Mask with least signficant n bits set to 1
* Example: n = 6 -> 0x3F, n = 17 -> 0x1FFFF
* Assume 1 <= n <= w
*/
int lower_one_mask(int n) {
int w = sizeof(int) << 3;
return (unsigned) -1 >> (w - n);
}
38
2.69
2.69.md
/*
* rotate-left.c
*/
#include <stdio.h>
#include <assert.h>
/*
* Do rotate left shift. Assume 0 <= n < w
* Example when x = 0x12345678 and w = 32:
* n = 4 -> 0x23456781, n = 20 -> 0x67812345
*/
unsigned rotate_left(unsigned x, int n) {
int w = sizeof(unsigned) << 3;
/* pay attention when n == 0 */
return x << n | x >> (w - n - 1) >> 1;
}
39
2.70
2.70
/*
* fits-bits.c
*/
#include <stdio.h>
#include <assert.h>
assert(fits_bits(0b0010, 3));
assert(!fits_bits(0b1010, 3));
assert(!fits_bits(0b0110, 3));
assert(fits_bits(~0b11, 3));
assert(!fits_bits(~0b01000011, 3));
assert(!fits_bits(~0b111, 3));
40
2.70
return 0;
}
41
2.71
2.71
A.
B.
/*
* xbyte.c
*/
#include <stdio.h>
#include <assert.h>
42
2.72
2.72
A.
B.
43
2.72
/*
* copy-int.c
*/
#include <stdio.h>
#include <assert.h>
#include <string.h>
#include <stdlib.h>
val = 0x12345678;
copy_int(val, buf, maxbytes);
assert(*(int*)buf == val);
val = 0xAABBCCDD;
copy_int(val, buf, 0);
assert(*(int*)buf != val);
free(buf);
return 0;
}
44
2.73
2.73
thanks https://zhangjunphy.github.io/csapp/chap2.html :)
/*
* saturating-add.c
*/
#include <stdio.h>
#include <assert.h>
#include <limits.h>
return sum;
}
45
2.74
2.74
/*
* tsub-ok.c
*/
#include <stdio.h>
#include <assert.h>
#include <limits.h>
int sub = x - y;
int pos_over = x > 0 && y < 0 && sub < 0;
int neg_over = x < 0 && y > 0 && sub > 0;
return res;
}
46
2.75
2.75.md
/*
* unsigned-high-prod.c
*/
#include <stdio.h>
#include <assert.h>
#include <inttypes.h>
assert(another_unsigned_high_prod(x, y) == unsigned_high_prod(
x, y));
return 0;
}
47
2.75
48
2.76
2.76.md
/*
* calloc.c
*/
#include <stdio.h>
#include <assert.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
p = another_calloc(SIZE_MAX, 2);
assert(p == NULL);
free(p);
return 0;
}
49
2.76
50
2.77
2.77
/*
* 2.77.c
*/
#include <stdio.h>
#include <assert.h>
/* K = 17 */
int A(int x) {
return (x << 4) + x;
}
/* K = -7 */
int B(int x) {
return x - (x << 3);
}
/* K = 60 */
int C(int x) {
return (x << 6) - (x << 2);
}
/* K = -112 */
int D(int x) {
return (x << 4) - (x << 7);
}
51
2.77
52
2.78
2.78
/*
* divide-power2.c
*/
#include <stdio.h>
#include <assert.h>
#include <limits.h>
/*
* Divide by power of 2, -> x/2^k
* Assume 0 <= k < w-1
*/
int divide_power2(int x, int k) {
int is_neg = x & INT_MIN;
(is_neg && (x = x + (1 << k) - 1));
return x >> k;
}
53
2.79
2.79
/*
* mul3div4.c
*/
#include <stdio.h>
#include <assert.h>
#include <limits.h>
/*
* code from 2.78
*
* Divide by power of 2, -> x/2^k
* Assume 0 <= k < w-1
*/
int divide_power2(int x, int k) {
int is_neg = x & INT_MIN;
(is_neg && (x = x + (1 << k) - 1));
return x >> k;
}
int mul3div4(int x) {
int mul3 = (x << 1) + x;
return divide_power2(mul3, 2);
}
54
2.80
2.80
/*
* threeforths.c
*/
#include <stdio.h>
#include <assert.h>
#include <limits.h>
/*
* calculate 3/4x, no overflow, round to zero
*
* no overflow means divide 4 first, then multiple 3, diffrent f
rom 2.79 here
*
* rounding to zero is a little complicated.
* every int x, equals f(first 30 bit number) plus l(last 2 bit
number)
*
* f = x & ~0x3
* l = x & 0x3
* x = f + l
* threeforths(x) = f/4*3 + l*3/4
*
* f doesn't care about round at all, we just care about roundin
g from l*3/4
*
* lm3 = (l << 1) + l
*
* when x > 0, rounding to zero is easy
*
* lm3d4 = lm3 >> 2
*
* when x < 0, rounding to zero acts like divide_power2 in 2.78
*
* bias = 0x3 // (1 << 2) - 1
* lm3d4 = (lm3 + bias) >> 2
*/
int threeforths(int x) {
int is_neg = x & INT_MIN;
55
2.80
assert(threeforths(-8) == -6);
assert(threeforths(-9) == -6);
assert(threeforths(-10) == -7);
assert(threeforths(-11) == -8);
assert(threeforths(-12) == -9);
return 0;
}
56
2.81
2.81
A.
-1 << k
B.
/*
* 2.81.c
*/
#include <stdio.h>
#include <assert.h>
57
2.82
2.82
A.
B.
right
C.
right
D.
right
E.
right
/*
* 2.82.c
*/
#include <stdio.h>
#include <assert.h>
#include <limits.h>
#include "lib/random.h"
/*
* right
*
* ((x + y) << 4) + y - x
* =>
* x << 4 - x + y << 4 + y
* =>
* x*16 - x + y*16 + y
58
2.82
/*
* right
*
* ~x + ~y + 1
* =>
* ~x + 1 + ~y + 1 - 1
* =>
* -x + -y - 1
* =>
* -(x + y) - 1
* =>
* ~(x + y) + 1 - 1
* =>
* ~(x + y)
*/
int C(int x, int y) {
return ~x + ~y + 1 == ~(x + y);
}
/*
* right
*
* (ux - uy) == -(unsigned) (y - x)
* =>
* -(ux - uy) == (unsigned) (y - x)
* =>
* (ux - uy) == (unsigned) (x - y)
*/
int D(int x, int y) {
unsigned ux = (unsigned) x;
unsigned uy = (unsigned) y;
59
2.82
/*
* right
*
* x >> 2 << 2
* =>
* x & ~0x3
* =>
* x - num(00/01/10/11)
* =>
* ((x >> 2) << 2) <= x
*/
int E(int x, int y) {
return ((x >> 2) << 2) <= x;
}
assert(!A(INT_MIN, 0));
assert(B(x, y));
assert(C(x, y));
assert(D(x, y));
assert(E(x, y));
return 0;
}
60
2.83
2.83
A.
n = 0.yyyyy...
n << k = y.yyyyy... = Y + n
n << k - n = Y
n = Y/(2^k - 1)
B.
(a).
y = 101, Y = 5, k = 3
n = 5/7
(b).
y = 0110, Y = 6, k = 4
n = 2/5
(c).
y = 010011, Y = 19, k = 6
n = 19/63
61
2.84
2.84
thanks czy1996
62
2.84
/*
* float-le.c
*/
#include <stdio.h>
#include <assert.h>
unsigned f2u(float x) {
return *(unsigned*)&x;
}
// ref: https://github.com/DreamAndDead/CSAPP-3e-Solutions/iss
ues/1
return (ux << 1 == 0 && uy << 1 == 0) || /* both zeros */
(sx && !sy) || /* x < 0, y >= 0 or x
<= 0, y > 0 */
(!sx && !sy && ux <= uy) || /* x > 0, y >= 0 or x
>= 0, y > 0 */
(sx && sy && ux >= uy); /* x < 0, y <= 0 or x
<= 0, y < 0 */
}
63
2.84
64
2.85
2.85
bias = 2^(k-1) - 1
V = 2^E * M
A.
7.0 = 0b111.000...
bits
0 10....01 110....
B.
bits
0 bias+n 11111....
C.
V = 2^(1-bias)
reciprocal
V = 2^(bias-1)
bits
65
2.85
0 11...101 00000.....
66
2.86
2.86
bias = 2^(15-1) - 1
67
2.87
2.87
Desc Hex M E V D
-0 0x8000 0 -14 -0 -0.0
>2 least 0x4001 1025/1024 1 1025/512 2.00195312
512 0x6000 1 9 512 512.0
bigest 6.09755516e-
0x03FF 1023/1024 -14 1023/(2^24)
denormalized 5
-∞ 0xFC00 - - -∞ -∞
ox3BB0 0x3BB0 123/64 -1 123/128 0.9609375
68
2.88
2.88
69
2.89
2.89
/*
* 2.89.c
*/
#include <stdio.h>
#include <assert.h>
#include <limits.h>
#include "lib/random.h"
/*
* most important thing is that all double number come from ints
*/
/* right */
int A(int x, double dx) {
return (float)x == (float)dx;
}
/* right */
int C(double dx, double dy, double dz) {
return (dx+dy)+dz == dx+(dy+dz);
}
/*
* wrong
*
* FIXME I don't know what conditions cause false
*/
int D(double dx, double dy, double dz) {
return (dx*dy)*dz == dx*(dy*dz);
}
70
2.89
int x = random_int();
int y = random_int();
int z = random_int();
double dx = (double)x;
double dy = (double)y;
double dz = (double)z;
assert(A(x, dx));
assert(!B(0, (double)(int)0, INT_MIN, (double)(int)INT_MIN));
assert(C(dx, dy, dz));
/* magic number, brute force attack */
assert(!D((double)(int)0x64e73387, (double)(int)0xd31cb264, (d
ouble)(int)0xd22f1fcd));
assert(!E(dx, (double)(int)0));
return 0;
}
71
2.90
2.90
/*
* fpwr2.c
*/
#include <stdio.h>
#include <assert.h>
#include <math.h>
float u2f(unsigned x) {
return *(float*) &x;
}
/* 2^x */
float fpwr2(int x) {
/* Result exponent and fraction */
unsigned exp, frac;
unsigned u;
if (x < 2-pow(2,7)-23) {
/* too small. return 0.0 */
exp = 0;
frac = 0;
} else if (x < 2-pow(2,7)) {
/* Denormalized result */
exp = 0;
frac = 1 << (unsigned)(x - (2-pow(2,7)-23));
} else if (x < pow(2,7)-1+1) {
/* Normalized result */
exp = pow(2,7)-1+x;
frac = 0;
} else {
/* Too big, return +oo */
exp = 0xFF;
frac = 0;
}
72
2.90
return u2f(u);
}
73
2.91
2.91
A.
0x40490FDB
0 10000000 10010010000111111011011
float number
0b11.0010010000111111011011
B.
ref 2.83
0b11.001001(001)...
C.
9th
74
2.92
2.92
/*
* float-negate.c
*/
#include <stdio.h>
#include <assert.h>
#include "float-negate.h"
float_bits float_negate(float_bits f) {
unsigned sig = f >> 31;
unsigned exp = f >> 23 & 0xFF;
unsigned frac = f & 0x7FFFFF;
75
2.93
2.93
/*
* float-absval.c
*/
#include <stdio.h>
#include <assert.h>
#include "float-absval.h"
float_bits float_absval(float_bits f) {
unsigned sig = f >> 31;
unsigned exp = f >> 23 & 0xFF;
unsigned frac = f & 0x7FFFFF;
76
2.94
2.94
/*
* float-twice.c
*/
#include <stdio.h>
#include <assert.h>
#include "float-twice.h"
float_bits float_twice(float_bits f) {
unsigned sig = f >> 31;
unsigned exp = f >> 23 & 0xFF;
unsigned frac = f & 0x7FFFFF;
if (exp == 0) {
/* Denormalized */
frac <<= 1;
} else if (exp == 0xFF - 1) {
/* twice to oo */
exp = 0xFF;
frac = 0;
} else {
/* Normalized */
exp += 1;
}
77
2.95
2.95
/*
* float-half.c
*/
#include <stdio.h>
#include <assert.h>
#include "float-half.h"
float_bits float_half(float_bits f) {
unsigned sig = f >> 31;
unsigned rest = f & 0x7FFFFFFF;
unsigned exp = f >> 23 & 0xFF;
unsigned frac = f & 0x7FFFFF;
/*
* round to even, we care about last 2 bits of frac
*
* 00 => 0 just >>1
* 01 => 0 (round to even) just >>1
* 10 => 1 just >>1
* 11 => 1 + 1 (round to even) just >>1 and plus 1
*/
int addition = (frac & 0x3) == 0x3;
if (exp == 0) {
/* Denormalized */
frac >>= 1;
frac += addition;
} else if (exp == 1) {
/* Normalized to denormalized */
rest >>= 1;
rest += addition;
exp = rest >> 23 & 0xFF;
frac = rest & 0x7FFFFF;
78
2.95
} else {
/* Normalized */
exp -= 1;
}
79
2.96
2.96
/*
* float-f2i.c
*/
#include <stdio.h>
#include <assert.h>
#include "float-f2i.h"
/*
* Compute (float) f
* If conversion cause overflow or f is NaN, return 0x80000000
*/
int float_f2i(float_bits f) {
unsigned sig = f >> 31;
unsigned exp = f >> 23 & 0xFF;
unsigned frac = f & 0x7FFFFF;
unsigned bias = 0x7F;
int num;
unsigned E;
unsigned M;
/*
* consider positive numbers
*
* 0 00000000 00000000000000000000000
* ===>
* 0 01111111 00000000000000000000000
* 0 <= f < 1
* get integer 0
*
* 0 01111111 00000000000000000000000
* ===>
* 0 (01111111+31) 00000000000000000000000
* 1 <= f < 2^31
* integer round to 0
*
80
2.96
* 0 (01111111+31) 00000000000000000000000
* ===>
* greater
* 2^31 <= f < oo
* return 0x80000000
*/
if (exp >= 0 && exp < 0 + bias) {
/* number less than 1 */
num = 0;
} else if (exp >= 31 + bias) {
/* number overflow */
/* or f < 0 and (int)f == INT_MIN */
num = 0x80000000;
} else {
E = exp - bias;
M = frac | 0x800000;
if (E > 23) {
num = M << (E - 23);
} else {
/* whether sig is 1 or 0, round to zero */
num = M >> (23 - E);
}
}
81
2.97
2.97
/*
* float-i2f.c
*/
#include <stdio.h>
#include <assert.h>
#include <limits.h>
#include "float-i2f.h"
/*
* Assume i > 0
* calculate i's bit length
*
* e.g.
* 0x3 => 2
* 0xFF => 8
* 0x80 => 8
*/
int bits_length(int i) {
if ((i & INT_MIN) != 0) {
return 32;
}
unsigned u = (unsigned)i;
int length = 0;
while (u >= (1<<length)) {
length++;
}
return length;
}
/*
* generate mask
* 00000...(32-l) 11111....(l)
*
* e.g.
* 3 => 0x00000007
* 16 => 0x0000FFFF
*/
82
2.97
unsigned bits_mask(int l) {
return (unsigned) -1 >> (32-l);
}
/*
* Compute (float) i
*/
float_bits float_i2f(int i) {
unsigned sig, exp, frac, rest, exp_sig /* except sig */, round
_part;
unsigned bits, fbits;
unsigned bias = 0x7F;
if (i == 0) {
sig = 0;
exp = 0;
frac = 0;
return sig << 31 | exp << 23 | frac;
}
if (i == INT_MIN) {
sig = 1;
exp = bias + 31;
frac = 0;
return sig << 31 | exp << 23 | frac;
}
sig = 0;
/* 2's complatation */
if (i < 0) {
sig = 1;
i = -i;
}
bits = bits_length(i);
fbits = bits - 1;
exp = bias + fbits;
83
2.97
/* round to even */
if (round_part < round_mid) {
/* nothing */
} else if (round_part > round_mid) {
exp_sig += 1;
} else {
/* round_part == round_mid */
if ((frac & 0x1) == 1) {
/* round to even */
exp_sig += 1;
}
}
}
84
3. Machine-Level Representation of Programs
Machine-Level Representation of
Programs
To understand a program, you must become both the machine and the
program.
by Alan Perlis
test
code directory: ./code
test way:
85
3. Machine-Level Representation of Programs
86
3.58
3.58
/*
* decode.c
*/
long decode(long x, long y, long z) {
long tmp = y - z;
return (tmp * x) ^ (tmp << 63 >> 63);
}
87
3.59
3.59
assume
ux = x + x63 264
uy = y + y63 264
multiple
x ⋅ y = ux ⋅ uy − (x 63y + y 63x)264
88
3.59
89
3.60
3.60
A.
val reg
x %rdi
n %esi
result %rax
mask %rdx
B.
result = 0
mask = 1
C.
mask != 0
D.
E.
90
3.60
/*
* loop2.c
*/
91
3.61
3.61
/*
* cread-alt.c
*/
#include <stdio.h>
#include <assert.h>
92
3.61
93
3.62
3.62
/*
* 3.62.c
*/
94
3.62
95
3.63
3.63
/*
* 3.63.c
*/
96
3.64
3.64
A.
3.1 in book
T D[R][C];
&D[i][j] = Xd + L(C*i + j)
similarly, in 3d array
TYPE D[R][S][T]
B.
97
3.64
.section .data
.global A
A:
.fill 3640/8, 8, 121 # fill data 121
.section .text
.global store_ele
# long store_ele(long i, long j, long k, long *dest)
# i in %rdi, j in %rsi, k in %rdx, dest in %rcx
store_ele:
leaq (%rsi, %rsi, 2), %rax # t1 = j*3
leaq (%rsi, %rax, 4), %rax # t1 = j*13
movq %rdi, %rsi # t2 = i
salq $6, %rsi # t2 = i*64
addq %rsi, %rdi # t3 = i*65
addq %rax, %rdi # t3 = i*65 + j*13
addq %rdi, %rdx # t4 = i*65 + j*13 + k
movq A(,%rdx,8), %rax # t1 = *(A + 8*t4)
movq %rax, (%rcx) # *dest = t1
movl $3640, %eax # return 3640
ret
base on comments,
S * T = 65
T = 13
8*R*S*T = 3640
so
R = 7
S = 5
T = 13
98
3.65
3.65
A.
&A[i][j] in %rdx
B.
&A[j][i] in %rax
C.
M = 15
99
3.66
3.66
.section .text
.global sum_col
# long sum_col(long n, long A[NR(n)][NC(n)], long j)
# n in %rdi, A in %rsi, j in %rdx
sum_col:
leaq 1(,%rdi,4), %r8 # t1 = n*4 + 1
leaq (%rdi,%rdi,2), %rax # t2 = n*3
movq %rax, %rdi # t3 = n*3
testq %rax, %rax # test n*3
jle .L4 # n*3 <= 0, jump .L4
salq $3, %r8 # t1 = t1*8 = 8*(n*4 + 1)
leaq (%rsi,%rdx,8), %rcx # t4 = j*8 + A
movl $0, %eax # t2 = 0
movl $0, %edx # t5 = 0
.L3:
addq (%rcx), %rax # t2 = *(t4) = *(A + j*8)
addq $1, %rdx # t5 = t5+1
addq %r8, %rcx # t4 = t1+t4 = A + j*8 + 8*(n*4 +
1)
cmpq %rdi, %rdx # cmp t5 & t3
jne .L3 # if t5 != n*3, loop
rep
ret
.L4:
movl $0, %eax # return 0
ret
100
3.66
thanks gonglinyuan
101
3.67
3.67
/*
* 3.67.c
*/
typedef struct {
long a[2];
long *p;
} strA;
typedef struct {
long u[2];
long q;
} strB;
strB process(strA a) {
strB r;
r.u[0] = s.a[1];
r.u[1] = s.a[0];
r.q = *s.p;
return r;
}
102
3.67
# strB process(strA s)
# s in %rdi
process:
movq %rdi, %rax
movq 24(%rsp), %rdx
movq (%rdx), %rdx
movq 16(%rsp), %rcx
movq %rcx, (%rdi)
movq 8(%rsp), %rcx
movq %rcx, 8(%rdi)
movq %rdx, 16(%rdi)
ret
A.
103
3.67
104 +------------------+
| |
| |
| |
| |
| |
| |
| |
| |
64 +------------------+ <-- %rdi
| |
| |
| |
| |
| |
| |
32 +------------------+
| z |
24 +------------------+
| &z |
16 +------------------+
| y |
8 +------------------+
| x |
0 +------------------+ <-- %rsp
B.
C.
D.
eval pass address %rsp+64 to process, process store data from here as
beginning, finially return this address
E.
104
3.67
104 +------------------+
| |
| |
| |
| |
| |
| |
88 +------------------+
| z |
80 +------------------+
| x |
72 +------------------+
| y |
64 +------------------+ <-- %rdi(eval pass in)
| | \
| | -- %rax(process pass out)
| |
| |
| |
| |
32 +------------------+
| z |
24 +------------------+
| &z |
16 +------------------+
| y |
8 +------------------+
| x |
0 +------------------+ <-- %rsp in eval
| |
-8 +------------------+ <-- %rsp in process
F.
caller find space and pass space address to callee, callee store data on this space
area and return this address
105
3.67
106
3.68
3.68
4 < B <= 8
5 < A <= 10
44 < A*B <= 46
only
A = 9
B = 5
107
3.69
3.69
/*
* 3.69.c
*/
typedef struct {
int first;
a_struct a[CNT];
int last;
} b_struct;
108
3.69
A.
CNT = 7
109
3.69
thanks https://github.com/zagortenay333
B.
typedef struct {
long idx,
long x[4]
} a_struct
110
3.70
3.70
A.
val offset
e1.p 0
e1.y 8
e2.x 0
e2.next 8
B.
16
C.
111
3.70
# %rdx = *( *(up->e2.next) )
# %rdx is treated as a pointer
# so %rdx stores *( *(up->e2.next).e1.p )
movq (%rdx), %rdx
base on comments
112
3.70
/*
* 3.70.c
*/
union ele {
struct {
long *p;
long y;
} e1;
struct {
long x;
union ele *next;
} e2;
};
113
3.71
3.71
/*
* good-echo.c
*/
#include <stdio.h>
#include <assert.h>
#define BUF_SIZE 12
void good_echo(void) {
char buf[BUF_SIZE];
while(1) {
/* function fgets is interesting */
char* p = fgets(buf, BUF_SIZE, stdin);
if (p == NULL) {
break;
}
printf("%s", p);
}
return;
}
114
3.72
3.72
/*
* 3.72.c
*/
#include <alloca.h>
A.
s2 = s1 − [(n ∗ 8 + 30)&0XF F F F F F F 0]
when n is odd
s2 = s1 − (n ∗ 8 + 24)
115
3.72
when n is even
s2 = s1 − (n ∗ 8 + 16)
B.
p = (s2 + 15)&0XF F F F F F F 0
C.
which e1 n s1
least 1 even n%16==1
greatest 24 odd n%16==0
least:
greatest:
D.
p is aligned by 16
116
3.73
3.73
117
3.73
/*
* 3.73.c
*/
#include <stdio.h>
#include <assert.h>
range_t find_range(float x) {
__asm__(
"vxorps %xmm1, %xmm1, %xmm1\n\t"
"vucomiss %xmm1, %xmm0\n\t"
"jp .P\n\t"
"ja .A\n\t"
"jb .B\n\t"
"je .E\n\t"
".A:\n\t"
"movl $2, %eax\n\t"
"jmp .Done\n\t"
".B:\n\t"
"movl $0, %eax\n\t"
"jmp .Done\n\t"
".E:\n\t"
"movl $1, %eax\n\t"
"jmp .Done\n\t"
".P:\n\t"
"movl $3, %eax\n\t"
".Done:\n\t"
);
}
118
3.73
119
3.74
3.74
/*
* 3.74.c
*/
#include <stdio.h>
#include <assert.h>
range_t find_range(float x) {
__asm__(
"vxorps %xmm1, %xmm1, %xmm1\n\t"
"movq $1, %rax\n\t"
"movq $2, %r8\n\t"
"movq $0, %r9\n\t"
"movq $3, %r10\n\t"
"vucomiss %xmm1, %xmm0\n\t"
"cmovaq %r8, %rax\n\t"
"cmovbq %r9, %rax\n\t"
"cmovpq %r10, %rax\n\t"
);
}
120
3.75
3.75
A.
B.
121
4. Processor Architecture
Processor Architecture
The speed at which modern CPUs perform computations still blows my mind
daily.
by Markus Persson
yas simulation
This chapter focus on processor architecture and design a little simple processor
and yas -- a assemble language -- designed running on it.
you can access the processor simulation code and simulation manual from csapp
official site.
simulation code
simulator manual
Highly recommend you read the manual and README in code to know how it
works and how to test yas code.
test
code directory: ./code
test way:
122
4. Processor Architecture
123
4.45
4.45
A.
No
B.
124
4.46
4.46
A.
No
if REG is %rsp, movq (%rsp), REG pop the right value into %rsp, but addq
$8, %rsp modify it
B.
125
4.47
4.47
A.
/*
* bubble-sort-pointer.c
*/
B.
Compile bubble-sort-pointer.c
126
4.47
.file "bubble-sort-pointer.c"
.text
.globl bubble_p
.type bubble_p, @function
bubble_p:
.LFB0:
.cfi_startproc
leaq -8(%rdi,%rsi,8), %rsi
jmp .L2
.L4:
movq 8(%rax), %rdx
movq (%rax), %rcx
cmpq %rcx, %rdx
jge .L3
movq %rcx, 8(%rax)
movq %rdx, (%rax)
.L3:
addq $8, %rax
jmp .L5
.L6:
movq %rdi, %rax
.L5:
cmpq %rsi, %rax
jb .L4
subq $8, %rsi
.L2:
cmpq %rdi, %rsi
ja .L6
rep ret
.cfi_endproc
.LFE0:
.size bubble_p, .-bubble_p
.ident "GCC: (Ubuntu 5.4.0-6ubuntu1~16.04.5) 5.4.0 201606
09"
.section .note.GNU-stack,"",@progbits
/* bubble-sort-pointer.ys */
127
4.47
.pos 0
irmovq stack, %rsp
call main
halt
# Array of 4 elements
.align 8
data:
.quad 0x0000000000000004
.quad 0x0000000000000003
.quad 0x0000000000000002
data_end:
.quad 0x0000000000000001
main:
irmovq data,%rdi
irmovq data_end,%rsi
call ysBubbleP
ret
128
4.47
.pos 0x200
stack:
../sim/misc/yas bubble-sort-pointer.ys
../sim/misc/yis bubble-sort-pointer.yo
Stopped in 117 steps at PC = 0x13. Status 'HLT', CC Z=1 S=0 O=0
Changes to registers:
%rax: 0x0000000000000000 0x0000000000000020
%rsp: 0x0000000000000000 0x0000000000000200
%rsi: 0x0000000000000000 0x0000000000000018
%rdi: 0x0000000000000000 0x0000000000000018
%r9: 0x0000000000000000 0x0000000000000001
%r10: 0x0000000000000000 0x0000000000000002
Changes to memory:
0x0018: 0x0000000000000004 0x0000000000000001
0x0020: 0x0000000000000003 0x0000000000000002
0x0028: 0x0000000000000002 0x0000000000000003
0x0030: 0x0000000000000001 0x0000000000000004
0x01f0: 0x0000000000000000 0x0000000000000055
0x01f8: 0x0000000000000000 0x0000000000000013
129
4.47
130
4.48
4.48
/* bubble-sort-pointer.ys */
.pos 0
irmovq stack, %rsp
call main
halt
# Array of 4 elements
.align 8
data:
.quad 0x0000000000000004
.quad 0x0000000000000003
.quad 0x0000000000000002
data_end:
.quad 0x0000000000000001
main:
irmovq data,%rdi
irmovq data_end,%rsi
call ysBubbleP
ret
131
4.48
.pos 0x200
stack:
132
4.48
../sim/misc/yas bubble-sort-pointer-3-cmove.ys
../sim/misc/yis bubble-sort-pointer-3-cmove.yo
Stopped in 129 steps at PC = 0x13. Status 'HLT', CC Z=1 S=0 O=0
Changes to registers:
%rax: 0x0000000000000000 0x0000000000000020
%rsp: 0x0000000000000000 0x0000000000000200
%rsi: 0x0000000000000000 0x0000000000000018
%rdi: 0x0000000000000000 0x0000000000000018
%r9: 0x0000000000000000 0x0000000000000002
%r10: 0x0000000000000000 0x0000000000000001
%r11: 0x0000000000000000 0x0000000000000001
Changes to memory:
0x0018: 0x0000000000000004 0x0000000000000001
0x0020: 0x0000000000000003 0x0000000000000002
0x0028: 0x0000000000000002 0x0000000000000003
0x0030: 0x0000000000000001 0x0000000000000004
133
4.49
4.49
if x = 9, y = 10
x = x ^ y x:9^10; y:10
y = x ^ y x:9^10; y:9
x = x ^ y x:10; y:9
/* bubble-sort-pointer.ys */
.pos 0
irmovq stack, %rsp
call main
halt
# Array of 4 elements
.align 8
data:
.quad 0x0000000000000004
.quad 0x0000000000000003
.quad 0x0000000000000002
data_end:
.quad 0x0000000000000001
main:
irmovq data,%rdi
irmovq data_end,%rsi
call ysBubbleP
134
4.49
ret
135
4.49
.pos 0x200
stack:
../sim/misc/yas bubble-sort-pointer-1-cmove.ys
../sim/misc/yis bubble-sort-pointer-1-cmove.yo
Stopped in 141 steps at PC = 0x13. Status 'HLT', CC Z=1 S=0 O=0
Changes to registers:
%rax: 0x0000000000000000 0x0000000000000020
%rsp: 0x0000000000000000 0x0000000000000200
%rsi: 0x0000000000000000 0x0000000000000018
%rdi: 0x0000000000000000 0x0000000000000018
%r9: 0x0000000000000000 0x0000000000000002
%r10: 0x0000000000000000 0x0000000000000001
%r11: 0x0000000000000000 0x0000000000000002
Changes to memory:
0x0018: 0x0000000000000004 0x0000000000000001
0x0020: 0x0000000000000003 0x0000000000000002
0x0028: 0x0000000000000002 0x0000000000000003
0x0030: 0x0000000000000001 0x0000000000000004
136
4.50
4.50
/* switch.ys */
.pos 0
irmovq stack, %rsp
call main
halt
# Array of 4 elements
.align 8
array:
.quad 0x0000000000000000
.quad 0x0000000000000000
.quad 0x0000000000000000
.quad 0x0000000000000000
main:
# test number 1, -1, 3, 5
irmovq array, %r10
irmovq $1,%rdi
call switchv
rmmovq %rax, (%r10)
irmovq $-1,%rdi
call switchv
rmmovq %rax, 8(%r10)
irmovq $3,%rdi
call switchv
rmmovq %rax, 16(%r10)
irmovq $5,%rdi
call switchv
rmmovq %rax, 24(%r10)
ret
table:
.quad LD # default branch
.quad L0 # idx == 0
137
4.50
.quad L1 # idx == 1
.quad L2 # idx == 2
.quad L3 # idx == 3
.quad L4 # idx == 4
.quad L5 # idx == 5
138
4.50
L1:
jmp LD
L2:
jmp L5
L3:
irmovq $0xccc, %rax
ret
L4:
jmp LD
L5:
irmovq $0xbbb, %rax
ret
LD:
irmovq $0xddd, %rax
ret
.pos 0x200
stack:
test function switchv in main, using idx 1,-1,3,5, store result into array
139
4.50
../sim/misc/yas switch.ys
../sim/misc/yis switch.yo
Stopped in 133 steps at PC = 0x13. Status 'HLT', CC Z=0 S=0 O=0
Changes to registers:
%rax: 0x0000000000000000 0x0000000000000bbb
%rcx: 0x0000000000000000 0x00000000000000e7
%rdx: 0x0000000000000000 0xfffffffffffffffd
%rsp: 0x0000000000000000 0x0000000000000200
%rdi: 0x0000000000000000 0x00000000000001a8
%r8: 0x0000000000000000 0x0000000000000008
%r11: 0x0000000000000000 0x0000000000000001
Changes to memory:
0x0000: 0x000000000200f430 0x0000000000000ddd
0x0008: 0x0000000038800000 0x0000000000000ddd
0x0010: 0x0000000000000000 0x0000000000000ccc
0x0018: 0x0000000000000000 0x0000000000000bbb
140
4.51
4.51
141
4.52
4.52
@@ -129,7 +129,7 @@
142
4.52
word srcB = [
- icode in { IOPQ, IRMMOVQ, IMRMOVQ } : rB;
+ icode in { IOPQ, IRMMOVQ, IMRMOVQ, IIADDQ } : rB;
icode in { IPUSHQ, IPOPQ, ICALL, IRET } : RRSP;
1 : RNONE; # Don't need register
];
@@ -137,7 +137,7 @@
## What register should be used as the E destination?
word dstE = [
icode in { IRRMOVQ } && Cnd : rB;
- icode in { IIRMOVQ, IOPQ} : rB;
+ icode in { IIRMOVQ, IOPQ, IIADDQ } : rB;
icode in { IPUSHQ, IPOPQ, ICALL, IRET } : RRSP;
1 : RNONE; # Don't write any register
];
@@ -153,7 +153,7 @@
## Select input A to ALU
word aluA = [
icode in { IRRMOVQ, IOPQ } : valA;
- icode in { IIRMOVQ, IRMMOVQ, IMRMOVQ } : valC;
+ icode in { IIRMOVQ, IRMMOVQ, IMRMOVQ, IIADDQ } : valC;
icode in { ICALL, IPUSHQ } : -8;
icode in { IRET, IPOPQ } : 8;
# Other instructions don't need ALU
@@ -162,7 +162,7 @@
## Select input B to ALU
word aluB = [
icode in { IRMMOVQ, IMRMOVQ, IOPQ, ICALL,
- IPUSHQ, IRET, IPOPQ } : valB;
+ IPUSHQ, IRET, IPOPQ, IIADDQ } : valB;
icode in { IRRMOVQ, IIRMOVQ } : 0;
# Other instructions don't need ALU
];
@@ -174,7 +174,7 @@
];
143
4.52
144
4.53
4.53
1. data hazard
something handled by data-forward must be handled by stall if no data-forward
anymore. so when
data hazard happens, we have to insert bubble in phase E and stall phase F&D
load/use hazard
pay attention
so we get:
145
4.53
situation: data_hazard
bool s_data_hazard =
(
(
d_srcA != RNONE &&
d_srcA in { e_dstE, E_dstM, M_dstM, M_dstE, W_dstM, W_dstE
}
) ||
(
d_srcB != RNONE &&
d_srcB in { e_dstE, E_dstM, M_dstM, M_dstE, W_dstM, W_dstE
}
)
)
2. ret situation
keep same
situation: ret
bool s_ret = IRET in { D_icode, E_icode, M_icode };
3. jxx error
keep same
4. hazard composition
X means nothing to do; stall means stall; bubble means inserting bubble
146
4.53
situation 4: just data hazard, stall phase F and D, insert bubble in phase E, M and
W keep same
ret:
one of them
jxx error:
+-----------+
M | |
+-----------+
E | jxx |
+-----------+
D | |
+-----------+
147
4.53
data hazard:
<--+
+-----------+ |
M | |<--+
+-----------+ |
E | |<--+
+-----------+ |
D | xxx +---+
+-----------+
when data hazard happens, D_icode is not sure, xxx means any instruction
<--+
+-----------+ |
M | |<--+
+-----------+ |
E | |<--+
+-----------+ |
D | ret +---+
+-----------+
when they happen same time, data hazard is prior to ret because if ret doesn't
stall to avoid data hazard, we get wrong anwser with ISA
148
4.53
<--+
+-----------+ |
M | |<--+
+-----------+ |
E | jxx |<--+
+-----------+ |
D | xxx +---+
+-----------+
when they two happens same time, jxx error is prior to data hazard because next
2 instructions is canceled when jxx error, xxx is canceled anymore.
<--+
+-----------+ |
M | |<--+
+-----------+ |
E | jxx |<--+
+-----------+ |
D | ret +---+
+-----------+
finally:
149
4.53
F:
D:
E:
stall: 0
bubble: jxx || data
M:
keep same
W:
keep same
finally:
150
4.53
151
4.53
+ )
+ ) &&
+ !(E_icode == IJXX && !e_Cnd);
bool F_bubble = 0;
bool D_bubble =
- # Mispredicted branch
- (E_icode == IJXX && !e_Cnd) ||
- # Stalling at fetch while ret passes through pipeline
- !(E_icode in { IMRMOVQ, IPOPQ } && E_dstM in { d_srcA, d_sr
cB }) &&
- # but not condition for a generate/use hazard
- !0 &&
- IRET in { D_icode, E_icode, M_icode };
+ (E_icode == IJXX && !e_Cnd) ||
+ (
+ !(
+ (
152
4.53
153
4.53
154
4.54
4.54
155
4.54
word d_srcB = [
- D_icode in { IOPQ, IRMMOVQ, IMRMOVQ } : D_rB;
+ D_icode in { IOPQ, IRMMOVQ, IMRMOVQ, IIADDQ } : D_rB;
D_icode in { IPUSHQ, IPOPQ, ICALL, IRET } : RRSP;
1 : RNONE; # Don't need register
];
156
4.54
SHLT };
157
4.55
4.55
Problem mismatch with skeleton code, we follow the code here: change J_YES
to UNCOND
one point is
origin
now
158
4.55
bool D_bubble =
# Mispredicted branch
- (E_icode == IJXX && !e_Cnd) ||
+ (E_icode == IJXX && E_ifun != UNCOND && e_Cnd) ||
# Stalling at fetch while ret passes through pipeline
# but not condition for a load/use hazard
!(E_icode in { IMRMOVQ, IPOPQ } && E_dstM in { d_srcA, d_sr
cB }) &&
@@ -354,7 +359,7 @@
bool E_stall = 0;
bool E_bubble =
# Mispredicted branch
159
4.55
160
4.56
4.56
Problem mismatch with skeleton code, we follow the code here: change J_YES
to UNCOND
161
4.56
valC;
+ f_icode == IJXX && f_ifun != UNCOND && f_valC >= f_valP : f
_valP;
f_icode in { IJXX, ICALL } : f_valC;
1 : f_valP;
];
@@ -244,12 +249,15 @@
# way to get valC into pipeline register M, so that
# you can correct for a mispredicted branch.
@@ -258,6 +266,7 @@
E_icode in { IRMMOVQ, IMRMOVQ, IOPQ, ICALL,
IPUSHQ, IRET, IPOPQ } : E_valB;
E_icode in { IRRMOVQ, IIRMOVQ } : 0;
+ E_icode in { IJXX } : 0;
# Other instructions don't need ALU
];
@@ -343,7 +352,11 @@
bool D_bubble =
# Mispredicted branch
- (E_icode == IJXX && !e_Cnd) ||
+ # backward taken error or forward not-taken error
+ (
+ (E_icode == IJXX && E_ifun != UNCOND && E_valC < E_valA &&
!e_Cnd) ||
+ (E_icode == IJXX && E_ifun != UNCOND && E_valC >= E_valA &&
162
4.56
e_Cnd)
+ ) ||
# BBTFNT: This condition will change
# Stalling at fetch while ret passes through pipeline
# but not condition for a load/use hazard
@@ -355,7 +368,11 @@
bool E_stall = 0;
bool E_bubble =
# Mispredicted branch
- (E_icode == IJXX && !e_Cnd) ||
+ # backward taken error or forward not-taken error
+ (
+ (E_icode == IJXX && E_ifun != UNCOND && E_valC < E_valA &&
!e_Cnd) ||
+ (E_icode == IJXX && E_ifun != UNCOND && E_valC >= E_valA &&
e_Cnd)
+ ) ||
# BBTFNT: This condition will change
# Conditions for a load/use hazard
E_icode in { IMRMOVQ, IPOPQ } &&
163
4.57
4.57
A.
situation 1 2 3 4
E_dstM == d_srcA 1 1 0 0
E_dstM == d_srcB 1 0 1 0
situation 4:
situation 1,2,3:
consider situation 1 and 3, E_dstM == d_srcB, then d_srcB is not RNONE and
must be used in phase E, so load-forward can't work. load-forward only work in
situation 2!
ret and popq can't work because d_srcA == d_srcB == %rsp, that's not in situation
2!!
164
4.57
B.
@@ -329,7 +330,13 @@
bool F_stall =
# Conditions for a load/use hazard
## Set this to the new load/use condition
- 0 ||
+ E_icode in { IMRMOVQ, IPOPQ } &&
+ (
+ E_dstM == d_srcB ||
+ (
165
4.57
@@ -338,15 +345,29 @@
bool D_stall =
# Conditions for a load/use hazard
## Set this to the new load/use condition
- 0;
+ E_icode in { IMRMOVQ, IPOPQ } &&
+ (
+ E_dstM == d_srcB ||
+ (
+ E_dstM == d_srcA && !(D_icode in { IRMMOVQ, IPUSHQ })
+ )
+ );
bool D_bubble =
# Mispredicted branch
(E_icode == IJXX && !e_Cnd) ||
# Stalling at fetch while ret passes through pipeline
# but not condition for a load/use hazard
- !(E_icode in { IMRMOVQ, IPOPQ } && E_dstM in { d_srcA, d_sr
cB }) &&
- IRET in { D_icode, E_icode, M_icode };
+ !(
+ E_icode in { IMRMOVQ, IPOPQ } &&
+ (
+ E_dstM == d_srcB ||
+ (
+ E_dstM == d_srcA && !(D_icode in { IRMMOVQ, IPUSHQ
})
+ )
+ )
+ ) &&
+ IRET in { D_icode, E_icode, M_icode };
166
4.57
167
4.58
4.58
@@ -169,7 +170,7 @@
# Is instruction valid?
bool instr_valid = f_icode in
168
4.58
@@ -204,14 +206,14 @@
## What register should be used as the A source?
word d_srcA = [
D_icode in { IRRMOVQ, IRMMOVQ, IOPQ, IPUSHQ } : D_rA;
- D_icode in { IPOPQ, IRET } : RRSP;
+ D_icode in { IRET } : RRSP;
1 : RNONE; # Don't need register
];
169
4.58
@@ -224,7 +226,7 @@
@@ -255,7 +257,7 @@
word aluA = [
E_icode in { IRRMOVQ, IOPQ } : E_valA;
E_icode in { IIRMOVQ, IRMMOVQ, IMRMOVQ } : E_valC;
- E_icode in { ICALL, IPUSHQ } : -8;
+ E_icode in { ICALL, IPUSHQ, IPOP2 } : -8;
E_icode in { IRET, IPOPQ } : 8;
# Other instructions don't need ALU
];
@@ -263,7 +265,7 @@
## Select input B to ALU
word aluB = [
E_icode in { IRMMOVQ, IMRMOVQ, IOPQ, ICALL,
- IPUSHQ, IRET, IPOPQ } : E_valB;
+ IPUSHQ, IRET, IPOPQ, IPOP2 } : E_valB;
E_icode in { IRRMOVQ, IIRMOVQ } : 0;
# Other instructions don't need ALU
];
@@ -292,13 +294,13 @@
170
4.58
bool D_bubble =
@@ -367,7 +369,7 @@
(E_icode == IJXX && !e_Cnd) ||
# Stalling at fetch while ret passes through pipeline
# but not condition for a load/use hazard
- !(E_icode in { IMRMOVQ, IPOPQ } && E_dstM in { d_srcA, d_sr
cB }) &&
+ !(E_icode in { IMRMOVQ, IPOP2 } && E_dstM in { d_srcA, d_sr
cB }) &&
# 1W: This condition will change
IRET in { D_icode, E_icode, M_icode };
@@ -378,7 +380,7 @@
# Mispredicted branch
(E_icode == IJXX && !e_Cnd) ||
# Conditions for a load/use hazard
- E_icode in { IMRMOVQ, IPOPQ } &&
+ E_icode in { IMRMOVQ, IPOP2 } &&
171
4.58
172
4.59
4.59
L4:
mrmovq 8(%rax), %r9
mrmovq (%rax), %r10
rrmovq %r9, %r8
subq %r10, %r8
jge L3
rmmovq %r10, 8(%rax)
rmmovq %r9, (%rax)
50% jge is right, run 5 instructions; 50% jge is wrong, run 7 instructions and 2 nop
bubble. so Cycles Per Loop is 50% 5 + (7 + 2) 50% = 7
L4:
mrmovq 8(%rax), %r9
mrmovq (%rax), %r10
rrmovq %r9, %r8
subq %r10, %r8
cmovl %r9, %r11
cmovl %r10, %r9
cmovl %r11, %r10
rmmovq %r9, 8(%rax)
rmmovq %r10, (%rax)
173
4.59
L4:
mrmovq 8(%rax), %r9
mrmovq (%rax), %r10
rrmovq %r9, %r8
rrmovq %r10, %r11
xorq %r9, %r10
subq %r11, %r8
cmovge %r11, %r9
xorq %r10, %r9
xorq %r9, %r10
rmmovq %r9, 8(%rax)
rmmovq %r10, (%rax)
174
5. Optimizing Program Performance
by Mae Jemison
test
code directory: ./code
test way:
prof
prerequisite
google gperftools
175
5. Optimizing Program Performance
176
5.13
5.13
A.
+----+----+----+----+-----+-----+
|%rbp|%rcx|%rax|%rbx|%xmm1|%xmm0|
+----+----+----+----+-----+-----++----+
+----|----|----|----------|--->| |
| +----|----|----------|--->|load| vmovad 0(%rbp,%rcx
,8),%xmm1
| | | | +-----|----| |
| | | | | | +----+
| +----|----|----------|--->| |
| | | | | | |load|---+
| | +----|----------|--->| | |
| | | | | | +----+ | vmulsd (%rax,%rcx,
8),%xmm1,%xmm0
| | | | | | | |<--+
| | | | +-----|--->|mul |
| | | | +-----|----| |
| | | | | | +----+
| | | | +-----|--->| |
| | | | | +--->|add | vaddsd %xmm1,%xmm0
,%xmm0
| | | | | +----| |
| | | | | | +----+
| +----|----|----------|--->| |
| | | | | |add | addq $1, %rcx
| +----|----|----------|----| |
| | | | | | +----+
| +----|----|----------|--->| |
| | | | | | |cmp |---+ cmpq %rbx, %rcx
| | | +----|-----|--->| | |
| | | | | | +----+ |
| | | | | | | | |
| | | | | | |jne |<--+ jne .L15
| | | | | | | |
| | | | | | +----+
v v v v v v
+----+----+----+----+-----+-----+
177
5.13
|%rbp|%rcx|%rax|%rbx|%xmm1|%xmm0|
+----+----+----+----+-----+-----+
+----+ +-----+
|%rcx| |%xmm0|
+----+ +-----+
| |
| +----+ | <--------- key path
+---->|load|------+ |
| +----+ | |
| v v
| +----+ +-+--+ +-+--+
+---->|load|--->|mul |--->|add |
| +----+ +----+ +----+
| |
| |
v |
+----+ |
|add | |
+----+ |
| |
v v
+-+--+ +-----+
|%rcx| |%xmm0|
+----+ +-----+
B.
5-12
C.
5-12
D.
178
5.13
/*
* 5.13.c
*/
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include "./lib/vec.h"
set_vec_start(u, arr);
set_vec_start(v, arr);
data_t res;
inner4(u, v, &res);
179
5.13
assert(res == 1+4+9);
return 0;
}
180
5.14
5.14
+----+ +----+
|sum | | i |
+----+ +----+
| |
v |
+----+ +----+ +----+ |
|add |<------|mul |<-|load|<------+
+----+ +----+ +----+ |
| |
v |
+----+ +----+ +----+ |
|add |<------|mul |<-|load|<------+
+----+ +----+ +----+ |
| |
v |
+----+ +----+ +----+ |
|add |<------|mul |<-|load|<------+
+----+ +----+ +----+ |
| |
v |
+----+ +----+ +----+ |
|add |<------|mul |<-|load|<------+
+----+ +----+ +----+ |
| |
v |
+----+ +----+ +----+ |
|add |<------|mul |<-|load|<------+
+----+ +----+ +----+ |
| |
v |
+----+ +----+ +----+ |
|add |<------|mul |<-|load|<------+
+----+ +----+ +----+ |
| |
| v
| +----+
| |add |
| <------- key path +----+
181
5.14
| |
| |
v v
+----+ +-+--+
|sum | | i |
+----+ +----+
A.
so n/6 * 6 = n
B.
same like A
/*
* 5.14.c
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include "./lib/vec.h"
#define LEN 24
182
5.14
set_vec_start(u, arr);
set_vec_start(v, arr);
data_t res;
inner4(u, v, &res);
assert(res == 1+4+9);
return 0;
}
183
5.15
5.15
maybe
/*
* 5.15.c
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include "./lib/vec.h"
#define LEN 24
184
5.15
set_vec_start(u, arr);
set_vec_start(v, arr);
data_t res;
inner4(u, v, &res);
assert(res == 1+4+9);
return 0;
}
185
5.16
5.16
/*
* 5.16.c
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include "./lib/vec.h"
#define LEN 24
186
5.16
vec_ptr v = new_vec(LEN);
set_vec_start(u, arr);
set_vec_start(v, arr);
data_t res;
inner4(u, v, &res);
assert(res == 1+4+9);
return 0;
}
187
5.17
5.17
/*
* 5.17.c
*/
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
/*
* K = sizeof(unsigned long)
* cs store K chars for memset
*/
void* effective_memset(void *s, unsigned long cs, size_t n) {
/* align to K */
size_t K = sizeof(unsigned long);
size_t cnt = 0;
unsigned char *schar = s;
while (cnt < n) {
if ((size_t)schar % K == 0) {
break;
}
*schar++ = (unsigned char)cs;
cnt++;
}
188
5.17
free(basic_space);
free(effective_space);
return 0;
}
189
5.17
190
5.18
5.18
uname -p
if your machine core is Intel Core i7 Haswell like book, you can try install google
gperftools for profile
/*
* 5.18.c
*/
#include <stdio.h>
#include <assert.h>
/* version 6*3a */
double poly_6_3a(double a[], double x, long degree) {
long i = 1;
double result = a[0];
191
5.18
double result1 = 0;
double result2 = 0;
double xpwr = x;
double xpwr1 = x * x * x;
double xpwr2 = x * x * x * x * x;
double xpwr_step = x * x * x * x * x * x;
for (; i <= degree - 6; i+=6) {
result = result + (a[i]*xpwr + a[i+1]*xpwr*x);
result1 = result1 + (a[i+2]*xpwr1 + a[i+3]*xpwr1*x);
result2 = result2 + (a[i+4]*xpwr2 + a[i+5]*xpwr2*x);
xpwr *= xpwr_step;
xpwr1 *= xpwr_step;
xpwr2 *= xpwr_step;
}
192
5.18
double a[10 + 1] = { 0, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1};
double x = 2;
long degree = 10;
x = 1;
degree = LEN;
double b[LEN + 1];
193
5.19
5.19
if you're interested, you can try install google's gperftools for profile
/*
* 5.19.c
*/
#include <stdio.h>
#include <assert.h>
/* version 4*1a */
void psum_4_1a(float a[], float p[], long n) {
long i;
float val, last_val;
float tmp, tmp1, tmp2, tmp3;
last_val = p[0] = a[0];
194
5.19
p[i] = tmp;
p[i+1] = tmp1;
p[i+2] = tmp2;
p[i+3] = tmp3;
/* key point */
last_val = last_val + (a[i] + a[i+1] + a[i+2] + a[i+3]);
}
float q[5];
psum_4_1a(a, q, 5);
assert(q[4] == 15);
/* for prof */
for (int i = 0; i < LOOP; i++) {
float s[LEN];
float d[LEN];
psum1a(s, d, LEN);
psum_4_1a(s, d, LEN);
}
return 0;
}
195
5.19
196
6. The Memory Hierarchy
by Alexis Stewart
test
code directory: ./code
test way:
197
6. The Memory Hierarchy
198
6.22
6.22
assume
bpt = x * r * K
Track Count
tc = (1-x) * r * M
M, K are constant
so
Bit Count
bc = K * M * r^2 * (1-x) * x
199
6.23
6.23
T_avg_seek = 4ms
so
T_access = 6.005ms
200
6.24
6.24
A.
best case: blocks are mapped sequential and on same cylinder. just seek data
once.
T_avg_seek = 4ms
T_avg_rotation = 2ms
file size 2MB, block size 512B, block count 2MB/512B = 4000
Block Per Track = 1000, so we need rotate 4 loop to read all data
so
T_access = 22ms
B.
201
6.25
6.25
m c B E S t s b
32 1024 4 4 64 24 6 2
32 1024 4 256 1 30 0 2
32 1024 8 1 128 22 7 3
32 1024 8 128 1 29 0 3
32 1024 32 1 32 22 5 5
32 1024 32 4 8 24 3 5
202
6.26
6.26
m c B E S t s b
32 2048 8 1 256 21 8 3
32 2048 4 4 128 23 7 2
32 1024 2 8 64 25 6 1
32 1024 32 2 16 23 4 5
203
6.27
6.27
A.
address may be
01000101 001 xx
format
0 1000 1010 01xx
t = 0x38
B.
0x1238 - 0x123B
204
6.28
6.28
A.
None
B.
0x18F0 - 0x18F3
0x00B0 - 0x00B3
C.
0x0E34 - 0x0E37
D.
0x1BDC - 0x1BDF
205
6.29
6.29
A.
11 10 9 8 7 6 5 4 3 2 1 0
+----+----+----+----+----+----+----+----+----+----+----+----+
| CT | CT | CT | CT | CT | CT | CT | CT | CI | CI | CO | CO |
+----+----+----+----+----+----+----+----+----+----+----+----+
B.
206
6.30
6.30
A.
C = E B S = 128B
B.
12 11 10 9 8 7 6 5 4 3 2 1 0
+----+----+----+----+----+----+----+----+----+----+----+----+---
-+
| CT | CT | CT | CT | CT | CT | CT | CT | CI | CI | CI | CO | CO
|
+----+----+----+----+----+----+----+----+----+----+----+----+---
-+
207
6.31
6.31
A.
12 11 10 9 8 7 6 5 4 3 2 1 0
+----+----+----+----+----+----+----+----+----+----+----+----+---
-+
| 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | 0 | 1 | 0
|
+----+----+----+----+----+----+----+----+----+----+----+----+---
-+
B.
param value
CO 0x02
CI 0x06
CT 0x38
hit? Yes
return 0xEB
208
6.32
6.32
A.
12 11 10 9 8 7 6 5 4 3 2 1 0
+----+----+----+----+----+----+----+----+----+----+----+----+---
-+
| 1 | 0 | 1 | 1 | 0 | 1 | 1 | 1 | 0 | 1 | 0 | 0 | 0
|
+----+----+----+----+----+----+----+----+----+----+----+----+---
-+
B.
param value
CO 0x00
CI 0x02
CT 0xB7
hit? No
return --
209
6.33
6.33
0x1788 - 0x178B
0x16C8 - 0x16CB
210
6.34
6.34
src:
c0 c1 c2 c3
r0 m m h m
r1 m h m h
r2 m m h m
r3 m h m h
dst:
c0 c1 c2 c3
r0 m m m m
r1 m m m m
r2 m m m m
r3 m m m m
211
6.35
6.35
src:
c0 c1 c2 c3
r0 m h h h
r1 m h h h
r2 m h h h
r3 m h h h
dst:
c0 c1 c2 c3
r0 m h h h
r1 m h h h
r2 m h h h
r3 m h h h
212
6.36
6.36
int x[2][128];
int i;
int sum = 0;
A.
C = 512, E = 1, B = 16, S = 32
B.
C = 1024, E = 1, B = 16, S = 64
sizeof(x) == 2 128 4 == C
B = 16, sizeof(int) = 4, so
C.
C = 512, E = 2, B = 16, S = 16
213
6.36
so x[0][i] and x[1][i] are cached into different block in same set.
in second half
x[0][i] is not in cache. according to LRU strategy, cache x[0][i] into the same block
with x[0][i-64], cache x[1][i] into the same block with x[1][i-64]. miss rate is 25%.
D.
No
if B is still 16, sizeof(int) = 4, block can only cache 4 int one time.
read int first time toggle memory cache, miss; next 3 time read hit.
E.
Yes
214
6.36
215
6.37
6.37
/*
* 6.37.c
*/
int sumA(array_t a) {
int i, j;
int sum = 0;
for (i = 0; i < N; i++)
for (j = 0; j < N; j++)
sum += a[i][j];
return sum;
}
int sumB(array_t a) {
int i, j;
int sum = 0;
for (i = 0; i < N; i++)
for (j = 0; j < N; j++)
sum += a[j][i];
return sum;
}
int sumC(array_t a) {
int i, j;
int sum = 0;
for (i = 0; i < N; i+=2)
for (j = 0; j < N; j+=2)
sum += (a[j][i] + a[j][i+1] + a[j+1][i] + a[j+1][i+1])
}
N = 64
216
6.37
sizeof(array_t) == 64 64 == 4096 == 4C
memory-cache graph
0 +---------+
| 0 |
16 +---------+
| 1 |
32 +---------+
| 2 |
48 +---------+
| . |
| . |
| . |
| . |
| . |
4096-16 +---------+
| 255 |
4096 +---------+
| 0 |
4096+16 +---------+
| 1 |
4096+32 +---------+
| . |
| . |
| . |
| . |
| . |
| . |
4096*4-16+---------+
| 255 |
4096*4 +---------+
217
6.37
A. sumA
sum += a[i][j];
B. sumB
sum += a[j][i];
0, 16, 32, 48, ... 240,(4 times) 1, 17, 33, ... 241,(4 times) 15, 31, 47, ... 255(4
times)
C. sumC
218
6.37
same like
same like
because of i+=2,
N = 60
A. sumA
sum += a[i][j];
219
6.37
B. sumB
it's interesting.
0, 15, 30, ...., 225, (17 numbers) 255, 14, 29, ....., 224, (17 numbers) 254, 13, 28,
....., 223, (17 numbers) 253, 12, 27, 42, 57, 72, 87, 102, 117 (9 numbers)
next 3 loops: a[0][1] -> a[59][1], a[0][2] -> a[59][2], a[0][3] -> a[59][3]
all hit
C. sumC
25%
220
6.38
6.38
A.
4 * 16 * 16
B.
sizeof(point_color) == 16, B = 32
square[i][j].c = 0
square[i][j].m = 0
square[i][j].y = 0
square[i][j].k = 0
square[i][j+1].c = 0
square[i][j+1].m = 0
square[i][j+1].y = 0
square[i][j+1].k = 0
all hit
C.
1/8
221
6.39
6.39
A.
4 * 16 * 16
B.
sizeof(point_color) == 16, B = 32
square[j][i].c = 0
square[j][i].m = 0
square[j][i].y = 0
square[j][i].k = 0
all hit.
next loop
C.
1/4
222
6.40
6.40
A.
4 16 16
B.
miss count is
16161/2 + 16163*1/6
C.
1/4
223
6.41
6.41
every loop
buffer[i][j].r = 0;
buffer[i][j].g = 0;
buffer[i][j].b = 0;
buffer[i][j].a = 0;
all hit
224
6.42
6.42
same like
buffer[i][j].r = 0;
buffer[i][j].g = 0;
buffer[i][j].b = 0;
buffer[i][j].a = 0;
all hit
225
6.43
6.43
same like
every loop,
(int*)&buffer[i][j] = 0;
always miss
226
6.44
6.44
run
see result
227
6.44
228
6.45
6.45
function transponse
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
order 0: cache miss, load src[0],src[1] order 1: cache hit because of order 0 order
2: cache miss, load src[2],src[3] .... order 15: cache hit
0 4 8 12 1 5 9 13 2 6 10 14 3 7 11 15
229
6.45
code
230
6.45
+--+--+ +--+--+
|0 |1 | |0 |4 |
+--+--+ => +--+--+
|4 |5 | |1 |5 |
+--+--+ +--+--+
dst[0] = src[0];
dst[1] = src[4];
dst[4] = src[1];
dst[5] = src[5];
if element 0 is miss, element 1 must hit; if element 4 is miss, element 5 must hit;
50% is the highest hit rate in such low cache block size.
if B is greater and cache size C is larger, we can split matrix into 44, 88 or more
larger. theoretically we will archive the highest hit rate.
finally code:
/*
* transpose.c
*/
#include <assert.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
231
6.45
close(rnd);
}
int offset = i;
void test(void) {
int *d = (int *)malloc(MEM_SIZE);
int *s = (int *)malloc(MEM_SIZE);
randomize((void *)s, MEM_SIZE);
transpose(d, s, MATRIX_N);
232
6.45
memset(d, 0, MEM_SIZE);
effective_transpose(d, s, MATRIX_N);
free((void *)d);
free((void *)s);
}
void prof(void) {
int *d = (int *)malloc(MEM_SIZE);
int *s = (int *)malloc(MEM_SIZE);
free((void *)d);
free((void *)s);
}
void prof_effect(void) {
int *d = (int *)malloc(MEM_SIZE);
int *s = (int *)malloc(MEM_SIZE);
free((void *)d);
free((void *)s);
}
/* prof(); */
/* prof_effect(); */
233
6.45
return 0;
}
in code, matrix size 1024*1024, loop 1000 times to measure program run time.
BLOCK time(s)
2 9.99
3 7.16
4 5.6
5 5.66
6 5.34
7 5.39
8 5.38
9 5.48
10 6.21
11 7.9
12 10.17
13 11.14
14 11.88
15 12.11
16 11.85
cat /sys/devices/system/cpu/cpu0/cache/*
234
6.46
6.46
/*
* convert.c
*/
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <string.h>
#include <unistd.h>
#include <time.h>
int i, j;
for (i = 0; i <= N - 1; i++)
for (j = 0; j <= N - 1; j++)
arr[i * N + j] = rand() % 2;
}
235
6.46
int offset = i;
void test(void) {
int *s = (int *)malloc(MEM_SIZE);
int *e = (int *)malloc(MEM_SIZE);
randomize(s, MATRIX_N);
memcpy(e, s, MEM_SIZE);
convert(s, MATRIX_N);
236
6.46
effective_convert(e, MATRIX_N);
free((void *)s);
free((void *)e);
}
void prof(void) {
int *s = (int *)malloc(MEM_SIZE);
free((void *)s);
}
void prof_effect(void) {
int *e = (int *)malloc(MEM_SIZE);
free((void *)e);
}
/* prof(); */
/* prof_effect(); */
return 0;
}
237
6.46
BLOCK time(s)
1 14.26
2 12.01
3 7.43
4 6.20
5 6.08
6 5.86
7 5.70
8 5.67
9 6.30
10 6.39
11 6.21
12 6.18
13 5.9
14 6.3
15 5.88
16 5.92
238
7. Linking
Linking
Our ultimate goal is extensible programming. By this, we mean the
construction of hierarchies of modules, each module adding new functionality
to the system.
by Niklaus Wirth
test
code directory: ./code
test way:
239
7.6
7.6
/*
* swap.c
*/
void swap() {
int temp;
incr();
bufp1 = &buf[1];
temp = *bufp0;
*bufp0 = *bufp1;
*bufp1 = temp;
}
240
7.6
output:
gcc -c swap.c
objdump -t swap.o
SYMBOL TABLE:
0000000000000000 l O .bss 0000000000000008 bufp1
0000000000000000 l F .text 0000000000000015 incr
0000000000000008 l O .bss 0000000000000004 count.1747
0000000000000000 g O .data 0000000000000008 bufp0
0000000000000000 *UND* 0000000000000000 buf
0000000000000015 g F .text 0000000000000049 swap
241
7.7
7.7
/*
* bar5.c
*/
double x;
void f() {
/*x = -0.0;*/
}
delete line
x = -0.0;
242
7.8
7.8
A.
main.1
main.2
B.
unknown
unknown
C.
error
error
243
7.9
7.9
/*
* bar6.c
*/
#include <stdio.h>
void p2() {
printf("0x%x\n", main);
}
/*
* foo6.c
*/
void p2(void);
void offset(void) {
return;
}
output:
244
7.9
0xe5894855
objdump -d main
000000000040055d <main>:
40055d: 55 push %rbp
40055e: 48 89 e5 mov %rsp,%rbp
400561: 48 83 ec 10 sub $0x10,%rsp
400565: 89 7d fc mov %edi,-0x4(%rbp)
400568: 48 89 75 f0 mov %rsi,-0x10(%rbp)
40056c: e8 07 00 00 00 callq 400578 <p2>
400571: b8 00 00 00 00 mov $0x0,%eax
400576: c9 leaveq
400577: c3 retq
245
7.9
/*
* another-bar6.c
*/
#include <stdio.h>
void p2() {
printf("0x%x\n", * (unsigned int *)main);
}
246
7.10
7.10
A.
B.
C.
247
7.11
7.11
248
7.12
7.12
A.
B.
249
7.13
7.13
A.
libm.a path
whereis libm.a
output:
libm.a files
ar t /usr/lib64/libm.a
output:
250
7.13
s_lib_version.o
s_matherr.o
s_signgam.o
fclrexcpt.o
fgetexcptflg.o
fraiseexcpt.o
fsetexcptflg.o
ftestexcept.o
fegetround.o
fesetround.o
fegetenv.o
feholdexcpt.o
fesetenv.o
feupdateenv.o
t_exp.o
fedisblxcpt.o
feenablxcpt.o
fegetexcept.o
powl_helper.o
e_acos.o
e_acosh.o
e_asin.o
e_atan2.o
e_atanh.o
....
B.
/*
* little.c
*/
int main(int argc, char* argv[]) {
return 0;
}
251
7.13
C.
ldd og-little
output:
linux-vdso.so.1 (0x00007ffef51d3000)
libc.so.6 => /lib64/libc.so.6 (0x00007f27c6b8b000)
/lib64/ld-linux-x86-64.so.2 (0x00007f27c6f24000)
252
8. Exceptional Control Flow
by Margaret Fuller
test
8.1 - 8.8 visit book
test
code directory: ./code
test way:
253
8. Exceptional Control Flow
254
8.9
8.9
255
8.10
8.10
fork
longjmp, execve
setjmp
256
8.11
8.11
4 lines
c
+-------+
| "hello"
|
c | p
+-------+-------+
| fork "hello"
|
| c
| +-------+
| | "hello"
| |
| p | p
+------+-------+-------+
main fork fork "hello"
257
8.12
8.12
8 lines
c
+-------+---------+
| "hello" "hello"
|
c | p
+-------+-------+---------+
| fork "hello" "hello"
|
| c
| +-------+---------+
| | "hello" "hello"
| |
| p | p
+------+-------+-------+---------+
main fork fork "hello" "hello"
258
8.13
8.13
x=4
x=3
x=2
pay attention, parent process and child process don't share global x, they have
own private x.
259
8.14
8.14
3 lines
c
+-------+
| "hello"
|
c | p
+-------+-------+
| fork "hello"
|
| p
+------+-------+-------+
main fork return "hello"
260
8.15
8.15
5 lines
c
+-------+---------+
| "hello" "hello"
|
c | p
+-------+-------+---------+
| fork "hello" "hello"
|
|
| p
+------+-------+-------+
main fork return "hello"
261
8.16
8.16
counter = 2
262
8.17
8.17
263
8.18
8.18
c
+-------+---------+
| "0" exit "2"
|
c | p
+-------+-------+---------+
| fork "1" exit "2"
| (atexit)
| c
| +-------+---------+
| | "0" exit
| |
| p | p
+------+-------+-------+---------+
main fork fork "1" exit
B & D is impossible.
264
8.19
8.19
2^n
265
8.20
8.20
/*
* 8.20.c
*/
#include <stdio.h>
#include "csapp.h"
266
8.21
8.21
abc
or
bac
267
8.22
8.22
/*
* mysystem.c
*/
#include <stdio.h>
#include "csapp.h"
if ((pid = Fork()) == 0) {
/* child process */
char* argv[4] = { "", "-c", command, NULL };
execve("/bin/sh", argv, environ);
}
/* exit by signal */
if (WIFSIGNALED(status))
return WTERMSIG(status);
}
}
code = mysystem("./exit-code");
printf("normally exit, code: %d\n", code); fflush(stdout);
code = mysystem("./wait-sig");
printf("exit caused by signal, code: %d\n", code); fflush(stdo
268
8.22
ut);
return 0;
}
/*
* exit-code.c
*/
#include "csapp.h"
should output
/*
* wait-sig.c
*/
#include "csapp.h"
269
8.22
270
8.23
8.23
there's only one pending signal all the time. other same type signals will be
canceled.
remove sleep
/* sleep(1); */
or
271
8.24
8.24
/* $begin waitpid1 */
#include "csapp.h"
#define N 2
#define LEN 100
int main() {
int status, i;
pid_t pid;
272
8.24
exit(0);
}
/* $end waitpid1 */
273
8.25
8.25
/*
* 8.25.c
*/
#include <stdio.h>
#include "csapp.h"
sigjmp_buf buf;
if (!sigsetjmp(buf, 1)) {
alarm(5);
if (signal(SIGALRM, handler) == SIG_ERR)
unix_error("set alarm handler error");
return fgets(s, size, stream);
} else {
/* run out of time */
return NULL;
}
}
if (input == NULL) {
printf("nothing input: NULL\n");
} else {
printf("%s", input);
}
274
8.25
return 0;
}
275
8.26
8.26
learn
#include <assert.h>
#include "../csapp.h"
#include "shell.h"
#include "job.h"
strcpy(buf, cmdline);
bg = parse_line(buf, argv);
if (argv[0] == NULL)
return; /* Ignore empty lines */
if (!builtin_command(argv)) {
sigset_t mask_one, prev_one;
Sigemptyset(&mask_one);
Sigaddset(&mask_one, SIGCHLD);
276
8.26
if (!bg) {
set_fg_pid(pid);
while(get_fg_pid())
sigsuspend(&prev_one);
}
else
printf("[%d] %d %s \t %s\n", new_jid, pid, "Running", cmdl
ine);
/*
* If first arg is a builtin command, run it and return true;
* else return false.
*/
int builtin_command(char **argv)
{
if (!strcmp(argv[0], "quit")) /* quit command */
exit(0);
277
8.26
} else {
printf("format error, e.g. fg %%12 || fg 1498\n");
}
return 1;
}
// > bg
if (!strcmp(argv[0], "bg")) {
int id;
// right format: bg %ddd or bg ddd
if ((id = parse_id(argv[1])) != -1 && argv[2] == NULL) {
278
8.26
/* parse_line - Parse the command line and build the argv array
*/
int parse_line(char *buf, char **argv)
{
char *delim; /* Points to first space delimiter */
int argc; /* Number of args */
int bg; /* Background job? */
279
8.26
return bg;
}
return 1;
}
int parse_id(char* s) {
int error = -1;
if (s == NULL)
return error;
/* format: %ddddd */
if (s[0] == '%') {
if (!is_number_str(s+1))
return error;
return atoi(s+1);
}
/* format: dddddd */
if (is_number_str(s))
return atoi(s);
/* not right */
return error;
}
void test_shell() {
280
8.26
// parse id
assert(-1 == parse_id("ns"));
assert(-1 == parse_id("%%"));
assert(0 == parse_id("%0"));
assert(0 == parse_id("0"));
assert(98 == parse_id("%98"));
assert(98 == parse_id("98"));
}
/*
* job.c
*/
#include <stdio.h>
#include <assert.h>
#include "job.h"
#include "../csapp.h"
/* SIGCONT signal */
void sigchild_handler(int sig) {
int old_errno = errno;
int status;
pid_t pid;
281
8.26
Sigfillset(&mask_all);
/* be stopped */
if (WIFSTOPPED(status)) {
if (is_fg_pid(pid)) {
set_fg_pid(0);
}
// set pid status stopped
Sigprocmask(SIG_BLOCK, &mask_all, &prev_all);
JobPtr jp = find_job_by_pid(pid);
set_job_status(jp, Stopped);
Sigprocmask(SIG_SETMASK, &prev_all, NULL);
/* continue */
if(WIFCONTINUED(status)) {
set_fg_pid(pid);
// set pid status running
Sigprocmask(SIG_BLOCK, &mask_all, &prev_all);
JobPtr jp = find_job_by_pid(pid);
set_job_status(jp, Running);
282
8.26
errno = old_errno;
}
if (is_fg_pid(0)) {
Signal(SIGINT, SIG_DFL);
Kill(getpid(), SIGINT);
} else {
Kill(get_fg_pid(), SIGINT);
}
}
283
8.26
}
/* no such job */
return NULL;
}
return jid;
}
284
8.26
void print_jobs() {
for (int i = 0; i < MAXJOBS; i++) {
Job j = jobs[i];
if (j.using) {
printf("[%d] %d %s \t %s\n", j.jid, j.pid,
j.status == Running ? "Running" : "Stopped", j.cmdline
);
}
}
}
void init_jobs() {
memset(jobs, 0, sizeof(jobs));
}
void test_job() {
test it
ps: ./loop is a dead loop program, ./sleep sleeps 5 secs and exit.
285
8.26
286
9. Virtual Memory
Virtual Memory
I wanted to have virtual memory, at least as it's coupled with file systems.
by Ken Thompson
test
code directory: ./code
test way:
287
9.11
9.11
VA: 0x027c
A.
13 12 11 10 9 8 7 6 5 4 3 2 1 0
+--|--|--|--|--|--|--|--|--|--|--|--|--|--+
| 0| 0| 0| 0| 1| 0| 0| 1| 1| 1| 1| 1| 0| 0|
+--|--|--|--|--|--|--|--|--|--|--|--|--|--+
B.
param value
VPN 0x09
TLBI 0x01
TLBT 0x02
hit? No
page falut? No
PPN 0x17
C.
11 10 9 8 7 6 5 4 3 2 1 0
+--|--|--|--|--|--|--|--|--|--|--|--+
| 0| 1| 0| 1| 1| 1| 1| 1| 1| 1| 0| 0|
+--|--|--|--|--|--|--|--|--|--|--|--+
D.
288
9.11
param value
CO 0x00
CI 0x0F
CT 0x17
hit? No
value -----
289
9.12
9.12
VA: 0x03a9
A.
13 12 11 10 9 8 7 6 5 4 3 2 1 0
+--|--|--|--|--|--|--|--|--|--|--|--|--|--+
| 0| 0| 0| 0| 1| 1| 1| 0| 1| 0| 1| 0| 0| 1|
+--|--|--|--|--|--|--|--|--|--|--|--|--|--+
B.
param value
VPN 0x0E
TLBI 0x02
TLBT 0x03
hit? No
page falut? No
PPN 0x11
C.
11 10 9 8 7 6 5 4 3 2 1 0
+--|--|--|--|--|--|--|--|--|--|--|--+
| 0| 1| 0| 0| 0| 1| 1| 0| 1| 0| 0| 1|
+--|--|--|--|--|--|--|--|--|--|--|--+
D.
290
9.12
param value
CO 0x01
CI 0x0A
CT 0x11
hit? No
value -----
291
9.13
9.13
VA: 0x0040
A.
13 12 11 10 9 8 7 6 5 4 3 2 1 0
+--|--|--|--|--|--|--|--|--|--|--|--|--|--+
| 0| 0| 0| 0| 0| 0| 0| 1| 0| 0| 0| 0| 0| 0|
+--|--|--|--|--|--|--|--|--|--|--|--|--|--+
B.
param value
VPN 0x01
TLBI 0x01
TLBT 0x00
hit? No
page falut? Yes
292
9.14
9.14
/*
* 9.14.c
*/
#include <stdio.h>
#include <assert.h>
#include "vm/csapp.h"
293
9.14
Munmap(bufp, size);
294
9.15
9.15
295
9.16
9.16
296
9.17
9.17
/* Global variables */
static char *heap_listp = 0; /* Pointer to first block */
+static char *rover; /* Next fit rover */
+ rover = heap_listp;
/* $begin mminit */
- /* $begin mmfirstfit */
297
9.17
- /* First-fit search */
- void *bp;
-
- for (bp = heap_listp; GET_SIZE(HDRP(bp)) > 0; bp = NEXT_BLKP(
bp)) {
- if (!GET_ALLOC(HDRP(bp)) && (asize <= GET_SIZE(HDRP(bp)))) {
- return bp;
- }
- }
- return NULL; /* No fit */
+ /* Next fit search */
+ char *oldrover = rover;
+
+ /* Search from the rover to the end of list */
+ for ( ; GET_SIZE(HDRP(rover)) > 0; rover = NEXT_BLKP(rover))
+ if (!GET_ALLOC(HDRP(rover)) && (asize <= GET_SIZE(HDRP(rove
r))))
+ return rover;
+
+ /* search from start of list to old rover */
+ for (rover = heap_listp; rover < oldrover; rover = NEXT_BLKP(
rover))
+ if (!GET_ALLOC(HDRP(rover)) && (asize <= GET_SIZE(HDRP(rove
r))))
+ return rover;
+
+ return NULL; /* no fit found */
}
/* $end mmfirstfit */
298
9.18
9.18
/* Given block ptr bp, compute address of its header and footer
*/
#define HDRP(bp) ((char *)(bp) - WSIZE)
//line:vm:mm:hdrp
@@ -63,9 +64,9 @@
if ((heap_listp = mem_sbrk(4*WSIZE)) == (void *)-1) //line:vm
:mm:begininit
return -1;
PUT(heap_listp, 0); /* Alignment pad
ding */
- PUT(heap_listp + (1*WSIZE), PACK(DSIZE, 1)); /* Prologue head
er */
- PUT(heap_listp + (2*WSIZE), PACK(DSIZE, 1)); /* Prologue foot
er */
- PUT(heap_listp + (3*WSIZE), PACK(0, 1)); /* Epilogue head
299
9.18
er */
+ PUT(heap_listp + (1*WSIZE), PACK(DSIZE, 1, 1)); /* Prologue h
eader */
+ PUT(heap_listp + (2*WSIZE), PACK(DSIZE, 1, 1)); /* Prologue f
ooter */
+ PUT(heap_listp + (3*WSIZE), PACK(0, 1, 1)); /* Epilogue h
eader */
heap_listp += (2*WSIZE); //line:vm:mm:end
init
/* $end mminit */
@@ -98,10 +99,10 @@
return NULL;
300
9.18
+ if (GET_ALLOC(HDRP(NEXT_BLKP(bp))))
+ PUT(HDRP(NEXT_BLKP(bp)), PACK(GET_SIZE(HDRP(NEXT_BLKP(bp)))
, 1, 0));
+ else {
+ PUT(HDRP(NEXT_BLKP(bp)), PACK(GET_SIZE(HDRP(NEXT_BLKP(bp)))
, 0, 0));
+ PUT(FTRP(NEXT_BLKP(bp)), PACK(GET_SIZE(HDRP(NEXT_BLKP(bp)))
, 0, 0));
+ }
+
coalesce(bp);
}
@@ -148,7 +157,7 @@
/* $begin mmfree */
static void *coalesce(void *bp)
{
- size_t prev_alloc = GET_ALLOC(FTRP(PREV_BLKP(bp)));
+ size_t prev_alloc = GET_PREV_ALLOC(HDRP(bp));
size_t next_alloc = GET_ALLOC(HDRP(NEXT_BLKP(bp)));
size_t size = GET_SIZE(HDRP(bp));
@@ -158,22 +167,22 @@
301
9.18
else { /* Case 4 */
size += GET_SIZE(HDRP(PREV_BLKP(bp))) +
GET_SIZE(FTRP(NEXT_BLKP(bp)));
- PUT(HDRP(PREV_BLKP(bp)), PACK(size, 0));
- PUT(FTRP(NEXT_BLKP(bp)), PACK(size, 0));
+ PUT(HDRP(PREV_BLKP(bp)), PACK(size, 0, 1));
+ PUT(FTRP(NEXT_BLKP(bp)), PACK(size, 0, 1));
bp = PREV_BLKP(bp);
}
/* $end mmfree */
@@ -246,9 +255,9 @@
return NULL; //line:
vm:mm:endextend
302
9.18
bp = NEXT_BLKP(bp);
- PUT(HDRP(bp), PACK(csize-asize, 0));
- PUT(FTRP(bp), PACK(csize-asize, 0));
+ PUT(HDRP(bp), PACK(csize-asize, 0, 1));
+ PUT(FTRP(bp), PACK(csize-asize, 0, 1));
}
else {
- PUT(HDRP(bp), PACK(csize, 1));
- PUT(FTRP(bp), PACK(csize, 1));
+ PUT(HDRP(bp), PACK(csize, 1, 1));
+ PUT(HDRP(NEXT_BLKP(bp)), PACK(csize, 1, 1));
}
}
/* $end mmplace */
303
9.19
9.19
1)
b: wrong.
2)
d: Right.
3)
ref 9.10.3
304
9.20
9.20
mm.h
memlib.c
305
9.20
/*
* mem_init - Initialize the memory system model
*/
void mem_init(void)
{
- mem_heap = (char *)Malloc(MAX_HEAP);
+ mem_heap = (char *)sbrk(0);
mem_brk = (char *)mem_heap;
- mem_max_addr = (char *)(mem_heap + MAX_HEAP);
}
/*
@@ -43,7 +38,7 @@
{
char *old_brk = mem_brk;
306
9.20
/*
* main.c
*/
#include <stdio.h>
#include "csapp.h"
#ifdef CUS_MALLOC
#include "mm.h"
#include "memlib.h"
#else
#include <stdlib.h>
#endif
int i;
for (i = 0; i < LOOP; i+=2) {
void* ptr_f = malloc(i);
void* ptr = malloc(i+1);
free(ptr_f);
malloc_size += i+1;
}
return 0;
}
307
9.20
run make to generate both origin main executable file and custom version(using
-DCUS_MALLOC )
CC = gcc
CFLAGS = -m64 -pthread -DCUS_MALLOC
SRCS = mm.c memlib.c csapp.c
measure:
time ./origin.main
time ./custom.main
origin:
$(CC) -m64 main.c -o origin.main
custom:
$(CC) $(CFLAGS) $(SRCS) main.c -o custom.main
diff:
(diff -u ../vm/mm.h mm.h > mm.h.diff; cd .)
(diff -u ../vm/memlib.c memlib.c > memlib.c.diff; cd .)
test:
.PHONY: clean
clean:
find . -type f -executable -print0 | xargs -0 rm -f --
measurement
308
9.20
time ./origin.main
malloc size: 25000000, heap_size: 28311552
0.00user 0.01system 0:00.01elapsed 100%CPU (0avgtext+0avgdata 19
256maxresident)k
0inputs+0outputs (0major+4547minor)pagefaults 0swaps
time ./custom.main
malloc size: 25000000, heap_size: 31327104
0.58user 0.00system 0:00.58elapsed 99%CPU (0avgtext+0avgdata 305
92maxresident)k
0inputs+0outputs (0major+7339minor)pagefaults 0swaps
309
10. System-Level I/O
System-Level I/O
I think the major good idea in Unix was its clean and simple interface: open,
close, read, and write.
by Ken Thompson
test
code directory: ./code
test way:
310
10.6
10.6
fd = 4
311
10.7
10.7
/*
* 10.7.c
*/
#include <stdio.h>
#include "csapp.h"
return 0;
}
312
10.8
10.8
/*
* 10.8.c
*/
#include <stdio.h>
#include "csapp.h"
int fd;
if (argc <= 1)
fd = 0; // stdin
else
fd = atoi(argv[1]);
Fstat(fd, &stat);
if (S_ISREG(stat.st_mode))
type = "regular";
else if (S_ISDIR(stat.st_mode))
type = "dir";
else
type = "other";
if ((stat.st_mode & S_IRUSR))
readok = "yes";
else
readok = "no";
return 0;
}
313
10.8
314
10.9
10.9
if (Fork() == 0) {
Dup2(0, 3);
Execve("fstatcheck", argv, envp);
}
315
10.10
10.10
/*
* 10.10.c
*/
#include <stdio.h>
#include "csapp.h"
if (argc == 2) {
int fd = Open(argv[1], O_RDONLY, 0);
while ((n = Rio_readn(fd, buf, MAXBUF)) != 0)
Rio_writen(STDOUT_FILENO, buf, n);
exit(0);
}
Rio_readinitb(&rio, STDIN_FILENO);
while ((n = Rio_readlineb(&rio, buf, MAXLINE)) != 0)
Rio_writen(STDOUT_FILENO, buf, n);
return 0;
}
316
11. Network Programming
Network Programming
640k is enough for anyone, and by the way, what’s a network?
test
code directory: ./code
test way:
browser means start server and use browser visit server and watch result
317
11.6
11.6
A.
318
11.6
+}
+
/*
* doit - handle one HTTP request/response transaction
*/
B.
GET / HTTP/1.1
Host: localhost:5000
User-Agent: Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:53.0) Gec
ko/20100101 Firefox/53.0
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/
*;q=0.8
Accept-Language: en-US,en;q=0.5
Accept-Encoding: gzip, deflate
Connection: keep-alive
C.
HTTP 1.1
D.
Accept: 14.1
The Accept request-header field can be used to specify certain media types which
are acceptable for the response. Accept headers can be used to indicate that the
request is specifically limited to a small set of desired types, as in the case of a
request for an in-line image.
Accept-Encoding: 14.3
Accept-Language: 14.4
319
11.6
Connection: 14.10
The Connection general-header field allows the sender to specify options that are
desired for that particular connection and MUST NOT be communicated by
proxies over further connections.
Host: 14.23
The Host request-header field specifies the Internet host and port number of the
resource being requested, as obtained from the original URI given by the user or
referring resource. The Host field value MUST represent the naming authority of
the origin server or gateway given by the original URL. This allows the origin
server or gateway to differentiate between internally-ambiguous URLs, such as
the root “/” URL of a server for multiple host names on a single IP address.
User-Agent: 14.43
The User-Agent request-header field contains information about the user agent
originating the request. This is for statistical purposes, the tracing of protocol
violations, and automated recognition of user agents for the sake of tailoring
responses to avoid particular user agent limitations. User agents SHOULD include
this field with requests. The field can contain multiple product tokens (section 3.8)
and comments identifying the agent and any subproducts which form a significant
part of the user agent. By convention, the product tokens are listed in order of
their significance for identifying the application.
320
11.7
11.7
run server
browser visit
http://localhost:5000/ghost-in-shell.mpeg
321
11.8
11.8
322
11.8
nt */ //line:netp:servedynamic:dup2
Execve(filename, emptylist, environ); /* Run CGI program */
//line:netp:servedynamic:execve
}
- Wait(NULL); /* Parent waits for and reaps child */ //line:net
p:servedynamic:wait
}
/*
323
11.9
11.9
/*
324
11.10
11.10
A.
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8" />
<title>Tiny Server</title>
</head>
<body>
<form action="/cgi-bin/form-adder" method="GET">
<p>first number: <input type="text" name="first"/></p>
<p>second number: <input type="text" name="second"/></p>
<input type="submit" value="Submit"/>
</form>
</body>
</html>
B.
325
11.10
@@ -12,10 +12,8 @@
if ((buf = getenv("QUERY_STRING")) != NULL) {
p = strchr(buf, '&');
*p = '\0';
- strcpy(arg1, buf);
- strcpy(arg2, p+1);
- n1 = atoi(arg1);
- n2 = atoi(arg2);
+ sscanf(buf, "first=%d", &n1);
+ sscanf(p+1, "second=%d", &n2);
}
326
11.11
11.11
The HEAD method is identical to GET except that the server MUST NOT return a
message-body in the response.
tiny.c changes
@@ -55,7 +55,7 @@
return;
printf("%s", buf);
sscanf(buf, "%s %s %s", method, uri, version); //line:n
etp:doit:parserequest
- if (strcasecmp(method, "GET")) { //line:n
etp:doit:beginrequesterr
+ if (!(strcasecmp(method, "GET") == 0 || strcasecmp(method, "H
EAD") == 0)) {
clienterror(fd, method, "501", "Not Implemented",
"Tiny does not implement this method");
return;
@@ -76,7 +76,7 @@
"Tiny couldn't read the file");
return;
}
327
11.11
@@ -136,7 +136,7 @@
/*
* serve_static - copy a file back to the client
*/
-void serve_static(int fd, char *filename, int filesize)
+void serve_static(int fd, char *filename, int filesize, char *m
ethod)
{
int srcfd;
char *srcp, filetype[MAXLINE], buf[MAXBUF];
@@ -152,6 +152,9 @@
printf("Response headers:\n");
printf("%s", buf);
+ if (strcasecmp(method, "HEAD") == 0)
+ return;
+
/* Send response body to client */
srcfd = Open(filename, O_RDONLY, 0); //line:netp:servestat
ic:open
srcp = Mmap(0, filesize, PROT_READ, MAP_PRIVATE, srcfd, 0);//
328
11.11
line:netp:servestatic:mmap
@@ -180,7 +183,7 @@
/*
* serve_dynamic - run a CGI program on behalf of the client
*/
-void serve_dynamic(int fd, char *filename, char *cgiargs)
+void serve_dynamic(int fd, char *filename, char *cgiargs, char
*method)
{
char buf[MAXLINE], *emptylist[] = { NULL };
@@ -193,6 +196,7 @@
if (Fork() == 0) { /* Child */ //line:netp:servedynamic:fork
/* Real server would set all CGI vars here */
setenv("QUERY_STRING", cgiargs, 1); //line:netp:servedynami
c:setenv
+ setenv("REQUEST_METHOD", method, 1);
Dup2(fd, STDOUT_FILENO); /* Redirect stdout to clie
nt */ //line:netp:servedynamic:dup2
Execve(filename, emptylist, environ); /* Run CGI program */
//line:netp:servedynamic:execve
}
adder.c changes
int main(void) {
- char *buf, *p;
+ char *buf, *p, *method;
329
11.11
@@ -18,6 +18,8 @@
n2 = atoi(arg2);
}
+ method = getenv("REQUEST_METHOD");
+
/* Make the response body */
sprintf(content, "Welcome to add.com: ");
sprintf(content, "%sTHE Internet addition portal.\r\n<p>", co
ntent);
@@ -29,7 +31,10 @@
printf("Connection: close\r\n");
printf("Content-length: %d\r\n", (int)strlen(content));
printf("Content-type: text/html\r\n\r\n");
- printf("%s", content);
+
+ if (strcasecmp(method, "HEAD") != 0)
+ printf("%s", content);
+
fflush(stdout);
exit(0);
330
11.12
11.12
run server
331
11.12
/*
332
11.12
post-adder code
333
11.12
/*
* post-adder.c - a minimal CGI program that adds two numbers to
gether
*/
#include "../csapp.h"
int main(void) {
char *buf, *p;
char arg1[MAXLINE], arg2[MAXLINE], content[MAXLINE];
int n1=0, n2=0;
exit(0);
}
334
11.12
335
11.13
11.13
336
11.13
printf("Response headers:\n");
printf("%s", buf);
@@ -156,7 +170,7 @@
srcfd = Open(filename, O_RDONLY, 0); //line:netp:servestat
ic:open
srcp = Mmap(0, filesize, PROT_READ, MAP_PRIVATE, srcfd, 0);//
line:netp:servestatic:mmap
Close(srcfd); //line:netp:servestat
ic:close
- Rio_writen(fd, srcp, filesize); //line:netp:servestat
ic:write
+ Im_rio_writen(fd, srcp, filesize); //line:netp:serves
tatic:write
Munmap(srcp, filesize); //line:netp:servestat
ic:munmap
}
@@ -186,11 +200,13 @@
337
11.13
338
12. Concurrent Programming
Concurrent Programming
Multi-tasking arises out of distraction itself.
test
prerequisite
test way:
339
12. Concurrent Programming
340
12.16
12.16
/*
* 12.16.c
*/
#include <stdio.h>
#include "csapp.h"
#define DEFAULT 4
int i;
pthread_t tid;
for (i = 0; i < N; i++) {
Pthread_create(&tid, NULL, thread, NULL);
}
Pthread_exit(NULL);
}
341
12.17
12.17
A.
B.
pthread_exit
/*
* 12.17.c
*/
#include "csapp.h"
void *thread(void *vargp);
int main()
{
pthread_t tid;
/* Thread routine */
void *thread(void *vargp)
{
Sleep(1);
printf("Hello, world!\n");
return NULL;
}
342
12.18
12.18
A unsafe
B safe
C unsafe
343
12.19
12.19
/*
* 12.19.c
*/
#include <stdio.h>
#include "csapp.h"
/* Critical section */
readtimes++;
reader_first = 0;
/* Critical section */
P(&mutex);
readcnt--;
if (readcnt == 0)
V(&w);
V(&mutex);
}
}
344
12.19
P(&w);
/* Critical section */
writetimes++;
if (writetimes == WRITE_LIMIT) {
printf("read/write: %d/%d\n", readtimes, writetimes);
exit(0);
}
/* Critical section */
void init(void) {
readcnt = 0;
readtimes = 0;
writetimes = 0;
reader_first = 0;
Sem_init(&w, 0, 1);
Sem_init(&mutex, 0, 1);
}
init();
345
12.19
if (i%2 == 0)
Pthread_create(&tid, NULL, reader, NULL);
else
Pthread_create(&tid, NULL, writer, NULL);
Pthread_exit(NULL);
exit(0);
}
346
12.20
12.20
/*
* 12.20.c
*/
#include <stdio.h>
#include "csapp.h"
readtimes++;
V(&mutex);
V(&readercnt);
}
}
writetimes++;
if (writetimes == WRITE_LIMIT) {
printf("read/write: %d/%d\n", readtimes, writetimes);
exit(0);
}
347
12.20
V(&mutex);
}
}
void init(void) {
readtimes = 0;
writetimes = 0;
Sem_init(&mutex, 0, 1);
Sem_init(&readercnt, 0, N);
}
init();
Pthread_exit(NULL);
exit(0);
}
348
12.21
12.21
/*
* 12.21.c
*/
#include <stdio.h>
#include "csapp.h"
P(&w);
/* Critical section */
readtimes++;
/* Critical section */
V(&w);
}
}
349
12.21
writecnt++;
V(&mutex);
P(&w);
/* Critical section */
writetimes++;
if (writetimes == WRITE_LIMIT) {
printf("read/write: %d/%d\n", readtimes, writetimes);
exit(0);
}
/* Critical section */
V(&w);
P(&mutex);
// writer has written
writecnt--;
V(&mutex);
}
}
void init(void) {
writecnt = 0;
readtimes = 0;
writetimes = 0;
Sem_init(&w, 0, 1);
Sem_init(&mutex, 0, 1);
}
init();
350
12.21
else
Pthread_create(&tid, NULL, writer, NULL);
}
Pthread_exit(NULL);
exit(0);
}
351
12.22
12.22
#include "csapp.h"
void command(void);
if (argc != 2) {
fprintf(stderr, "usage: %s <port>\nuse port 5000 here\n", ar
gv[0]);
// default port 5000
listenfd = Open_listenfd("5000");
} else {
listenfd = Open_listenfd(argv[1]); //line:conc:select:openl
istenfd
}
while (1) {
ready_set = read_set;
Select(n, &ready_set, NULL, NULL, NULL); //line:conc:select:
select
352
12.22
void command(void) {
char buf[MAXLINE];
if (!Fgets(buf, MAXLINE, stdin))
exit(0); /* EOF */
printf("%s", buf); /* Process the input command */
}
353
12.22
char buf[1];
run server
354
12.23
12.23
run server
let's figure out when server will fail, see code 12.23.client.c
/*
* 12.23.client.c - An echo client
*/
#include "csapp.h"
host = "127.0.0.1";
port = "5000";
Rio_readinitb(&rio, clientfd);
Rio_writen(clientfd, buf, strlen(buf));
/*Close(clientfd);*/
exit(0);
}
355
12.23
how to fix:
@@ -105,15 +103,21 @@
/* If the descriptor is ready, echo a text line from it */
if ((connfd > 0) && (FD_ISSET(connfd, &p->ready_set))) {
p->nready--;
- if ((n = Rio_readlineb(&rio, buf, MAXLINE)) != 0) {
+ if ((n = rio_readlineb(&rio, buf, MAXLINE)) > 0) {
byte_cnt += n; //line:conc:echoservers:beginecho
printf("Server received %d (%d total) bytes on fd %d\n"
,
n, byte_cnt, connfd);
Rio_writen(connfd, buf, n); //line:conc:echoservers:end
echo
}
-
/* EOF detected, remove descriptor from pool */
+ else if (n == 0) {
+ Close(connfd); //line:conc:echoservers:closeconnfd
+ FD_CLR(connfd, &p->read_set); //line:conc:echoservers:b
eginremove
+ p->clientfd[i] = -1; //line:conc:echoservers:e
ndremove
+ }
+ /* n == -1, it's an error */
356
12.23
else {
+ fprintf(stderr, "error in fd %d, close fd %d connection
\n", connfd, connfd);
Close(connfd); //line:conc:echoservers:closeconnfd
FD_CLR(connfd, &p->read_set); //line:conc:echoservers:b
eginremove
p->clientfd[i] = -1; //line:conc:echoservers:e
ndremove
357
12.24
12.24
if don't pass pointer param which points to same data block, functions
rio_readn
rio_writen
rio_readinitb
rio_readlineb
rio_readnb
358
12.25
12.25
/*
* A thread-safe version of echo that counts the total number
* of bytes received from clients.
*/
/* $begin echo_cnt */
#include "csapp.h"
359
12.25
thread safe?
reentrant?
360
12.26
12.26
struct hostent {
char *h_name;
char **h_aliases;
int h_addrtype;
int h_length;
char **h_addr_list;
}
copy int, copy char, copy char* in struct hostent are 3 different ways.
/*
* 12.26.c
*/
#include <stdio.h>
#include "csapp.h"
/*
* struct hostent *gethostbyname(const char *name)
*
* struct hostent {
* char *h_name;
* char **h_aliases;
* int h_addrtype;
* int h_length;
* char **h_addr_list;
* }
*/
static sem_t mutex;
361
12.26
P(&mutex);
sharehost = gethostbyname(name);
// copy int
host->h_addrtype = sharehost->h_addrtype;
host->h_length = sharehost->h_length;
// copy char *
host->h_name = (char*)Malloc(strlen(sharehost->h_name));
strcpy(host->h_name, sharehost->h_name);
// copy char **
int i;
for (i = 0; sharehost->h_aliases[i] != NULL; i++) {}
host->h_aliases = (char**)Malloc(sizeof(char*) * (i+1));
for (i = 0; sharehost->h_aliases[i] != NULL; i++) {
// copy every char *
host->h_aliases[i] = (char*)Malloc(strlen(sharehost->h_alias
es[i]));
strcpy(host->h_aliases[i], sharehost->h_aliases[i]);
}
host->h_aliases[i] = NULL;
return host;
}
362
12.26
gethostbyname_ts("127.0.0.1", &host);
// result in &host
return 0;
}
363
12.27
12.27
fclose(fpin);
fclose(fpout);
ref: 10.11
fdopen open 2 stream on same sockfd, fdclose will close sockfd under
stream. if you call fclose 2 stream on the same sockfd, the second fclose
will fail.
image one thread execute code and open 2 stream on fd N. after execution of line
fclose(fpin); , program create another thread and execute the same code.
but
364
12.28
12.28
No effect on deadlock
thread 1 thread 2
P(s) P(s)
P(t) P(t)
V(s) V(s)
V(t) V(t)
365
12.28
+
|
|
+ +--------------------+
V(t)| | |
| | |
+ | unsafe region t |
| | |
| | |
+ +---------+---------+ |
V(s)| | | | |
| | | | |
+ | | | |
| | | | |
| | | | |
+ | +---------+----------+
P(t)| | |
| | unsafe region s |
+ | |
| | |
| | |
+ +-------------------+
P(s)|
|
+
|
|
+----+----+----+----+----+----+----+----+----+ thread 1
P(s) P(t) V(s) V(t)
thread 1 thread 2
P(s) P(s)
P(t) P(t)
V(s) V(t)
V(t) V(s)
366
12.28
+
|
|
+ +-------------------+
V(s)| | |
| | |
+ | |
| | |
| | |
+ | +---------+----------+
V(t)| | | | |
| | | | unsafe |
+ | | | region |
| | | | t |
| | | | |
+ | +---------+----------+
P(t)| | |
| | |
+ | unsafe region s |
| | |
| | |
+ +-------------------+
P(s)|
|
+
|
|
+----+----+----+----+----+----+----+----+----+ thread 1
P(s) P(t) V(s) V(t)
thread 1 thread 2
P(s) P(s)
P(t) P(t)
V(t) V(s)
V(s) V(t)
367
12.28
+
|
|
+ +---------+
V(t)| | unsafe |
| | region |
+ | t |
| | |
| | |
+ +---------+---------+----------+
V(s)| | | | |
| | | | |
+ | | | |
| | | | |
| | | | |
+ | +---------+ |
P(t)| | |
| | |
+ | unsafe region s |
| | |
| | |
+ +------------------------------+
P(s)|
|
+
|
|
+----+----+----+----+----+----+----+----+----+ thread 1
P(s) P(t) V(t) V(s)
thread 1 thread 2
P(s) P(s)
P(t) P(t)
V(t) V(t)
V(s) V(s)
368
12.28
+
|
|
+ +------------------------------+
V(s)| | |
| | |
+ | |
| | |
| | |
+ | +----------+ |
V(t)| | | unsafe | |
| | | region | |
+ | | t | |
| | | | |
| | | | |
+ | +----------+ |
P(t)| | |
| | |
+ | unsafe region s |
| | |
| | |
+ +------------------------------+
P(s)|
|
+
|
|
+----+----+----+----+----+----+----+----+----+ thread 1
P(s) P(t) V(t) V(s)
369
12.29
12.29
no deadlock
initial: a = 1, b = 1, c = 1
thread 1 thread 2
P(a) P(c)
P(b) P(b)
V(b) V(b)
P(c) V(c)
V(c) ----
V(a) ----
thread 2 doesn't manipulate mutex a and initial a is 1, so P(a), V(a) don't affect
deadlock status.
thread 1 thread 2
P(b) P(c)
V(b) P(b)
P(c) V(b)
V(c) V(c)
370
12.29
+
|
|
+ +----------+
V(c)| | |
| | |
+ | |
| | |
| | |
+ +----------+ | |
V(b)| | | | |
| | | | |
+ | | | |
| | | | |
| | | | |
+ +----------+ | |
P(b)| | |
| | |
+ | |
| | |
| | |
+ +----------+
P(c)|
|
+
|
|
+----+----+----+----+----+----+----+----+----+ thread 1
P(b) V(b) P(c) V(c)
371
12.30
12.30
initial: a = 1, b = 1, c = 1
A.
thread 2: b&c
thread 3: a&b
B.
C.
372
12.31
12.31
/*
* 12.31.c
*/
#include <stdio.h>
#include "csapp.h"
sigjmp_buf buf;
return 0;
}
373
12.31
374
12.32
12.32
/*
* 12.32.c
*/
#include <stdio.h>
#include "csapp.h"
return 0;
}
375
12.33
12.33
/*
* 12.33.c
*/
#include <stdio.h>
#include "csapp.h"
struct pack {
char *s;
int size;
FILE *stream;
};
p.s = s;
p.size = size;
p.stream = stream;
Pthread_create(&tid_read, NULL, thread_read, (void*)&p);
376
12.33
if (timeout == 1) {
Pthread_cancel(tid_read);
return NULL;
} else {
Pthread_cancel(tid_sleep);
return ptr;
}
}
return 0;
}
377
12.34
12.34
/*
* 12.34.h
*/
#define N 640
#define M 640
378
12.34
/*
* 12.34.non.concurrent.c
*/
#include <stdio.h>
#include "csapp.h"
#include "12.34.h"
int M1[N][M];
int M2[N][M];
int MUL12[N][M];
void non_concurrent_mul(void) {
int i, j, k;
for (i = 0; i < N; i++)
for (j = 0; j < N; j++) {
int sum = 0;
for (k = 0; k < M; k++) {
sum += M1[i][k] * M2[k][j];
}
MUL12[i][j] = sum;
}
}
concurrent version
/*
* 12.34.concurrent.c
*/
#include <stdio.h>
#include "csapp.h"
#include "12.34.h"
int M1[N][M];
379
12.34
int M2[N][M];
int MUL12[N][M];
void concurrent_mul(void) {
pthread_t tid[THREAD];
int param[THREAD];
int i;
measure performance
380
12.34
output
(time ./12.34.non.concurrent)
0.90user 0.00system 0:00.90elapsed 99%CPU (0avgtext+0avgdata 370
4maxresident)k
0inputs+0outputs (0major+756minor)pagefaults 0swaps
(time ./12.34.concurrent)
2.20user 0.00system 0:00.64elapsed 341%CPU (0avgtext+0avgdata 38
96maxresident)k
0inputs+0outputs (0major+1462minor)pagefaults 0swaps
more detialed
thread(t) 1 2 4 8 16
core(p) 1 2 4 4 4
time(Tp) 0.86 0.466 0.626 0.627 0.628
speedup(Sp) 1 1.84 1.37 1.37 1.37
efficiency(Ep) 100% 92.2% 34.3% 34.3% 34.3%
381
12.35
12.35
the key is close(connfd) in both parent and child process to ensure close fd
and reuse it or it'll failed: Accept error: too many open files
382
12.35
@@ -35,8 +37,14 @@
Getnameinfo((SA *) &clientaddr, clientlen, hostname, MAXLIN
E,
port, MAXLINE, 0);
printf("Accepted connection from (%s, %s)\n", hostname, por
t);
- doit(connfd); /
/line:netp:tiny:doit
- Close(connfd); /
/line:netp:tiny:close
+
+ if (Fork() == 0) {
+ Close(listenfd);
+ doit(connfd);
//line:netp:tiny:doit
+ Close(connfd);
//line:netp:tiny:close
+ exit(0);
+ }
+ Close(connfd);
}
}
run server
383
12.35
output
384
12.36
12.36
key points:
benchmark it
385
12.37
12.37
386
12.37
run server
output
387
12.38
12.38
files:
/* Empty buf? */
int sbuf_empty(sbuf_t *sp) {
int e;
P(&sp->mutex); /* Lock the buffer */
e = sp->front == sp->rear;
V(&sp->mutex); /* Lock the buffer */
return e;
}
/* Full buf? */
int sbuf_full(sbuf_t *sp) {
int f;
P(&sp->mutex); /* Lock the buffer */
f = (sp->rear - sp->front) == sp->n;
V(&sp->mutex); /* Lock the buffer */
return f;
}
main.c
/*
* main.c
*/
#include <stdio.h>
#include "../csapp.h"
388
12.38
#include "tiny.h"
#include "sbuf.h"
#define SBUFSIZE 4
#define INIT_THREAD_N 1
#define THREAD_LIMIT 4096
// thread info
typedef struct {
pthread_t tid;
sem_t mutex;
} ithread;
// init work
void init(void);
// function for create server thread
void *serve_thread(void *vargp);
/*
* creating thread that adjust total thread count according to s
buf situation
*
* if sbuf is empty, double threads
* if sbuf is full, half threads
*/
void *adjust_threads(void *);
// from start to end, create (end - start) new server threads
void create_threads(int start, int end);
if (argc != 2) {
389
12.38
init();
while (1) {
clientlen = sizeof(struct sockaddr_storage);
connfd = Accept(listenfd, (SA *) &clientaddr, &clientlen);
sbuf_insert(&sbuf, connfd); /* Insert connfd in buffer */
}
}
void init(void) {
nthreads = INIT_THREAD_N;
sbuf_init(&sbuf, SBUFSIZE);
while (1) {
// get lock first
// thread can't be kill now
P(&(threads[idx].mutex));
390
12.38
while (1) {
// if sbuf is full, double threads
if (sbuf_full(sp)) {
if (nthreads == THREAD_LIMIT) {
fprintf(stderr, "too many threads, can't double\n");
continue;
}
// double n
int dn = 2 * nthreads;
create_threads(nthreads, dn);
nthreads = dn;
continue;
}
// half threads
if (sbuf_empty(sp)) {
if (nthreads == 1)
continue;
391
12.38
// half n
int hn = nthreads / 2;
/*
* all server thread are divide to 2 parts
*
* keep [0, hn] running
* kill [hn, nthreads] threads
*
* if you want to kill a thread, you must get the lock bef
ore it so you
* won't kill a thread which is offering service.
*/
int i;
for (i = hn; i < nthreads; i++) {
P(&(threads[i].mutex));
Pthread_cancel(threads[i].tid);
V(&(threads[i].mutex));
}
nthreads = hn;
continue;
}
}
}
run server
output
392
12.38
393
12.39
12.39
+------+ +------+
| |----->| |
|client| |server|
| |<-----| |
+------+ +------+
GET / HTTP/1.1
Host: address:port
A.
/*
* proxy.c
*
* visited url log to file log.list
394
12.39
socklen_t clientlen;
struct sockaddr_storage clientaddr;
char block_list[MAXENTRY][MAXLINE];
int logfd;
char log_buf[MAXLINE];
if (argc != 2) {
fprintf(stderr, "usage: %s <port>\n", argv[0]);
fprintf(stderr, "use default port 5000\n");
listenfd = Open_listenfd("5000");
} else {
listenfd = Open_listenfd(argv[1]);
}
395
12.39
while (1) {
// wait for connection as a server
clientlen = sizeof(struct sockaddr_storage);
connfd = Accept(listenfd, (SA *) &clientaddr, &clientlen);
Rio_readinitb(&server_rio, connfd);
/*
* if uri is full path url like http://localhost:8000/server
.c
* remove host part http://localhost:8000
* only pass /server.c to server
*/
// parse HTTP request first line
if (!Rio_readlineb(&server_rio, s_buf, MAXLINE)) {
Close(connfd);
continue;
}
sscanf(s_buf, "%s %s %s", method, uri, version);
// if uri is blocked?
if (blocked_uri(uri, block_list)) {
printf("%s is blocked\n", uri);
Close(connfd);
continue;
}
// log visit
sprintf(log_buf, "visit url: %s\n", uri);
Write(logfd, log_buf, strlen(log_buf));
396
12.39
} else if (res == 0) {
fprintf(stderr, "not a abslute request path\n");
Close(connfd);
continue;
}
/*
* browser --> proxy --> server
*
* send requests
*/
// write first request line
sprintf(s_buf, "%s %s %s\n", method, path, version);
Rio_writen(clientfd, s_buf, strlen(s_buf));
printf("%s", s_buf);
do {
// pass next http requests
sn = Rio_readlineb(&server_rio, s_buf, MAXLINE);
printf("%s", s_buf);
Rio_writen(clientfd, s_buf, sn);
} while(strcmp(s_buf, "\r\n"));
/*
* server --> proxy --> browser
*
* server send response back
*/
while ((cn = Rio_readlineb(&client_rio, c_buf, MAXLINE)) != 0
)
Rio_writen(connfd, c_buf, cn);
Close(connfd);
Close(clientfd);
}
Close(logfd);
397
12.39
/*
* if uri is abslute path url like
* http://localhost:8888/something
* or
* http://localhost/something (port default is 80)
* separate into three part and return 1
*
* if uri is relative path like /something
* do nothing and return 0
*
* if uri is abslute path and not http protocal like https/ftp/e
tc
* do nothing, return -1, it's error
*/
int separate_uri(char *uri, char *host, char *port, char *path)
{
// relative path
if (uri[0] == '/')
return 0;
// abslute path
char *prefix = "http://";
int prelen = strlen(prefix);
// if not http protocal, error
if (strncmp(uri, prefix, prelen) != 0)
return -1;
// copy host
while (*end != ':' && *end != '/') {
end++;
}
strncpy(host, start, end-start);
// port is provided
398
12.39
if (*end == ':') {
// skip ':'
++end;
start = end;
// copy port
while (*end != '/')
end++;
strncpy(port, start, end-start);
} else {
// port is not provided, defualt 80
strncpy(port, "80", 2);
}
// copy path
strcpy(path, end);
}
/*
* read block file, parse all the entries and save into list
* entries count no more than limit
*/
void parse_block_file(char *filename, char list[MAXENTRY][MAXLIN
E], int limit) {
int blockfd;
char block_buf[MAXLINE];
rio_t block_rio;
ssize_t block_n;
// right entry
399
12.39
if (strncmp(block_buf, "http://", 7) == 0) {
strcpy(list[num], block_buf);
num++;
}
Close(blockfd);
}
/*
* if uri is in list, return true
* if not, return false
*/
int blocked_uri(char *uri, char list[MAXENTRY][MAXLINE]) {
int i;
for (i = 0; list[i][0] != '\0'; i++)
if (strncmp(uri, list[i], strlen(uri)) == 0)
return 1;
return 0;
}
B.
/*
* proxy-thread.c multi thread deal with concurrent
*
* visited url log to file log.list
* block url base on entry from file block.list
*/
#include <stdio.h>
#include "../csapp.h"
400
12.39
if (argc != 2) {
fprintf(stderr, "usage: %s <port>\n", argv[0]);
fprintf(stderr, "use default port 5000\n");
listenfd = Open_listenfd("5000");
} else {
listenfd = Open_listenfd(argv[1]);
}
while (1) {
// wait for connection as a server
clientlen = sizeof(struct sockaddr_storage);
connfdp = Malloc(sizeof(int));
*connfdp = Accept(listenfd, (SA *) &clientaddr, &clientlen);
// new thread
Pthread_create(&tid, NULL, proxy_thread, connfdp);
}
401
12.39
Close(logfd);
}
char log_buf[MAXLINE];
int clientfd;
Rio_readinitb(&server_rio, connfd);
/*
* if uri is full path url like http://localhost:8000/server.c
* remove host part http://localhost:8000
* only pass /server.c to server
*/
// parse HTTP request first line
if (!Rio_readlineb(&server_rio, s_buf, MAXLINE)) {
Close(connfd);
return NULL;
}
sscanf(s_buf, "%s %s %s", method, uri, version);
// if uri is blocked?
if (blocked_uri(uri, block_list)) {
printf("thread %ld: %s is blocked\n", tid, uri);
Close(connfd);
return NULL;
}
// log visit
402
12.39
/*
* browser --> proxy --> server
*
* send requests
*/
// write first request line
sprintf(s_buf, "%s %s %s\n", method, path, version);
Rio_writen(clientfd, s_buf, strlen(s_buf));
printf("tid %ld: %s", tid, s_buf);
do {
// pass next http requests
sn = Rio_readlineb(&server_rio, s_buf, MAXLINE);
printf("tid %ld: %s", tid, s_buf);
Rio_writen(clientfd, s_buf, sn);
} while(strcmp(s_buf, "\r\n"));
/*
* server --> proxy --> browser
403
12.39
*
* server send response back
*/
while ((cn = Rio_readlineb(&client_rio, c_buf, MAXLINE)) != 0)
Rio_writen(connfd, c_buf, cn);
Close(connfd);
Close(clientfd);
}
/*
* if uri is abslute path url like
* http://localhost:8888/something
* or
* http://localhost/something (port default is 80)
* separate into three part and return 1
*
* if uri is relative path like /something
* do nothing and return 0
*
* if uri is abslute path and not http protocal like https/ftp/e
tc
* do nothing, return -1, it's error
*/
int separate_uri(char *uri, char *host, char *port, char *path)
{
// relative path
if (uri[0] == '/')
return 0;
// abslute path
char *prefix = "http://";
int prelen = strlen(prefix);
// if not http protocal, error
if (strncmp(uri, prefix, prelen) != 0)
return -1;
404
12.39
// copy host
while (*end != ':' && *end != '/') {
end++;
}
strncpy(host, start, end-start);
// port is provided
if (*end == ':') {
// skip ':'
++end;
start = end;
// copy port
while (*end != '/')
end++;
strncpy(port, start, end-start);
} else {
// port is not provided, defualt 80
strncpy(port, "80", 2);
}
// copy path
strcpy(path, end);
}
405
12.39
if (num == limit)
break;
// right entry
if (strncmp(block_buf, "http://", 7) == 0) {
strcpy(list[num], block_buf);
num++;
}
Close(blockfd);
}
/*
* if uri is in list, return true
* if not, return false
*/
int blocked_uri(char *uri, char list[MAXENTRY][MAXLINE]) {
int i;
for (i = 0; list[i][0] != '\0'; i++)
if (strncmp(uri, list[i], strlen(uri)) == 0)
return 1;
return 0;
}
how to benchmark:
run proxy
406
12.39
407