diff --git a/labs/malloclab_lab/mdriver b/labs/malloclab_lab/mdriver index 2c4aefc..1ca2a70 100644 Binary files a/labs/malloclab_lab/mdriver and b/labs/malloclab_lab/mdriver differ diff --git a/labs/malloclab_lab/mm.c b/labs/malloclab_lab/mm.c index 5e8a590..dbadeba 100644 --- a/labs/malloclab_lab/mm.c +++ b/labs/malloclab_lab/mm.c @@ -1,27 +1,11 @@ /* - * mm-naive.c - The fastest, least memory-efficient malloc package. - * - * In this naive approach, a block is allocated by simply incrementing - * the brk pointer. A block is pure payload. There are no headers or - * footers. Blocks are never coalesced or reused. Realloc is - * implemented directly using mm_malloc and mm_free. - * - * NOTE TO STUDENTS: Replace this header comment with your own header - * comment that gives a high level description of your solution. + * mm-naive.c - The clear list, first fit malloc package. */ -#include -#include -#include -#include #include #include "mm.h" #include "memlib.h" -/********************************************************* - * NOTE TO STUDENTS: Before you do anything else, please - * provide your team information in the following struct. - ********************************************************/ team_t team = { /* Team name */ "team", @@ -44,7 +28,9 @@ team_t team = { #define WSIZE 4 #define DSIZE 8 #define FSIZE 16 -#define CHUNK 1 << 10 +#define ADDRESS (sizeof(unsigned long)) +#define CHUNK 1 << 12 +#define MIN_BLOCK (2 * ADDRESS + DSIZE) #define MAX(a, b) ((a) > (b) ? (a) : (b)) @@ -62,14 +48,24 @@ team_t team = { #define NEXT(bp) (FOOT(bp) + DSIZE) #define PREV(bp) ((byte *)(bp) - PARSE(GET((bp) - DSIZE)) - DSIZE) -typedef unsigned int word; +#define POS_PRED(bp) ((byte *)(bp)) +#define POS_SUCC(bp) (((byte *)(bp) + ADDRESS)) +#define GET_PRED(bp) (*(word *)POS_PRED(bp)) +#define GET_SUCC(bp) (*(word *)POS_SUCC(bp)) + +typedef unsigned long word; typedef char byte; // mark the front and tail pos void *front_p = NULL; void *tail_p = NULL; -// used for next fit, updated by mm_init, mm_malloc, _coalesce + +/** + * used for next fit, updated by mm_init, mm_malloc, _coalesce + * @deprecated useless for clear list + */ void *fitted_p = NULL; +void *list_p = NULL; // My func /** @@ -89,6 +85,7 @@ static void *_coalesce(void *bp); static void *__coalesce_prev(void *bp); static void *__coalesce_next(void *bp); static void *__coalesce_all(void *bp); +static void *__coalesce_none(void *bp); /** * traverse and find first fit, then place in @@ -100,6 +97,7 @@ static void *_first_fit(size_t size); /** * find next fit, then place in + * @deprecated I'll use clear list * @param size align by 8, excluding head and foot * @return */ @@ -114,10 +112,30 @@ static void *_next_fit(size_t size); static void *_next_best_fit(size_t size); /** + * traverse blank block only and find first fit, then place in + * @param size align by 8, excluding head and foot + * @return + */ +static void *_first_fit_of_clear(size_t size); + +/** * allocate the block and cut sometimes * @param size align by 8, excluding head and foot */ static void _place(void *ptr, size_t size); + +/** + * just replace in of out in list + * @param in the block that in the list + * @param out the block that out the list + */ +static void _fix_list(void *in, void *out); + +/** + * check the number of blank list nums and real blank nums and print + * @deprecated + */ +static void _check(); // end /** @@ -127,7 +145,7 @@ static void _place(void *ptr, size_t size); int mm_init(void) { if ((front_p = mem_sbrk(WSIZE)) == (void *) - 1) return -1; // blank front_p += DSIZE; // first chunk - fitted_p = front_p; // init fitted_p +// fitted_p = front_p; // init fitted_p if (!_extend(CHUNK)) return -1; return 0; } @@ -139,8 +157,8 @@ void *mm_malloc(size_t size) { size_t adjust_size = ALIGN(size); size_t extend_size; void *bp; - if ((bp = _next_fit(adjust_size)) != NULL) { - fitted_p = bp; + if ((bp = _first_fit_of_clear(adjust_size)) != NULL) { +// fitted_p = bp; return bp; } else { extend_size = adjust_size; @@ -150,7 +168,7 @@ void *mm_malloc(size_t size) { bp = _extend(MAX(extend_size, CHUNK)); if (bp == NULL) return bp; _place(bp, adjust_size); - fitted_p = bp; +// fitted_p = bp; return bp; } } @@ -159,6 +177,11 @@ void *mm_malloc(size_t size) { * free a block and coalesce immediately */ void mm_free(void *ptr) { +#ifdef DEBUG + printf("---free---\n"); + _check(); + printf("----------\n"); +#endif size_t size = SIZE(ptr); SET(HEAD(ptr), PACK(size, 0)); SET(FOOT(ptr), PACK(size, 0)); @@ -183,22 +206,24 @@ void *mm_realloc(void *ptr, size_t size) { size_t next_size = (ptr != tail_p && !ALLOC(NEXT(ptr))) ? SIZE(NEXT(ptr)) + DSIZE : 0; size_t total_size = old_size + next_size; if (adjust_size <= total_size) { - __coalesce_next(ptr); - _place(ptr, adjust_size); // just cut + void *next = NEXT(ptr); + // remove + SET(POS_SUCC(GET_PRED(next)), GET_SUCC(next)); + SET(POS_PRED(GET_SUCC(next)), GET_PRED(next)); + if (next == list_p) { + if (GET_SUCC(next) == (word)next) list_p = NULL; + else list_p = (void *)GET_SUCC(next); + } + SET(HEAD(ptr), PACK(total_size, 1)); + SET(FOOT(ptr), PACK(total_size, 1)); + if (next == tail_p) tail_p = ptr; return ptr; - } - size_t prev_size = (ptr != front_p && !ALLOC(PREV(ptr))) ? SIZE(PREV(ptr)) + DSIZE : 0; - total_size += prev_size; - if (adjust_size <= total_size) { // coalesce prev or all - new_ptr = _coalesce(ptr); - memmove(new_ptr, ptr, old_size); - _place(new_ptr, adjust_size); } else { if ((new_ptr = mm_malloc(size)) == NULL) return NULL; memmove(new_ptr, ptr, old_size); mm_free(ptr); + return new_ptr; } - return new_ptr; } // my func @@ -209,6 +234,13 @@ static void *_extend(size_t size) { SET(bp, PACK(size, 0)); bp += WSIZE; SET(FOOT(bp), PACK(size, 0)); +#ifdef DEBUG + if (tail_p) { + printf("----extend----\n"); + _check(); + printf("--------------\n"); + } +#endif // init mark point tail_p = bp; return _coalesce(bp); @@ -216,9 +248,9 @@ static void *_extend(size_t size) { static void *_coalesce(void *bp) { // one chunk - if (bp == front_p && bp == tail_p) return bp; + if (bp == front_p && bp == tail_p) return __coalesce_none(bp); if (bp == front_p || ALLOC(PREV(bp))) { - if (bp == tail_p || ALLOC(NEXT(bp))) return bp; + if (bp == tail_p || ALLOC(NEXT(bp))) return __coalesce_none(bp); return __coalesce_next(bp); } else if (bp == tail_p || ALLOC(NEXT(bp))) { return __coalesce_prev(bp); @@ -233,31 +265,74 @@ static void *__coalesce_prev(void *bp) { SET(HEAD(prev), PACK(new_size, 0)); SET(FOOT(bp), PACK(new_size, 0)); if (bp == tail_p) tail_p = prev; - if (bp == fitted_p) fitted_p = prev; +// if (bp == fitted_p) fitted_p = prev; + list_p = prev; return prev; } static void *__coalesce_next(void *bp) { void *next = NEXT(bp); + // tweak list + _fix_list(next, bp); + // coalesce size_t new_size = SIZE(next) + SIZE(bp) + DSIZE; SET(HEAD(bp), PACK(new_size, 0)); SET(FOOT(next), PACK(new_size, 0)); if (next == tail_p) tail_p = bp; // should also change - if (next == fitted_p) fitted_p = bp; +// if (next == fitted_p) fitted_p = bp; + list_p = bp; + +#ifdef DEBUG + printf("----coalesce next----\n"); + _check(); + printf("---------------------\n"); +#endif return bp; } static void *__coalesce_all(void *bp) { void *prev = PREV(bp); void *next = NEXT(bp); + // tweak list + SET(POS_SUCC(GET_PRED(next)), GET_SUCC(next)); + SET(POS_PRED(GET_SUCC(next)), GET_PRED(next)); + // coalesce size_t new_size = SIZE(prev) + SIZE(bp) + SIZE(next) + FSIZE; SET(HEAD(prev), PACK(new_size, 0)); SET(FOOT(next), PACK(new_size, 0)); if (next == tail_p) tail_p = prev; - if (next == fitted_p || bp == fitted_p) fitted_p = prev; +// if (next == fitted_p || bp == fitted_p) fitted_p = prev; + list_p = prev; +#ifdef DEBUG + printf("---coalesce all---\n"); + _check(); + printf("-------------------\n"); +#endif return prev; } +static void *__coalesce_none(void *bp) { + // tweak list + if (list_p == NULL) { + list_p = bp; + SET(POS_SUCC(list_p), (word)list_p); + SET(POS_PRED(list_p), (word)list_p); + } else { + // add to list + SET(POS_SUCC(bp), GET_SUCC(list_p)); + SET(POS_PRED(bp), (word)list_p); + SET(POS_PRED(GET_SUCC(list_p)), (word)bp); + SET(POS_SUCC(list_p), (word)bp); + list_p = bp; + } +#ifdef DEBUG + printf("---coalesce none---\n"); + _check(); + printf("-------------------\n"); +#endif + return bp; +} + static void *_first_fit(size_t size) { void *bp = front_p; void *after_p = NEXT(tail_p); @@ -319,18 +394,93 @@ static void *_next_best_fit(size_t size) { return min_p; } +static void *_first_fit_of_clear(size_t size) { + void *bp = list_p; + if (bp == NULL) return NULL; + do { + if (SIZE(bp) >= size) { + _place(bp, size); + return bp; + } + bp = (void *)GET_SUCC(bp); + } while (bp != list_p); + return NULL; +} + static void _place(void *ptr, size_t size) { size_t p_size = SIZE(ptr); - if (p_size - size >= FSIZE) { + if (p_size - size >= MIN_BLOCK) { SET(HEAD(ptr), PACK(size, 1)); SET(FOOT(ptr), PACK(size, 1)); // DSIZE adjust size_t adjust_size = p_size - size - DSIZE; - SET(HEAD(NEXT(ptr)), PACK(adjust_size, 0)); - SET(FOOT(NEXT(ptr)), PACK(adjust_size, 0)); - if (ptr == tail_p) tail_p = NEXT(ptr); + void *new_ptr = NEXT(ptr); + SET(HEAD(new_ptr), PACK(adjust_size, 0)); + SET(FOOT(new_ptr), PACK(adjust_size, 0)); + if (ptr == tail_p) tail_p = new_ptr; + // tweak list + _fix_list(ptr, new_ptr); + if (ptr == list_p) list_p = new_ptr; } else { SET(HEAD(ptr), PACK(p_size, 1)); SET(FOOT(ptr), PACK(p_size, 1)); + // tweak list + // remove + SET(POS_SUCC(GET_PRED(ptr)), GET_SUCC(ptr)); + SET(POS_PRED(GET_SUCC(ptr)), GET_PRED(ptr)); + if (ptr == list_p) { + if (GET_SUCC(ptr) == (word)ptr) list_p = NULL; + else list_p = (void *)GET_SUCC(ptr); + } + } +//#ifdef DEBUG +// printf("----place----\n"); +// _check(); +// printf("-------------\n"); +//#endif +} + +static void _fix_list(void *in, void *out) { + if (GET_SUCC(in) == (word)in) { + SET(POS_SUCC(out), (word)out); + SET(POS_PRED(out), (word)out); + } else { + SET(POS_SUCC(out), GET_SUCC(in)); + SET(POS_PRED(out), GET_PRED(in)); + SET(POS_SUCC(GET_PRED(in)), (word)out); + SET(POS_PRED(GET_SUCC(in)), (word)out); } } + +static void _check() { + int num1 = 0; + int num2 = 0; + int num3 = 0; + void *bp = front_p; + void *after_p = NEXT(tail_p); + void *blank_p = NULL; + while (bp != after_p) { + if (!ALLOC(bp)) { + if (blank_p == NULL) blank_p = bp; + num1++; + } + bp = NEXT(bp); + } + bp = blank_p; + do { + if (bp != NULL) { + bp = (void *)GET_SUCC(bp); + num2++; + } + } while (bp != blank_p); + + bp = list_p; + do { + if (bp != NULL) { + bp = (void *)GET_SUCC(bp); + num3++; + } + } while (bp != list_p); + + printf("expect: %d, actual: %d, list_p: %d\n", num1, num2, num3); +} diff --git a/labs/malloclab_lab/mm.o b/labs/malloclab_lab/mm.o index 44d0f11..362b601 100644 Binary files a/labs/malloclab_lab/mm.o and b/labs/malloclab_lab/mm.o differ diff --git a/labs/malloclab_lab/solve_note.md b/labs/malloclab_lab/solve_note.md index 9df1022..b10c2ad 100644 --- a/labs/malloclab_lab/solve_note.md +++ b/labs/malloclab_lab/solve_note.md @@ -13,7 +13,7 @@ 理解逻辑后,接下来将实现自己的版本 * 为方便理解,定义了两个类型别名 ```c - typedef unsigned int word; + typedef unsigned long word; typedef char byte; ``` * 不同于示例代码用序言块和尾块标记,本人仅用两个指针标记头尾,来提高内存利用率 @@ -31,7 +31,7 @@ * `#debug`对于 `segmentation fault` 使用 `gdb` 获取头尾块的 `size` 发现尾部异常值 `0xcdcdcd`,在代码中使用 `print` 跟踪 `trail_p` 变量,发现在`__coalesce_next`处没有及时更新 * `#bug1` 若记录的 `size` 是有效载荷的 `size`,合并和分割时应注意增减 `DSIZE` * `#bug2` 每次合并都需要判断 `tail_p` 是否改变,特别是 `__coalesce_next` 的情况 -#### 方法与得分 +#### 得分 * 隐式空闲链表,首次适配,立即合并 ```c Results for mm malloc: @@ -64,6 +64,45 @@ Perf index = 47 (util) + 13 (thru) = 60/100 #### 针对 `realloc` 的优化 v3 * 最终选择的是逐步的过程,因为从时间开销上来看,直接返回优于仅合并后部分优于合并前后部分,但同时合并前后部分与再分配一段内存的优劣不好比较 * 现在的问题是在仅合并后部分和重新分配之间要不要插一段合并前后部分的条件,两者分数相同,个人认为插入这个条件通用性更好 +* 最终的 `realloc` + ```c + /** + * implemented simply in terms of mm_malloc and mm_free + * compare adjust_size and total_size step by step + */ + void *mm_realloc(void *ptr, size_t size) { + if (ptr == NULL) return mm_malloc(size); + if (size == 0) return NULL; + + void *new_ptr; + size_t adjust_size = ALIGN(size); + size_t old_size = SIZE(ptr); + if (adjust_size <= old_size) { + // just return, for the memory lost is little + return ptr; + } + size_t next_size = (ptr != tail_p && !ALLOC(NEXT(ptr))) ? SIZE(NEXT(ptr)) + DSIZE : 0; + size_t total_size = old_size + next_size; + if (adjust_size <= total_size) { + __coalesce_next(ptr); + _place(ptr, adjust_size); // just cut + return ptr; + } + size_t prev_size = (ptr != front_p && !ALLOC(PREV(ptr))) ? SIZE(PREV(ptr)) + DSIZE : 0; + total_size += prev_size; + if (adjust_size <= total_size) { // coalesce prev or all + new_ptr = _coalesce(ptr); + memmove(new_ptr, ptr, old_size); + _place(new_ptr, adjust_size); + } else { + if ((new_ptr = mm_malloc(size)) == NULL) return NULL; + memmove(new_ptr, ptr, old_size); + mm_free(ptr); + } + return new_ptr; + } + ``` +* 然而此版不适合之后的显示链表 ~~pity because it's elegant~~ #### 得分 ```c Results for mm malloc: @@ -110,5 +149,43 @@ Total 80% 112372 0.160025 702 Perf index = 48 (util) + 40 (thru) = 88/100 ``` +## Version 2 +#### 显示 LIFO 空闲链表 +* 经过多次尝试,88 分基本上是隐式链表的极限了,为进一步提高吞吐率,我们维护一个双向链表将空闲块串起来,这里选择的则是 LIFO 方式 +* 首先由于 32/64 位的地址所需位数不同,为了保持 64 位干净,我们需要定义地址的位数为 `sizeof(unsigned long)`,同时还需定义最小的空闲块大小 +* 需要在合并、切割处着重维护显示链表,同时为了减少不必要的判断,本人将选择双向循环列表,并由 `list_p` 标记头部 + * 首先,每次调用 `_coalesce` 之前都会产生一个未入链表的空闲块,我们只需要修改 `__coalesce_next, __coalesce_all`,以及没有合并的情况,并将 `list_p` 指向新块 + * 初始化时记得将第一个空闲块的头尾指向自己 + * 注意插入链表时的顺序,应该先改前后指向自己再改自己指向前后 + * 其次,`_place` 时都需要维护链表,存在没有空闲链表的情况,这时让 `list_p` 为空 + * 对于 `realloc` 的优化,由于会调用 `_coalesce, _place`,可能会把原始信息破坏,或覆盖链表信息,需要修改为仅合并后部分空闲链表,并且合并部分需重写 + * 并入链表的过程~~几乎一样~~不一样,部分可以包装成函数 `_fix_list`实现替换的逻辑,还需要注意加入链表(`__coalesce_none`),移除链表(`_place`)的情况 + * 多写了一个 `_check` 函数用以查看遍历空闲链表获得的空闲块数和遍历整个链表获取的空闲块数是否相同,以此判断链表是否正确,并用 `debug` 宏标注,编译选项中加入 `-DDEBUG` 才会输出信息 +* 多亏 `_check` 的信息,我们将错误定位在 `__coalesce_none` 处,发现在空闲块存在而没有合并时应写插入的逻辑,而 `_fix_list` 写的是替换的逻辑 +* 在解决完一系列 `bug` 后,我们的链表总算是维护好了,但是分数也降到了 80 分,接下来让我们实现显示链表的适配版本 `_first_fit_of_clear` +#### 得分 +* 可以看到虽然内存利用率有略微下降,但吞吐率却有了一个数量级的提升 ~~那说明辛苦的 `debug` 没有白费~~ +* 然而不知道测试分数具体的计算方法,时间下降了一个数量级但是吞吐率分数没变... +* 可能原因也在于,少占用内存比少几毫秒时间价值更高吧 +```c +Results for mm malloc: +trace valid util ops secs Kops + 0 yes 90% 5694 0.000211 26960 + 1 yes 91% 5848 0.000128 45581 + 2 yes 95% 6648 0.000217 30636 + 3 yes 98% 5380 0.000135 39911 + 4 yes 100% 14400 0.000090159645 + 5 yes 90% 4800 0.000457 10503 + 6 yes 87% 4800 0.000453 10587 + 7 yes 55% 12000 0.003927 3056 + 8 yes 51% 24000 0.002370 10127 + 9 yes 41% 14401 0.000422 34126 +10 yes 86% 14401 0.000087166486 +Total 80% 112372 0.008497 13225 + +Perf index = 48 (util) + 40 (thru) = 88/100 +``` +## 最后 +* 所有用到的辅助函数均在开头声明并注释,被淘汰的函数用 `@deprecated` 标记 *** -2022.12.29 ~ 2022.12.30 \ No newline at end of file +2022.12.29 ~ 2022.12.31 \ No newline at end of file