2 коммитов

Автор SHA1 Сообщение Дата
  朱文韬 ab825e5935 mm malloc v2.1 - 90 1 год назад
  朱文韬 234964fd87 mm malloc v2 - 88 1 год назад
4 измененных файлов: 344 добавлений и 45 удалений
  1. Двоичные данные
      labs/malloclab_lab/mdriver
  2. +240
    -43
      labs/malloclab_lab/mm.c
  3. Двоичные данные
      labs/malloclab_lab/mm.o
  4. +104
    -2
      labs/malloclab_lab/solve_note.md

Двоичные данные
labs/malloclab_lab/mdriver Просмотреть файл


+ 240
- 43
labs/malloclab_lab/mm.c Просмотреть файл

@ -1,27 +1,11 @@
/*
* mm-naive.c - The fastest, least memory-efficient malloc package.
*
* In this naive approach, a block is allocated by simply incrementing
* the brk pointer. A block is pure payload. There are no headers or
* footers. Blocks are never coalesced or reused. Realloc is
* implemented directly using mm_malloc and mm_free.
*
* NOTE TO STUDENTS: Replace this header comment with your own header
* comment that gives a high level description of your solution.
* mm-naive.c - The clear list(LIFO), best fit, coalesce immediately malloc package.
*/
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <unistd.h>
#include <string.h>
#include "mm.h"
#include "memlib.h"
/*********************************************************
* NOTE TO STUDENTS: Before you do anything else, please
* provide your team information in the following struct.
********************************************************/
team_t team = {
/* Team name */
"team",
@ -35,7 +19,7 @@ team_t team = {
""
};
/* single word (4) or double word (8) alignment */
/* single size_t (4) or double size_t (8) alignment */
#define ALIGNMENT 8
/* rounds up to the nearest multiple of ALIGNMENT */
#define ALIGN(size) (((size) + (ALIGNMENT - 1)) & ~0x7)
@ -44,7 +28,9 @@ team_t team = {
#define WSIZE 4
#define DSIZE 8
#define FSIZE 16
#define CHUNK 1 << 10
#define ADDRESS (sizeof(size_t))
#define CHUNK 1 << 12
#define MIN_BLOCK (2 * ADDRESS + DSIZE)
#define MAX(a, b) ((a) > (b) ? (a) : (b))
@ -62,14 +48,24 @@ team_t team = {
#define NEXT(bp) (FOOT(bp) + DSIZE)
#define PREV(bp) ((byte *)(bp) - PARSE(GET((bp) - DSIZE)) - DSIZE)
#define POS_PRED(bp) ((byte *)(bp))
#define POS_SUCC(bp) (((byte *)(bp) + ADDRESS))
#define GET_PRED(bp) (*(size_t *)POS_PRED(bp))
#define GET_SUCC(bp) (*(size_t *)POS_SUCC(bp))
typedef unsigned int word;
typedef char byte;
// mark the front and tail pos
void *front_p = NULL;
void *tail_p = NULL;
// used for next fit, updated by mm_init, mm_malloc, _coalesce
/**
* used for next fit, updated by mm_init, mm_malloc, _coalesce
* @deprecated useless for clear list
*/
void *fitted_p = NULL;
void *list_p = NULL;
// My func
/**
@ -89,6 +85,7 @@ static void *_coalesce(void *bp);
static void *__coalesce_prev(void *bp);
static void *__coalesce_next(void *bp);
static void *__coalesce_all(void *bp);
static void *__coalesce_none(void *bp);
/**
* traverse and find first fit, then place in
@ -100,6 +97,7 @@ static void *_first_fit(size_t size);
/**
* find next fit, then place in
* @deprecated I'll use clear list
* @param size align by 8, excluding head and foot
* @return
*/
@ -114,10 +112,38 @@ static void *_next_fit(size_t size);
static void *_next_best_fit(size_t size);
/**
* traverse blank block only and find first fit, then place in
* @deprecated for the memory loss
* @param size align by 8, excluding head and foot
* @return
*/
static void *_first_fit_of_clear(size_t size);
/**
* best fit for clear list
* @param size align by 8, excluding head and foot
* @return
*/
static void *_best_fit_of_clear(size_t size);
/**
* allocate the block and cut sometimes
* @param size align by 8, excluding head and foot
*/
static void _place(void *ptr, size_t size);
/**
* just replace in of out in list
* @param in the block that in the list
* @param out the block that out the list
*/
static void _fix_list(void *in, void *out);
/**
* check the number of blank list nums and real blank nums and print
* @deprecated
*/
static void _check();
// end
/**
@ -127,7 +153,7 @@ static void _place(void *ptr, size_t size);
int mm_init(void) {
if ((front_p = mem_sbrk(WSIZE)) == (void *) - 1) return -1; // blank
front_p += DSIZE; // first chunk
fitted_p = front_p; // init fitted_p
// fitted_p = front_p; // init fitted_p
if (!_extend(CHUNK)) return -1;
return 0;
}
@ -139,8 +165,8 @@ void *mm_malloc(size_t size) {
size_t adjust_size = ALIGN(size);
size_t extend_size;
void *bp;
if ((bp = _next_fit(adjust_size)) != NULL) {
fitted_p = bp;
if ((bp = _best_fit_of_clear(adjust_size)) != NULL) {
// fitted_p = bp;
return bp;
} else {
extend_size = adjust_size;
@ -150,7 +176,7 @@ void *mm_malloc(size_t size) {
bp = _extend(MAX(extend_size, CHUNK));
if (bp == NULL) return bp;
_place(bp, adjust_size);
fitted_p = bp;
// fitted_p = bp;
return bp;
}
}
@ -159,6 +185,11 @@ void *mm_malloc(size_t size) {
* free a block and coalesce immediately
*/
void mm_free(void *ptr) {
#ifdef DEBUG
printf("---free---\n");
_check();
printf("----------\n");
#endif
size_t size = SIZE(ptr);
SET(HEAD(ptr), PACK(size, 0));
SET(FOOT(ptr), PACK(size, 0));
@ -183,22 +214,44 @@ void *mm_realloc(void *ptr, size_t size) {
size_t next_size = (ptr != tail_p && !ALLOC(NEXT(ptr))) ? SIZE(NEXT(ptr)) + DSIZE : 0;
size_t total_size = old_size + next_size;
if (adjust_size <= total_size) {
__coalesce_next(ptr);
_place(ptr, adjust_size); // just cut
void *next = NEXT(ptr);
// remove
if (total_size - adjust_size >= MIN_BLOCK) {
SET(POS_SUCC(GET_PRED(next)), GET_SUCC(next));
SET(POS_PRED(GET_SUCC(next)), GET_PRED(next));
SET(HEAD(ptr), PACK(total_size, 1));
SET(FOOT(ptr), PACK(total_size, 1));
if (next == list_p) {
if (GET_SUCC(next) == (size_t) next) list_p = NULL;
else list_p = (void *) GET_SUCC(next);
}
if (next == tail_p) tail_p = ptr;
} else { // replace
void *pred = (void *)GET_PRED(next);
void *succ = (void *) GET_SUCC(next);
SET(HEAD(ptr), PACK(adjust_size, 1));
SET(FOOT(ptr), PACK(adjust_size, 1));
size_t new_size = total_size - adjust_size - DSIZE;
void *new = NEXT(ptr);
SET(HEAD(new), PACK(new_size, 0));
SET(FOOT(new), PACK(new_size, 0));
if (pred == next) {
SET(POS_PRED(new), (size_t)new);
SET(POS_SUCC(new), (size_t)new);
} else {
SET(POS_PRED(succ), (size_t)new);
SET(POS_SUCC(pred), (size_t)new);
}
if (list_p == next) list_p = new;
if (next == tail_p) tail_p = new;
}
return ptr;
}
size_t prev_size = (ptr != front_p && !ALLOC(PREV(ptr))) ? SIZE(PREV(ptr)) + DSIZE : 0;
total_size += prev_size;
if (adjust_size <= total_size) { // coalesce prev or all
new_ptr = _coalesce(ptr);
memmove(new_ptr, ptr, old_size);
_place(new_ptr, adjust_size);
} else {
if ((new_ptr = mm_malloc(size)) == NULL) return NULL;
memmove(new_ptr, ptr, old_size);
mm_free(ptr);
return new_ptr;
}
return new_ptr;
}
// my func
@ -209,6 +262,13 @@ static void *_extend(size_t size) {
SET(bp, PACK(size, 0));
bp += WSIZE;
SET(FOOT(bp), PACK(size, 0));
#ifdef DEBUG
if (tail_p) {
printf("----extend----\n");
_check();
printf("--------------\n");
}
#endif
// init mark point
tail_p = bp;
return _coalesce(bp);
@ -216,9 +276,9 @@ static void *_extend(size_t size) {
static void *_coalesce(void *bp) {
// one chunk
if (bp == front_p && bp == tail_p) return bp;
if (bp == front_p && bp == tail_p) return __coalesce_none(bp);
if (bp == front_p || ALLOC(PREV(bp))) {
if (bp == tail_p || ALLOC(NEXT(bp))) return bp;
if (bp == tail_p || ALLOC(NEXT(bp))) return __coalesce_none(bp);
return __coalesce_next(bp);
} else if (bp == tail_p || ALLOC(NEXT(bp))) {
return __coalesce_prev(bp);
@ -233,31 +293,74 @@ static void *__coalesce_prev(void *bp) {
SET(HEAD(prev), PACK(new_size, 0));
SET(FOOT(bp), PACK(new_size, 0));
if (bp == tail_p) tail_p = prev;
if (bp == fitted_p) fitted_p = prev;
// if (bp == fitted_p) fitted_p = prev;
list_p = prev;
return prev;
}
static void *__coalesce_next(void *bp) {
void *next = NEXT(bp);
// tweak list
_fix_list(next, bp);
// coalesce
size_t new_size = SIZE(next) + SIZE(bp) + DSIZE;
SET(HEAD(bp), PACK(new_size, 0));
SET(FOOT(next), PACK(new_size, 0));
if (next == tail_p) tail_p = bp; // should also change
if (next == fitted_p) fitted_p = bp;
// if (next == fitted_p) fitted_p = bp;
list_p = bp;
#ifdef DEBUG
printf("----coalesce next----\n");
_check();
printf("---------------------\n");
#endif
return bp;
}
static void *__coalesce_all(void *bp) {
void *prev = PREV(bp);
void *next = NEXT(bp);
// tweak list
SET(POS_SUCC(GET_PRED(next)), GET_SUCC(next));
SET(POS_PRED(GET_SUCC(next)), GET_PRED(next));
// coalesce
size_t new_size = SIZE(prev) + SIZE(bp) + SIZE(next) + FSIZE;
SET(HEAD(prev), PACK(new_size, 0));
SET(FOOT(next), PACK(new_size, 0));
if (next == tail_p) tail_p = prev;
if (next == fitted_p || bp == fitted_p) fitted_p = prev;
// if (next == fitted_p || bp == fitted_p) fitted_p = prev;
list_p = prev;
#ifdef DEBUG
printf("---coalesce all---\n");
_check();
printf("-------------------\n");
#endif
return prev;
}
static void *__coalesce_none(void *bp) {
// tweak list
if (list_p == NULL) {
list_p = bp;
SET(POS_SUCC(list_p), (size_t)list_p);
SET(POS_PRED(list_p), (size_t)list_p);
} else {
// add to list
SET(POS_SUCC(bp), GET_SUCC(list_p));
SET(POS_PRED(bp), (size_t)list_p);
SET(POS_PRED(GET_SUCC(list_p)), (size_t)bp);
SET(POS_SUCC(list_p), (size_t)bp);
list_p = bp;
}
#ifdef DEBUG
printf("---coalesce none---\n");
_check();
printf("-------------------\n");
#endif
return bp;
}
static void *_first_fit(size_t size) {
void *bp = front_p;
void *after_p = NEXT(tail_p);
@ -319,18 +422,112 @@ static void *_next_best_fit(size_t size) {
return min_p;
}
static void *_first_fit_of_clear(size_t size) {
void *bp = list_p;
if (bp == NULL) return NULL;
do {
if (SIZE(bp) >= size) {
_place(bp, size);
return bp;
}
bp = (void *)GET_SUCC(bp);
} while (bp != list_p);
return NULL;
}
static void *_best_fit_of_clear(size_t size) {
void *bp = list_p;
if (bp == NULL) return NULL;
size_t min = 0;
void *min_p = NULL;
do {
if (SIZE(bp) >= size) {
if (min_p == NULL || SIZE(bp) < min) {
min = SIZE(bp);
min_p = bp;
}
}
bp = (void *)GET_SUCC(bp);
} while (bp != list_p);
if (min_p == NULL) return NULL;
_place(min_p, size);
return min_p;
}
static void _place(void *ptr, size_t size) {
size_t p_size = SIZE(ptr);
if (p_size - size >= FSIZE) {
if (p_size - size >= MIN_BLOCK) {
SET(HEAD(ptr), PACK(size, 1));
SET(FOOT(ptr), PACK(size, 1));
// DSIZE adjust
size_t adjust_size = p_size - size - DSIZE;
SET(HEAD(NEXT(ptr)), PACK(adjust_size, 0));
SET(FOOT(NEXT(ptr)), PACK(adjust_size, 0));
if (ptr == tail_p) tail_p = NEXT(ptr);
void *new_ptr = NEXT(ptr);
SET(HEAD(new_ptr), PACK(adjust_size, 0));
SET(FOOT(new_ptr), PACK(adjust_size, 0));
if (ptr == tail_p) tail_p = new_ptr;
// tweak list
_fix_list(ptr, new_ptr);
if (ptr == list_p) list_p = new_ptr;
} else {
SET(HEAD(ptr), PACK(p_size, 1));
SET(FOOT(ptr), PACK(p_size, 1));
// tweak list
// remove
SET(POS_SUCC(GET_PRED(ptr)), GET_SUCC(ptr));
SET(POS_PRED(GET_SUCC(ptr)), GET_PRED(ptr));
if (ptr == list_p) {
if (GET_SUCC(ptr) == (size_t)ptr) list_p = NULL;
else list_p = (void *)GET_SUCC(ptr);
}
}
//#ifdef DEBUG
// printf("----place----\n");
// _check();
// printf("-------------\n");
//#endif
}
static void _fix_list(void *in, void *out) {
if (GET_SUCC(in) == (size_t)in) {
SET(POS_SUCC(out), (size_t)out);
SET(POS_PRED(out), (size_t)out);
} else {
SET(POS_SUCC(out), GET_SUCC(in));
SET(POS_PRED(out), GET_PRED(in));
SET(POS_SUCC(GET_PRED(in)), (size_t)out);
SET(POS_PRED(GET_SUCC(in)), (size_t)out);
}
}
static void _check() {
int num1 = 0;
int num2 = 0;
int num3 = 0;
void *bp = front_p;
void *after_p = NEXT(tail_p);
void *blank_p = NULL;
while (bp != after_p) {
if (!ALLOC(bp)) {
if (blank_p == NULL) blank_p = bp;
num1++;
}
bp = NEXT(bp);
}
bp = blank_p;
do {
if (bp != NULL) {
bp = (void *)GET_SUCC(bp);
num2++;
}
} while (bp != blank_p);
bp = list_p;
do {
if (bp != NULL) {
bp = (void *)GET_SUCC(bp);
num3++;
}
} while (bp != list_p);
printf("expect: %d, actual: %d, list_p: %d\n", num1, num2, num3);
}

Двоичные данные
labs/malloclab_lab/mm.o Просмотреть файл


+ 104
- 2
labs/malloclab_lab/solve_note.md Просмотреть файл

@ -31,7 +31,7 @@
* `#debug`对于 `segmentation fault` 使用 `gdb` 获取头尾块的 `size` 发现尾部异常值 `0xcdcdcd`,在代码中使用 `print` 跟踪 `trail_p` 变量,发现在`__coalesce_next`处没有及时更新
* `#bug1` 若记录的 `size` 是有效载荷的 `size`,合并和分割时应注意增减 `DSIZE`
* `#bug2` 每次合并都需要判断 `tail_p` 是否改变,特别是 `__coalesce_next` 的情况
#### 方法与得分
#### 得分
* 隐式空闲链表,首次适配,立即合并
```c
Results for mm malloc:
@ -64,6 +64,45 @@ Perf index = 47 (util) + 13 (thru) = 60/100
#### 针对 `realloc` 的优化 v3
* 最终选择的是逐步的过程,因为从时间开销上来看,直接返回优于仅合并后部分优于合并前后部分,但同时合并前后部分与再分配一段内存的优劣不好比较
* 现在的问题是在仅合并后部分和重新分配之间要不要插一段合并前后部分的条件,两者分数相同,个人认为插入这个条件通用性更好
* 最终的 `realloc`
```c
/**
* implemented simply in terms of mm_malloc and mm_free
* compare adjust_size and total_size step by step
*/
void *mm_realloc(void *ptr, size_t size) {
if (ptr == NULL) return mm_malloc(size);
if (size == 0) return NULL;
void *new_ptr;
size_t adjust_size = ALIGN(size);
size_t old_size = SIZE(ptr);
if (adjust_size <= old_size) {
// just return, for the memory lost is little
return ptr;
}
size_t next_size = (ptr != tail_p && !ALLOC(NEXT(ptr))) ? SIZE(NEXT(ptr)) + DSIZE : 0;
size_t total_size = old_size + next_size;
if (adjust_size <= total_size) {
__coalesce_next(ptr);
_place(ptr, adjust_size); // just cut
return ptr;
}
size_t prev_size = (ptr != front_p && !ALLOC(PREV(ptr))) ? SIZE(PREV(ptr)) + DSIZE : 0;
total_size += prev_size;
if (adjust_size <= total_size) { // coalesce prev or all
new_ptr = _coalesce(ptr);
memmove(new_ptr, ptr, old_size);
_place(new_ptr, adjust_size);
} else {
if ((new_ptr = mm_malloc(size)) == NULL) return NULL;
memmove(new_ptr, ptr, old_size);
mm_free(ptr);
}
return new_ptr;
}
```
* 然而此版不适合之后的显示链表 ~~pity because it's elegant~~
#### 得分
```c
Results for mm malloc:
@ -110,5 +149,68 @@ Total 80% 112372 0.160025 702
Perf index = 48 (util) + 40 (thru) = 88/100
```
## Version 2
#### 显示 LIFO 空闲链表
* 经过多次尝试,88 分基本上是隐式链表的极限了,为进一步提高吞吐率,我们维护一个双向链表将空闲块串起来,这里选择的则是 LIFO 方式
* 首先由于 32/64 位的地址所需位数不同,为了保持 64 位干净,我们需要定义地址的位数为 `sizeof(size_t)`,同时还需定义最小的空闲块大小
* 需要在合并、切割处着重维护显示链表,同时为了减少不必要的判断,本人将选择双向循环列表,并由 `list_p` 标记头部
* 首先,每次调用 `_coalesce` 之前都会产生一个未入链表的空闲块,我们只需要修改 `__coalesce_next, __coalesce_all`,以及没有合并的情况,并将 `list_p` 指向新块
* 初始化时记得将第一个空闲块的头尾指向自己
* 注意插入链表时的顺序,应该先改前后指向自己再改自己指向前后
* 其次,`_place` 时都需要维护链表,存在没有空闲链表的情况,这时让 `list_p` 为空
* 对于 `realloc` 的优化,由于会调用 `_coalesce, _place`,可能会把原始信息破坏,或覆盖链表信息,需要修改为仅合并后部分空闲链表,并且合并部分需重写
* 并入链表的过程~~几乎一样~~不一样,部分可以包装成函数 `_fix_list`实现替换的逻辑,还需要注意加入链表(`__coalesce_none`),移除链表(`_place`)的情况
* 多写了一个 `_check` 函数用以查看遍历空闲链表获得的空闲块数和遍历整个链表获取的空闲块数是否相同,以此判断链表是否正确,并用 `debug` 宏标注,编译选项中加入 `-DDEBUG` 才会输出信息
* 多亏 `_check` 的信息,我们将错误定位在 `__coalesce_none` 处,发现在空闲块存在而没有合并时应写插入的逻辑,而 `_fix_list` 写的是替换的逻辑
* 在解决完一系列 `bug` 后,我们的链表总算是维护好了,但是分数也降到了 80 分,接下来让我们实现显示链表的适配版本 `_first_fit_of_clear`
#### 得分
* 可以看到虽然内存利用率有略微下降,但吞吐率却有了一个数量级的提升 ~~那说明辛苦的 `debug` 没有白费~~
* 然而不知道测试分数具体的计算方法,时间下降了一个数量级但是吞吐率分数没变...
* 可能原因也在于,少占用内存比少几毫秒时间价值更高吧
```c
Results for mm malloc:
trace valid util ops secs Kops
0 yes 90% 5694 0.000211 26960
1 yes 91% 5848 0.000128 45581
2 yes 95% 6648 0.000217 30636
3 yes 98% 5380 0.000135 39911
4 yes 100% 14400 0.000090159645
5 yes 90% 4800 0.000457 10503
6 yes 87% 4800 0.000453 10587
7 yes 55% 12000 0.003927 3056
8 yes 51% 24000 0.002370 10127
9 yes 41% 14401 0.000422 34126
10 yes 86% 14401 0.000087166486
Total 80% 112372 0.008497 13225
Perf index = 48 (util) + 40 (thru) = 88/100
```
## Version 2.1
#### 最佳适配
* 可能也是为了鼓励提升内存利用率,而我们的吞吐率已经极大提高了,所以可以用最佳适配来换取内存利用率
* `realloc` 也添加了一些情况的判断
#### 得分
* 我们得到了最终的得分,可以看到牺牲了很多吞吐率才换来了内存利用率百分之三的提升
```c
Results for mm malloc:
trace valid util ops secs Kops
0 yes 99% 5694 0.000145 39405
1 yes 100% 5848 0.000166 35314
2 yes 100% 6648 0.000207 32131
3 yes 100% 5380 0.000140 38374
4 yes 100% 14400 0.000089162528
5 yes 95% 4800 0.002331 2059
6 yes 95% 4800 0.002358 2035
7 yes 55% 12000 0.027697 433
8 yes 51% 24000 0.063180 380
9 yes 40% 14401 0.000257 55970
10 yes 73% 14401 0.000077187513
Total 83% 112372 0.096647 1163
Perf index = 50 (util) + 40 (thru) = 90/100
```
## 最后
* 最终的版本是显示空闲链表LIFO,最佳适配,立即合并
* 所有用到的辅助函数均在开头声明并注释,被淘汰的函数用 `@deprecated` 标记
***
2022.12.29 ~ 2022.12.30
2022.12.29 ~ 2022.12.31

Загрузка…
Отмена
Сохранить