Procházet zdrojové kódy

mm malloc v2.1 - 90

master
朱文韬 před 1 rokem
rodič
revize
ab825e5935
4 změnil soubory, kde provedl 100 přidání a 28 odebrání
  1. binární
      labs/malloclab_lab/mdriver
  2. +73
    -26
      labs/malloclab_lab/mm.c
  3. binární
      labs/malloclab_lab/mm.o
  4. +27
    -2
      labs/malloclab_lab/solve_note.md

binární
labs/malloclab_lab/mdriver Zobrazit soubor


+ 73
- 26
labs/malloclab_lab/mm.c Zobrazit soubor

@ -1,5 +1,5 @@
/*
* mm-naive.c - The clear list, first fit malloc package.
* mm-naive.c - The clear list(LIFO), best fit, coalesce immediately malloc package.
*/
#include <string.h>
@ -19,7 +19,7 @@ team_t team = {
""
};
/* single word (4) or double word (8) alignment */
/* single size_t (4) or double size_t (8) alignment */
#define ALIGNMENT 8
/* rounds up to the nearest multiple of ALIGNMENT */
#define ALIGN(size) (((size) + (ALIGNMENT - 1)) & ~0x7)
@ -28,7 +28,7 @@ team_t team = {
#define WSIZE 4
#define DSIZE 8
#define FSIZE 16
#define ADDRESS (sizeof(unsigned long))
#define ADDRESS (sizeof(size_t))
#define CHUNK 1 << 12
#define MIN_BLOCK (2 * ADDRESS + DSIZE)
@ -50,10 +50,10 @@ team_t team = {
#define POS_PRED(bp) ((byte *)(bp))
#define POS_SUCC(bp) (((byte *)(bp) + ADDRESS))
#define GET_PRED(bp) (*(word *)POS_PRED(bp))
#define GET_SUCC(bp) (*(word *)POS_SUCC(bp))
#define GET_PRED(bp) (*(size_t *)POS_PRED(bp))
#define GET_SUCC(bp) (*(size_t *)POS_SUCC(bp))
typedef unsigned long word;
typedef unsigned int word;
typedef char byte;
// mark the front and tail pos
@ -113,12 +113,20 @@ static void *_next_best_fit(size_t size);
/**
* traverse blank block only and find first fit, then place in
* @deprecated for the memory loss
* @param size align by 8, excluding head and foot
* @return
*/
static void *_first_fit_of_clear(size_t size);
/**
* best fit for clear list
* @param size align by 8, excluding head and foot
* @return
*/
static void *_best_fit_of_clear(size_t size);
/**
* allocate the block and cut sometimes
* @param size align by 8, excluding head and foot
*/
@ -157,7 +165,7 @@ void *mm_malloc(size_t size) {
size_t adjust_size = ALIGN(size);
size_t extend_size;
void *bp;
if ((bp = _first_fit_of_clear(adjust_size)) != NULL) {
if ((bp = _best_fit_of_clear(adjust_size)) != NULL) {
// fitted_p = bp;
return bp;
} else {
@ -208,15 +216,35 @@ void *mm_realloc(void *ptr, size_t size) {
if (adjust_size <= total_size) {
void *next = NEXT(ptr);
// remove
SET(POS_SUCC(GET_PRED(next)), GET_SUCC(next));
SET(POS_PRED(GET_SUCC(next)), GET_PRED(next));
if (next == list_p) {
if (GET_SUCC(next) == (word)next) list_p = NULL;
else list_p = (void *)GET_SUCC(next);
if (total_size - adjust_size >= MIN_BLOCK) {
SET(POS_SUCC(GET_PRED(next)), GET_SUCC(next));
SET(POS_PRED(GET_SUCC(next)), GET_PRED(next));
SET(HEAD(ptr), PACK(total_size, 1));
SET(FOOT(ptr), PACK(total_size, 1));
if (next == list_p) {
if (GET_SUCC(next) == (size_t) next) list_p = NULL;
else list_p = (void *) GET_SUCC(next);
}
if (next == tail_p) tail_p = ptr;
} else { // replace
void *pred = (void *)GET_PRED(next);
void *succ = (void *) GET_SUCC(next);
SET(HEAD(ptr), PACK(adjust_size, 1));
SET(FOOT(ptr), PACK(adjust_size, 1));
size_t new_size = total_size - adjust_size - DSIZE;
void *new = NEXT(ptr);
SET(HEAD(new), PACK(new_size, 0));
SET(FOOT(new), PACK(new_size, 0));
if (pred == next) {
SET(POS_PRED(new), (size_t)new);
SET(POS_SUCC(new), (size_t)new);
} else {
SET(POS_PRED(succ), (size_t)new);
SET(POS_SUCC(pred), (size_t)new);
}
if (list_p == next) list_p = new;
if (next == tail_p) tail_p = new;
}
SET(HEAD(ptr), PACK(total_size, 1));
SET(FOOT(ptr), PACK(total_size, 1));
if (next == tail_p) tail_p = ptr;
return ptr;
} else {
if ((new_ptr = mm_malloc(size)) == NULL) return NULL;
@ -315,14 +343,14 @@ static void *__coalesce_none(void *bp) {
// tweak list
if (list_p == NULL) {
list_p = bp;
SET(POS_SUCC(list_p), (word)list_p);
SET(POS_PRED(list_p), (word)list_p);
SET(POS_SUCC(list_p), (size_t)list_p);
SET(POS_PRED(list_p), (size_t)list_p);
} else {
// add to list
SET(POS_SUCC(bp), GET_SUCC(list_p));
SET(POS_PRED(bp), (word)list_p);
SET(POS_PRED(GET_SUCC(list_p)), (word)bp);
SET(POS_SUCC(list_p), (word)bp);
SET(POS_PRED(bp), (size_t)list_p);
SET(POS_PRED(GET_SUCC(list_p)), (size_t)bp);
SET(POS_SUCC(list_p), (size_t)bp);
list_p = bp;
}
#ifdef DEBUG
@ -407,6 +435,25 @@ static void *_first_fit_of_clear(size_t size) {
return NULL;
}
static void *_best_fit_of_clear(size_t size) {
void *bp = list_p;
if (bp == NULL) return NULL;
size_t min = 0;
void *min_p = NULL;
do {
if (SIZE(bp) >= size) {
if (min_p == NULL || SIZE(bp) < min) {
min = SIZE(bp);
min_p = bp;
}
}
bp = (void *)GET_SUCC(bp);
} while (bp != list_p);
if (min_p == NULL) return NULL;
_place(min_p, size);
return min_p;
}
static void _place(void *ptr, size_t size) {
size_t p_size = SIZE(ptr);
if (p_size - size >= MIN_BLOCK) {
@ -429,7 +476,7 @@ static void _place(void *ptr, size_t size) {
SET(POS_SUCC(GET_PRED(ptr)), GET_SUCC(ptr));
SET(POS_PRED(GET_SUCC(ptr)), GET_PRED(ptr));
if (ptr == list_p) {
if (GET_SUCC(ptr) == (word)ptr) list_p = NULL;
if (GET_SUCC(ptr) == (size_t)ptr) list_p = NULL;
else list_p = (void *)GET_SUCC(ptr);
}
}
@ -441,14 +488,14 @@ static void _place(void *ptr, size_t size) {
}
static void _fix_list(void *in, void *out) {
if (GET_SUCC(in) == (word)in) {
SET(POS_SUCC(out), (word)out);
SET(POS_PRED(out), (word)out);
if (GET_SUCC(in) == (size_t)in) {
SET(POS_SUCC(out), (size_t)out);
SET(POS_PRED(out), (size_t)out);
} else {
SET(POS_SUCC(out), GET_SUCC(in));
SET(POS_PRED(out), GET_PRED(in));
SET(POS_SUCC(GET_PRED(in)), (word)out);
SET(POS_PRED(GET_SUCC(in)), (word)out);
SET(POS_SUCC(GET_PRED(in)), (size_t)out);
SET(POS_PRED(GET_SUCC(in)), (size_t)out);
}
}

binární
labs/malloclab_lab/mm.o Zobrazit soubor


+ 27
- 2
labs/malloclab_lab/solve_note.md Zobrazit soubor

@ -13,7 +13,7 @@
理解逻辑后,接下来将实现自己的版本
* 为方便理解,定义了两个类型别名
```c
typedef unsigned long word;
typedef unsigned int word;
typedef char byte;
```
* 不同于示例代码用序言块和尾块标记,本人仅用两个指针标记头尾,来提高内存利用率
@ -152,7 +152,7 @@ Perf index = 48 (util) + 40 (thru) = 88/100
## Version 2
#### 显示 LIFO 空闲链表
* 经过多次尝试,88 分基本上是隐式链表的极限了,为进一步提高吞吐率,我们维护一个双向链表将空闲块串起来,这里选择的则是 LIFO 方式
* 首先由于 32/64 位的地址所需位数不同,为了保持 64 位干净,我们需要定义地址的位数为 `sizeof(unsigned long)`,同时还需定义最小的空闲块大小
* 首先由于 32/64 位的地址所需位数不同,为了保持 64 位干净,我们需要定义地址的位数为 `sizeof(size_t)`,同时还需定义最小的空闲块大小
* 需要在合并、切割处着重维护显示链表,同时为了减少不必要的判断,本人将选择双向循环列表,并由 `list_p` 标记头部
* 首先,每次调用 `_coalesce` 之前都会产生一个未入链表的空闲块,我们只需要修改 `__coalesce_next, __coalesce_all`,以及没有合并的情况,并将 `list_p` 指向新块
* 初始化时记得将第一个空闲块的头尾指向自己
@ -185,7 +185,32 @@ Total 80% 112372 0.008497 13225
Perf index = 48 (util) + 40 (thru) = 88/100
```
## Version 2.1
#### 最佳适配
* 可能也是为了鼓励提升内存利用率,而我们的吞吐率已经极大提高了,所以可以用最佳适配来换取内存利用率
* `realloc` 也添加了一些情况的判断
#### 得分
* 我们得到了最终的得分,可以看到牺牲了很多吞吐率才换来了内存利用率百分之三的提升
```c
Results for mm malloc:
trace valid util ops secs Kops
0 yes 99% 5694 0.000145 39405
1 yes 100% 5848 0.000166 35314
2 yes 100% 6648 0.000207 32131
3 yes 100% 5380 0.000140 38374
4 yes 100% 14400 0.000089162528
5 yes 95% 4800 0.002331 2059
6 yes 95% 4800 0.002358 2035
7 yes 55% 12000 0.027697 433
8 yes 51% 24000 0.063180 380
9 yes 40% 14401 0.000257 55970
10 yes 73% 14401 0.000077187513
Total 83% 112372 0.096647 1163
Perf index = 50 (util) + 40 (thru) = 90/100
```
## 最后
* 最终的版本是显示空闲链表LIFO,最佳适配,立即合并
* 所有用到的辅助函数均在开头声明并注释,被淘汰的函数用 `@deprecated` 标记
***
2022.12.29 ~ 2022.12.31

Načítá se…
Zrušit
Uložit