《操作系统》的实验代码。
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

640 lines
20 KiB

12 years ago
12 years ago
12 years ago
  1. #include <defs.h>
  2. #include <list.h>
  3. #include <memlayout.h>
  4. #include <assert.h>
  5. #include <kmalloc.h>
  6. #include <sync.h>
  7. #include <pmm.h>
  8. #include <stdio.h>
  9. #include <rb_tree.h>
  10. /* The slab allocator used in ucore is based on an algorithm first introduced by
  11. Jeff Bonwick for the SunOS operating system. The paper can be download from
  12. http://citeseer.ist.psu.edu/bonwick94slab.html
  13. An implementation of the Slab Allocator as described in outline in;
  14. UNIX Internals: The New Frontiers by Uresh Vahalia
  15. Pub: Prentice Hall ISBN 0-13-101908-2
  16. Within a kernel, a considerable amount of memory is allocated for a finite set
  17. of objects such as file descriptors and other common structures. Jeff found that
  18. the amount of time required to initialize a regular object in the kernel exceeded
  19. the amount of time required to allocate and deallocate it. His conclusion was
  20. that instead of freeing the memory back to a global pool, he would have the memory
  21. remain initialized for its intended purpose.
  22. In our simple slab implementation, the the high-level organization of the slab
  23. structures is simplied. At the highest level is an array slab_cache[SLAB_CACHE_NUM],
  24. and each array element is a slab_cache which has slab chains. Each slab_cache has
  25. two list, one list chains the full allocated slab, and another list chains the notfull
  26. allocated(maybe empty) slab. And each slab has fixed number(2^n) of pages. In each
  27. slab, there are a lot of objects (such as ) with same fixed size(32B ~ 128KB).
  28. +----------------------------------+
  29. | slab_cache[0] for 0~32B obj |
  30. +----------------------------------+
  31. | slab_cache[1] for 33B~64B obj |-->lists for slabs
  32. +----------------------------------+ |
  33. | slab_cache[2] for 65B~128B obj | |
  34. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
  35. +----------------------------------+ |
  36. | slab_cache[12]for 64KB~128KB obj | |
  37. +----------------------------------+ |
  38. |
  39. slabs_full/slabs_not +---------------------+
  40. -<-----------<----------<-+
  41. | | |
  42. slab1 slab2 slab3...
  43. |
  44. |-------|-------|
  45. pages1 pages2 pages3...
  46. |
  47. |
  48. |
  49. slab_t+n*bufctl_t+obj1-obj2-obj3...objn (the size of obj is small)
  50. |
  51. OR
  52. |
  53. obj1-obj2-obj3...objn WITH slab_t+n*bufctl_t in another slab (the size of obj is BIG)
  54. The important functions are:
  55. kmem_cache_grow(kmem_cache_t *cachep)
  56. kmem_slab_destroy(kmem_cache_t *cachep, slab_t *slabp)
  57. kmalloc(size_t size): used by outside functions need dynamicly get memory
  58. kfree(void *objp): used by outside functions need dynamicly release memory
  59. */
  60. #define BUFCTL_END 0xFFFFFFFFL // the signature of the last bufctl
  61. #define SLAB_LIMIT 0xFFFFFFFEL // the max value of obj number
  62. typedef size_t kmem_bufctl_t; //the index of obj in slab
  63. typedef struct slab_s {
  64. list_entry_t slab_link; // the list entry linked to kmem_cache list
  65. void *s_mem; // the kernel virtual address of the first obj in slab
  66. size_t inuse; // the number of allocated objs
  67. size_t offset; // the first obj's offset value in slab
  68. kmem_bufctl_t free; // the first free obj's index in slab
  69. } slab_t;
  70. // get the slab address according to the link element (see list.h)
  71. #define le2slab(le, member) \
  72. to_struct((le), slab_t, member)
  73. typedef struct kmem_cache_s kmem_cache_t;
  74. struct kmem_cache_s {
  75. list_entry_t slabs_full; // list for fully allocated slabs
  76. list_entry_t slabs_notfull; // list for not-fully allocated slabs
  77. size_t objsize; // the fixed size of obj
  78. size_t num; // number of objs per slab
  79. size_t offset; // this first obj's offset in slab
  80. bool off_slab; // the control part of slab in slab or not.
  81. /* order of pages per slab (2^n) */
  82. size_t page_order;
  83. kmem_cache_t *slab_cachep;
  84. };
  85. #define MIN_SIZE_ORDER 5 // 32
  86. #define MAX_SIZE_ORDER 17 // 128k
  87. #define SLAB_CACHE_NUM (MAX_SIZE_ORDER - MIN_SIZE_ORDER + 1)
  88. static kmem_cache_t slab_cache[SLAB_CACHE_NUM];
  89. static void init_kmem_cache(kmem_cache_t *cachep, size_t objsize, size_t align);
  90. static void check_slab(void);
  91. //slab_init - call init_kmem_cache function to reset the slab_cache array
  92. static void
  93. slab_init(void) {
  94. size_t i;
  95. //the align bit for obj in slab. 2^n could be better for performance
  96. size_t align = 16;
  97. for (i = 0; i < SLAB_CACHE_NUM; i ++) {
  98. init_kmem_cache(slab_cache + i, 1 << (i + MIN_SIZE_ORDER), align);
  99. }
  100. check_slab();
  101. }
  102. inline void
  103. kmalloc_init(void) {
  104. slab_init();
  105. cprintf("kmalloc_init() succeeded!\n");
  106. }
  107. //slab_allocated - summary the total size of allocated objs
  108. static size_t
  109. slab_allocated(void) {
  110. size_t total = 0;
  111. int i;
  112. bool intr_flag;
  113. local_intr_save(intr_flag);
  114. {
  115. for (i = 0; i < SLAB_CACHE_NUM; i ++) {
  116. kmem_cache_t *cachep = slab_cache + i;
  117. list_entry_t *list, *le;
  118. list = le = &(cachep->slabs_full);
  119. while ((le = list_next(le)) != list) {
  120. total += cachep->num * cachep->objsize;
  121. }
  122. list = le = &(cachep->slabs_notfull);
  123. while ((le = list_next(le)) != list) {
  124. slab_t *slabp = le2slab(le, slab_link);
  125. total += slabp->inuse * cachep->objsize;
  126. }
  127. }
  128. }
  129. local_intr_restore(intr_flag);
  130. return total;
  131. }
  132. inline size_t
  133. kallocated(void) {
  134. return slab_allocated();
  135. }
  136. // slab_mgmt_size - get the size of slab control area (slab_t+num*kmem_bufctl_t)
  137. static size_t
  138. slab_mgmt_size(size_t num, size_t align) {
  139. return ROUNDUP(sizeof(slab_t) + num * sizeof(kmem_bufctl_t), align);
  140. }
  141. // cacahe_estimate - estimate the number of objs in a slab
  142. static void
  143. cache_estimate(size_t order, size_t objsize, size_t align, bool off_slab, size_t *remainder, size_t *num) {
  144. size_t nr_objs, mgmt_size;
  145. size_t slab_size = (PGSIZE << order);
  146. if (off_slab) {
  147. mgmt_size = 0;
  148. nr_objs = slab_size / objsize;
  149. if (nr_objs > SLAB_LIMIT) {
  150. nr_objs = SLAB_LIMIT;
  151. }
  152. }
  153. else {
  154. nr_objs = (slab_size - sizeof(slab_t)) / (objsize + sizeof(kmem_bufctl_t));
  155. while (slab_mgmt_size(nr_objs, align) + nr_objs * objsize > slab_size) {
  156. nr_objs --;
  157. }
  158. if (nr_objs > SLAB_LIMIT) {
  159. nr_objs = SLAB_LIMIT;
  160. }
  161. mgmt_size = slab_mgmt_size(nr_objs, align);
  162. }
  163. *num = nr_objs;
  164. *remainder = slab_size - nr_objs * objsize - mgmt_size;
  165. }
  166. // calculate_slab_order - estimate the size(4K~4M) of slab
  167. // paramemters:
  168. // cachep: the slab_cache
  169. // objsize: the size of obj
  170. // align: align bit for objs
  171. // off_slab: the control part of slab in slab or not
  172. // left_over: the size of can not be used area in slab
  173. static void
  174. calculate_slab_order(kmem_cache_t *cachep, size_t objsize, size_t align, bool off_slab, size_t *left_over) {
  175. size_t order;
  176. for (order = 0; order <= KMALLOC_MAX_ORDER; order ++) {
  177. size_t num, remainder;
  178. cache_estimate(order, objsize, align, off_slab, &remainder, &num);
  179. if (num != 0) {
  180. if (off_slab) {
  181. size_t off_slab_limit = objsize - sizeof(slab_t);
  182. off_slab_limit /= sizeof(kmem_bufctl_t);
  183. if (num > off_slab_limit) {
  184. panic("off_slab: objsize = %d, num = %d.", objsize, num);
  185. }
  186. }
  187. if (remainder * 8 <= (PGSIZE << order)) {
  188. cachep->num = num;
  189. cachep->page_order = order;
  190. if (left_over != NULL) {
  191. *left_over = remainder;
  192. }
  193. return ;
  194. }
  195. }
  196. }
  197. panic("calculate_slab_over: failed.");
  198. }
  199. // getorder - find order, should satisfy n <= minest 2^order
  200. static inline size_t
  201. getorder(size_t n) {
  202. size_t order = MIN_SIZE_ORDER, order_size = (1 << order);
  203. for (; order <= MAX_SIZE_ORDER; order ++, order_size <<= 1) {
  204. if (n <= order_size) {
  205. return order;
  206. }
  207. }
  208. panic("getorder failed. %d\n", n);
  209. }
  210. // init_kmem_cache - initial a slab_cache cachep according to the obj with the size = objsize
  211. static void
  212. init_kmem_cache(kmem_cache_t *cachep, size_t objsize, size_t align) {
  213. list_init(&(cachep->slabs_full));
  214. list_init(&(cachep->slabs_notfull));
  215. objsize = ROUNDUP(objsize, align);
  216. cachep->objsize = objsize;
  217. cachep->off_slab = (objsize >= (PGSIZE >> 3));
  218. size_t left_over;
  219. calculate_slab_order(cachep, objsize, align, cachep->off_slab, &left_over);
  220. assert(cachep->num > 0);
  221. size_t mgmt_size = slab_mgmt_size(cachep->num, align);
  222. if (cachep->off_slab && left_over >= mgmt_size) {
  223. cachep->off_slab = 0;
  224. }
  225. if (cachep->off_slab) {
  226. cachep->offset = 0;
  227. cachep->slab_cachep = slab_cache + (getorder(mgmt_size) - MIN_SIZE_ORDER);
  228. }
  229. else {
  230. cachep->offset = mgmt_size;
  231. }
  232. }
  233. static void *kmem_cache_alloc(kmem_cache_t *cachep);
  234. #define slab_bufctl(slabp) \
  235. ((kmem_bufctl_t*)(((slab_t *)(slabp)) + 1))
  236. // kmem_cache_slabmgmt - get the address of a slab according to page
  237. // - and initialize the slab according to cachep
  238. static slab_t *
  239. kmem_cache_slabmgmt(kmem_cache_t *cachep, struct Page *page) {
  240. void *objp = page2kva(page);
  241. slab_t *slabp;
  242. if (cachep->off_slab) {
  243. if ((slabp = kmem_cache_alloc(cachep->slab_cachep)) == NULL) {
  244. return NULL;
  245. }
  246. }
  247. else {
  248. slabp = page2kva(page);
  249. }
  250. slabp->inuse = 0;
  251. slabp->offset = cachep->offset;
  252. slabp->s_mem = objp + cachep->offset;
  253. return slabp;
  254. }
  255. #define SET_PAGE_CACHE(page, cachep) \
  256. do { \
  257. struct Page *__page = (struct Page *)(page); \
  258. kmem_cache_t **__cachepp = (kmem_cache_t **)&(__page->page_link.next); \
  259. *__cachepp = (kmem_cache_t *)(cachep); \
  260. } while (0)
  261. #define SET_PAGE_SLAB(page, slabp) \
  262. do { \
  263. struct Page *__page = (struct Page *)(page); \
  264. slab_t **__cachepp = (slab_t **)&(__page->page_link.prev); \
  265. *__cachepp = (slab_t *)(slabp); \
  266. } while (0)
  267. // kmem_cache_grow - allocate a new slab by calling alloc_pages
  268. // - set control area in the new slab
  269. static bool
  270. kmem_cache_grow(kmem_cache_t *cachep) {
  271. struct Page *page = alloc_pages(1 << cachep->page_order);
  272. if (page == NULL) {
  273. goto failed;
  274. }
  275. slab_t *slabp;
  276. if ((slabp = kmem_cache_slabmgmt(cachep, page)) == NULL) {
  277. goto oops;
  278. }
  279. size_t order_size = (1 << cachep->page_order);
  280. do {
  281. //setup this page in the free list (see memlayout.h: struct page)???
  282. SET_PAGE_CACHE(page, cachep);
  283. SET_PAGE_SLAB(page, slabp);
  284. //this page is used for slab
  285. SetPageSlab(page);
  286. page ++;
  287. } while (-- order_size);
  288. int i;
  289. for (i = 0; i < cachep->num; i ++) {
  290. slab_bufctl(slabp)[i] = i + 1;
  291. }
  292. slab_bufctl(slabp)[cachep->num - 1] = BUFCTL_END;
  293. slabp->free = 0;
  294. bool intr_flag;
  295. local_intr_save(intr_flag);
  296. {
  297. list_add(&(cachep->slabs_notfull), &(slabp->slab_link));
  298. }
  299. local_intr_restore(intr_flag);
  300. return 1;
  301. oops:
  302. free_pages(page, 1 << cachep->page_order);
  303. failed:
  304. return 0;
  305. }
  306. // kmem_cache_alloc_one - allocate a obj in a slab
  307. static void *
  308. kmem_cache_alloc_one(kmem_cache_t *cachep, slab_t *slabp) {
  309. slabp->inuse ++;
  310. void *objp = slabp->s_mem + slabp->free * cachep->objsize;
  311. slabp->free = slab_bufctl(slabp)[slabp->free];
  312. if (slabp->free == BUFCTL_END) {
  313. list_del(&(slabp->slab_link));
  314. list_add(&(cachep->slabs_full), &(slabp->slab_link));
  315. }
  316. return objp;
  317. }
  318. // kmem_cache_alloc - call kmem_cache_alloc_one function to allocate a obj
  319. // - if no free obj, try to allocate a slab
  320. static void *
  321. kmem_cache_alloc(kmem_cache_t *cachep) {
  322. void *objp;
  323. bool intr_flag;
  324. try_again:
  325. local_intr_save(intr_flag);
  326. if (list_empty(&(cachep->slabs_notfull))) {
  327. goto alloc_new_slab;
  328. }
  329. slab_t *slabp = le2slab(list_next(&(cachep->slabs_notfull)), slab_link);
  330. objp = kmem_cache_alloc_one(cachep, slabp);
  331. local_intr_restore(intr_flag);
  332. return objp;
  333. alloc_new_slab:
  334. local_intr_restore(intr_flag);
  335. if (kmem_cache_grow(cachep)) {
  336. goto try_again;
  337. }
  338. return NULL;
  339. }
  340. // kmalloc - simple interface used by outside functions
  341. // - to allocate a free memory using kmem_cache_alloc function
  342. void *
  343. kmalloc(size_t size) {
  344. assert(size > 0);
  345. size_t order = getorder(size);
  346. if (order > MAX_SIZE_ORDER) {
  347. return NULL;
  348. }
  349. return kmem_cache_alloc(slab_cache + (order - MIN_SIZE_ORDER));
  350. }
  351. static void kmem_cache_free(kmem_cache_t *cachep, void *obj);
  352. // kmem_slab_destroy - call free_pages & kmem_cache_free to free a slab
  353. static void
  354. kmem_slab_destroy(kmem_cache_t *cachep, slab_t *slabp) {
  355. struct Page *page = kva2page(slabp->s_mem - slabp->offset);
  356. struct Page *p = page;
  357. size_t order_size = (1 << cachep->page_order);
  358. do {
  359. assert(PageSlab(p));
  360. ClearPageSlab(p);
  361. p ++;
  362. } while (-- order_size);
  363. free_pages(page, 1 << cachep->page_order);
  364. if (cachep->off_slab) {
  365. kmem_cache_free(cachep->slab_cachep, slabp);
  366. }
  367. }
  368. // kmem_cache_free_one - free an obj in a slab
  369. // - if slab->inuse==0, then free the slab
  370. static void
  371. kmem_cache_free_one(kmem_cache_t *cachep, slab_t *slabp, void *objp) {
  372. //should not use divide operator ???
  373. size_t objnr = (objp - slabp->s_mem) / cachep->objsize;
  374. slab_bufctl(slabp)[objnr] = slabp->free;
  375. slabp->free = objnr;
  376. slabp->inuse --;
  377. if (slabp->inuse == 0) {
  378. list_del(&(slabp->slab_link));
  379. kmem_slab_destroy(cachep, slabp);
  380. }
  381. else if (slabp->inuse == cachep->num -1 ) {
  382. list_del(&(slabp->slab_link));
  383. list_add(&(cachep->slabs_notfull), &(slabp->slab_link));
  384. }
  385. }
  386. #define GET_PAGE_CACHE(page) \
  387. (kmem_cache_t *)((page)->page_link.next)
  388. #define GET_PAGE_SLAB(page) \
  389. (slab_t *)((page)->page_link.prev)
  390. // kmem_cache_free - call kmem_cache_free_one function to free an obj
  391. static void
  392. kmem_cache_free(kmem_cache_t *cachep, void *objp) {
  393. bool intr_flag;
  394. struct Page *page = kva2page(objp);
  395. if (!PageSlab(page)) {
  396. panic("not a slab page %08x\n", objp);
  397. }
  398. local_intr_save(intr_flag);
  399. {
  400. kmem_cache_free_one(cachep, GET_PAGE_SLAB(page), objp);
  401. }
  402. local_intr_restore(intr_flag);
  403. }
  404. // kfree - simple interface used by ooutside functions to free an obj
  405. void
  406. kfree(void *objp) {
  407. kmem_cache_free(GET_PAGE_CACHE(kva2page(objp)), objp);
  408. }
  409. static inline void
  410. check_slab_empty(void) {
  411. int i;
  412. for (i = 0; i < SLAB_CACHE_NUM; i ++) {
  413. kmem_cache_t *cachep = slab_cache + i;
  414. assert(list_empty(&(cachep->slabs_full)));
  415. assert(list_empty(&(cachep->slabs_notfull)));
  416. }
  417. }
  418. void
  419. check_slab(void) {
  420. int i;
  421. void *v0, *v1;
  422. size_t nr_free_pages_store = nr_free_pages();
  423. size_t kernel_allocated_store = slab_allocated();
  424. /* slab must be empty now */
  425. check_slab_empty();
  426. assert(slab_allocated() == 0);
  427. kmem_cache_t *cachep0, *cachep1;
  428. cachep0 = slab_cache;
  429. assert(cachep0->objsize == 32 && cachep0->num > 1 && !cachep0->off_slab);
  430. assert((v0 = kmalloc(16)) != NULL);
  431. slab_t *slabp0, *slabp1;
  432. assert(!list_empty(&(cachep0->slabs_notfull)));
  433. slabp0 = le2slab(list_next(&(cachep0->slabs_notfull)), slab_link);
  434. assert(slabp0->inuse == 1 && list_next(&(slabp0->slab_link)) == &(cachep0->slabs_notfull));
  435. struct Page *p0, *p1;
  436. size_t order_size;
  437. p0 = kva2page(slabp0->s_mem - slabp0->offset), p1 = p0;
  438. order_size = (1 << cachep0->page_order);
  439. for (i = 0; i < cachep0->page_order; i ++, p1 ++) {
  440. assert(PageSlab(p1));
  441. assert(GET_PAGE_CACHE(p1) == cachep0 && GET_PAGE_SLAB(p1) == slabp0);
  442. }
  443. assert(v0 == slabp0->s_mem);
  444. assert((v1 = kmalloc(16)) != NULL && v1 == v0 + 32);
  445. kfree(v0);
  446. assert(slabp0->free == 0);
  447. kfree(v1);
  448. assert(list_empty(&(cachep0->slabs_notfull)));
  449. for (i = 0; i < cachep0->page_order; i ++, p0 ++) {
  450. assert(!PageSlab(p0));
  451. }
  452. v0 = kmalloc(16);
  453. assert(!list_empty(&(cachep0->slabs_notfull)));
  454. slabp0 = le2slab(list_next(&(cachep0->slabs_notfull)), slab_link);
  455. for (i = 0; i < cachep0->num - 1; i ++) {
  456. kmalloc(16);
  457. }
  458. assert(slabp0->inuse == cachep0->num);
  459. assert(list_next(&(cachep0->slabs_full)) == &(slabp0->slab_link));
  460. assert(list_empty(&(cachep0->slabs_notfull)));
  461. v1 = kmalloc(16);
  462. assert(!list_empty(&(cachep0->slabs_notfull)));
  463. slabp1 = le2slab(list_next(&(cachep0->slabs_notfull)), slab_link);
  464. kfree(v0);
  465. assert(list_empty(&(cachep0->slabs_full)));
  466. assert(list_next(&(slabp0->slab_link)) == &(slabp1->slab_link)
  467. || list_next(&(slabp1->slab_link)) == &(slabp0->slab_link));
  468. kfree(v1);
  469. assert(!list_empty(&(cachep0->slabs_notfull)));
  470. assert(list_next(&(cachep0->slabs_notfull)) == &(slabp0->slab_link));
  471. assert(list_next(&(slabp0->slab_link)) == &(cachep0->slabs_notfull));
  472. v1 = kmalloc(16);
  473. assert(v1 == v0);
  474. assert(list_next(&(cachep0->slabs_full)) == &(slabp0->slab_link));
  475. assert(list_empty(&(cachep0->slabs_notfull)));
  476. for (i = 0; i < cachep0->num; i ++) {
  477. kfree(v1 + i * cachep0->objsize);
  478. }
  479. assert(list_empty(&(cachep0->slabs_full)));
  480. assert(list_empty(&(cachep0->slabs_notfull)));
  481. cachep0 = slab_cache;
  482. bool has_off_slab = 0;
  483. for (i = 0; i < SLAB_CACHE_NUM; i ++, cachep0 ++) {
  484. if (cachep0->off_slab) {
  485. has_off_slab = 1;
  486. cachep1 = cachep0->slab_cachep;
  487. if (!cachep1->off_slab) {
  488. break;
  489. }
  490. }
  491. }
  492. if (!has_off_slab) {
  493. goto check_pass;
  494. }
  495. assert(cachep0->off_slab && !cachep1->off_slab);
  496. assert(cachep1 < cachep0);
  497. assert(list_empty(&(cachep0->slabs_full)));
  498. assert(list_empty(&(cachep0->slabs_notfull)));
  499. assert(list_empty(&(cachep1->slabs_full)));
  500. assert(list_empty(&(cachep1->slabs_notfull)));
  501. v0 = kmalloc(cachep0->objsize);
  502. p0 = kva2page(v0);
  503. assert(page2kva(p0) == v0);
  504. if (cachep0->num == 1) {
  505. assert(!list_empty(&(cachep0->slabs_full)));
  506. slabp0 = le2slab(list_next(&(cachep0->slabs_full)), slab_link);
  507. }
  508. else {
  509. assert(!list_empty(&(cachep0->slabs_notfull)));
  510. slabp0 = le2slab(list_next(&(cachep0->slabs_notfull)), slab_link);
  511. }
  512. assert(slabp0 != NULL);
  513. if (cachep1->num == 1) {
  514. assert(!list_empty(&(cachep1->slabs_full)));
  515. slabp1 = le2slab(list_next(&(cachep1->slabs_full)), slab_link);
  516. }
  517. else {
  518. assert(!list_empty(&(cachep1->slabs_notfull)));
  519. slabp1 = le2slab(list_next(&(cachep1->slabs_notfull)), slab_link);
  520. }
  521. assert(slabp1 != NULL);
  522. order_size = (1 << cachep0->page_order);
  523. for (i = 0; i < order_size; i ++, p0 ++) {
  524. assert(PageSlab(p0));
  525. assert(GET_PAGE_CACHE(p0) == cachep0 && GET_PAGE_SLAB(p0) == slabp0);
  526. }
  527. kfree(v0);
  528. check_pass:
  529. check_rb_tree();
  530. check_slab_empty();
  531. assert(slab_allocated() == 0);
  532. assert(nr_free_pages_store == nr_free_pages());
  533. assert(kernel_allocated_store == slab_allocated());
  534. cprintf("check_slab() succeeded!\n");
  535. }