/* Copyright (C) 2021,2022 fef . All rights reserved. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if DMAP_OFFSET % PAGE_SIZE != 0 #error "DMAP_OFFSET must be an integral multiple of PAGE_SIZE" #endif #if PAGE_SIZE % LONG_BIT != 0 #error "PAGE_SIZE must be an integral multiple of LONG_BIT" #endif #if __SIZEOF_POINTER__ != __SIZEOF_LONG__ #error "long must be as wide as a pointer" #endif #if CFG_DEBUG_PAGE_ALLOCS # define PAGE_ASSERT(x) KASSERT(x) # define page_debug(msg, ...) kprintf("[page] " msg, ##__VA_ARGS__) # define PAGE_DEBUG_BLOCK # if CFG_DEBUG_PAGE_ALLOCS_NOISY # define page_debug_noisy(msg, ...) kprintf("[page] " msg, ##__VA_ARGS__) # else # define page_debug_noisy(msg, ...) ({}) # endif #else # define PAGE_ASSERT(x) ({}) # define PAGE_DEBUG_BLOCK if (0) # define page_debug(msg, ...) ({}) # define page_debug_noisy(msg, ...) ({}) #endif #define ORDER_SHIFT(order) (PAGE_SHIFT + (order)) #define ORDER_SIZE(order) (1 << ORDER_SHIFT(order)) /* this should be the same as LONG_BIT because latom_t is really just a * long wrapped in a struct, but my trust in compilers is exactly zero */ #define LATOM_BIT (sizeof(latom_t) * CHAR_BIT) struct mm_zone mm_zones[MM_NR_ZONES]; static inline u_int paddr_find_order(vm_paddr_t addr) { int bit = ffsll((long long)addr) - 1; if (bit == -1 || bit > ORDER_SHIFT(MM_MAX_ORDER)) bit = ORDER_SHIFT(MM_MAX_ORDER); KASSERT(bit >= PAGE_SHIFT); return bit - PAGE_SHIFT; } /** @brief Claim all free pages in one of the memory areas from the boot allocator. */ static inline void claim_bmem_area(struct mm_zone *zone, const struct _bmem_area *area) { u_int order = paddr_find_order(area->start); while (area->start + ORDER_SIZE(order) > area->end) order--; struct vm_page *const start = paddr2pg(area->start); struct vm_page *const end = paddr2pg(area->end); struct vm_page *pos = start; const vm_size_t nr_pages = end->pfn - start->pfn; latom_add(&zone->free_count, (long)nr_pages); /* * We want to insert pages at the highest possible order. However, the * start and end pointers of the area are only guaranteed to be page * aligned. Therefore, we start with the highest possible order based * on the start address, and then increment the order in every loop * iteration (up to MM_MAX_ORDER). We do this until we have reached * the end which, again, is only guaranteed to be page aligned, and * subsequently lower the order again. */ while (pos < end) { struct mm_pool *const pool = &zone->pools[order]; clist_add(&pool->freelist, &pos->link); pool->free_entries++; /* only the first page in the order group is inserted into * the freelist, but all of them need to be initialized */ for (u_int i = 0; i < (1u << order); i++) { if (pos >= end) panic("page %p out of range", pos); if (atom_read(&pos->count) != 420) panic("page %p double initialized\n", pos); atom_init(&pos->count, 0); atom_init(&pos->attr, 0); pos++; } /* * order * ^ * | ._____._____. < MM_MAX_ORDER * | .___| | * start |._| |_. * order > .| |. < end order * |---------------------|----> pos * start end */ if (order < MM_MAX_ORDER && pos + (1 << (order + 1)) <= end) { /* this makes the rising part of the graph */ order++; } else if (order > 0 && pos + (1 << order) > end) { /* this makes the abrupt downwards jump at the end of the graph */ while (--order) { if (pos + (1 << order) <= end) break; } } } } void paging_init(vm_paddr_t phys_end) { /* Sizes of the individual bitmaps per order, rounded up to the * next full longword. We use the same bitmaps in all zones. */ usize bitmap_sizes[MM_NR_ORDERS]; /* size of all bitmaps combined */ usize bitmap_total_size = 0; for (int order = 0; order < MM_NR_ORDERS; order++) { usize pages = phys_end >> ORDER_SHIFT(order); pages = align_ceil(pages, LATOM_BIT * 2); usize bytes = pages / (CHAR_BIT * 2); bitmap_sizes[order] = bytes; bitmap_total_size += bytes; } page_debug("Reserving %zu bytes for page bitmaps\n", bitmap_total_size); /* * allocate memory for the bitmaps and zero them out */ u_int bitmap_size_log2 = flsl((long)bitmap_total_size); KASSERT(bitmap_size_log2 != 0); bitmap_size_log2--; /* the bit index returned by flsl starts at 1 */ if (bitmap_total_size ^ (1ul << bitmap_size_log2)) bitmap_size_log2++; /* bitmap_total_size is not a power of 2, round up */ vm_paddr_t bitmap_start_phys = __boot_pmalloc(bitmap_size_log2, MM_ZONE_NORMAL); panic_if(bitmap_start_phys == BOOT_PMALLOC_ERR, "cannot allocate memory for the page bitmaps"); memset(__v(bitmap_start_phys), 0, bitmap_total_size); /* * initialize the pools */ for (int zone_index = 0; zone_index < ARRAY_SIZE(mm_zones); zone_index++) { struct mm_zone *zone = &mm_zones[zone_index]; latom_init(&zone->free_count, 0); /* we use the same bitmaps for all zones */ latom_t *bitmap_pos = __v(bitmap_start_phys); for (int order = 0; order < MM_NR_ORDERS; order++) { struct mm_pool *pool = &zone->pools[order]; pool->bitmap = bitmap_pos; pool->free_entries = 0; clist_init(&pool->freelist); spin_init(&pool->lock); bitmap_pos += bitmap_sizes[order]; } } /* * mark *all* pages as reserved first * * XXX this is totally unnecessary and i'm only doing it because i'm * too tired to work out an algorithm that finds all pages that are * not in the _bmem_areas lists of the mm_zones * * if the reserved bit is set, all other fields in the page are invalid. */ for (u_long pfn = 0; pfn < phys_end >> PAGE_SHIFT; pfn++) { /* This is merely an optimization to simplify checking whether * two buddies can be coalesced into one. In reality, the * reference count is invalid because the page is reserved. */ atom_init(&vm_page_array[pfn].count, 420); atom_init(&vm_page_array[pfn].attr, _PGA_RSVD_MASK); vm_page_array[pfn].pfn = pfn; } /* * populate the freelists */ for (int i = 0; i < ARRAY_SIZE(mm_zones); i++) { struct mm_zone *zone = &mm_zones[i]; struct _bmem_area *area, *tmp; clist_foreach_entry_safe(&zone->_bmem_areas, area, tmp, link) { /* make sure the boot memory allocator cannot under any circumstances hand * out pages from this area anymore, even though that should be unnecessary */ clist_del(&area->link); claim_bmem_area(zone, area); zone->thrsh.emerg = latom_read(&zone->free_count) / CFG_PAGE_EMERG_DENOM; if (zone->thrsh.emerg > CFG_PAGE_EMERG_MAX) zone->thrsh.emerg = CFG_PAGE_EMERG_MAX; } } } static inline bool pg_flip_bit(struct mm_zone *zone, u_long pfn, u_int order) { usize bit = pfn >> (order + 1); latom_t *bitmap = &zone->pools[order].bitmap[bit / LATOM_BIT]; return latom_flip_bit(bitmap, (int)(bit % LATOM_BIT)); } vm_page_t page_alloc(u_int order, enum mflags flags) { if (order > MM_MAX_ORDER) { page_debug("get_pages(%d, %#08x): Order too high!\n", order, flags); return nil; } struct mm_zone *zone = &mm_zones[_M_ZONE_INDEX(flags)]; long count_after; try_next_zone: count_after = latom_sub(&zone->free_count, (1 << order)) - (1 << order); if (count_after < zone->thrsh.emerg) { if (count_after < 0 || !(flags & _M_EMERG)) { latom_add(&zone->free_count, (1 << order)); /* if we can't allocate from ZONE_NORMAL, fall back to ZONE_DMA */ if (zone > &mm_zones[0]) { zone--; goto try_next_zone; } else { return nil; } } } register_t cpuflags = read_flags(); /* * Search for a free page. Start looking at the freelist for the * requested order, and if it's empty, go over to the next higher order. * Repeat until we found a page, or we've reached the highest order. */ vm_page_t page = nil; u_int page_order = order; while (page == nil && page_order < MM_NR_ORDERS) { struct mm_pool *pool = &zone->pools[page_order]; disable_intr(); spin_lock(&pool->lock); if (pool->free_entries > 0) { page = clist_del_first_entry(&pool->freelist, typeof(*page), link); /* increment the reference count while we hold the lock on the pool, * so that no other processor can try to coalesce this block if its * buddy is being freed (coalition is only possible if the buddy * has a reference count of zero, and while holding the pool lock) */ page_get(page); pool->free_entries--; } else { page_order++; } spin_unlock(&pool->lock); intr_restore(cpuflags); } if (page == nil) { if (zone > &mm_zones[0]) { /* * If we reach this, the current zone technically had enough free * pages for the allocation, but those pages were split up into * smaller chunks rather than a contiguous area. However, we don't * give up quite yet: If possible, we fall back to a lower memory * zone (ZONE_NORMAL -> ZONE_DMA) and start over from the top. */ zone--; goto try_next_zone; } else { return nil; } } /* * if we found a page, check if we need to split it up * (which is the case if we took one from a higher order freelist) */ usize pfn = pg2pfn(page); page_debug_noisy("alloc order %u, split pfn %#lx from order %u\n", order, pfn, page_order); pg_flip_bit(zone, pfn, page_order); /* split the page and insert the upper halves into the * respective freelist until we reach the requested order */ while (page_order-- > order) { page_debug_noisy("split %p (order = %u)\n", pfn2vaddr(pfn), page_order); struct mm_pool *pool = &zone->pools[page_order]; vm_page_t buddy = page + (1 << page_order); pga_set_order(buddy, page_order); pg_flip_bit(zone, pfn + (1 << page_order), page_order); disable_intr(); spin_lock(&pool->lock); clist_add_first(&pool->freelist, &buddy->link); pool->free_entries++; spin_unlock(&pool->lock); intr_restore(cpuflags); } for (u_int i = 0; i < (1 << order); i++) pga_set_order(&page[i], order); page_clear(page); return page; } /* * XXX get_page() and get_pages() shouldn't depend on the direct map * * XXX Do we need these at all? I don't think so. */ void *get_pages(u_int order, enum mflags flags) { vm_page_t page = page_alloc(order, flags); if (page) return pfn2vaddr(pg2pfn(page)); else return nil; } void *get_page(enum mflags flags) { vm_page_t page = page_alloc(0, flags); if (page) return pfn2vaddr(pg2pfn(page)); else return nil; } /* * Two buddies can be merged if: * - you currently hold the lock for the pool * - they both have a reference count of zero * - they are in the same zone * - neither of them is reserved * * This is only called from within the critical section of free_pages(), * so execution speed is prioritized over anything else. */ static __always_inline bool can_merge(vm_page_t page, vm_page_t buddy) { bool merge = (atom_read(&buddy->count) == 0); /* we know that `page' is not reserved, because we * check that flag before we even attempt coalition */ const unsigned mask = _PGA_RSVD_MASK | _PGA_ZONE_MASK; merge &= (atom_read(&page->attr) & mask) == (atom_read(&buddy->attr) & mask); return merge; } void page_free(vm_page_t page) { register_t cpuflags = read_flags(); u_int order = pga_order(page); PAGE_ASSERT((uintptr_t)ptr % ORDER_SIZE(order) == 0); u_long pfn = pg2pfn(page); PAGE_DEBUG_BLOCK { int old_count = atom_sub(&page->count, 1); if (old_count != 1) { if (old_count == 0) page_debug("double free of %p", ptr); else page_debug("attempted to free %p with references", ptr); return; } } else { atom_dec(&page->count); } struct mm_zone *zone = &mm_zones[pga_zone(page)]; latom_add(&zone->free_count, (1 << order)); /* try to coalesce free buddy blocks until we're reached the highest order */ while (order < MM_MAX_ORDER) { if (pg_flip_bit(zone, pfn, order)) break; page_debug_noisy("join %p (order = %u)\n", pfn2vaddr(pfn), order); /* precompute all values we need inside the critical section * to avoid blocking other CPUs for longer than necessary */ vm_page_t buddy = &vm_page_array[pfn ^ (1ul << order)]; vm_page_t low = &vm_page_array[pfn & ~(1ul << order)]; struct mm_pool *current_order_pool = &zone->pools[order]; struct mm_pool *next_order_pool = &zone->pools[order + 1]; disable_intr(); spin_lock(&zone->pools[order].lock); if (can_merge(page, buddy)) { clist_del(&buddy->link); current_order_pool->free_entries--; pga_set_order(buddy, order + 1); pga_set_order(page, order + 1); clist_add(&next_order_pool->freelist, &low->link); next_order_pool->free_entries++; } else { order = MM_MAX_ORDER; /* break out of the loop */ } spin_unlock(&zone->pools[order].lock); intr_restore(cpuflags); page = low; order++; } /* finally, we need to insert the page at its freelist */ struct mm_pool *pool = &zone->pools[order]; disable_intr(); spin_lock(&pool->lock); clist_add(&pool->freelist, &page->link); pool->free_entries++; spin_unlock(&zone->pools[order].lock); intr_restore(cpuflags); }