You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
457 lines
13 KiB
C
457 lines
13 KiB
C
/* Copyright (C) 2021 fef <owo@fef.moe>. All rights reserved. */
|
|
|
|
#include <arch/cpufunc.h>
|
|
#include <arch/page.h>
|
|
|
|
#include <gay/clist.h>
|
|
#include <gay/config.h>
|
|
#include <gay/kprintf.h>
|
|
#include <gay/mm.h>
|
|
#include <gay/mutex.h>
|
|
#include <gay/poison.h>
|
|
#include <gay/systm.h>
|
|
#include <gay/types.h>
|
|
#include <gay/util.h>
|
|
#include <gay/vm/page.h>
|
|
|
|
#include <limits.h>
|
|
#include <string.h>
|
|
#include <strings.h>
|
|
|
|
#if DMAP_OFFSET % PAGE_SIZE != 0
|
|
#error "DMAP_OFFSET must be an integral multiple of PAGE_SIZE"
|
|
#endif
|
|
|
|
#if PAGE_SIZE % LONG_BIT != 0
|
|
#error "PAGE_SIZE must be an integral multiple of LONG_BIT"
|
|
#endif
|
|
|
|
#if __SIZEOF_POINTER__ != __SIZEOF_LONG__
|
|
#error "long must be as wide as a pointer"
|
|
#endif
|
|
|
|
#if CFG_DEBUG_PAGE_ALLOCS
|
|
# define PAGE_ASSERT(x) KASSERT(x)
|
|
# define page_debug(msg, ...) kprintf("[page] " msg, ##__VA_ARGS__)
|
|
# define PAGE_DEBUG_BLOCK
|
|
# if CFG_DEBUG_PAGE_ALLOCS_NOISY
|
|
# define page_debug_noisy(msg, ...) kprintf("[page] " msg, ##__VA_ARGS__)
|
|
# else
|
|
# define page_debug_noisy(msg, ...) ({})
|
|
# endif
|
|
#else
|
|
# define PAGE_ASSERT(x) ({})
|
|
# define PAGE_DEBUG_BLOCK if (0)
|
|
# define page_debug(msg, ...) ({})
|
|
# define page_debug_noisy(msg, ...) ({})
|
|
#endif
|
|
|
|
#define ORDER_SHIFT(order) (PAGE_SHIFT + (order))
|
|
#define ORDER_SIZE(order) (1 << ORDER_SHIFT(order))
|
|
|
|
/* this should be the same as LONG_BIT because latom_t is really just a
|
|
* long wrapped in a struct, but my trust in compilers is exactly zero */
|
|
#define LATOM_BIT (sizeof(latom_t) * CHAR_BIT)
|
|
|
|
struct mm_zone mm_zones[MM_NR_ZONES];
|
|
|
|
static inline u_int paddr_find_order(vm_paddr_t addr)
|
|
{
|
|
int bit = ffsll((long long)addr) - 1;
|
|
if (bit == -1 || bit > ORDER_SHIFT(MM_MAX_ORDER))
|
|
bit = ORDER_SHIFT(MM_MAX_ORDER);
|
|
|
|
KASSERT(bit >= PAGE_SHIFT);
|
|
return bit - PAGE_SHIFT;
|
|
}
|
|
|
|
/** @brief Claim all free pages in one of the memory areas from the boot allocator. */
|
|
static inline void claim_bmem_area(struct mm_zone *zone, struct _bmem_area *area)
|
|
{
|
|
vm_paddr_t start = area->start;
|
|
vm_paddr_t end = area->end;
|
|
vm_paddr_t pos = start;
|
|
vm_size_t nr_pages = end - start / PAGE_SIZE;
|
|
latom_add(&zone->free_count, (long)nr_pages);
|
|
|
|
struct vm_page *page = &vm_page_array[start >> PAGE_SHIFT];
|
|
u_int order = paddr_find_order(start);
|
|
/* make sure the boot memory allocator cannot under any circumstances hand
|
|
* out pages from this area anymore, even though that should be unnecessary */
|
|
clist_del(&area->link);
|
|
|
|
/*
|
|
* We want to insert pages at the highest possible order. However, the
|
|
* start and end pointers of the area are only guaranteed to be page
|
|
* aligned. Therefore, we start with the highest possible order based
|
|
* on the start address, and then increment the order in every loop
|
|
* iteration (up to MM_MAX_ORDER). We do this until we have reached
|
|
* the end which, again, is only guaranteed to be page aligned, and
|
|
* subsequently lower the order again.
|
|
*/
|
|
while (pos < end) {
|
|
struct mm_pool *pool = &zone->pools[order];
|
|
clist_add(&pool->freelist, &page->link);
|
|
pool->free_entries++;
|
|
|
|
/* only the first page in the order group is inserted into
|
|
* the freelist, but all of them need to be initialized */
|
|
for (u_int i = 0; i < (1 << order); i++) {
|
|
atom_init(&page[i].count, 0);
|
|
atom_init(&page[i].attr, 0);
|
|
}
|
|
|
|
/*
|
|
* order
|
|
* ^
|
|
* | ._____._____. < MM_MAX_ORDER
|
|
* | .___| |
|
|
* start |._| |_.
|
|
* order > .| |. < end order
|
|
* |---------------------|----> pos
|
|
* start end
|
|
*/
|
|
pos += ORDER_SIZE(order);
|
|
page += (1 << order);
|
|
if (order < MM_MAX_ORDER && pos + ORDER_SIZE(order) <= end) {
|
|
/* this makes the rising part of the graph */
|
|
order++;
|
|
} else if (order > 0 && pos > end) {
|
|
/* we have overshot, lower the order */
|
|
pos -= ORDER_SIZE(order);
|
|
page -= (1 << order);
|
|
/* this makes the abrupt downwards jump at the end of the graph */
|
|
while (--order) {
|
|
if (pos + ORDER_SIZE(order) <= end) {
|
|
pos += ORDER_SIZE(order);
|
|
page += (1 << order);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void paging_init(vm_paddr_t phys_end)
|
|
{
|
|
/* Sizes of the individual bitmaps per order, rounded up to the
|
|
* next full longword. We use the same bitmaps in all zones. */
|
|
usize bitmap_sizes[MM_NR_ORDERS];
|
|
/* size of all bitmaps combined */
|
|
usize bitmap_total_size = 0;
|
|
|
|
for (int order = 0; order < MM_NR_ORDERS; order++) {
|
|
usize pages = phys_end >> ORDER_SHIFT(order + 1);
|
|
pages = align_ceil(pages, LATOM_BIT * 2);
|
|
usize bytes = pages / (CHAR_BIT * 2);
|
|
bitmap_sizes[order] = bytes;
|
|
bitmap_total_size += bytes;
|
|
}
|
|
|
|
page_debug("Reserving %zu bytes for page bitmaps\n", bitmap_total_size);
|
|
|
|
/*
|
|
* allocate memory for the bitmaps and zero them out
|
|
*/
|
|
u_int bitmap_size_log2 = flsl((long)bitmap_total_size);
|
|
KASSERT(bitmap_size_log2 != 0);
|
|
bitmap_size_log2--; /* the bit index returned by flsl starts at 1 */
|
|
if (bitmap_total_size ^ (1ul << bitmap_size_log2))
|
|
bitmap_size_log2++; /* bitmap_total_size is not a power of 2, round up */
|
|
uintptr_t bitmap_start_phys = __boot_pmalloc(bitmap_size_log2, MM_ZONE_NORMAL);
|
|
panic_if(bitmap_start_phys == BOOT_PMALLOC_ERR,
|
|
"cannot allocate memory for the page bitmaps");
|
|
memset(__v(bitmap_start_phys), 0, bitmap_total_size);
|
|
|
|
/*
|
|
* initialize the pools
|
|
*/
|
|
for (int zone_index = 0; zone_index < ARRAY_SIZE(mm_zones); zone_index++) {
|
|
struct mm_zone *zone = &mm_zones[zone_index];
|
|
latom_t *bitmap_pos = __v(bitmap_start_phys);
|
|
for (int order = 0; order < MM_NR_ORDERS; order++) {
|
|
zone->pools[order].bitmap = bitmap_pos;
|
|
clist_init(&zone->pools[order].freelist);
|
|
zone->pools[order].free_entries = 0;
|
|
latom_init(&zone->free_count, 0);
|
|
|
|
bitmap_pos += bitmap_sizes[order];
|
|
}
|
|
}
|
|
|
|
/*
|
|
* mark *all* pages as reserved first
|
|
*
|
|
* XXX this is totally unnecessary and i'm only doing it because i'm
|
|
* too tired to work out an algorithm that finds all pages that are
|
|
* not in the _bmem_areas lists of the mm_zones
|
|
*
|
|
* if the reserved bit is set, all other fields in the page are invalid.
|
|
*/
|
|
for (usize i = 0; i < phys_end >> PAGE_SHIFT; i++) {
|
|
/* This is merely an optimization to simplify checking whether
|
|
* two buddies can be coalesced into one. In reality, the
|
|
* reference count is invalid because the page is reserved. */
|
|
atom_init(&vm_page_array[i].count, 1);
|
|
atom_init(&vm_page_array[i].attr, _PGA_RSVD_MASK);
|
|
}
|
|
|
|
/*
|
|
* populate the freelists
|
|
*/
|
|
for (int i = 0; i < ARRAY_SIZE(mm_zones); i++) {
|
|
struct mm_zone *zone = &mm_zones[i];
|
|
struct _bmem_area *area, *tmp;
|
|
clist_foreach_entry_safe(&zone->_bmem_areas, area, tmp, link) {
|
|
claim_bmem_area(zone, area);
|
|
}
|
|
zone->thrsh.emerg = latom_read(&zone->free_count) / CFG_PAGE_EMERG_DENOM;
|
|
if (zone->thrsh.emerg > CFG_PAGE_EMERG_MAX)
|
|
zone->thrsh.emerg = CFG_PAGE_EMERG_MAX;
|
|
}
|
|
}
|
|
|
|
static inline bool pg_flip_bit(struct mm_zone *zone, u_long pfn, u_int order)
|
|
{
|
|
usize bit = pfn >> (order + 1);
|
|
latom_t *bitmap = &zone->pools[order].bitmap[bit / LATOM_BIT];
|
|
return latom_flip_bit(bitmap, (int)(bit % LATOM_BIT));
|
|
}
|
|
|
|
__malloc_like
|
|
static void *__get_pages(u_int order, enum mflags flags)
|
|
{
|
|
PAGE_ASSERT(order >= 0);
|
|
struct mm_zone *zone = &mm_zones[_M_ZONE_INDEX(flags)];
|
|
|
|
if (order > MM_MAX_ORDER) {
|
|
page_debug("get_pages(%d, %#08x): Order too high!\n", order, flags);
|
|
return nil;
|
|
}
|
|
|
|
u_long count_after = latom_sub(&zone->free_count, (1 << order)) - (1 << order);
|
|
if (count_after < zone->thrsh.emerg) {
|
|
if (count_after < 0 || !(flags & _M_EMERG)) {
|
|
latom_add(&zone->free_count, (1 << order));
|
|
return nil;
|
|
}
|
|
}
|
|
|
|
register_t cpuflags = read_flags();
|
|
|
|
/*
|
|
* Search for a free page. Start looking at the freelist for the
|
|
* requested order, and if it's empty, go over to the next higher order.
|
|
* Repeat until we found a page, or we've reached the highest order.
|
|
*/
|
|
vm_page_t page = nil;
|
|
u_int page_order = order;
|
|
while (page == nil && page_order < MM_NR_ORDERS) {
|
|
struct mm_pool *pool = &zone->pools[page_order];
|
|
|
|
disable_intr();
|
|
spin_lock(&pool->lock);
|
|
if (pool->free_entries > 0) {
|
|
page = clist_del_first_entry(&pool->freelist, typeof(*page), link);
|
|
/* increment the reference count while we hold the lock on the pool,
|
|
* so that no other processor can try to coalesce this block if its
|
|
* buddy is being freed (coalition is only possible if the buddy
|
|
* has a reference count of zero, and while holding the pool lock) */
|
|
page_get(page);
|
|
pool->free_entries--;
|
|
} else {
|
|
page_order++;
|
|
}
|
|
spin_unlock(&pool->lock);
|
|
intr_restore(cpuflags);
|
|
}
|
|
|
|
/*
|
|
* if we found a page, check if we need to split it up
|
|
* (which is the case if we took one from a higher order freelist)
|
|
*/
|
|
if (page != nil) {
|
|
usize pfn = pg2pfn(page);
|
|
page_debug_noisy("alloc order %u, split pfn %#lx from order %u\n",
|
|
order, pfn, page_order);
|
|
pg_flip_bit(zone, pfn, page_order);
|
|
|
|
/* split the page and insert the upper halves into the
|
|
* respective freelist until we reach the requested order */
|
|
while (page_order-- > order) {
|
|
page_debug_noisy("split %p (order = %u)\n", pfn2vaddr(pfn), page_order);
|
|
struct mm_pool *pool = &zone->pools[page_order];
|
|
vm_page_t buddy = page + (1 << page_order);
|
|
pga_set_order(buddy, page_order);
|
|
pg_flip_bit(zone, pfn + (1 << page_order), page_order);
|
|
|
|
disable_intr();
|
|
spin_lock(&pool->lock);
|
|
clist_add_first(&pool->freelist, &buddy->link);
|
|
pool->free_entries++;
|
|
spin_unlock(&pool->lock);
|
|
intr_restore(cpuflags);
|
|
}
|
|
|
|
pga_set_order(page, order);
|
|
void *vaddr = pfn2vaddr(pfn);
|
|
|
|
return vaddr;
|
|
} else {
|
|
return nil;
|
|
}
|
|
}
|
|
|
|
/* faster memset for whole pages */
|
|
static inline void init_pages(u_long *start, u_long val, u_int order)
|
|
{
|
|
u_long *end = start + (ORDER_SIZE(order) / sizeof(*start));
|
|
do {
|
|
*start++ = val;
|
|
} while (start != end);
|
|
}
|
|
|
|
void *get_pages(u_int order, enum mflags flags)
|
|
{
|
|
void *pages = __get_pages(order, flags);
|
|
|
|
#if CFG_POISON_PAGES
|
|
if (pages != nil)
|
|
init_pages(pages, PAGE_POISON_ALLOC, order);
|
|
#endif
|
|
|
|
return pages;
|
|
}
|
|
|
|
void *get_page(enum mflags flags)
|
|
{
|
|
void *pages = __get_pages(0, flags);
|
|
|
|
#if CFG_POISON_PAGES
|
|
if (pages != nil)
|
|
init_pages(pages, PAGE_POISON_ALLOC, 0);
|
|
#endif
|
|
|
|
return pages;
|
|
}
|
|
|
|
void *get_zero_pages(u_int order, enum mflags flags)
|
|
{
|
|
void *pages = __get_pages(order, flags);
|
|
|
|
if (pages != nil)
|
|
init_pages(pages, 0, order);
|
|
|
|
return pages;
|
|
}
|
|
|
|
void *get_zero_page(enum mflags flags)
|
|
{
|
|
void *page = __get_pages(0, flags);
|
|
|
|
if (page != nil)
|
|
init_pages(page, 0, 0);
|
|
|
|
return page;
|
|
}
|
|
|
|
/*
|
|
* Two buddies can be merged if:
|
|
* - you currently hold the lock for the pool
|
|
* - they both have a reference count of zero
|
|
* - they are in the same zone
|
|
* - neither of them is reserved
|
|
*
|
|
* This is only called from within the critical section of free_pages(),
|
|
* so execution speed is prioritized over anything else.
|
|
*/
|
|
static __always_inline bool can_merge(vm_page_t page, vm_page_t buddy)
|
|
{
|
|
bool merge = (atom_read(&buddy->count) == 0);
|
|
|
|
/* we know that `page' is not reserved, because we
|
|
* check that flag before we even attempt coalition */
|
|
const unsigned mask = _PGA_RSVD_MASK | _PGA_ZONE_MASK;
|
|
merge &= (atom_read(&page->attr) & mask) == (atom_read(&buddy->attr) & mask);
|
|
|
|
return merge;
|
|
}
|
|
|
|
void free_pages(void *ptr)
|
|
{
|
|
PAGE_DEBUG_BLOCK {
|
|
if (ptr < DMAP_START || ptr >= DMAP_END) {
|
|
panic("free_pages(%p): not in DMAP region\n", ptr);
|
|
}
|
|
}
|
|
|
|
register_t cpuflags = read_flags();
|
|
|
|
vm_page_t page = vaddr2pg(ptr);
|
|
panic_if(pga_rsvd(page), "tried to free reserved page %p", ptr);
|
|
|
|
u_int order = pga_order(page);
|
|
PAGE_ASSERT((uintptr_t)ptr % ORDER_SIZE(order) == 0);
|
|
u_long pfn = vaddr2pfn(ptr);
|
|
|
|
#if CFG_POISON_PAGES
|
|
init_pages(ptr, PAGE_POISON_FREE, order);
|
|
#endif
|
|
|
|
PAGE_DEBUG_BLOCK {
|
|
int old_count = atom_sub(&page->count, 1);
|
|
if (old_count != 1) {
|
|
if (old_count == 0)
|
|
page_debug("double free of %p", ptr);
|
|
else
|
|
page_debug("attempted to free %p with references", ptr);
|
|
return;
|
|
}
|
|
}
|
|
|
|
struct mm_zone *zone = &mm_zones[pga_zone(page)];
|
|
latom_add(&zone->free_count, (1 << order));
|
|
|
|
/* try to coalesce free buddy blocks until we're reached the highest order */
|
|
while (order < MM_MAX_ORDER) {
|
|
if (pg_flip_bit(zone, pfn, order))
|
|
break;
|
|
|
|
page_debug_noisy("join %p (order = %u)\n", pfn2vaddr(pfn), order);
|
|
|
|
/* precompute all values we need inside the critical section
|
|
* to avoid blocking other CPUs for longer than necessary */
|
|
vm_page_t buddy = &vm_page_array[pfn ^ (1ul << order)];
|
|
vm_page_t low = &vm_page_array[pfn & ~(1ul << order)];
|
|
struct mm_pool *current_order_pool = &zone->pools[order];
|
|
struct mm_pool *next_order_pool = &zone->pools[order + 1];
|
|
|
|
disable_intr();
|
|
spin_lock(&zone->pools[order].lock);
|
|
if (can_merge(page, buddy)) {
|
|
clist_del(&buddy->link);
|
|
current_order_pool->free_entries--;
|
|
pga_set_order(buddy, order + 1);
|
|
pga_set_order(page, order + 1);
|
|
clist_add(&next_order_pool->freelist, &low->link);
|
|
next_order_pool->free_entries++;
|
|
} else {
|
|
order = MM_MAX_ORDER; /* break out of the loop */
|
|
}
|
|
spin_unlock(&zone->pools[order].lock);
|
|
intr_restore(cpuflags);
|
|
|
|
page = low;
|
|
order++;
|
|
}
|
|
|
|
/* finally, we need to insert the page at its freelist */
|
|
struct mm_pool *pool = &zone->pools[order];
|
|
disable_intr();
|
|
spin_lock(&pool->lock);
|
|
clist_add(&pool->freelist, &page->link);
|
|
pool->free_entries++;
|
|
spin_unlock(&zone->pools[order].lock);
|
|
intr_restore(cpuflags);
|
|
}
|