You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

439 lines
13 KiB
C

/* Copyright (C) 2021,2022 fef <owo@fef.moe>. All rights reserved. */
#include <arch/cpufunc.h>
#include <arch/page.h>
#include <gay/clist.h>
#include <gay/config.h>
#include <gay/kprintf.h>
#include <gay/mm.h>
#include <gay/mutex.h>
#include <gay/poison.h>
#include <gay/systm.h>
#include <gay/types.h>
#include <gay/util.h>
#include <gay/vm/page.h>
#include <limits.h>
#include <string.h>
#include <strings.h>
#if DMAP_OFFSET % PAGE_SIZE != 0
#error "DMAP_OFFSET must be an integral multiple of PAGE_SIZE"
#endif
#if PAGE_SIZE % LONG_BIT != 0
#error "PAGE_SIZE must be an integral multiple of LONG_BIT"
#endif
#if __SIZEOF_POINTER__ != __SIZEOF_LONG__
#error "long must be as wide as a pointer"
#endif
#if CFG_DEBUG_PAGE_ALLOCS
# define PAGE_ASSERT(x) KASSERT(x)
# define page_debug(msg, ...) kprintf("[page] " msg, ##__VA_ARGS__)
# define PAGE_DEBUG_BLOCK
# if CFG_DEBUG_PAGE_ALLOCS_NOISY
# define page_debug_noisy(msg, ...) kprintf("[page] " msg, ##__VA_ARGS__)
# else
# define page_debug_noisy(msg, ...) ({})
# endif
#else
# define PAGE_ASSERT(x) ({})
# define PAGE_DEBUG_BLOCK if (0)
# define page_debug(msg, ...) ({})
# define page_debug_noisy(msg, ...) ({})
#endif
#define ORDER_SHIFT(order) (PAGE_SHIFT + (order))
#define ORDER_SIZE(order) (1 << ORDER_SHIFT(order))
/* this should be the same as LONG_BIT because latom_t is really just a
* long wrapped in a struct, but my trust in compilers is exactly zero */
#define LATOM_BIT (sizeof(latom_t) * CHAR_BIT)
struct mm_zone mm_zones[MM_NR_ZONES];
static inline u_int paddr_find_order(vm_paddr_t addr)
{
int bit = ffsll((long long)addr) - 1;
if (bit == -1 || bit > ORDER_SHIFT(MM_MAX_ORDER))
bit = ORDER_SHIFT(MM_MAX_ORDER);
KASSERT(bit >= PAGE_SHIFT);
return bit - PAGE_SHIFT;
}
/** @brief Claim all free pages in one of the memory areas from the boot allocator. */
static inline void claim_bmem_area(struct mm_zone *zone, const struct _bmem_area *area)
{
u_int order = paddr_find_order(area->start);
while (area->start + ORDER_SIZE(order) > area->end)
order--;
struct vm_page *const start = paddr2pg(area->start);
struct vm_page *const end = paddr2pg(area->end);
struct vm_page *pos = start;
const vm_size_t nr_pages = end->pfn - start->pfn;
latom_add(&zone->free_count, (long)nr_pages);
/*
* We want to insert pages at the highest possible order. However, the
* start and end pointers of the area are only guaranteed to be page
* aligned. Therefore, we start with the highest possible order based
* on the start address, and then increment the order in every loop
* iteration (up to MM_MAX_ORDER). We do this until we have reached
* the end which, again, is only guaranteed to be page aligned, and
* subsequently lower the order again.
*/
while (pos < end) {
struct mm_pool *const pool = &zone->pools[order];
clist_add(&pool->freelist, &pos->link);
pool->free_entries++;
/* only the first page in the order group is inserted into
* the freelist, but all of them need to be initialized */
for (u_int i = 0; i < (1u << order); i++) {
if (pos >= end)
panic("page %p out of range", pos);
if (atom_read(&pos->count) != 420)
panic("page %p double initialized\n", pos);
atom_init(&pos->count, 0);
atom_init(&pos->attr, 0);
pos++;
}
/*
* order
* ^
* | ._____._____. < MM_MAX_ORDER
* | .___| |
* start |._| |_.
* order > .| |. < end order
* |---------------------|----> pos
* start end
*/
if (order < MM_MAX_ORDER && pos + (1 << (order + 1)) <= end) {
/* this makes the rising part of the graph */
order++;
} else if (order > 0 && pos + (1 << order) > end) {
/* this makes the abrupt downwards jump at the end of the graph */
while (--order) {
if (pos + (1 << order) <= end)
break;
}
}
}
}
void paging_init(vm_paddr_t phys_end)
{
/* Sizes of the individual bitmaps per order, rounded up to the
* next full longword. We use the same bitmaps in all zones. */
usize bitmap_sizes[MM_NR_ORDERS];
/* size of all bitmaps combined */
usize bitmap_total_size = 0;
for (int order = 0; order < MM_NR_ORDERS; order++) {
usize pages = phys_end >> ORDER_SHIFT(order);
pages = align_ceil(pages, LATOM_BIT * 2);
usize bytes = pages / (CHAR_BIT * 2);
bitmap_sizes[order] = bytes;
bitmap_total_size += bytes;
}
page_debug("Reserving %zu bytes for page bitmaps\n", bitmap_total_size);
/*
* allocate memory for the bitmaps and zero them out
*/
u_int bitmap_size_log2 = flsl((long)bitmap_total_size);
KASSERT(bitmap_size_log2 != 0);
bitmap_size_log2--; /* the bit index returned by flsl starts at 1 */
if (bitmap_total_size ^ (1ul << bitmap_size_log2))
bitmap_size_log2++; /* bitmap_total_size is not a power of 2, round up */
vm_paddr_t bitmap_start_phys = __boot_pmalloc(bitmap_size_log2, MM_ZONE_NORMAL);
panic_if(bitmap_start_phys == BOOT_PMALLOC_ERR,
"cannot allocate memory for the page bitmaps");
memset(__v(bitmap_start_phys), 0, bitmap_total_size);
/*
* initialize the pools
*/
for (int zone_index = 0; zone_index < ARRAY_SIZE(mm_zones); zone_index++) {
struct mm_zone *zone = &mm_zones[zone_index];
latom_init(&zone->free_count, 0);
/* we use the same bitmaps for all zones */
latom_t *bitmap_pos = __v(bitmap_start_phys);
for (int order = 0; order < MM_NR_ORDERS; order++) {
struct mm_pool *pool = &zone->pools[order];
pool->bitmap = bitmap_pos;
pool->free_entries = 0;
clist_init(&pool->freelist);
spin_init(&pool->lock);
bitmap_pos += bitmap_sizes[order];
}
}
/*
* mark *all* pages as reserved first
*
* XXX this is totally unnecessary and i'm only doing it because i'm
* too tired to work out an algorithm that finds all pages that are
* not in the _bmem_areas lists of the mm_zones
*
* if the reserved bit is set, all other fields in the page are invalid.
*/
for (u_long pfn = 0; pfn < phys_end >> PAGE_SHIFT; pfn++) {
/* This is merely an optimization to simplify checking whether
* two buddies can be coalesced into one. In reality, the
* reference count is invalid because the page is reserved. */
atom_init(&vm_page_array[pfn].count, 420);
atom_init(&vm_page_array[pfn].attr, _PGA_RSVD_MASK);
vm_page_array[pfn].pfn = pfn;
}
/*
* populate the freelists
*/
for (int i = 0; i < ARRAY_SIZE(mm_zones); i++) {
struct mm_zone *zone = &mm_zones[i];
struct _bmem_area *area, *tmp;
clist_foreach_entry_safe(&zone->_bmem_areas, area, tmp, link) {
/* make sure the boot memory allocator cannot under any circumstances hand
* out pages from this area anymore, even though that should be unnecessary */
clist_del(&area->link);
claim_bmem_area(zone, area);
zone->thrsh.emerg = latom_read(&zone->free_count) / CFG_PAGE_EMERG_DENOM;
if (zone->thrsh.emerg > CFG_PAGE_EMERG_MAX)
zone->thrsh.emerg = CFG_PAGE_EMERG_MAX;
}
}
}
static inline bool pg_flip_bit(struct mm_zone *zone, u_long pfn, u_int order)
{
usize bit = pfn >> (order + 1);
latom_t *bitmap = &zone->pools[order].bitmap[bit / LATOM_BIT];
return latom_flip_bit(bitmap, (int)(bit % LATOM_BIT));
}
vm_page_t page_alloc(u_int order, enum mflags flags)
{
if (order > MM_MAX_ORDER) {
page_debug("get_pages(%d, %#08x): Order too high!\n", order, flags);
return nil;
}
struct mm_zone *zone = &mm_zones[_M_ZONE_INDEX(flags)];
long count_after;
try_next_zone:
count_after = latom_sub(&zone->free_count, (1 << order)) - (1 << order);
if (count_after < zone->thrsh.emerg) {
if (count_after < 0 || !(flags & _M_EMERG)) {
latom_add(&zone->free_count, (1 << order));
/* if we can't allocate from ZONE_NORMAL, fall back to ZONE_DMA */
if (zone > &mm_zones[0]) {
zone--;
goto try_next_zone;
} else {
return nil;
}
}
}
register_t cpuflags = read_flags();
/*
* Search for a free page. Start looking at the freelist for the
* requested order, and if it's empty, go over to the next higher order.
* Repeat until we found a page, or we've reached the highest order.
*/
vm_page_t page = nil;
u_int page_order = order;
while (page == nil && page_order < MM_NR_ORDERS) {
struct mm_pool *pool = &zone->pools[page_order];
disable_intr();
spin_lock(&pool->lock);
if (pool->free_entries > 0) {
page = clist_del_first_entry(&pool->freelist, typeof(*page), link);
/* increment the reference count while we hold the lock on the pool,
* so that no other processor can try to coalesce this block if its
* buddy is being freed (coalition is only possible if the buddy
* has a reference count of zero, and while holding the pool lock) */
page_get(page);
pool->free_entries--;
} else {
page_order++;
}
spin_unlock(&pool->lock);
intr_restore(cpuflags);
}
if (page == nil) {
if (zone > &mm_zones[0]) {
/*
* If we reach this, the current zone technically had enough free
* pages for the allocation, but those pages were split up into
* smaller chunks rather than a contiguous area. However, we don't
* give up quite yet: If possible, we fall back to a lower memory
* zone (ZONE_NORMAL -> ZONE_DMA) and start over from the top.
*/
zone--;
goto try_next_zone;
} else {
return nil;
}
}
/*
* if we found a page, check if we need to split it up
* (which is the case if we took one from a higher order freelist)
*/
usize pfn = pg2pfn(page);
page_debug_noisy("alloc order %u, split pfn %#lx from order %u\n",
order, pfn, page_order);
pg_flip_bit(zone, pfn, page_order);
/* split the page and insert the upper halves into the
* respective freelist until we reach the requested order */
while (page_order-- > order) {
page_debug_noisy("split %p (order = %u)\n", pfn2vaddr(pfn), page_order);
struct mm_pool *pool = &zone->pools[page_order];
vm_page_t buddy = page + (1 << page_order);
pga_set_order(buddy, page_order);
pg_flip_bit(zone, pfn + (1 << page_order), page_order);
disable_intr();
spin_lock(&pool->lock);
clist_add_first(&pool->freelist, &buddy->link);
pool->free_entries++;
spin_unlock(&pool->lock);
intr_restore(cpuflags);
}
for (u_int i = 0; i < (1 << order); i++)
pga_set_order(&page[i], order);
page_clear(page);
return page;
}
/*
* XXX get_page() and get_pages() shouldn't depend on the direct map
*
* XXX Do we need these at all? I don't think so.
*/
void *get_pages(u_int order, enum mflags flags)
{
vm_page_t page = page_alloc(order, flags);
if (page)
return pfn2vaddr(pg2pfn(page));
else
return nil;
}
void *get_page(enum mflags flags)
{
vm_page_t page = page_alloc(0, flags);
if (page)
return pfn2vaddr(pg2pfn(page));
else
return nil;
}
/*
* Two buddies can be merged if:
* - you currently hold the lock for the pool
* - they both have a reference count of zero
* - they are in the same zone
* - neither of them is reserved
*
* This is only called from within the critical section of free_pages(),
* so execution speed is prioritized over anything else.
*/
static __always_inline bool can_merge(vm_page_t page, vm_page_t buddy)
{
bool merge = (atom_read(&buddy->count) == 0);
/* we know that `page' is not reserved, because we
* check that flag before we even attempt coalition */
const unsigned mask = _PGA_RSVD_MASK | _PGA_ZONE_MASK;
merge &= (atom_read(&page->attr) & mask) == (atom_read(&buddy->attr) & mask);
return merge;
}
void page_free(vm_page_t page)
{
register_t cpuflags = read_flags();
u_int order = pga_order(page);
PAGE_ASSERT((uintptr_t)ptr % ORDER_SIZE(order) == 0);
u_long pfn = pg2pfn(page);
PAGE_DEBUG_BLOCK {
int old_count = atom_sub(&page->count, 1);
if (old_count != 1) {
if (old_count == 0)
page_debug("double free of %p", ptr);
else
page_debug("attempted to free %p with references", ptr);
return;
}
} else {
atom_dec(&page->count);
}
struct mm_zone *zone = &mm_zones[pga_zone(page)];
latom_add(&zone->free_count, (1 << order));
/* try to coalesce free buddy blocks until we're reached the highest order */
while (order < MM_MAX_ORDER) {
if (pg_flip_bit(zone, pfn, order))
break;
page_debug_noisy("join %p (order = %u)\n", pfn2vaddr(pfn), order);
/* precompute all values we need inside the critical section
* to avoid blocking other CPUs for longer than necessary */
vm_page_t buddy = &vm_page_array[pfn ^ (1ul << order)];
vm_page_t low = &vm_page_array[pfn & ~(1ul << order)];
struct mm_pool *current_order_pool = &zone->pools[order];
struct mm_pool *next_order_pool = &zone->pools[order + 1];
disable_intr();
spin_lock(&zone->pools[order].lock);
if (can_merge(page, buddy)) {
clist_del(&buddy->link);
current_order_pool->free_entries--;
pga_set_order(buddy, order + 1);
pga_set_order(page, order + 1);
clist_add(&next_order_pool->freelist, &low->link);
next_order_pool->free_entries++;
} else {
order = MM_MAX_ORDER; /* break out of the loop */
}
spin_unlock(&zone->pools[order].lock);
intr_restore(cpuflags);
page = low;
order++;
}
/* finally, we need to insert the page at its freelist */
struct mm_pool *pool = &zone->pools[order];
disable_intr();
spin_lock(&pool->lock);
clist_add(&pool->freelist, &page->link);
pool->free_entries++;
spin_unlock(&zone->pools[order].lock);
intr_restore(cpuflags);
}