You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

457 lines
13 KiB
C

/* Copyright (C) 2021 fef <owo@fef.moe>. All rights reserved. */
#include <arch/cpufunc.h>
#include <arch/page.h>
#include <gay/clist.h>
#include <gay/config.h>
#include <gay/kprintf.h>
#include <gay/mm.h>
#include <gay/mutex.h>
#include <gay/poison.h>
#include <gay/systm.h>
#include <gay/types.h>
#include <gay/util.h>
#include <gay/vm/page.h>
#include <limits.h>
#include <string.h>
#include <strings.h>
#if DMAP_OFFSET % PAGE_SIZE != 0
#error "DMAP_OFFSET must be an integral multiple of PAGE_SIZE"
#endif
#if PAGE_SIZE % LONG_BIT != 0
#error "PAGE_SIZE must be an integral multiple of LONG_BIT"
#endif
#if __SIZEOF_POINTER__ != __SIZEOF_LONG__
#error "long must be as wide as a pointer"
#endif
#if CFG_DEBUG_PAGE_ALLOCS
# define PAGE_ASSERT(x) KASSERT(x)
# define page_debug(msg, ...) kprintf("[page] " msg, ##__VA_ARGS__)
# define PAGE_DEBUG_BLOCK
# if CFG_DEBUG_PAGE_ALLOCS_NOISY
# define page_debug_noisy(msg, ...) kprintf("[page] " msg, ##__VA_ARGS__)
# else
# define page_debug_noisy(msg, ...) ({})
# endif
#else
# define PAGE_ASSERT(x) ({})
# define PAGE_DEBUG_BLOCK if (0)
# define page_debug(msg, ...) ({})
# define page_debug_noisy(msg, ...) ({})
#endif
#define ORDER_SHIFT(order) (PAGE_SHIFT + (order))
#define ORDER_SIZE(order) (1 << ORDER_SHIFT(order))
/* this should be the same as LONG_BIT because latom_t is really just a
* long wrapped in a struct, but my trust in compilers is exactly zero */
#define LATOM_BIT (sizeof(latom_t) * CHAR_BIT)
struct mm_zone mm_zones[MM_NR_ZONES];
static inline u_int paddr_find_order(vm_paddr_t addr)
{
int bit = ffsll((long long)addr) - 1;
if (bit == -1 || bit > ORDER_SHIFT(MM_MAX_ORDER))
bit = ORDER_SHIFT(MM_MAX_ORDER);
KASSERT(bit >= PAGE_SHIFT);
return bit - PAGE_SHIFT;
}
/** @brief Claim all free pages in one of the memory areas from the boot allocator. */
static inline void claim_bmem_area(struct mm_zone *zone, struct _bmem_area *area)
{
vm_paddr_t start = area->start;
vm_paddr_t end = area->end;
vm_paddr_t pos = start;
vm_size_t nr_pages = end - start / PAGE_SIZE;
latom_add(&zone->free_count, (long)nr_pages);
struct vm_page *page = &vm_page_array[start >> PAGE_SHIFT];
u_int order = paddr_find_order(start);
/* make sure the boot memory allocator cannot under any circumstances hand
* out pages from this area anymore, even though that should be unnecessary */
clist_del(&area->link);
/*
* We want to insert pages at the highest possible order. However, the
* start and end pointers of the area are only guaranteed to be page
* aligned. Therefore, we start with the highest possible order based
* on the start address, and then increment the order in every loop
* iteration (up to MM_MAX_ORDER). We do this until we have reached
* the end which, again, is only guaranteed to be page aligned, and
* subsequently lower the order again.
*/
while (pos < end) {
struct mm_pool *pool = &zone->pools[order];
clist_add(&pool->freelist, &page->link);
pool->free_entries++;
/* only the first page in the order group is inserted into
* the freelist, but all of them need to be initialized */
for (u_int i = 0; i < (1 << order); i++) {
atom_init(&page[i].count, 0);
atom_init(&page[i].attr, 0);
}
/*
* order
* ^
* | ._____._____. < MM_MAX_ORDER
* | .___| |
* start |._| |_.
* order > .| |. < end order
* |---------------------|----> pos
* start end
*/
pos += ORDER_SIZE(order);
page += (1 << order);
if (order < MM_MAX_ORDER && pos + ORDER_SIZE(order) <= end) {
/* this makes the rising part of the graph */
order++;
} else if (order > 0 && pos > end) {
/* we have overshot, lower the order */
pos -= ORDER_SIZE(order);
page -= (1 << order);
/* this makes the abrupt downwards jump at the end of the graph */
while (--order) {
if (pos + ORDER_SIZE(order) <= end) {
pos += ORDER_SIZE(order);
page += (1 << order);
break;
}
}
}
}
}
void paging_init(vm_paddr_t phys_end)
{
/* Sizes of the individual bitmaps per order, rounded up to the
* next full longword. We use the same bitmaps in all zones. */
usize bitmap_sizes[MM_NR_ORDERS];
/* size of all bitmaps combined */
usize bitmap_total_size = 0;
for (int order = 0; order < MM_NR_ORDERS; order++) {
usize pages = phys_end >> ORDER_SHIFT(order + 1);
pages = align_ceil(pages, LATOM_BIT * 2);
usize bytes = pages / (CHAR_BIT * 2);
bitmap_sizes[order] = bytes;
bitmap_total_size += bytes;
}
page_debug("Reserving %zu bytes for page bitmaps\n", bitmap_total_size);
/*
* allocate memory for the bitmaps and zero them out
*/
u_int bitmap_size_log2 = flsl((long)bitmap_total_size);
KASSERT(bitmap_size_log2 != 0);
bitmap_size_log2--; /* the bit index returned by flsl starts at 1 */
if (bitmap_total_size ^ (1ul << bitmap_size_log2))
bitmap_size_log2++; /* bitmap_total_size is not a power of 2, round up */
uintptr_t bitmap_start_phys = __boot_pmalloc(bitmap_size_log2, MM_ZONE_NORMAL);
panic_if(bitmap_start_phys == BOOT_PMALLOC_ERR,
"cannot allocate memory for the page bitmaps");
memset(__v(bitmap_start_phys), 0, bitmap_total_size);
/*
* initialize the pools
*/
for (int zone_index = 0; zone_index < ARRAY_SIZE(mm_zones); zone_index++) {
struct mm_zone *zone = &mm_zones[zone_index];
latom_t *bitmap_pos = __v(bitmap_start_phys);
for (int order = 0; order < MM_NR_ORDERS; order++) {
zone->pools[order].bitmap = bitmap_pos;
clist_init(&zone->pools[order].freelist);
zone->pools[order].free_entries = 0;
latom_init(&zone->free_count, 0);
bitmap_pos += bitmap_sizes[order];
}
}
/*
* mark *all* pages as reserved first
*
* XXX this is totally unnecessary and i'm only doing it because i'm
* too tired to work out an algorithm that finds all pages that are
* not in the _bmem_areas lists of the mm_zones
*
* if the reserved bit is set, all other fields in the page are invalid.
*/
for (usize i = 0; i < phys_end >> PAGE_SHIFT; i++) {
/* This is merely an optimization to simplify checking whether
* two buddies can be coalesced into one. In reality, the
* reference count is invalid because the page is reserved. */
atom_init(&vm_page_array[i].count, 1);
atom_init(&vm_page_array[i].attr, _PGA_RSVD_MASK);
}
/*
* populate the freelists
*/
for (int i = 0; i < ARRAY_SIZE(mm_zones); i++) {
struct mm_zone *zone = &mm_zones[i];
struct _bmem_area *area, *tmp;
clist_foreach_entry_safe(&zone->_bmem_areas, area, tmp, link) {
claim_bmem_area(zone, area);
}
zone->thrsh.emerg = latom_read(&zone->free_count) / CFG_PAGE_EMERG_DENOM;
if (zone->thrsh.emerg > CFG_PAGE_EMERG_MAX)
zone->thrsh.emerg = CFG_PAGE_EMERG_MAX;
}
}
static inline bool pg_flip_bit(struct mm_zone *zone, u_long pfn, u_int order)
{
usize bit = pfn >> (order + 1);
latom_t *bitmap = &zone->pools[order].bitmap[bit / LATOM_BIT];
return latom_flip_bit(bitmap, (int)(bit % LATOM_BIT));
}
__malloc_like
static void *__get_pages(u_int order, enum mflags flags)
{
PAGE_ASSERT(order >= 0);
struct mm_zone *zone = &mm_zones[_M_ZONE_INDEX(flags)];
if (order > MM_MAX_ORDER) {
page_debug("get_pages(%d, %#08x): Order too high!\n", order, flags);
return nil;
}
u_long count_after = latom_sub(&zone->free_count, (1 << order)) - (1 << order);
if (count_after < zone->thrsh.emerg) {
if (count_after < 0 || !(flags & _M_EMERG)) {
latom_add(&zone->free_count, (1 << order));
return nil;
}
}
register_t cpuflags = read_flags();
/*
* Search for a free page. Start looking at the freelist for the
* requested order, and if it's empty, go over to the next higher order.
* Repeat until we found a page, or we've reached the highest order.
*/
vm_page_t page = nil;
u_int page_order = order;
while (page == nil && page_order < MM_NR_ORDERS) {
struct mm_pool *pool = &zone->pools[page_order];
disable_intr();
spin_lock(&pool->lock);
if (pool->free_entries > 0) {
page = clist_del_first_entry(&pool->freelist, typeof(*page), link);
/* increment the reference count while we hold the lock on the pool,
* so that no other processor can try to coalesce this block if its
* buddy is being freed (coalition is only possible if the buddy
* has a reference count of zero, and while holding the pool lock) */
page_get(page);
pool->free_entries--;
} else {
page_order++;
}
spin_unlock(&pool->lock);
intr_restore(cpuflags);
}
/*
* if we found a page, check if we need to split it up
* (which is the case if we took one from a higher order freelist)
*/
if (page != nil) {
usize pfn = pg2pfn(page);
page_debug_noisy("alloc order %u, split pfn %#lx from order %u\n",
order, pfn, page_order);
pg_flip_bit(zone, pfn, page_order);
/* split the page and insert the upper halves into the
* respective freelist until we reach the requested order */
while (page_order-- > order) {
page_debug_noisy("split %p (order = %u)\n", pfn2vaddr(pfn), page_order);
struct mm_pool *pool = &zone->pools[page_order];
vm_page_t buddy = page + (1 << page_order);
pga_set_order(buddy, page_order);
pg_flip_bit(zone, pfn + (1 << page_order), page_order);
disable_intr();
spin_lock(&pool->lock);
clist_add_first(&pool->freelist, &buddy->link);
pool->free_entries++;
spin_unlock(&pool->lock);
intr_restore(cpuflags);
}
pga_set_order(page, order);
void *vaddr = pfn2vaddr(pfn);
return vaddr;
} else {
return nil;
}
}
/* faster memset for whole pages */
static inline void init_pages(u_long *start, u_long val, u_int order)
{
u_long *end = start + (ORDER_SIZE(order) / sizeof(*start));
do {
*start++ = val;
} while (start != end);
}
void *get_pages(u_int order, enum mflags flags)
{
void *pages = __get_pages(order, flags);
#if CFG_POISON_PAGES
if (pages != nil)
init_pages(pages, PAGE_POISON_ALLOC, order);
#endif
return pages;
}
void *get_page(enum mflags flags)
{
void *pages = __get_pages(0, flags);
#if CFG_POISON_PAGES
if (pages != nil)
init_pages(pages, PAGE_POISON_ALLOC, 0);
#endif
return pages;
}
void *get_zero_pages(u_int order, enum mflags flags)
{
void *pages = __get_pages(order, flags);
if (pages != nil)
init_pages(pages, 0, order);
return pages;
}
void *get_zero_page(enum mflags flags)
{
void *page = __get_pages(0, flags);
if (page != nil)
init_pages(page, 0, 0);
return page;
}
/*
* Two buddies can be merged if:
* - you currently hold the lock for the pool
* - they both have a reference count of zero
* - they are in the same zone
* - neither of them is reserved
*
* This is only called from within the critical section of free_pages(),
* so execution speed is prioritized over anything else.
*/
static __always_inline bool can_merge(vm_page_t page, vm_page_t buddy)
{
bool merge = (atom_read(&buddy->count) == 0);
/* we know that `page' is not reserved, because we
* check that flag before we even attempt coalition */
const unsigned mask = _PGA_RSVD_MASK | _PGA_ZONE_MASK;
merge &= (atom_read(&page->attr) & mask) == (atom_read(&buddy->attr) & mask);
return merge;
}
void free_pages(void *ptr)
{
PAGE_DEBUG_BLOCK {
if (ptr < DMAP_START || ptr >= DMAP_END) {
panic("free_pages(%p): not in DMAP region\n", ptr);
}
}
register_t cpuflags = read_flags();
vm_page_t page = vaddr2pg(ptr);
panic_if(pga_rsvd(page), "tried to free reserved page %p", ptr);
u_int order = pga_order(page);
PAGE_ASSERT((uintptr_t)ptr % ORDER_SIZE(order) == 0);
u_long pfn = vaddr2pfn(ptr);
#if CFG_POISON_PAGES
init_pages(ptr, PAGE_POISON_FREE, order);
#endif
PAGE_DEBUG_BLOCK {
int old_count = atom_sub(&page->count, 1);
if (old_count != 1) {
if (old_count == 0)
page_debug("double free of %p", ptr);
else
page_debug("attempted to free %p with references", ptr);
return;
}
}
struct mm_zone *zone = &mm_zones[pga_zone(page)];
latom_add(&zone->free_count, (1 << order));
/* try to coalesce free buddy blocks until we're reached the highest order */
while (order < MM_MAX_ORDER) {
if (pg_flip_bit(zone, pfn, order))
break;
page_debug_noisy("join %p (order = %u)\n", pfn2vaddr(pfn), order);
/* precompute all values we need inside the critical section
* to avoid blocking other CPUs for longer than necessary */
vm_page_t buddy = &vm_page_array[pfn ^ (1ul << order)];
vm_page_t low = &vm_page_array[pfn & ~(1ul << order)];
struct mm_pool *current_order_pool = &zone->pools[order];
struct mm_pool *next_order_pool = &zone->pools[order + 1];
disable_intr();
spin_lock(&zone->pools[order].lock);
if (can_merge(page, buddy)) {
clist_del(&buddy->link);
current_order_pool->free_entries--;
pga_set_order(buddy, order + 1);
pga_set_order(page, order + 1);
clist_add(&next_order_pool->freelist, &low->link);
next_order_pool->free_entries++;
} else {
order = MM_MAX_ORDER; /* break out of the loop */
}
spin_unlock(&zone->pools[order].lock);
intr_restore(cpuflags);
page = low;
order++;
}
/* finally, we need to insert the page at its freelist */
struct mm_pool *pool = &zone->pools[order];
disable_intr();
spin_lock(&pool->lock);
clist_add(&pool->freelist, &page->link);
pool->free_entries++;
spin_unlock(&zone->pools[order].lock);
intr_restore(cpuflags);
}