@ -1,32 +1,25 @@
/* Copyright (C) 2021 fef <owo@fef.moe>. All rights reserved. */
# include <arch/cpufunc.h>
# include <arch/page.h>
# include <gay/bits.h>
# include <gay/clist.h>
# include <gay/config.h>
# include <gay/kprintf.h>
# include <gay/mm.h>
# include <gay/mutex.h>
# include <gay/poison.h>
# include <gay/systm.h>
# include <gay/types.h>
# include <gay/util.h>
# include <gay/vm/page.h>
# include <limits.h>
# include <string.h>
# include <strings.h>
# ifndef __HAVE_HUGEPAGES
# error "Systems without huge pages are currently unsupported because i'm a dumb bitch"
# endif
# if DMAP_OFFSET % HUGEPAGE_SIZE != 0
# error "DMAP_OFFSET must be an integral multiple of HUGEPAGE_SIZE"
# endif
/* this should be impossible because arch/page.h must also define PAGE_SHIFT
* and HUGEPAGE_SHIFT , meaning the two are definitively powers of 2 */
# if HUGEPAGE_SIZE % PAGE_SIZE != 0
# error "HUGEPAGE_SIZE must be an integral multiple of PAGE_SIZE"
# if DMAP_OFFSET % PAGE_SIZE != 0
# error "DMAP_OFFSET must be an integral multiple of PAGE_SIZE"
# endif
# if PAGE_SIZE % LONG_BIT != 0
@ -40,6 +33,7 @@
# if CFG_DEBUG_PAGE_ALLOCS
# define PAGE_ASSERT(x) KASSERT(x)
# define page_debug(msg, ...) kprintf("[page] " msg, ##__VA_ARGS__)
# define PAGE_DEBUG_BLOCK
# if CFG_DEBUG_PAGE_ALLOCS_NOISY
# define page_debug_noisy(msg, ...) kprintf("[page] " msg, ##__VA_ARGS__)
# else
@ -47,359 +41,419 @@
# endif
# else
# define PAGE_ASSERT(x) ({})
# define PAGE_DEBUG_BLOCK if (0)
# define page_debug(msg, ...) ({})
# define page_debug_noisy(msg, ...) ({})
# endif
/**
* We have cache levels for areas ranging from a single page up to a huge page
* on a logarithmic scale . Every level covers double the pages per entry than
* the one below it , starting at one page per entry . The effective result is
* that a single entry in the cache on level L covers ` ( 1 < < L ) ` pages .
*/
# define CACHE_ORDERS GET_PAGE_ORDERS
# define ORDER_SHIFT(order) (PAGE_SHIFT + (order))
# define ORDER_SIZE(order) (1 << ORDER_SHIFT(order))
/** @brief There is one of this for every cache order. */
struct cache_pool {
/**
* @ brief List of free blocks on this order of granularity .
* The individual entries sit right at the beginning of each free block ,
* and are always aligned to ` entry_size ` bytes .
*/
struct clist freelist ;
/**
* @ brief Bitmap that stores the allocated status of each entry .
* 1 means allocated , 0 means not .
*/
unsigned long * bitmap ;
/** @brief Number of items in `freelist`. */
usize free_entries ;
} ;
static struct cache_pool caches [ CACHE_ORDERS ] ;
static MTX ( caches_lock ) ;
/* these get set in kmalloc_init() */
uintptr_t phys_start ;
uintptr_t phys_end ;
uintptr_t __early_get_page ( void )
{
phys_end - = PAGE_SIZE ;
return phys_end ;
}
/* this should be the same as LONG_BIT because latom_t is really just a
* long wrapped in a struct , but my trust in compilers is exactly zero */
# define LATOM_BIT (sizeof(latom_t) * CHAR_BIT)
struct mm_zone mm_zones [ MM_NR_ZONES ] ;
static int sanity_check ( void )
static inline u_int paddr_find_order ( vm_paddr_t addr )
{
KASSERT ( phys_start < phys_end ) ;
KASSERT ( phys_start = = HUGEPAGE_ALIGN ( phys_start ) ) ;
/* phys_end is only page aligned, see kmalloc_init() */
KASSERT ( phys_end = = PAGE_ALIGN ( phys_end ) ) ;
if ( ( phys_end - phys_start ) < ( 32 * 1024 * 1024 ) ) {
kprintf ( " Less than 32 MB of usable RAM, this wouldn't go well \n " ) ;
return 1 ;
}
int bit = ffsll ( ( long long ) addr ) - 1 ;
if ( bit = = - 1 | | bit > ORDER_SHIFT ( MM_MAX_ORDER ) )
bit = ORDER_SHIFT ( MM_MAX_ORDER ) ;
return 0 ;
KASSERT ( bit > = PAGE_SHIFT ) ;
return bit - PAGE_SHIFT ;
}
/*
* Map the entire physical memory into the direct contiguous area .
* __early_map_page ( ) might call __early_get_page ( ) in order to allocate
* new page table structures , which in turn shrinks the physical memory
* size ( see above ) .
*/
static inline void map_direct_area ( void )
/** @brief Claim all free pages in one of the memory areas from the boot allocator. */
static inline void claim_bmem_pages ( struct mm_zone * zone , struct _bmem_area * area )
{
# ifdef __HAVE_HUGEPAGES
const usize step = HUGEPAGE_SIZE ;
const enum pflags flags = P_PRESENT | P_RW | P_HUGE ;
# else
const usize step = PAGE_SIZE ;
const enum pflags flags = P_PRESENT | P_RW ;
# endif
vm_paddr_t start = area - > start ;
vm_paddr_t end = area - > end ;
vm_paddr_t pos = start ;
vm_size_t nr_pages = end - start / PAGE_SIZE ;
latom_add ( & zone - > free_count , ( long ) nr_pages ) ;
struct vm_page * page = & vm_page_array [ start > > PAGE_SHIFT ] ;
u_int order = paddr_find_order ( start ) ;
/* make sure the boot memory allocator cannot under any circumstances hand
* out pages from this area anymore , even though that should be unnecessary */
clist_del ( & area - > link ) ;
/*
* It might be necessary to use a volatile pointer to phys_end for this
* loop in case clang does The Optimization and caches its value for
* whatever reason , even though at least for x86 this is not the case
* ( and i don ' t even thing the C standard allows it when calling
* external functions in between , but still , Never Trust The Compiler ) .
* We want to insert pages at the highest possible order . However , the
* start and end pointers of the area are only guaranteed to be page
* aligned . Therefore , we start with the highest possible order based
* on the start address , and then increment the order in every loop
* iteration ( up to MM_MAX_ORDER ) . We do this until we have reached
* the end which , again , is only guaranteed to be page aligned , and
* subsequently lower the order again .
*/
for ( uintptr_t pos = phys_start ; pos < = phys_end - step ; pos + = step )
__early_map_page ( pos , __v ( pos ) , flags ) ;
while ( pos < end ) {
struct mm_pool * pool = & zone - > pools [ order ] ;
clist_add ( & pool - > freelist , & page - > link ) ;
pool - > free_entries + + ;
/* only the first page in the order group is inserted into
* the freelist , but all of them need to be initialized */
for ( u_int i = 0 ; i < ( 1 < < order ) ; i + + ) {
atom_init ( & page [ i ] . count , 0 ) ;
page [ i ] . flags = 0 ;
page [ i ] . order = 0 ;
}
vm_flush ( ) ;
/*
* order
* ^
* | _________ < MM_MAX_ORDER
* | / |
* start | / \ < end order
* order > | /
* | - - - - - - - - - - - - - - | - - - - > pos
* start end
*/
pos + = ORDER_SIZE ( order ) ;
page + = ( 1 < < order ) ;
if ( order < MM_MAX_ORDER & & pos + ORDER_SIZE ( order ) < = end ) {
/* this makes the rising part of the graph */
order + + ;
} else if ( order > 0 & & pos > end ) {
/* we have overshot, lower the order */
pos - = ORDER_SIZE ( order ) ;
page - = ( 1 < < order ) ;
/* this makes the abrupt downwards jump at the end of the graph */
while ( - - order ) {
if ( pos + ORDER_SIZE ( order ) < = end ) {
pos + = ORDER_SIZE ( order ) ;
page + = ( 1 < < order ) ;
break ;
}
}
}
}
}
/*
* This function maps the entire physical memory into the direct region
* ( DMAP_START - DMAP_END ) and sets up the caches .
* The bitmaps are stored one after another at the end of physical memory , and
*
*/
int pages_init ( void )
void paging_init ( vm_paddr_t phys_end )
{
if ( sanity_check ( ) ! = 0 )
return 1 ;
map_direct_area ( ) ;
/* Sizes of the individual bitmaps per order, rounded up to the
* next full longword . We use the same bitmaps in all zones . */
usize bitmap_sizes [ MM_NR_ORDERS ] ;
/* size of all bitmaps combined */
usize bitmap_total_size = 0 ;
for ( int order = 0 ; order < MM_NR_ORDERS ; order + + ) {
usize pages = phys_end > > ORDER_SHIFT ( order + 1 ) ;
pages = align_ceil ( pages , LATOM_BIT * 2 ) ;
usize bytes = pages / ( CHAR_BIT * 2 ) ;
bitmap_sizes [ order ] = bytes ;
bitmap_total_size + = bytes ;
}
/* phys_end gets aligned, as promised by the comment in kmalloc_init() */
phys_end = align_floor ( phys_end , HUGEPAGE_SIZE ) ;
usize phys_size = phys_end - phys_start ;
page_debug ( " Reserving %zu bytes for page bitmaps \n " , bitmap_total_size ) ;
/*
* calculate the size of each bitmap , as well as their combined size
* allocate memory for the bitmaps and zero them out
*/
usize bitmap_bytes = 0 ;
for ( int i = 0 ; i < CACHE_ORDERS ; i + + ) {
usize bits = phys_size > > ORDER_SHIFT ( i ) ;
bits = align_ceil ( bits , LONG_BIT ) ;
bitmap_bytes + = bits / 8 ;
}
page_debug ( " Page frame overhead = %zu bytes, %zu bytes total \n " , bitmap_bytes , phys_size ) ;
u_int bitmap_size_log2 = flsl ( ( long ) bitmap_total_size ) ;
KASSERT ( bitmap_size_log2 ! = 0 ) ;
bitmap_size_log2 - - ; /* the bit index returned by flsl starts at 1 */
if ( bitmap_total_size ^ ( 1ul < < bitmap_size_log2 ) )
bitmap_size_log2 + + ; /* bitmap_total_size is not a power of 2, round up */
uintptr_t bitmap_start_phys = __boot_pmalloc ( bitmap_size_log2 , MM_ZONE_NORMAL ) ;
panic_if ( bitmap_start_phys = = BOOT_PMALLOC_ERR ,
" cannot allocate memory for the page bitmaps " ) ;
memset ( __v ( bitmap_start_phys ) , 0 , bitmap_total_size ) ;
/*
* zero out all bitmap s
* initialize the pool s
*/
uintptr_t bitmap_start_phys = phys_end - bitmap_bytes ;
unsigned long * bitmap_start = __v ( bitmap_start_phys ) ;
memset ( bitmap_start , 0 , bitmap_bytes ) ;
for ( int zone_index = 0 ; zone_index < ARRAY_SIZE ( mm_zones ) ; zone_index + + ) {
struct mm_zone * zone = & mm_zones [ zone_index ] ;
latom_t * bitmap_pos = __v ( bitmap_start_phys ) ;
for ( int order = 0 ; order < MM_NR_ORDERS ; order + + ) {
zone - > pools [ order ] . bitmap = bitmap_pos ;
clist_init ( & zone - > pools [ order ] . freelist ) ;
zone - > pools [ order ] . free_entries = 0 ;
latom_init ( & zone - > free_count , 0 ) ;
bitmap_pos + = bitmap_sizes [ order ] ;
}
}
/*
* populate the remaining members of the cache_pool structures and
* preallocate entries that can ' t be handed out ( i . e . the cache bitmaps )
* mark * all * pages as reserved first
*
* XXX this is totally unnecessary and i ' m only doing it because i ' m
* too tired to work out an algorithm that finds all pages that are
* not in the _bmem_areas lists of the mm_zones
*
* if the reserved bit is set , all other fields in the page are invalid .
*/
unsigned long * bitmap_pos = bitmap_start ;
for ( int i = 0 ; i < CACHE_ORDERS ; i + + ) {
/* total amount of entries on this level */
usize total_bits = phys_size > > ORDER_SHIFT ( i ) ;
/* number of entries on this level that the bitmap itself takes up */
usize wasted_bits = bitmap_bytes > > ORDER_SHIFT ( i ) ;
if ( wasted_bits = = 0 )
wasted_bits = 1 ;
bit_set_range ( bitmap_pos , total_bits - wasted_bits , wasted_bits ) ;
caches [ i ] . bitmap = bitmap_pos ;
bitmap_pos + = total_bits / LONG_BIT ;
clist_init ( & caches [ i ] . freelist ) ;
caches [ i ] . free_entries = 0 ;
for ( usize i = 0 ; i < phys_end > > PAGE_SHIFT ; i + + ) {
/* This is merely an optimization to simplify checking whether
* two buddies can be coalesced into one . In reality , the
* reference count is invalid because the page is reserved . */
atom_init ( & vm_page_array [ i ] . count , 1 ) ;
vm_page_array [ i ] . flags = PG_RESERVED ;
}
/* kheap_start and kheap_end are globals */
kheap_start = __v ( phys_start ) ;
kheap_end = align_floor ( bitmap_start , HUGEPAGE_SIZE ) ;
/*
* populate the freelist on the highest order , all orders beneath it
* stay empty until one of the large blocks gets split up
* populate the freelists
*/
struct cache_pool * high_pool = & caches [ CACHE_ORDERS - 1 ] ;
usize step = 1 < < ORDER_SHIFT ( CACHE_ORDERS - 1 ) ;
for ( void * pos = kheap_start ; pos < kheap_end ; pos + = step ) {
struct clist * entry = pos ;
clist_add ( & high_pool - > freelist , entry ) ;
high_pool - > free_entries + + ;
for ( int i = 0 ; i < ARRAY_SIZE ( mm_zones ) ; i + + ) {
struct mm_zone * zone = & mm_zones [ i ] ;
struct _bmem_area * area , * tmp ;
clist_foreach_entry_safe ( & zone - > _bmem_areas , area , tmp , link ) {
claim_bmem_pages ( zone , area ) ;
}
zone - > thrsh . emerg = latom_read ( & zone - > free_count ) / CFG_PAGE_EMERG_DENOM ;
if ( zone - > thrsh . emerg > CFG_PAGE_EMERG_MAX )
zone - > thrsh . emerg = CFG_PAGE_EMERG_MAX ;
}
return 0 ;
}
/**
* @ brief Split a block and return the lower half .
* The block is assumed to already have been removed from its freelist .
* The high half ( i . e . the block that is * not * returned ) is inserted into the
* freelist one level below ` level ` .
*
* @ param ptr Pointer to the block
* @ param level Current level of the block
* ( ` ptr ` must be aligned to ` 1 < < level ` pages )
*/
static void * split_buddy ( void * ptr , int level ) ;
/**
* @ brief Attempt to coalesce a block with its buddy .
* If coalition is possible , the buddy is removed from its freelist at ` order ` .
*
* @ param ptr Pointer to the block
* @ param order Cache order , must be less than ` CACHE_ORDERS - 1 ` ( because you
* can ' t join blocks at the highest cache order )
* @ return The joined block , or ` nil ` if coalition was not possible
*/
static void * try_join_buddy ( void * ptr , int order ) ;
static inline usize get_bit_number ( void * ptr , int order )
static inline bool pg_flip_bit ( struct mm_zone * zone , u_long pfn , u_int order )
{
return ( ( uintptr_t ) ptr - ( uintptr_t ) kheap_start ) > > ORDER_SHIFT ( order ) ;
usize bit = pfn > > ( order + 1 ) ;
latom_t * bitmap = & zone - > pools [ order ] . bitmap [ bit / LATOM_BIT ] ;
return latom_flip_bit ( bitmap , ( int ) ( bit % LATOM_BIT ) ) ;
}
void * get_pages ( int order , enum mflags flags )
__malloc_like
static void * __get_pages ( u_int order , enum mflags flags )
{
PAGE_ASSERT ( order > = 0 ) ;
struct mm_zone * zone = & mm_zones [ _M_ZONE_INDEX ( flags ) ] ;
if ( order > = GET_PAGE_ORDERS ) {
if ( order > MM_MAX_ORDER ) {
page_debug ( " get_pages(%d, %#08x): Order too high! \n " , order , flags ) ;
return nil ;
}
if ( flags & M_NOWAIT ) {
kprintf ( " get_pages(): M_NOWAIT requested, this is not implemented yet :( \n " ) ;
return nil ;
u_long count_after = latom_sub ( & zone - > free_count , ( 1 < < order ) ) - ( 1 < < order ) ;
if ( count_after < zone - > thrsh . emerg ) {
if ( count_after < 0 | | ! ( flags & _M_EMERG ) ) {
latom_add ( & zone - > free_count , ( 1 < < order ) ) ;
return nil ;
}
}
mtx_lock ( & caches_lock ) ;
struct clist * entry = nil ;
int entry_order ;
for ( entry_order = order ; entry_order < CACHE_ORDERS ; entry_order + + ) {
if ( caches [ entry_order ] . free_entries > 0 ) {
entry = caches [ entry_order ] . freelist . next ;
break ;
register_t cpuflags = read_flags ( ) ;
/*
* Search for a free page . Start looking at the freelist for the
* requested order , and if it ' s empty , go over to the next higher order .
* Repeat until we found a page , or we ' ve reached the highest order .
*/
vm_page_t page = nil ;
u_int page_order = order ;
while ( page = = nil & & page_order < MM_NR_ORDERS ) {
struct mm_pool * pool = & zone - > pools [ page_order ] ;
disable_intr ( ) ;
spin_lock ( & pool - > lock ) ;
if ( pool - > free_entries > 0 ) {
page = clist_del_first_entry ( & pool - > freelist , typeof ( * page ) , link ) ;
/* increment the reference count while we hold the lock on the pool,
* so that no other processor can try to coalesce this block if its
* buddy is being freed ( coalition is only possible if the buddy
* has a reference count of zero , and while holding the pool lock ) */
page_get ( page ) ;
pool - > free_entries - - ;
} else {
page_order + + ;
}
spin_unlock ( & pool - > lock ) ;
intr_restore ( cpuflags ) ;
}
if ( entry_order ! = CACHE_ORDERS ) {
clist_del ( entry ) ;
caches [ entry_order ] . free_entries - - ;
usize bit_number = get_bit_number ( entry , entry_order ) ;
while ( entry_order > order ) {
entry = split_buddy ( entry , entry_order ) ;
bit_set ( caches [ entry_order ] . bitmap , bit_number ) ;
entry_order - - ;
bit_number < < = 1 ;
/*
* if we found a page , check if we need to split it up
* ( which is the case if we took one from a higher order freelist )
*/
if ( page ! = nil ) {
usize pfn = pg2pfn ( page ) ;
page_debug_noisy ( " alloc order %u, split pfn %#lx from order %u \n " ,
order , pfn , page_order ) ;
pg_flip_bit ( zone , pfn , page_order ) ;
/* split the page and insert the upper halves into the
* respective freelist until we reach the requested order */
while ( page_order - - > order ) {
page_debug_noisy ( " split %p (order = %u) \n " , pfn2vaddr ( pfn ) , page_order ) ;
struct mm_pool * pool = & zone - > pools [ page_order ] ;
vm_page_t buddy = page + ( 1 < < page_order ) ;
buddy - > order = page_order ;
pg_flip_bit ( zone , pfn + ( 1 < < page_order ) , page_order ) ;
disable_intr ( ) ;
spin_lock ( & pool - > lock ) ;
clist_add_first ( & pool - > freelist , & buddy - > link ) ;
pool - > free_entries + + ;
spin_unlock ( & pool - > lock ) ;
intr_restore ( cpuflags ) ;
}
bit_set ( caches [ order ] . bitmap , bit_number ) ;
# if CFG_POISON_PAGES
memset ( entry , ' a ' , 1 < < ORDER_SHIFT ( order ) ) ;
# endif
page - > order = order ;
void * vaddr = pfn2vaddr ( pfn ) ;
return vaddr ;
} else {
return nil ;
}
}
mtx_unlock ( & caches_lock ) ;
return ( void * ) entry ;
/* faster memset for whole pages */
static inline void init_pages ( u_long * start , u_long val , u_int order )
{
u_long * end = start + ( ORDER_SIZE ( order ) / sizeof ( * start ) ) ;
do {
* start + + = val ;
} while ( start ! = end ) ;
}
void free_pages ( void * ptr )
void * get_pages ( u_int order , enum mflags flags )
{
# if CFG_DEBUG_PAGE_ALLOCS
if ( ( uintptr_t ) ptr % PAGE_SIZE ) {
kprintf ( " free_pages(%p): unaligned ptr! \n " , ptr ) ;
return ;
}
# endif
void * pages = __get_pages ( order , flags ) ;
if ( sus_nil ( ptr ) ) {
page_debug ( " free_pages(%p): tried to free NULL! \n " , ptr ) ;
return ;
}
# if CFG_POISON_PAGES
if ( pages ! = nil )
init_pages ( pages , PAGE_POISON_ALLOC , order ) ;
# endif
int order = 0 ;
usize bit_number = get_bit_number ( ptr , order ) ;
for ( ; order < CACHE_ORDERS ; order + + ) {
if ( bit_tst ( caches [ order ] . bitmap , bit_number ) )
break ;
bit_number > > = 1 ;
}
return pages ;
}
if ( order = = CACHE_ORDERS ) {
page_debug ( " free_pages(%p): double free! \n " , ptr ) ;
return ;
}
int original_order = order ;
void * get_page ( enum mflags flags )
{
void * pages = __get_pages ( 0 , flags ) ;
mtx_lock ( & caches_lock ) ;
# if CFG_POISON_PAGES
if ( pages ! = nil )
init_pages ( pages , PAGE_POISON_ALLOC , 0 ) ;
# endif
while ( order < CACHE_ORDERS - 1 ) {
bit_clr ( caches [ order ] . bitmap , bit_number ) ;
return pages ;
}
void * tmp = try_join_buddy ( ptr , order ) ;
if ( tmp = = nil )
break ;
void * get_zero_pages ( u_int order , enum mflags flags )
{
void * pages = __get_pages ( order , flags ) ;
ptr = tmp ;
order + + ;
bit_number > > = 1 ;
}
if ( pages ! = nil )
init_pages ( pages , 0 , order ) ;
if ( order = = CACHE_ORDERS - 1 & & original_order ! = CACHE_ORDERS - 1 )
set_pflags ( HUGEPAGE_ALIGN ( ptr ) , P_HUGE | P_RW ) ;
return pages ;
}
# if CFG_POISON_PAGES
memset ( ptr , ' A ' , 1 < < ORDER_SHIFT ( order ) ) ;
# endif
void * get_zero_page ( enum mflags flags )
{
void * page = __get_pages ( 0 , flags ) ;
clist_add ( & caches [ order ] . freelist , ( struct clist * ) ptr ) ;
caches [ order ] . free_entries + + ;
if ( page ! = nil )
init_pages ( page , 0 , 0 ) ;
mtx_unlock ( & caches_lock ) ;
return page ;
}
static inline void * split_buddy ( void * ptr , int level )
/*
* Two buddies can be merged if :
* - you currently hold the lock for the pool
* - they both have a reference count of zero
* - they are in the same zone
* - neither of them is reserved
*
* This is only called from within the critical section of free_pages ( ) ,
* so execution speed is prioritized over anything else .
*/
static __always_inline bool can_merge ( vm_page_t page , vm_page_t buddy )
{
# if CFG_DEBUG_PAGE_ALLOCS
if ( ( uintptr_t ) ptr % ( 1 < < ORDER_SHIFT ( level ) ) ) {
kprintf ( " split_buddy(ptr = %p, level = %d): unaligned ptr! \n " , ptr , level ) ;
return nil ;
}
if ( level < 1 | | level > = CACHE_ORDERS ) {
kprintf ( " split_buddy(ptr = %p, level = %d): invalid level! \n " , ptr , level ) ;
return nil ;
}
# endif
bool merge = ( atom_read ( & buddy - > count ) = = 0 ) ;
struct clist * high_buddy = ptr + ( 1 < < ORDER_SHIFT ( level - 1 ) ) ;
clist_add ( & caches [ level - 1 ] . freelist , high_buddy ) ;
caches [ level - 1 ] . free_entries + + ;
/* we know that `page` doesn't have PG_RESERVED set,
* because we check that flag before anything else */
const unsigned mask = PG_RESERVED | PG_DMA ;
merge & = ( page - > flags & mask ) = = ( buddy - > flags & mask ) ;
page_debug_noisy ( " split (%p:%p), lvl=%d \n " , ptr , ( void * ) high_buddy , level ) ;
return ptr ;
return merge ;
}
static void * try_join_buddy ( void * ptr , int orde r)
void free_pages ( void * pt r)
{
const usize entry_size = 1 < < ORDER_SHIFT ( order ) ;
# if CFG_DEBUG_PAGE_ALLOCS
if ( ( uintptr_t ) ptr % entry_size ) {
kprintf ( " try_join_buddy(%p, %d): unaligned ptr! \n " , ptr , order ) ;
return nil ;
PAGE_DEBUG_BLOCK {
if ( ptr < DMAP_START | | ptr > = DMAP_END ) {
panic ( " free_pages(%p): not in DMAP region \n " , ptr ) ;
}
/* order must be < CACHE_ORDERS - 1 because you
* can ' t join blocks on the topmost order */
if ( order > = CACHE_ORDERS - 1 ) {
kprintf ( " try_join_buddy(%p, %d): order >= CACHE_ORDERS - 1! \n " , ptr , order ) ;
return nil ;
}
# endif
}
/*
* Test whether the buddy block is allocated and return nil if it is .
* entry_size is a power of 2 , so we can quickly get to the buddy block
* with a cheap XOR of the address and the entry size without the need
* for any if branches .
*/
uintptr_t buddy = ( uintptr_t ) ptr ^ entry_size ;
usize buddy_bitnum = get_bit_number ( ( void * ) buddy , order ) ;
if ( bit_tst ( caches [ order ] . bitmap , buddy_bitnum ) )
return nil ;
register_t cpuflags = read_flags ( ) ;
page_debug_noisy ( " join (%p:%p), order=%d \n " , ptr , ( void * ) buddy , order ) ;
vm_page_t page = vaddr2pg ( ptr ) ;
panic_if ( page - > flags & PG_RESERVED , " tried to free reserved page %p " , ptr ) ;
/* If the buddy is free, we remove it from the freelist ... */
clist_del( ( struct clist * ) buddy ) ;
caches[ order ] . free_entries - - ;
u_int order = page - > order ;
PAGE_ASSERT ( ( uintptr_t ) ptr % ORDER_SIZE ( order ) = = 0 ) ;
u_long pfn = vaddr2pfn ( ptr ) ;
/*
* . . . and return a pointer to the coalesced block .
* We use the same trick as above to get to the even ( lower ) block , just
* that this time we ' re zeroing the bit out rather than flipping it .
*/
uintptr_t even = ( uintptr_t ) ptr & ~ entry_size ;
return ( void * ) even ;
# if CFG_POISON_PAGES
init_pages ( ptr , PAGE_POISON_FREE , order ) ;
# endif
int old_count = atom_sub ( & page - > count , 1 ) ;
if ( old_count ! = 1 ) {
if ( old_count = = 0 )
panic ( " double free of page %p " , ptr ) ;
else
panic ( " attempted to free page %p with references " , ptr ) ;
}
struct mm_zone * zone ;
if ( page - > flags & PG_DMA )
zone = & mm_zones [ MM_ZONE_DMA ] ;
else
zone = & mm_zones [ MM_ZONE_NORMAL ] ;
latom_add ( & zone - > free_count , ( 1 < < order ) ) ;
/* try to coalesce free buddy blocks until we're reached the highest order */
while ( order < MM_MAX_ORDER ) {
if ( pg_flip_bit ( zone , pfn , order ) )
break ;
page_debug_noisy ( " join %p (order = %u) \n " , pfn2vaddr ( pfn ) , order ) ;
/* precompute all values we need inside the critical section
* to avoid blocking other CPUs for longer than necessary */
vm_page_t buddy = & vm_page_array [ pfn ^ ( 1ul < < order ) ] ;
vm_page_t low = & vm_page_array [ pfn & ~ ( 1ul < < order ) ] ;
struct mm_pool * current_order_pool = & zone - > pools [ order ] ;
struct mm_pool * next_order_pool = & zone - > pools [ order + 1 ] ;
disable_intr ( ) ;
spin_lock ( & zone - > pools [ order ] . lock ) ;
if ( can_merge ( page , buddy ) ) {
clist_del ( & buddy - > link ) ;
current_order_pool - > free_entries - - ;
buddy - > order = order + 1 ;
page - > order = order + 1 ;
clist_add ( & next_order_pool - > freelist , & low - > link ) ;
next_order_pool - > free_entries + + ;
} else {
order = MM_MAX_ORDER ; /* break out of the loop */
}
spin_unlock ( & zone - > pools [ order ] . lock ) ;
intr_restore ( cpuflags ) ;
page = low ;
order + + ;
}
/* finally, we need to insert the page at its freelist */
struct mm_pool * pool = & zone - > pools [ order ] ;
disable_intr ( ) ;
spin_lock ( & pool - > lock ) ;
clist_add ( & pool - > freelist , & page - > link ) ;
pool - > free_entries + + ;
spin_unlock ( & zone - > pools [ order ] . lock ) ;
intr_restore ( cpuflags ) ;
}