@ -1,24 +1,48 @@
/* Copyright (C) 2021 fef <owo@fef.moe>. All rights reserved. */
# include <arch/atom.h>
# include <arch/cpufunc.h>
# include <arch/page.h>
# include <gay/cdefs.h>
# include <gay/clist.h>
# include <gay/config.h>
# include <gay/kprintf.h>
# include <gay/ktrace.h>
# include <gay/mm.h>
# include <gay/poison.h>
# include <gay/systm.h>
# include <gay/types.h>
# include <gay/vm/page.h>
# include <string.h>
/*
* XXX this implementation is still missing object caches
*/
# if CFG_POISON_SLABS
struct slab_poison {
void * _pad ; /**< @brief That's where the freelist pointer is stored */
void * alloc_source ; /**< @brief Code address that made the alloc call */
u_long exact_size ;
u_long low_poison ;
u8 data [ 0 ] ;
u_long high_poison [ 1 ] ;
} ;
static void poison_after_alloc ( struct slab_poison * poison , u_int exact_size , void * alloc_source ) ;
static void poison_after_free ( struct slab_poison * poison ) ;
# endif
/**
* @ brief This header sits at the beginning of each slab .
* The individual entries follow immediately after the struct itself .
*/
struct slab {
struct clist clink ; /* -> pools[entry_size / SLAB_STEP - 1] (see below) */
/** @brief The individual clist nodes sit at the beginning of each free entry */
struct clist freelist ;
struct clist link ;
void * * freelist ;
struct slab_pool * pool ;
/** @brief For `link` */
spin_t lock ;
/**
* @ brief Number of free entries .
* The slabs are sorted within their pool by this value , so that we
@ -30,181 +54,337 @@ struct slab {
* ` PAGE_SIZE ` ) , so this saves a couple of bytes on systems where ` int `
* is smaller than ` usize ` .
*/
unsigned int free_entries ;
/**
* @ brief Size of a single slab entry in bytes .
* Sizes must always be an integral multiple of ` sizeof ( void * ) ` and
* at least ` sizeof ( struct clist ) ` , because that ' s the data structure
* used for tracking what entries are free ( ` freelist ` ) .
*
* Like ` free_entries ` , this is intentionally not a ` usize ` .
*/
unsigned int entry_size ;
/* here would come the individual entries */
u_int free_entries ;
} ;
/** @brief All slabs currently have the same size of one full page. */
# define SLAB_SIZE PAGE_SIZE
/**
* @ brief All slab entry sizes are an integral multiple of this .
* When allocating memory , the requested size gets rounded upwards .
*/
# define SLAB_STEP (sizeof(struct clist))
# define SLAB_OVERHEAD (sizeof(struct slab))
# define SLAB_MAX_ALLOC (SLAB_SIZE - SLAB_OVERHEAD)
/* slabs are always aligned ... */
# define SLAB_PTR_MASK (~(SLAB_SIZE - 1))
/* ... so we can do this */
# define GET_SLAB(ptr) ( (struct slab *)((uintptr_t)(ptr) & SLAB_PTR_MASK) )
# if CFG_DEBUG_SLAB_ALLOCS
# define slab_debug(msg, ...) kprintf("[slab] " msg, ##__VA_ARGS__)
# define SLAB_DEBUG_BLOCK
# define SLAB_ASSERT KASSERT
# if CFG_DEBUG_SLAB_ALLOCS_NOISY
# define slab_debug_noisy(msg, ...) kprintf("[slab] " msg, ##__VA_ARGS__)
# else
# define slab_debug_noisy(msg, ...) ({})
# endif
# else
# define SLAB_DEBUG_BLOCK if (0)
# define SLAB_ASSERT(x) ({})
# define slab_debug(msg, ...) ({})
# define slab_debug_noisy(msg, ...) ({})
# endif
/** @brief All slabs grouped by entry_size, indexed by `entry_size / SLAB_STEP - 1` */
struct clist pools [ SLAB_MAX_ALLOC / SLAB_STEP ] ;
struct slab_pool {
const u_int entry_size ; /**< @brief Size of one entry in bytes */
const int entries_per_slab ; /**< @brief Max number of entries per slab */
atom_t total_used ; /**< @brief Total allocated entries */
const u_int page_order ; /**< @brief Order passed to `get_pages()` */
struct clist empty_list ; /* -> struct slab::link */
struct clist partial_list ; /* -> struct slab::link */
struct clist full_list ; /* -> struct slab::link */
spin_t empty_lock ; /**< @brief Lock for `empty_list` */
spin_t partial_lock ; /**< @brief Lock for `partial_list` */
spin_t full_lock ; /**< @brief Lock for `full_list` */
atom_t empty_count ; /**< @brief Number of empty slabs */
atom_t partial_count ; /**< @brief Number of partially empty slabs */
atom_t full_count ; /**< @brief Number of full slabs */
} ;
static void * slab_alloc ( usize size , enum mflags flags ) ;
static void slab_free ( void * ptr ) ;
/*
* Fun size calculations because the slab header takes up some overhead at the
* beginning of each page . We should ideally try to cram all the info we need
* into struct vm_page , because the individual slab entry sizes could be even
* powers of two and perfectly aligned then .
*/
# define _MIN1(x) ((x) < 1 ? 1 : (x))
# define POOL_ENTRY_SIZE(sz) (( (sz) - ( SLAB_OVERHEAD / _MIN1(PAGE_SIZE / (sz)) ) ) & ~0xfu)
# define POOL_ENTRIES_PER_TABLE(sz) \
_MIN1 ( ( PAGE_SIZE - SLAB_OVERHEAD ) / POOL_ENTRY_SIZE ( sz ) )
static struct slab * slab_create ( unsigned int entry_size , enum mflags flags ) ;
# define POOL_DEFINE(sz) { \
. entry_size = POOL_ENTRY_SIZE ( sz ) , \
. entries_per_slab = POOL_ENTRIES_PER_TABLE ( sz ) , \
. total_used = ATOM_DEFINE ( 0 ) , \
. page_order = ( ( sz ) - 1 ) / PAGE_SIZE , \
. empty_lock = SPIN_DEFINE , \
. partial_lock = SPIN_DEFINE , \
. full_lock = SPIN_DEFINE , \
. empty_count = ATOM_DEFINE ( 0 ) , \
. partial_count = ATOM_DEFINE ( 0 ) , \
. full_count = ATOM_DEFINE ( 0 ) , \
}
static inline int get_order ( usize size )
static struct slab_pool slab_pools_normal [ ] = {
POOL_DEFINE ( 32 ) ,
POOL_DEFINE ( 64 ) ,
POOL_DEFINE ( 128 ) ,
POOL_DEFINE ( 256 ) ,
POOL_DEFINE ( 512 ) ,
POOL_DEFINE ( 1024 ) ,
POOL_DEFINE ( 2048 ) ,
POOL_DEFINE ( 4096 ) ,
POOL_DEFINE ( 8192 ) ,
POOL_DEFINE ( 16384 ) ,
POOL_DEFINE ( 32768 ) ,
{ . entry_size = 0 } /* terminator */
} ;
static struct slab_pool slab_pools_dma [ ] = {
POOL_DEFINE ( 32 ) ,
POOL_DEFINE ( 64 ) ,
POOL_DEFINE ( 128 ) ,
POOL_DEFINE ( 256 ) ,
POOL_DEFINE ( 512 ) ,
POOL_DEFINE ( 1024 ) ,
{ . entry_size = 0 } /* terminator */
} ;
# undef _MIN1 /* we don't wanna end up using this in actual code, do we? */
static struct slab_pool * slab_zone_pools [ MM_NR_ZONES ] = {
[ _M_ZONE_NORMAL ] = slab_pools_normal ,
[ _M_ZONE_DMA ] = slab_pools_dma ,
} ;
static struct slab * slab_create ( struct slab_pool * pool , enum mflags flags ) ;
void kmalloc_init ( void )
{
int order ;
usize order_size = PAGE_SIZE ;
for ( int i = 0 ; i < MM_NR_ZONES ; i + + ) {
struct slab_pool * pool = slab_zone_pools [ i ] ;
for ( order = 0 ; order < = GET_PAGE_MAX_ORDER ; order + + ) {
if ( order_size > = size )
break ;
order_size < < = 1 ;
while ( pool - > entry_size ! = 0 ) {
clist_init ( & pool - > empty_list ) ;
clist_init ( & pool - > partial_list ) ;
clist_init ( & pool - > full_list ) ;
pool + + ;
}
}
return order ;
}
void * kmalloc ( usize size , enum mflags flags )
{
if ( size > SLAB_MAX_ALLOC ) {
if ( flags & M_CONTIG ) {
int order = get_order ( size ) ;
if ( order > GET_PAGE_MAX_ORDER ) {
slab_debug ( " Requested alloc size %zu too large for get_pages() \n " ,
size ) ;
if ( size = = 0 )
return nil ;
# if CFG_POISON_SLABS
size + = sizeof ( struct slab_poison ) ;
# endif
SLAB_DEBUG_BLOCK {
if ( ! ( flags & _M_NOWAIT ) & & in_irq ( ) ) {
slab_debug ( " kmalloc() called from irq without M_NOWAIT "
" (caller: %p) \n " , ktrace_return_addr ( ) ) ;
flags | = _M_NOWAIT ;
}
}
SLAB_ASSERT ( _M_ZONE_INDEX ( flags ) < ARRAY_SIZE ( slab_zone_pools ) ) ;
struct slab_pool * pool = slab_zone_pools [ _M_ZONE_INDEX ( flags ) ] ;
while ( pool - > entry_size ! = 0 ) {
if ( pool - > entry_size > = size )
break ;
pool + + ;
}
if ( pool - > entry_size = = 0 ) {
slab_debug ( " Refusing to allocate %zu bytes in zone %d (limit is %u) \n " ,
size , _M_ZONE_INDEX ( flags ) , pool [ - 1 ] . entry_size ) ;
return nil ;
}
slab_debug_noisy ( " alloc %zu bytes from zone %d, pool size %u \n " ,
size , _M_ZONE_INDEX ( flags ) , pool - > entry_size ) ;
/*
* Before locking a slab , we always remove it from its pool .
* This is far from optimal , because if multiple CPUs allocate from the
* same pool at the same time , we could end up creating several slabs
* with one used entry each ( not to mention the overhead of the mostly
* unnecessary list deletions / insertions ) . However , it allows me to be
* lazier when freeing unused slabs from a background thread since that
* thread knows for sure that once it has removed a slab from free_list ,
* it can ' t possibly be used for allocations anymore .
* This is probably not worth the overhead , though .
*/
struct slab * slab = nil ;
/* try to use a slab that is already partially used first */
register_t cpuflags = intr_disable ( ) ;
spin_lock ( & pool - > partial_lock ) ;
if ( ! clist_is_empty ( & pool - > partial_list ) ) {
atom_dec ( & pool - > partial_count ) ;
slab = clist_del_first_entry ( & pool - > partial_list , typeof ( * slab ) , link ) ;
}
spin_unlock ( & pool - > partial_lock ) ;
if ( slab = = nil ) {
/* no partially used slab available, see if we have a completely free one */
spin_lock ( & pool - > empty_lock ) ;
if ( ! clist_is_empty ( & pool - > empty_list ) ) {
atom_dec ( & pool - > empty_count ) ;
slab = clist_del_first_entry ( & pool - > empty_list , typeof ( * slab ) , link ) ;
}
spin_unlock ( & pool - > empty_lock ) ;
if ( slab = = nil ) {
/* we're completely out of usable slabs, allocate a new one */
intr_restore ( cpuflags ) ;
slab = slab_create ( pool , flags ) ;
if ( slab = = nil ) {
slab_debug ( " kernel OOM \n " ) ;
return nil ;
} else {
return get_pages ( order , flags ) ;
}
} else {
slab_debug ( " Refusing to allocate %zu bytes as slabs \n " , size ) ;
return nil ;
intr_disable ( ) ;
}
}
/* if we've made it to here, slab != nil and interrupts are disabled */
spin_lock ( & slab - > lock ) ;
void * ret = slab - > freelist ;
slab - > freelist = * slab - > freelist ;
if ( - - slab - > free_entries = = 0 ) {
spin_lock ( & pool - > full_lock ) ;
clist_add ( & pool - > full_list , & slab - > link ) ;
spin_unlock ( & pool - > full_lock ) ;
atom_inc ( & pool - > full_count ) ;
} else {
return slab_alloc ( size , flags ) ;
spin_lock ( & pool - > partial_lock ) ;
clist_add ( & pool - > partial_list , & slab - > link ) ;
spin_unlock ( & pool - > partial_lock ) ;
atom_inc ( & pool - > partial_count ) ;
}
spin_unlock ( & slab - > lock ) ;
intr_restore ( cpuflags ) ;
atom_inc ( & pool - > total_used ) ;
# if CFG_POISON_SLABS
struct slab_poison * poison = ret ;
poison_after_alloc ( poison , size - sizeof ( * poison ) , ktrace_return_addr ( ) ) ;
ret = poison - > data ;
# endif
return ret ;
}
void kfree ( void * ptr )
{
kprintf ( " kfree() is not implemented yet lmao \n " ) ;
}
if ( ptr = = nil )
return ;
void slab_init ( void )
{
slab_debug ( " Initializing %zu cache pools (%zu~%zu bytes) \n " ,
ARRAY_SIZE ( pools ) , SLAB_STEP , SLAB_MAX_ALLOC ) ;
for ( int i = 0 ; i < ARRAY_SIZE ( pools ) ; i + + )
clist_init ( & pools [ i ] ) ;
SLAB_ASSERT ( ptr > = DMAP_START & & ptr < DMAP_END ) ;
vm_page_t page = vaddr2pg ( ptr ) ;
SLAB_ASSERT ( pga_slab ( page ) ) ;
struct slab * slab = page - > extra ;
struct slab_pool * pool = slab - > pool ;
# if CFG_POISON_SLABS
struct slab_poison * poison = container_of ( ptr , typeof ( * poison ) , data ) ;
poison_after_free ( poison ) ;
ptr = poison ;
# endif
register_t cpuflags = intr_disable ( ) ;
spin_lock ( & slab - > lock ) ;
* ( void * * ) ptr = slab - > freelist ;
slab - > freelist = ( void * * ) ptr ;
if ( + + slab - > free_entries = = pool - > entries_per_slab ) {
spin_lock ( & pool - > partial_lock ) ;
clist_del ( & slab - > link ) ;
spin_unlock ( & pool - > partial_lock ) ;
atom_dec ( & pool - > partial_count ) ;
spin_lock ( & pool - > empty_lock ) ;
clist_add ( & pool - > empty_list , & slab - > link ) ;
spin_unlock ( & pool - > empty_lock ) ;
atom_inc ( & pool - > empty_count ) ;
}
spin_unlock ( & slab - > lock ) ;
atom_dec ( & pool - > total_used ) ;
intr_restore ( cpuflags ) ;
}
static inline void * slab_alloc ( usize size , enum mflags flags )
static struct slab * slab_create ( struct slab_pool * pool , enum mflags flags )
{
size = align_ceil ( size , SLAB_STEP ) ;
if ( size = = 0 | | size > SLAB_MAX_ALLOC )
return nil ;
slab_debug_noisy ( " Creating new cache for entry_size %u \n " , pool - > entry_size ) ;
struct slab * slab = get_zero_pages ( pool - > page_order , flags ) ;
struct clist * pool = & pools [ size / SLAB_STEP - 1 ] ;
struct slab * slab = nil ;
struct slab * cursor ;
clist_foreach_entry ( pool , cursor , clink ) {
if ( cursor - > free_entries > 0 ) {
slab = cursor ;
break ;
if ( slab ! = nil ) {
vm_page_t page = vaddr2pg ( slab ) ;
/* XXX it's probably sufficient to only do this for the lowest page */
vm_page_foreach_in_order ( page ) {
pga_set_slab ( page , true ) ;
page - > extra = slab ;
}
}
if ( slab = = nil ) {
slab = slab_create ( size , flags ) ;
if ( slab = = nil )
return nil ; /* OOM */
clist_add_first ( pool , & slab - > clink ) ;
spin_init ( & slab - > lock ) ;
slab - > pool = pool ;
slab - > free_entries = pool - > entries_per_slab ;
void * prev = nil ;
void * end = ( void * ) slab + ( 1 < < ( pool - > page_order + PAGE_SHIFT ) ) ;
void * pos = end ;
do {
pos - = pool - > entry_size ;
* ( void * * ) pos = prev ;
prev = pos ;
} while ( pos > = ( void * ) & slab [ 1 ] + pool - > entry_size ) ;
slab - > freelist = pos ;
}
/* list must have at least one entry, otherwise
* we would have created a completely new slab */
struct clist * ret = slab - > freelist . next ;
clist_del ( ret ) ;
slab - > free_entries - - ;
# if CFG_POISON_HEAP
memset ( ret , ' a ' , size ) ;
# endif
return ( void * ) ret ;
return slab ;
}
static inline void slab_free ( void * ptr )
# if CFG_POISON_SLABS
static inline void poison_after_alloc ( struct slab_poison * poison , u_int exact_size ,
void * alloc_source )
{
# if CFG_DEBUG_SLAB_ALLOCS
if ( ptr < kheap_start | | ptr > = kheap_end ) {
kprintf ( " slab_free(%p): invalid ptr! \n " , ptr ) ;
return ;
}
if ( ( uintptr_t ) ptr % SLAB_STEP ) {
kprintf ( " slab_free(%p): unaligned ptr! \n " , ptr ) ;
}
# endif
u_int offset = align_ceil ( poison - > exact_size , sizeof ( long ) ) / sizeof ( long ) ;
u_long * poison_start = & poison - > low_poison ;
struct slab * slab = GET_SLAB ( ptr ) ;
slab - > free_entries + + ;
/* slabs are zeroed out when they are newly allocated */
if ( poison - > exact_size ! = 0 ) {
for ( u_long * pos = poison_start ; pos < & poison - > high_poison [ offset ] ; pos + + ) {
if ( * pos ! = SLAB_POISON_FREE ) {
kprintf ( " Use-after-free in %p (alloc by %p) \n " ,
poison - > data , poison - > alloc_source ) ;
break ;
}
}
}
# if CFG_POISON_HEAP
memset ( ptr , ' A ' , slab - > entry_size ) ;
# endif
/* update offset to the new size */
offset = align_ceil ( exact_size , sizeof ( long ) ) / sizeof ( long ) ;
if ( slab - > free_entries * slab - > entry_size + slab - > entry_size > SLAB_MAX_ALLOC ) {
/* none of the entries are in use, free the slab */
slab_debug_noisy ( " Destroying empty cache of size %zu \n " , slab - > entry_size ) ;
free_pages ( slab ) ;
} else {
clist_add ( & slab - > freelist , ( struct clist * ) ptr ) ;
}
poison - > alloc_source = alloc_source ;
poison - > exact_size = exact_size ;
for ( u_long * pos = & poison - > low_poison ; pos < = & poison - > high_poison [ offset ] ; pos + + )
* pos = SLAB_POISON_ALLOC ;
}
static struct slab * slab_create ( unsigned int entry_size , enum mflags flags )
static inline void poison_after_free ( struct slab_poison * poison )
{
slab_debug_noisy ( " Creating new cache for size %zu \n " , entry_size ) ;
struct slab * slab = get_pages ( SLAB_SIZE / PAGE_SIZE , flags ) ;
u_int offset = align_ceil ( poison - > exact_size , sizeof ( long ) ) / sizeof ( long ) ;
if ( slab ! = nil ) {
clist_init ( & slab - > freelist ) ;
slab - > free_entries = 0 ;
slab - > entry_size = entry_size ;
void * startptr = ( void * ) slab + sizeof ( * slab ) ;
void * endptr = ( void * ) slab + SLAB_SIZE - entry_size ;
for ( void * pos = startptr ; pos < = endptr ; pos + = entry_size ) {
clist_add ( & slab - > freelist , ( struct clist * ) pos ) ;
slab - > free_entries + + ;
}
if ( poison - > low_poison ! = SLAB_POISON_ALLOC ) {
kprintf ( " Low out-of-bounds write to %p (alloc by %p) \n " ,
poison - > data , poison - > alloc_source ) ;
}
return slab ;
if ( poison - > high_poison [ offset ] ! = SLAB_POISON_ALLOC ) {
kprintf ( " High out-of-bounds write to %p (alloc by %p) \n " ,
poison - > data , poison - > alloc_source ) ;
}
for ( u_long * pos = & poison - > low_poison ; pos < = & poison - > high_poison [ offset ] ; pos + + )
* pos = SLAB_POISON_FREE ;
}
# endif
__weak void * malloc ( usize size )
{
return kmalloc ( size , M_KERN ) ;
}
__weak void free ( void * ptr )
{
kfree ( ptr ) ;
}