This was the last major obstacle in being able to manage virtual memory maps. Object caches are custom allocators that allow for more fine-grained allocation policies, including being able to use memory from the DMAP region.
542 lines
16 KiB
C
542 lines
16 KiB
C
/* Copyright (C) 2021,2022 fef <owo@fef.moe>. All rights reserved. */
|
|
|
|
/*
|
|
* slabbing slabs onto the slab for slabs slab slab slahsdf ashklfghdsla
|
|
*/
|
|
|
|
#include <arch/atom.h>
|
|
#include <arch/cpufunc.h>
|
|
#include <arch/page.h>
|
|
|
|
#include <gay/bits.h>
|
|
#include <gay/cdefs.h>
|
|
#include <gay/clist.h>
|
|
#include <gay/config.h>
|
|
#include <gay/kprintf.h>
|
|
#include <gay/ktrace.h>
|
|
#include <gay/mm.h>
|
|
#include <gay/poison.h>
|
|
#include <gay/systm.h>
|
|
#include <gay/types.h>
|
|
#include <gay/vm/page.h>
|
|
|
|
#include <strings.h>
|
|
|
|
#if CFG_POISON_SLABS
|
|
struct slab_poison {
|
|
void *_pad __unused; /**< @brief That's where the freelist pointer is stored */
|
|
void *alloc_source; /**< @brief Code address that made the alloc call */
|
|
u_long exact_size;
|
|
u_long low_poison;
|
|
u8 data[0];
|
|
u_long high_poison[1];
|
|
};
|
|
|
|
static void poison_on_alloc(struct slab_poison *poison, u_long exact_size, void *alloc_source);
|
|
static void poison_on_free(struct slab_poison *poison);
|
|
#endif
|
|
|
|
#if CFG_DEBUG_SLAB_ALLOCS
|
|
# define slab_debug(msg, ...) kprintf("[slab] " msg, ##__VA_ARGS__)
|
|
# define SLAB_DEBUG_BLOCK
|
|
# define SLAB_ASSERT KASSERT
|
|
# if CFG_DEBUG_SLAB_ALLOCS_NOISY
|
|
# define slab_debug_noisy(msg, ...) kprintf("[slab] " msg, ##__VA_ARGS__)
|
|
# else
|
|
# define slab_debug_noisy(msg, ...) ({})
|
|
# endif
|
|
#else
|
|
# define SLAB_DEBUG_BLOCK if (0)
|
|
# define SLAB_ASSERT(x) ({})
|
|
# define slab_debug(msg, ...) ({})
|
|
# define slab_debug_noisy(msg, ...) ({})
|
|
#endif
|
|
|
|
/**
|
|
* @brief Single node in the object cache system.
|
|
* Each node owns a page
|
|
*/
|
|
struct kmem_cache_node {
|
|
struct clist link; /* -> struct kmem_cache_pool::list */
|
|
void **freelist; /**< @brief Stack of free objects */
|
|
struct kmem_cache *cache; /**< @brief Object cache this node belongs to */
|
|
spin_t lock; /**< @brief Lock for `freelist` */
|
|
u_int free_count;
|
|
vm_page_t page; /**< @brief Physical page this node manages */
|
|
};
|
|
|
|
struct kmem_cache_pool {
|
|
struct clist list; /* -> struct kmem_cache_node::link */
|
|
spin_t lock;
|
|
atom_t count;
|
|
};
|
|
|
|
/**
|
|
* @brief Cache for one particular object type.
|
|
* A pool holds multiple nodes, each of which hold the same number of slabs.
|
|
*/
|
|
struct kmem_cache {
|
|
u_int object_size; /**< @brief Object size in bytes */
|
|
u_int page_order; /**< @brief Order passed to `get_pages()` */
|
|
enum slab_flags flags; /**< @brief Flags for how to allocate */
|
|
u_int slabs_per_node; /**< @brief Max number of slabs per cache node */
|
|
latom_t total_used; /**< @brief Total allocated entries */
|
|
const char *name; /**< @brief Unique name for this object type */
|
|
void (*ctor)(void *ptr, kmem_cache_t cache);
|
|
void (*dtor)(void *ptr, kmem_cache_t cache);
|
|
struct kmem_cache_pool empty;
|
|
struct kmem_cache_pool partial;
|
|
struct kmem_cache_pool full;
|
|
struct clist link; /**< @brief List of all kmem caches */
|
|
};
|
|
|
|
/* values for struct kmem_cache::flags */
|
|
|
|
/** @brief Zone to request pages from (using `page_alloc()`) */
|
|
#define SLAB_ZONE(flags) ((flags) & 3)
|
|
|
|
/** @brief List of all currently registered `struct kmem_cache`s. */
|
|
static CLIST(kmem_cache_list);
|
|
|
|
#define _MIN1(x) ((x) < 1 ? 1 : (x))
|
|
#define SLABS_PER_NODE(sz) _MIN1(PAGE_SIZE / (sz))
|
|
|
|
#define CACHE_DEFINE(sz, _name, _flags) { \
|
|
.object_size = (sz), \
|
|
.page_order = ((sz) - 1) / PAGE_SIZE, \
|
|
.flags = (_flags), \
|
|
.slabs_per_node = SLABS_PER_NODE(sz), \
|
|
.total_used = ATOM_DEFINE(0), \
|
|
.name = (_name), \
|
|
}
|
|
|
|
static struct kmem_cache kmem_caches[] = {
|
|
CACHE_DEFINE(32, "kmem_32", _M_ZONE_NORMAL | SLAB_POISON),
|
|
CACHE_DEFINE(64, "kmem_64", _M_ZONE_NORMAL | SLAB_POISON),
|
|
CACHE_DEFINE(128, "kmem_128", _M_ZONE_NORMAL | SLAB_POISON),
|
|
CACHE_DEFINE(256, "kmem_256", _M_ZONE_NORMAL | SLAB_POISON),
|
|
CACHE_DEFINE(512, "kmem_512", _M_ZONE_NORMAL | SLAB_POISON),
|
|
CACHE_DEFINE(1024, "kmem_1024", _M_ZONE_NORMAL | SLAB_POISON),
|
|
CACHE_DEFINE(2048, "kmem_2048", _M_ZONE_NORMAL | SLAB_POISON),
|
|
CACHE_DEFINE(4096, "kmem_4096", _M_ZONE_NORMAL | SLAB_POISON),
|
|
CACHE_DEFINE(8192, "kmem_8192", _M_ZONE_NORMAL | SLAB_POISON),
|
|
CACHE_DEFINE(16384, "kmem_16384", _M_ZONE_NORMAL | SLAB_POISON),
|
|
CACHE_DEFINE(32768, "kmem_32768", _M_ZONE_NORMAL | SLAB_POISON),
|
|
{ /* terminator */ }
|
|
};
|
|
static struct kmem_cache kmem_dma_caches[] = {
|
|
CACHE_DEFINE(32, "kmem_dma_32", _M_ZONE_DMA | SLAB_POISON),
|
|
CACHE_DEFINE(64, "kmem_dma_64", _M_ZONE_DMA | SLAB_POISON),
|
|
CACHE_DEFINE(128, "kmem_dma_128", _M_ZONE_DMA | SLAB_POISON),
|
|
CACHE_DEFINE(256, "kmem_dma_256", _M_ZONE_DMA | SLAB_POISON),
|
|
CACHE_DEFINE(512, "kmem_dma_512", _M_ZONE_DMA | SLAB_POISON),
|
|
CACHE_DEFINE(1024, "kmem_dma_1024", _M_ZONE_DMA | SLAB_POISON),
|
|
{ /* terminator */ }
|
|
};
|
|
|
|
/**
|
|
* This is a little fucked.
|
|
*
|
|
* So, every `vm_page_t` in use by the slab allocator gets a corresponding
|
|
* `struct kmem_cache_node` that keeps track of everything we need to know to
|
|
* make allocations. However, the memory for those structs themselves doesn't
|
|
* magically grow on trees. In other words, we need to allocate memory in
|
|
* order to be able to allocate memory.
|
|
*
|
|
* So what we have here is a separate object cache for `struct kmem_cache_node`
|
|
* that works slightly differently than all the other ones: Instead of making
|
|
* an extra allocation for the cache node, that node sits at the beginning of
|
|
* the page that we allocate from itself. Other caches don't do this because
|
|
* it destroys the perfect page alignment of the allocated area itself, but that
|
|
* doesn't matter here.
|
|
*/
|
|
static struct kmem_cache kmem_cache_node_caches =
|
|
CACHE_DEFINE(sizeof(struct kmem_cache_node), "kmem_cache_node", _M_ZONE_NORMAL | SLAB_DMAP);
|
|
|
|
#undef _MIN1 /* we don't wanna end up using this in actual code, do we? */
|
|
|
|
static struct kmem_cache *kmem_cache_zones[MM_NR_ZONES] = {
|
|
[_M_ZONE_DMA] = kmem_dma_caches,
|
|
[_M_ZONE_NORMAL] = kmem_caches,
|
|
};
|
|
|
|
static void cache_pool_init(struct kmem_cache_pool *pool)
|
|
{
|
|
clist_init(&pool->list);
|
|
atom_init(&pool->count, 0);
|
|
spin_init(&pool->lock);
|
|
}
|
|
|
|
void kmalloc_init(void)
|
|
{
|
|
cache_pool_init(&kmem_cache_node_caches.empty);
|
|
cache_pool_init(&kmem_cache_node_caches.partial);
|
|
cache_pool_init(&kmem_cache_node_caches.full);
|
|
/* for the management node at the beginning of the page */
|
|
kmem_cache_node_caches.slabs_per_node--;
|
|
clist_add(&kmem_cache_list, &kmem_cache_node_caches.link);
|
|
|
|
for (int i = 0; i < MM_NR_ZONES; i++) {
|
|
struct kmem_cache *cache = kmem_cache_zones[i];
|
|
|
|
while (cache->object_size != 0) {
|
|
clist_init(&cache->empty.list);
|
|
clist_init(&cache->partial.list);
|
|
clist_init(&cache->full.list);
|
|
clist_add(&kmem_cache_list, &cache->link);
|
|
cache++;
|
|
}
|
|
}
|
|
}
|
|
|
|
kmem_cache_t kmem_cache_register(const char *name, u_int obj_size, enum slab_flags flags,
|
|
void (*ctor)(void *ptr, kmem_cache_t cache),
|
|
void (*dtor)(void *ptr, kmem_cache_t cache))
|
|
{
|
|
obj_size = align_ceil(obj_size, sizeof(long));
|
|
/* we only support objects up to PAGE_SIZE for now */
|
|
if (obj_size > PAGE_SIZE || obj_size == 0)
|
|
return nil;
|
|
|
|
struct kmem_cache *cache = kmalloc(sizeof(*cache), M_KERN);
|
|
|
|
if (cache) {
|
|
cache->name = name;
|
|
cache->object_size = obj_size;
|
|
cache->flags = flags;
|
|
cache->ctor = ctor;
|
|
cache->dtor = dtor;
|
|
cache_pool_init(&cache->empty);
|
|
cache_pool_init(&cache->partial);
|
|
cache_pool_init(&cache->full);
|
|
|
|
/* XXX this is pretty wasteful for larger obj_sizes */
|
|
cache->slabs_per_node = PAGE_SIZE / obj_size;
|
|
cache->page_order = 0;
|
|
|
|
clist_add(&kmem_cache_list, &cache->link);
|
|
}
|
|
|
|
return cache;
|
|
}
|
|
|
|
static inline void **freelist_init(vm_page_t page, struct kmem_cache *cache)
|
|
{
|
|
void *prev = nil;
|
|
void *start = __v(pg2paddr(page));
|
|
void *end = start + align_floor(1 << (cache->page_order + PAGE_SHIFT), cache->object_size);
|
|
void *pos = end;
|
|
|
|
do {
|
|
pos -= cache->object_size;
|
|
if (cache->ctor)
|
|
cache->ctor(pos, cache);
|
|
*(void **)pos = prev;
|
|
prev = pos;
|
|
} while (pos >= start + cache->object_size);
|
|
|
|
return (void **)pos;
|
|
}
|
|
|
|
/** Attempt to remove a cache node from the partial/empty lists in a cache node and return it */
|
|
/* call with interrupts disabled */
|
|
static inline struct kmem_cache_node *pool_del_first_node(struct kmem_cache_pool *pool)
|
|
{
|
|
struct kmem_cache_node *node = nil;
|
|
|
|
spin_lock(&pool->lock);
|
|
if (!clist_is_empty(&pool->list)) {
|
|
atom_dec(&pool->count);
|
|
node = clist_del_first_entry(&pool->list, typeof(*node), link);
|
|
}
|
|
spin_unlock(&pool->lock);
|
|
|
|
return node;
|
|
}
|
|
|
|
/* call with interrupts disabled */
|
|
static inline void pool_del_node(struct kmem_cache_pool *pool, struct kmem_cache_node *node)
|
|
{
|
|
atom_dec(&pool->count);
|
|
spin_lock(&pool->lock);
|
|
clist_del(&node->link);
|
|
spin_unlock(&pool->lock);
|
|
}
|
|
|
|
/* call with interrupts disabled */
|
|
static inline void pool_add_node(struct kmem_cache_pool *pool, struct kmem_cache_node *node)
|
|
{
|
|
spin_lock(&pool->lock);
|
|
clist_add(&pool->list, &node->link);
|
|
spin_unlock(&pool->lock);
|
|
atom_inc(&pool->count);
|
|
}
|
|
|
|
/* call with interrupts disabled */
|
|
static inline void *pop_freelist_and_insert(struct kmem_cache *cache, struct kmem_cache_node *node)
|
|
{
|
|
spin_lock(&node->lock);
|
|
void *ret = node->freelist;
|
|
node->freelist = *node->freelist;
|
|
u_int free_count = --node->free_count;
|
|
spin_unlock(&node->lock);
|
|
|
|
latom_inc(&cache->total_used);
|
|
if (free_count == 0)
|
|
pool_add_node(&cache->full, node);
|
|
else
|
|
pool_add_node(&cache->partial, node);
|
|
|
|
return ret;
|
|
}
|
|
|
|
/* call with interrupts disabled */
|
|
static struct kmem_cache_node *node_alloc(void)
|
|
{
|
|
/*
|
|
* This is really the same basic procedure as kmem_cache_alloc(),
|
|
* except that we allocate everything manually if we run out of caches
|
|
* and interrupts are disabled.
|
|
* It definitely needs a cleanup at some point, most of the stuff here
|
|
* can probably be eliminated if kmem_cache_alloc() is split up.
|
|
*/
|
|
struct kmem_cache_node *mgmt_node = pool_del_first_node(&kmem_cache_node_caches.partial);
|
|
if (!mgmt_node) {
|
|
mgmt_node = pool_del_first_node(&kmem_cache_node_caches.empty);
|
|
if (!mgmt_node) {
|
|
vm_page_t page = page_alloc(0, M_ATOMIC);
|
|
if (!page)
|
|
return nil;
|
|
|
|
void **freelist = freelist_init(page, &kmem_cache_node_caches);
|
|
mgmt_node = (struct kmem_cache_node *)freelist;
|
|
mgmt_node->freelist = *freelist;
|
|
|
|
mgmt_node = __v(pg2paddr(page));
|
|
spin_init(&mgmt_node->lock);
|
|
mgmt_node->free_count = kmem_cache_node_caches.slabs_per_node;
|
|
mgmt_node->cache = &kmem_cache_node_caches;
|
|
mgmt_node->page = page;
|
|
}
|
|
}
|
|
|
|
struct kmem_cache_node *new_node = pop_freelist_and_insert(&kmem_cache_node_caches,
|
|
mgmt_node);
|
|
return new_node;
|
|
}
|
|
|
|
/* call with interrupts disabled */
|
|
static inline struct kmem_cache_node *node_create(struct kmem_cache *cache, enum mflags flags,
|
|
register_t cpuflags)
|
|
{
|
|
struct kmem_cache_node *node = node_alloc();
|
|
|
|
if (node) {
|
|
intr_restore(cpuflags);
|
|
vm_page_t page = page_alloc(cache->page_order, flags | M_ZERO);
|
|
if (page) {
|
|
pga_set_slab(page, true);
|
|
page->slab = node;
|
|
|
|
node->freelist = freelist_init(page, cache);
|
|
spin_init(&node->lock);
|
|
node->free_count = cache->slabs_per_node;
|
|
node->cache = cache;
|
|
node->page = page;
|
|
} else {
|
|
kfree(node);
|
|
node = nil;
|
|
}
|
|
intr_disable();
|
|
}
|
|
|
|
return node;
|
|
}
|
|
|
|
void *kmem_cache_alloc(kmem_cache_t cache, enum mflags flags)
|
|
{
|
|
SLAB_DEBUG_BLOCK {
|
|
if (!(flags & _M_NOWAIT) && in_irq()) {
|
|
slab_debug("kmem_cache_alloc() called from irq %p w/o M_NOWAIT\n",
|
|
ktrace_return_addr());
|
|
flags |= _M_NOWAIT;
|
|
}
|
|
}
|
|
|
|
SLAB_ASSERT(_M_ZONE_INDEX(flags) < ARRAY_SIZE(slab_zone_pools));
|
|
slab_debug_noisy("alloc %zu bytes from zone %d, cache %s\n",
|
|
size, _M_ZONE_INDEX(flags), cache->name);
|
|
|
|
/*
|
|
* Before locking a node, we always remove it from its cache pool.
|
|
* This is far from optimal, because if multiple CPUs allocate from the
|
|
* same pool at the same time, we could end up creating several slabs
|
|
* with one used entry each (not to mention the overhead of the mostly
|
|
* unnecessary list deletions/insertions). However, it allows me to be
|
|
* lazier when freeing unused slabs from a background thread since that
|
|
* thread knows for sure that once it has removed a slab from free_list,
|
|
* it can't possibly be used for allocations anymore.
|
|
* This is probably not worth the overhead, though.
|
|
*/
|
|
struct kmem_cache_node *node = nil;
|
|
|
|
/* try to use a slab that is already partially used first */
|
|
register_t cpuflags = intr_disable();
|
|
|
|
node = pool_del_first_node(&cache->partial);
|
|
if (!node) {
|
|
/* no partially used node available, see if we have a completely free one */
|
|
node = pool_del_first_node(&cache->empty);
|
|
if (!node) {
|
|
/* we're completely out of usable nodes, allocate a new one */
|
|
node = node_create(cache, flags, cpuflags);
|
|
if (!node) {
|
|
slab_debug("kernel OOM\n");
|
|
return nil;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* if we've made it to here, we have a cache node and interrupts are disabled */
|
|
void *ret = pop_freelist_and_insert(cache, node);
|
|
intr_restore(cpuflags);
|
|
|
|
return ret;
|
|
}
|
|
|
|
void *kmalloc(usize size, enum mflags flags)
|
|
{
|
|
if (size == 0)
|
|
return nil;
|
|
|
|
#if CFG_POISON_SLABS
|
|
size += sizeof(struct slab_poison);
|
|
#endif
|
|
|
|
SLAB_ASSERT(_M_ZONE_INDEX(flags) < ARRAY_SIZE(slab_zone_pools));
|
|
struct kmem_cache *cache = kmem_cache_zones[_M_ZONE_INDEX(flags)];
|
|
while (cache->object_size != 0) {
|
|
if (cache->object_size >= size)
|
|
break;
|
|
cache++;
|
|
}
|
|
|
|
if (cache->object_size == 0) {
|
|
slab_debug("Refusing to allocate %zu bytes in zone %d (limit is %u)\n",
|
|
size, _M_ZONE_INDEX(flags), cache[-1].object_size);
|
|
return nil;
|
|
}
|
|
|
|
void *ret = kmem_cache_alloc(cache, flags);
|
|
|
|
#if CFG_POISON_SLABS
|
|
if (ret) {
|
|
struct slab_poison *poison = ret;
|
|
poison_on_alloc(poison, size - sizeof(*poison), ktrace_return_addr());
|
|
ret = poison->data;
|
|
}
|
|
#endif
|
|
return ret;
|
|
}
|
|
|
|
void kfree(void *ptr)
|
|
{
|
|
if (ptr == nil)
|
|
return;
|
|
|
|
SLAB_ASSERT(ptr >= DMAP_START && ptr < DMAP_END);
|
|
|
|
vm_page_t page = vaddr2pg(ptr);
|
|
SLAB_ASSERT(pga_slab(page));
|
|
struct kmem_cache_node *node = page_slab(page);
|
|
struct kmem_cache *cache = node->cache;
|
|
#if CFG_POISON_SLABS
|
|
if (cache->flags & SLAB_POISON) {
|
|
struct slab_poison *poison = container_of(ptr, typeof(*poison), data);
|
|
poison_on_free(poison);
|
|
ptr = poison;
|
|
}
|
|
#endif
|
|
|
|
register_t cpuflags = intr_disable();
|
|
|
|
spin_lock(&node->lock);
|
|
*(void **)ptr = node->freelist;
|
|
SLAB(page)->freelist = (void **)ptr;
|
|
u_int free_count = ++node->free_count;
|
|
spin_unlock(&node->lock);
|
|
|
|
if (free_count == cache->slabs_per_node) {
|
|
pool_del_node(&cache->partial, node);
|
|
pool_add_node(&cache->empty, node);
|
|
}
|
|
|
|
latom_dec(&cache->total_used);
|
|
intr_restore(cpuflags);
|
|
}
|
|
|
|
#if CFG_POISON_SLABS
|
|
|
|
static inline void poison_on_alloc(struct slab_poison *poison, u_long exact_size,
|
|
void *alloc_source)
|
|
{
|
|
u_long offset = align_ceil(poison->exact_size, sizeof(long)) / sizeof(long);
|
|
u_long *poison_start = &poison->low_poison;
|
|
|
|
/*
|
|
* page_alloc() always initializes the allocated page to zeroes.
|
|
* Therefore, if exact_size is 0, we know this particular slab entry has
|
|
* never been used before, and we can skip the check.
|
|
*/
|
|
if (poison->exact_size != 0) {
|
|
for (u_long *pos = poison_start; pos < &poison->high_poison[offset]; pos++) {
|
|
if (*pos != SLAB_POISON_FREE) {
|
|
kprintf("Use-after-free in %p (alloc by %p)\n",
|
|
poison->data, poison->alloc_source);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* update offset to the new size */
|
|
offset = align_ceil(exact_size, sizeof(long)) / sizeof(long);
|
|
|
|
poison->alloc_source = alloc_source;
|
|
poison->exact_size = exact_size;
|
|
for (u_long *pos = &poison->low_poison; pos <= &poison->high_poison[offset]; pos++)
|
|
*pos = SLAB_POISON_ALLOC;
|
|
}
|
|
|
|
static inline void poison_on_free(struct slab_poison *poison)
|
|
{
|
|
u_long offset = align_ceil(poison->exact_size, sizeof(long)) / sizeof(long);
|
|
|
|
if (poison->low_poison != SLAB_POISON_ALLOC) {
|
|
kprintf("Low out-of-bounds write to %p (alloc by %p)\n",
|
|
poison->data, poison->alloc_source);
|
|
}
|
|
|
|
if (poison->high_poison[offset] != SLAB_POISON_ALLOC) {
|
|
kprintf("High out-of-bounds write to %p (alloc by %p)\n",
|
|
poison->data, poison->alloc_source);
|
|
}
|
|
|
|
for (u_long *pos = &poison->low_poison; pos <= &poison->high_poison[offset]; pos++)
|
|
*pos = SLAB_POISON_FREE;
|
|
}
|
|
|
|
#endif /* CFG_POISON_SLABS */
|
|
|
|
/*
|
|
* for certain libc routines
|
|
*/
|
|
|
|
__weak void *malloc(usize size)
|
|
{
|
|
return kmalloc(size, M_KERN);
|
|
}
|
|
|
|
__weak void free(void *ptr)
|
|
{
|
|
kfree(ptr);
|
|
}
|