mm: refactor entire mm subsystem, part 1

Another one of those larger endeavours that take
multiple commits.  This first one introduces the
basic vm_page data structure, as well as the x86
bootstrap code for initializing it.
main
anna 3 years ago
parent 2ace3d3505
commit 7285c2e076
Signed by: fef
GPG Key ID: EC22E476DC2D3D84

@ -73,14 +73,14 @@ static void fb_init(enum vga_color fg, enum vga_color bg);
static void print_gay_propaganda(void);
static struct mb2_tag *next_tag(struct mb2_tag *tag);
static int handle_tag(struct mb2_tag *tag);
static int handle_mmap_tag(struct mb2_tag_mmap *tag);
static const char *mmap_type_name(u32 type);
static void handle_tag(struct mb2_tag *tag);
extern int main(int argc, char *argv[]);
__asmlink void _boot(void *address)
{
volatile int x = 69420;
while (x == 69420);
kprintf_set_printer(&fb_kprintf_printer);
fb_init(VGA_COLOR_LIGHT_GREY, VGA_COLOR_BLACK);
@ -88,24 +88,17 @@ __asmlink void _boot(void *address)
print_gay_propaganda();
int err = 0;
/* the +8 for the address has something to do with the tags
* being embedded in another struct iirc, but i have no idea what
* that was and quite honestly i'm just glad it works at all. */
for (struct mb2_tag *tag = address + 8; tag != NULL; tag = next_tag(tag)) {
err = handle_tag(tag);
if (err)
break;
}
for (struct mb2_tag *tag = address + 8; tag != NULL; tag = next_tag(tag))
handle_tag(tag);
if (!err)
main(0, NULL);
main(0, nil);
}
static inline int handle_tag(struct mb2_tag *tag)
static inline void handle_tag(struct mb2_tag *tag)
{
int ret = 0;
switch (tag->type) {
case MB2_TAG_TYPE_END:
break;
@ -113,62 +106,12 @@ static inline int handle_tag(struct mb2_tag *tag)
kprintf("Kernel command line: %s\n", ((struct mb2_tag_string *)tag)->string);
break;
case MB2_TAG_TYPE_MMAP:
ret = handle_mmap_tag((struct mb2_tag_mmap *)tag);
x86_paging_init((struct mb2_tag_mmap *)tag);
break;
default:
//kprintf("Unknown tag %u\n", tag->type);
break;
}
return ret;
}
static inline int handle_mmap_tag(struct mb2_tag_mmap *tag)
{
kprintf("Memory map:\n");
uintptr_t region = 0;
usize region_len = 0;
struct mb2_mmap_entry *entry = &tag->entries[0];
while ((void *)entry < (void *)tag + tag->tag.size) {
kprintf(" [0x%016llx-0x%016llx] %s\n",
entry->addr,
entry->addr + entry->len - 1,
mmap_type_name(entry->type));
usize safe_len;
# ifdef __x86_64__
safe_len = entry->len;
# else
if (entry->addr >= (1llu << 32))
safe_len = 0; /* we can't handle 64-bit pointers */
else if (entry->len > (1llu << 32) - entry->addr)
safe_len = (1llu << 32) - entry->addr; /* clip to 32-bit */
else
safe_len = entry->len;
# endif
if (entry->type == MB2_MEMORY_AVAILABLE && safe_len > region_len) {
region = entry->addr;
region_len = safe_len;
}
entry = (void *)entry + tag->entry_size;
}
if (region == 0 || region_len == 0) {
kprintf("No memory available! Aborting.\n");
return 1;
}
int err = kmalloc_init(region, region + region_len);
if (err) {
kprintf("kmalloc_init() failed! Aborting.\n");
return 1;
}
return 0;
}
static inline struct mb2_tag *next_tag(struct mb2_tag *tag)
@ -280,21 +223,3 @@ static void print_gay_propaganda(void)
fb_foreground = fg_before;
kprintf(", be gay do crime!\n\n");
}
static const char *mmap_type_name(u32 type)
{
switch (type) {
case MB2_MEMORY_AVAILABLE:
return "Available";
case MB2_MEMORY_RESERVED:
return "Reserved";
case MB2_MEMORY_ACPI_RECLAIMABLE:
return "ACPI";
case MB2_MEMORY_NVS: /* non-volatile storage */
return "NVS";
case MB2_MEMORY_BADRAM:
return "Bad RAM";
default:
return "Unknown";
}
}

@ -26,7 +26,7 @@ header_start: /* struct mb2_header */
/* heaer_length */
.long header_end - header_start
/* checksum */
.long (1 << 33) - MB2_HEADER_MAGIC - MB2_ARCHITECTURE_I386 - (header_end - header_start)
.long (1 << 32) - MB2_HEADER_MAGIC - MB2_ARCHITECTURE_I386 - (header_end - header_start)
#if 0 /* TODO: implement graphics */
.align MB2_TAG_ALIGN

@ -129,36 +129,36 @@ ENTRY(_setup)
movl $X86_KERN_TSS, %eax
ltr %ax
#if (KERNBASE % (1 << X86_PDP_SHIFT)) != 0
#if (KERNBASE % (1 << X86_PDPT_SHIFT)) != 0
#error "KERNBASE must be aligned to at least a PDP entry (1 GB)"
#endif
#if (X86_PMAP_OFFSET % (1 << X86_PML4_SHIFT)) != 0
#if (X86_PMAP_OFFSET % (1 << X86_PML4T_SHIFT)) != 0
#error "X86_PMAP_OFFSET must be aligned to at least a PML4 entry (512 GB)"
#endif
#define V48 0xffff000000000000
#define PDP_OFFSET(ptr) (( (((ptr) - V48) >> X86_PDP_SHIFT) % 512 ) * 8)
#define PML4_OFFSET(ptr) ( ((ptr) - V48) >> (X86_PML4_SHIFT) * 8 )
#define PDP_OFFSET(ptr) (( (((ptr) - V48) >> X86_PDPT_SHIFT) % 512 ) * 8)
#define PML4_OFFSET(ptr) ( ((ptr) - V48) >> (X86_PML4T_SHIFT) * 8 )
/*
* statically map the low 2 GB to itself and to the high kernel half
*/
/* for the identity mapping */
movl $0x00000083, PADDR(_pdp0) /* present (0), write (1), huge (7) */
movl $0x40000083, PADDR(_pdp0 + 8)
movl $0x00000083, PADDR(_pdpt0) /* present (0), write (1), huge (7) */
movl $0x40000083, PADDR(_pdpt0 + 8)
/* For the -2GB at the end of virtual memory. We use the same PDP for
* both low and high memory, so techincally this creates a total of four
* mappings (+0 GB, +510 GB, -512 GB, -2 GB), but we remove all except
* the -2GB one once we have transitioned to high memory. */
movl $0x00000083, PADDR(_pdp0 + PDP_OFFSET(KERNBASE))
movl $0x40000083, PADDR(_pdp0 + PDP_OFFSET(KERNBASE + 0x40000000))
movl $0x00000083, PADDR(_pdpt0 + PDP_OFFSET(KERNBASE))
movl $0x40000083, PADDR(_pdpt0 + PDP_OFFSET(KERNBASE + 0x40000000))
movl $PADDR(_pdp0 + 0x003), PADDR(_pml4) /* present (0), write (1), huge (7) */
movl $PADDR(_pdp0 + 0x003), PADDR(_pml4 + PML4_OFFSET(KERNBASE))
movl $PADDR(_pdpt0 + 0x003), PADDR(_pml4t) /* present (0), write (1), huge (7) */
movl $PADDR(_pdpt0 + 0x003), PADDR(_pml4t + PML4_OFFSET(KERNBASE))
/* map the PML4 to itself */
movl $PADDR(_pml4 + 0x003), PADDR(_pml4 + PML4_OFFSET(X86_PMAP_OFFSET))
movb $0x80, PADDR(_pml4 + PML4_OFFSET(X86_PMAP_OFFSET) + 7) /* NX bit */
movl $PADDR(_pml4t + 0x003), PADDR(_pml4t + PML4_OFFSET(X86_PMAP_OFFSET))
movb $0x80, PADDR(_pml4t + PML4_OFFSET(X86_PMAP_OFFSET) + 7) /* NX bit */
/*
* ensure paging is disabled by clearing CR0.PG (bit 31)
@ -178,7 +178,7 @@ ENTRY(_setup)
movl %eax, %cr4
/* load cr3 with the PML4 */
movl $PADDR(_pml4), %eax
movl $PADDR(_pml4t), %eax
movl %eax, %cr3
/*
@ -249,9 +249,9 @@ L_ENTRY(_setup_highmem)
popfq
/* remove the low memory identity mapping and bonk the TLB */
movl $0, _pdp0
movl $0, _pdp0 + 8
movl $0, _pml4
movl $0, _pdpt0
movl $0, _pdpt0 + 8
movl $0, _pml4t
movq %cr3, %rax
movq %rax, %cr3

@ -5,30 +5,77 @@
#error "This file is not meant to be included directly, use <arch/page.h>"
#endif
/** @brief Binary logarithm of `HUGEPAGE_SIZE`. */
#define HUGEPAGE_SHIFT 21
/*
* Common abbreviations used throughout the entire x86 vm code base:
* PT - Page Table
* PDT - Page Directory Table
* PDPT - Page Directory Pointer Table
* PML4T - Page Map Level 4 Table
* PTE - Page Table Entry
* PDTE - Page Directory Table Entry
* PDPTE - Page Directory Pointer Table Entry
* PML4TE - Page Map Level 4 entry
* PTI - Page Table Index (range 0 - 511)
* PDTI - Page Directory Table Index (range 0 - 511)
* PDPTI - Page Directory Pointer Table Index (range 0 - 511)
* PML4TI - Page Map Level 4 Index (range 0 - 511)
*
* Quick recap on how the x86 transes virtual to physical addresses:
*
* |63 48|47 39|38 30|29 21|21 12|11 0|
* +------------------+-----------+-----------+-----------+-----------+--------------+
* | 16 bits | 9 bits | 9 bits | 9 bits | 9 bits | 12 bits |
* +------------------+-----------+-----------+-----------+-----------+--------------+
* (1) | (copy of bit 47) | PML4T | PDPT | PDT | PT | offset (4 K) |
* +------------------+-----------+-----------+-----------+-----------+--------------+
* (2) | (copy of bit 47) | PML4T | PDPT | PDT | offset (2 M) |
* +------------------+-----------+-----------+-----------+--------------------------+
* (3) | (copy of bit 47) | PML4T | PDPT | offset (1 G) |
* +------------------+-----------+-----------+--------------------------------------+
*
* %CR3: pointer to PML4T, 256 TB (2^36 pages)
* PML4T: 512 entries, 512 GB per entry (2^27 pages)
* PDPT: 512 entries, 1 GB per entry (2^18 pages)
* PDT: 512 entries, 2 MB per entry (2^9 pages)
* PT: 512 entries, 4 KB per entry (1 page)
*
* PDPT entries can either reference a PDT or a 1 GB region directly (if __P_HUGE is set)
* PDT entries can either reference a PT or a 2 MB region directly (if __P_HUGE is set)
*
* (1) shows a PML4T -> PDPT -> PDT -> PT regular mapping
* (2) shows a PML4T -> PDPT -> PDT hugepage mapping
* (3) shows a PML4T -> PDPT gigapage mapping
*
* Since the lowest 12 bits are always zero in any page map entry, they are
* used for flags. Additionally, bit 63 stores the NX (no execute) flag.
*/
#include <arch/vmparam.h>
#define X86_PT_SHIFT PAGE_SHIFT
#define X86_PD_SHIFT (X86_PT_SHIFT + 9)
#define X86_PDP_SHIFT (X86_PD_SHIFT + 9)
#define X86_PML4_SHIFT (X86_PDP_SHIFT + 9)
#define X86_PT_SHIFT PAGE_SHIFT
#define X86_PDT_SHIFT (X86_PT_SHIFT + 9)
#define X86_PDPT_SHIFT (X86_PDT_SHIFT + 9)
#define X86_PML4T_SHIFT (X86_PDPT_SHIFT + 9)
#define __HAVE_NOEXEC
#define __HAVE_GIGAPAGES
/** @brief Binary logarithm of `HUGEPAGE_SIZE`. */
#define HUGEPAGE_SHIFT X86_PDT_SHIFT
/** @brief Binary logarithm of `GIGAPAGE_SIZE`. */
#define GIGAPAGE_SHIFT
#define GIGAPAGE_SIZE (1 << GIGAPAGE_SHIFT)
#ifndef _ASM_SOURCE
#include <gay/cdefs.h>
#include <gay/types.h>
#define __HAVE_NOEXEC
/**
* @brief A single 64-bit Page Table Entry.
* @brief A single 64-bit page map entry, split up into its individual bit flags.
* The layout matches that of the Intel SDM, vol 3, sect 4.3, fig 4-4.
* Bits 9 and 10 (`slab` and `atomic`) are marked as AVL in the manual and
* ignored by the MMU. We only use them for `get_pflags()`/`set_pflags()`.
*/
struct x86_page_flags {
struct x86_pmap_flags {
/* 0 */bool present:1; /**< Page Fault on access if 0 */
/* 1 */bool rw:1; /**< Page Fault on write if 0 */
/* 2 */bool user:1; /**< Page Fault on user mode access if 0 */
@ -36,27 +83,30 @@ struct x86_page_flags {
/* 4 */bool cache_disabled:1; /**< Disable caching in TLB */
/* 5 */bool accessed:1; /**< 1 if page has been accessed */
/* 6 */bool dirty:1; /**< 1 if page has been written to */
/* 7 */bool huge:1; /**< only valid for PDPTEs and PDEs */
/* 8 */bool global:1; /**< Don't update the TLB on table swap if 1 */
/* 9 */bool slab:1; /**< Used by the slab allocator */
/* 10 */bool atomic:1; /**< Allocated atomically */
/* 11 */unsigned _unused:1;
/* 12 */uintptr_t shifted_address:51;
/* 63 */bool noexec:1;
/* 7 */bool huge:1; /**< Only valid for PDPTEs and PDTEs */
/* 8 */bool global:1; /**< Entry survives `vm_flush()` if 1 */
/* 9 */unsigned _unused:3;
/* 12 */vm_paddr_t shifted_address:51;
/* 63 */bool noexec:1; /**< Prevent instruction fetches */
} __packed;
#define __PFLAG_PRESENT (1 << 0)
#define __PFLAG_RW (1 << 1)
#define __PFLAG_USER (1 << 2)
#define __PFLAG_WRITE_THROUGH (1 << 3)
#define __PFLAG_NOCACHE (1 << 4)
#define __PFLAG_ACCESSED (1 << 5)
#define __PFLAG_DIRTY (1 << 6)
#define __PFLAG_HUGE (1 << 7)
#define __PFLAG_GLOBAL (1 << 8)
#define __PFLAG_SLAB (1 << 9)
#define __PFLAG_ATOMIC (1 << 10)
#define __PFLAG_NOEXEC (1 << 63)
/* bitmasks for the structure above */
#define __P_PRESENT (1 << 0)
#define __P_RW (1 << 1)
#define __P_USER (1 << 2)
#define __P_WRITE_THROUGH (1 << 3)
#define __P_NOCACHE (1 << 4)
#define __P_ACCESSED (1 << 5)
#define __P_DIRTY (1 << 6)
#define __P_HUGE (1 << 7)
#define __P_GLOBAL (1 << 8)
#define __P_SLAB (1 << 9)
#define __P_ATOMIC (1 << 10)
#define __P_NOEXEC (1ul << 63)
/** @brief Bitmask for extracting the physical address from a page map entry. */
#define X86_PMAP_MASK 0x7ffffffffffff000
/*
* these types are deliberately not merged into one so that the
@ -64,39 +114,82 @@ struct x86_page_flags {
*/
#define __pmap_entry_union union { \
struct x86_page_flags flags; \
uintptr_t val; \
struct x86_pmap_flags flags; \
vm_paddr_t val; \
}
/** @brief x86 Page Table Entry. */
typedef __pmap_entry_union x86_pte_t;
typedef __pmap_entry_union x86_pde_t;
typedef __pmap_entry_union x86_pdpe_t;
typedef __pmap_entry_union x86_pml4e_t;
/** @brief x86 Page Directory Table Entry. */
typedef __pmap_entry_union x86_pdte_t;
/** @brief x86 Page Directory Pointer Table Entry. */
typedef __pmap_entry_union x86_pdpte_t;
/** @brief x86 Page Map Level 4 Table Entry. */
typedef __pmap_entry_union x86_pml4te_t;
/** @brief x86 Page Table. */
typedef struct { x86_pte_t entries[512]; } __aligned(PAGE_SIZE) x86_pt_t;
typedef struct { x86_pde_t entries[512]; } __aligned(PAGE_SIZE) x86_pd_t;
typedef struct { x86_pdpe_t entries[512]; } __aligned(PAGE_SIZE) x86_pdp_t;
typedef struct { x86_pml4e_t entries[512]; } __aligned(PAGE_SIZE) x86_pml4_t;
#define X86_PMAP_MASK 0x7ffffffffffff000
/** @brief x86 Page Directory Table. */
typedef struct { x86_pdte_t entries[512]; } __aligned(PAGE_SIZE) x86_pdt_t;
/** @brief x86 Page Directory Pointer Table. */
typedef struct { x86_pdpte_t entries[512]; } __aligned(PAGE_SIZE) x86_pdpt_t;
/** @brief x86 Page Map Level 4 Table. */
typedef struct { x86_pml4te_t entries[512]; } __aligned(PAGE_SIZE) x86_pml4t_t;
/* you aren't expected to understand any of these, they're just nasty offset calculations */
#define __V48_MASK ( ((uintptr_t)1 << 48) - 1 )
/** @brief Get the linear 48-bit address */
#define __V48ADDR(ptr) ((uintptr_t)(ptr) & 0x0000ffffffffffff)
#define X86_PT_INDEX(ptr) (( __V48ADDR(ptr) >> X86_PT_SHIFT ) % 512)
#define X86_PD_INDEX(ptr) (( __V48ADDR(ptr) >> X86_PD_SHIFT ) % 512)
#define X86_PDP_INDEX(ptr) (( __V48ADDR(ptr) >> X86_PDP_SHIFT ) % 512)
#define X86_PML4_INDEX(ptr) ( __V48ADDR(ptr) >> X86_PML4_SHIFT )
#define __PT_BASE X86_PMAP_OFFSET
#define __PD_BASE (__PT_BASE + (__V48ADDR(X86_PMAP_OFFSET) >> X86_PT_SHIFT))
#define __PDP_BASE (__PD_BASE + (__V48ADDR(X86_PMAP_OFFSET) >> X86_PD_SHIFT))
#define __PML4_BASE (__PDP_BASE + (__V48ADDR(X86_PMAP_OFFSET) >> X86_PDP_SHIFT))
#define X86_PTE(ptr) ((x86_pte_t *)( __PT_BASE + (__V48ADDR(ptr) >> X86_PT_SHIFT) ))
#define X86_PDE(ptr) ((x86_pde_t *)( __PD_BASE + (__V48ADDR(ptr) >> X86_PD_SHIFT) ))
#define X86_PDPE(ptr) ((x86_pdpe_t *)( __PDP_BASE + (__V48ADDR(ptr) >> X86_PDP_SHIFT) ))
#define X86_PML4E(ptr) ((x86_pml4e_t *)( __PML4_BASE + (__V48ADDR(ptr) >> X86_PML4_SHIFT) ))
#define __V48(ptr) ((uintptr_t)(ptr) & __V48_MASK)
/**
* @brief Generate a 48-bit virtual address in user space, based on its pmap indices.
* Every index must be less than 512, or you'll get a garbage address.
* `pml4i` must be less than 256, or you'll hurt the MMU's feelings.
* This is because bits 63-48 of the virtual address must all match bit 47.
*/
#define UV48ADDR(pml4ti, pdpti, pdti, pti) ( \
(vm_paddr_t)(pml4ti) << X86_PML4T_SHIFT | \
(vm_paddr_t)(pdpti) << X86_PDPT_SHIFT | \
(vm_paddr_t)(pdti) << X86_PDT_SHIFT | \
(vm_paddr_t)(pti) << X86_PT_SHIFT \
)
/**
* @brief Generate a 48-bit virtual address in kernel space, based on its pmap indices.
* Every index must be less than 512, or you'll get a garbage address.
* `pml4i` must be at least 256, or you'll hurt the MMU's feelings.
* This is because bits 63-48 of the virtual address must all match bit 47.
*/
#define KV48ADDR(pml4ti, pdpti, pdti, pti) ( \
(vm_paddr_t)0xffff000000000000 | \
UV48ADDR(pml4ti, pdpti, pdti, pti) \
)
/** @brief Get the Page Table index for a given virtual address. */
#define X86_PTI(ptr) ((__V48(ptr) >> X86_PT_SHIFT ) % 512)
/** @brief Get the Page Directory Table index for a given virtual address. */
#define X86_PDTI(ptr) ((__V48(ptr) >> X86_PDT_SHIFT ) % 512)
/** @brief Get the Page Directory Pointer Table index for a given virtual address. */
#define X86_PDPTI(ptr) ((__V48(ptr) >> X86_PDPT_SHIFT ) % 512)
/** @brief Get the Page Map Level 4 Table index for a given virtual address. */
#define X86_PML4TI(ptr) (__V48(ptr) >> X86_PML4T_SHIFT)
/* Page Map Level 4 Table index for the recursive page map */
#define __PML4TI (X86_PML4TI(X86_PMAP_OFFSET)) /* = 256 */
#define __PT_BASE ( (x86_pt_t *)KV48ADDR(__PML4TI, 0, 0, 0) )
#define __PDT_BASE ( (x86_pdt_t *)KV48ADDR(__PML4TI, __PML4TI, 0, 0) )
#define __PDPT_BASE ( (x86_pdpt_t *)KV48ADDR(__PML4TI, __PML4TI, __PML4TI, 0) )
#define __PML4T_BASE ( (x86_pml4t_t *)KV48ADDR(__PML4TI, __PML4TI, __PML4TI, __PML4TI) )
/** @brief Get the Page Table Entry for a given virtual address. */
#define X86_PTE(ptr) ( &__PT_BASE->entries[__V48(ptr) >> X86_PT_SHIFT] )
/** @brief Get the Page Directory Table Entry for a given virtual address. */
#define X86_PDTE(ptr) ( &__PDT_BASE->entries[__V48(ptr) >> X86_PDT_SHIFT] )
/** @brief Get the Page Directory Pointer Table Entry for a given virtual address. */
#define X86_PDPTE(ptr) ( &__PDPT_BASE->entries[__V48(ptr) >> X86_PDPT_SHIFT] )
/** @brief Get the Page Map Level 4 Table Entry for a given virtual address. */
#define X86_PML4TE(ptr) ( &__PML4T_BASE->entries[__V48(ptr) >> X86_PML4T_SHIFT] )
#endif /* not _ASM_SOURCE */

@ -6,20 +6,24 @@
#endif
/** @brief Userland memory region */
#define USER_OFFSET 0x0000000000000000 /* +0 TB */
#define USER_LENGTH 0x0000800000000000 /* 128 TB */
#define USER_OFFSET 0x0000000000000000 /* +0 TB */
#define USER_LENGTH 0x0000800000000000 /* 128 TB */
/** @brief Recursive Page Map Level 4 map */
#define X86_PMAP_OFFSET 0xffff800000000000 /* -128 TB */
#define X86_PMAP_LENGTH 0x0000004020101000 /* ~ 256.5 GB */
#define X86_PMAP_OFFSET 0xffff800000000000 /* -128 TB */
#define X86_PMAP_LENGTH 0x0000004020101000 /* ~ 256.5 GB */
/** @brief Direct (contiguous) mapping of physical memory */
#define DMAP_OFFSET 0xfffff80000000000 /* -8 TB */
#define DMAP_LENGTH 0x0000040000000000 /* 4 TB */
#define DMAP_OFFSET 0xfffff80000000000 /* -8 TB */
#define DMAP_LENGTH 0x0000040000000000 /* 4 TB */
/** @brief Kernel region (image, heap, etc) */
#define KERN_OFFSET 0xfffffe0000000000 /* -2 TB */
#define KERN_LENGTH 0x0000020000000000 /* 2 TB */
#define KERN_OFFSET 0xfffffe0000000000 /* -2 TB */
#define KERN_LENGTH 0x0000020000000000 /* 2 TB */
/** @brief Where the kernel image is actually mapped to */
#define KERNBASE 0xffffffff80000000 /* -2 GB */
#define KERNBASE_LENGTH 0x0000000080000000
#define KERNBASE 0xffffffff80000000 /* -2 GB */
#define KERNBASE_LENGTH 0x0000000080000000
#define VM_PAGE_ARRAY_OFFSET KERN_OFFSET
#define VM_PAGE_ARRAY_LENGTH (KERN_OFFSET - KERNBASE)

@ -140,6 +140,15 @@
#define PRIXMAX "jX" /* uintmax_t */
#define PRIXPTR __PRIptr"X" /* uintptr_t */
#ifdef _KERNEL
#define PRIxVM_PADDR __PRI64"x" /* vm_paddr_t */
#define PRIxVM_OFFSET __PRI64"x" /* vm_offset_t */
#define PRIdVM_OFFSET __PRI64"d" /* vm_offset_t */
#define PRIdVM_SIZE __PRI64"d" /* vm_size_t */
#endif /* _KERNEL */
/* fscanf(3) macros for signed integers. */
#define SCNd8 "hhd" /* int8_t */

@ -26,6 +26,11 @@
#ifndef _ASM_SOURCE
#include <arch/multiboot.h>
/** @brief Initialize `vm_page_array` based on the multiboot memory map. */
void x86_paging_init(struct mb2_tag_mmap *mmap);
/** @brief Pointer bitmask to get the base address of their page. */
#define PAGE_MASK ( ~((unsigned long)PAGE_SIZE - 1) )
/** @brief Pointer bitmask to get the base address of their huge page. */
@ -48,8 +53,20 @@
* @brief Get the physical address a virtual one is currently mapped to.
*
* @param virt virtual address
* @returns The physical address, or `0` if there is no mapping
* @returns The physical address, or -1 cast to `vm_paddr_t` if there is no mapping
*/
uintptr_t vtophys(void *virt);
vm_paddr_t vtophys(void *virt);
static inline void vm_flush(void)
{
register_t tmp;
__asm__ volatile(
" mov %%cr3, %0 \n"
" mov %0, %%cr3 \n"
: "=r"(tmp)
:
: "memory"
);
}
#endif /* not _ASM_SOURCE */

@ -34,17 +34,3 @@ static inline int smp_cpuid(void)
return 0;
#endif /* !CFG_SMP */
}
/*
* This file is part of GayBSD.
* Copyright (c) 2021 fef <owo@fef.moe>.
*
* GayBSD is nonviolent software: you may only use, redistribute, and/or
* modify it under the terms of the Cooperative Nonviolent Public License
* (CNPL) as found in the LICENSE file in the source code root directory
* or at <https://git.pixie.town/thufie/npl-builder>; either version 7
* of the license, or (at your option) any later version.
*
* GayBSD comes with ABSOLUTELY NO WARRANTY, to the extent
* permitted by applicable law. See the CNPL for details.
*/

@ -1,5 +1,6 @@
# Copyright (C) 2021 fef <owo@fef.moe>. All rights reserved.
target_sources(gay_arch PRIVATE
init.c
page.c
)

@ -0,0 +1,428 @@
/* Copyright (C) 2021 fef <owo@fef.moe>. All rights reserved. */
#include <arch/atom.h>
#include <arch/multiboot.h>
#include <arch/vmparam.h>
#include <gay/linker.h>
#include <gay/mm.h>
#include <gay/vm/page.h>
#include <gay/systm.h>
#include <gay/util.h>
#include <inttypes.h>
#include <string.h>
/*
* This file is funny.
* Our job here seems simple at first glance: initialize the vm_page_array.
* The catch is that we can't use the regular kernel memory allocators for
* doing so, because those depend on vm_page_array. Classic chicken/egg stuff.
* So, how do we allocate (and map!) memory for the array? Simple, by using a
* completely separate page frame allocator that is so basic that it can't even
* free pages again. That's not a problem though, because it doesn't need to.
* Memory maps are created manually, which is very painful, but doable.
* HOWEVER! This boot page frame allocator needs to allocate memory for keeping
* track of which memory areas were already allocated and which ones are still
* free, too. Areas might also have to be split, if the region we want to
* allocate is not the exact size of the physical area. Therefore, we have
* *another* allocator, which is basically the most primitive slab allocator in
* existence. It uses a fixed-size "slab" (the `free_areas` array below), and
* keeps track of which free areas are available.
*
* To sum up:
* - The boot "slab" allocator hands out `struct free_area`s to ...
* - the boot page frame allocator, which is used to set up ...
* - the buddy page frame allocator, which serves as a backend to ...
* - the kernel slab allocator.
*
* XXX the boot memory allocator could probably be moved to an architecture
* independent file, because it is not really specific to the x86.
*/
struct vm_page *const vm_page_array = (vm_page_t)VM_PAGE_ARRAY_OFFSET;
#ifdef DEBUG
/* this gets updated in x86_setup_paging() once we know how big the array is */
vm_page_t _vm_page_array_end = (vm_page_t)(VM_PAGE_ARRAY_OFFSET + VM_PAGE_ARRAY_LENGTH);
#endif
/**
* @brief Memory area information for the boot page frame allocator.
* The multiboot bootloader gives us an array of memory areas, and tells us
* which ones are available and which aren't. We insert all available areas
* into a circular list (`free_area_list`), and the boot page frame allocator
* iterates over that list for getting memory.
*
* Also, this is probably one of the most unfortunately named structures in the
* entire system, because instances of this structure need to be allocated and,
* well, freed.
*/
struct free_area {
struct clist link;
vm_paddr_t start;
vm_size_t end;
};
/** @brief This is essentially a very basic slab. */
static struct free_area free_areas[16];
/** @brief List of all free memory areas, ordered by ascending address */
static CLIST(free_area_list);
/**
* @brief List of all the unused members in `free_areas`.
* This is essentially a very basic slab freelist.
*/
static CLIST(free_area_freelist);
/**
* @brief VERY early page frame allocator.
*
* Allocates `1 << log2` bytes of memory, aligned to at least its own size.
*
* @param log2 Binary logarithm of the allocation size. Must be at least `PAGE_SHIFT`.
* @returns Physical address of the allocated region, or `BOOT_PMALLOC_ERR` on failure
*/
static vm_paddr_t __boot_pmalloc(u_int log2);
#define BOOT_PMALLOC_ERR (~0ul)
/** @brief Zero out a single page (required for page tables) */
static void __boot_clear_page(vm_paddr_t paddr);
/** @brief Initialize the members of `vm_page_array` within the given range. */
static void init_page_range(vm_paddr_t start, vm_paddr_t end, u_int flags);
/** @brief Add a new entry to the list of free memory areas. */
static void insert_free_area(struct mb2_mmap_entry *entry);
static void init_free_area_freelist(void);
static void print_mem_area(struct mb2_mmap_entry *entry);
/*
* "Oh cool another deeply nested 100-liner that nobody understands"
*/
void x86_paging_init(struct mb2_tag_mmap *mmap)
{
init_free_area_freelist();
/*
* insert all free areas and find the end of physical memory
*/
struct mb2_mmap_entry *entry = mmap->entries;
vm_paddr_t end = 0;
kprintf("Memory map:\n");
while ((void *)entry - (void *)mmap < mmap->tag.size) {
vm_paddr_t entry_end = entry->addr + entry->len;
end = max(end, entry_end);
print_mem_area(entry);
if (entry->type == MB2_MEMORY_AVAILABLE)
insert_free_area(entry);
entry = (void *)entry + mmap->entry_size;
}
/*
* allocate and map vm_page_array into virtual memory at VM_PAGE_ARRAY_OFFSET
* (this is gonna be a long one)
*/
struct vm_page *vm_page_array_end = vm_page_array + (end >> PAGE_SHIFT);
#ifdef DEBUG
_vm_page_array_end = vm_page_array_end;
#endif
void *map_pos = vm_page_array;
usize remaining_size = (void *)vm_page_array_end - (void *)vm_page_array;
remaining_size = align_ceil(remaining_size, PAGE_SIZE);
kprintf("Mapping %zu bytes for vm_page_array\n", remaining_size);
while (remaining_size != 0) {
x86_pml4te_t *pml4te = X86_PML4TE(map_pos);
vm_paddr_t pml4te_val = __boot_pmalloc(PAGE_SHIFT);
KASSERT(pml4te_val != BOOT_PMALLOC_ERR);
__boot_clear_page(pml4te_val);
pml4te_val |= __P_PRESENT | __P_RW | __P_GLOBAL | __P_NOEXEC;
pml4te->val = pml4te_val;
vm_flush();
for (int pdpt_index = 0; pdpt_index < 512; pdpt_index++) {
x86_pdpte_t *pdpte = X86_PDPTE(map_pos);
vm_paddr_t pdpte_val;
/* try allocating a 1 GB gigapage first */
if (remaining_size >= 1 << X86_PDPT_SHIFT) {
pdpte_val = __boot_pmalloc(X86_PDPT_SHIFT);
/* CLion is warning about this condition being always true, but
* that is not the case. I've checked the disassembly with -O2,
* and clang is emitting the check. So it's fine, i guess. */
if (pdpte_val != BOOT_PMALLOC_ERR) {
pdpte_val |= __P_PRESENT | __P_RW | __P_HUGE
| __P_GLOBAL | __P_NOEXEC;
pdpte->val = pdpte_val;
remaining_size -= 1 << X86_PDPT_SHIFT;
map_pos += 1 << X86_PDPT_SHIFT;
if (remaining_size == 0)
goto map_done;
continue;
}
}
/* couldn't use a gigapage, continue in hugepage steps */
pdpte_val = __boot_pmalloc(PAGE_SHIFT);
KASSERT(pdpte_val != BOOT_PMALLOC_ERR);
__boot_clear_page(pdpte_val);
pdpte_val |= __P_PRESENT | __P_RW | __P_GLOBAL | __P_NOEXEC;
pdpte->val = pdpte_val;
vm_flush();
for (int pdt_index = 0; pdt_index < 512; pdt_index++) {
x86_pdte_t *pdte = X86_PDTE(map_pos);
vm_paddr_t pdte_val;
/* try allocating a 2 MB hugepage first */
if (remaining_size >= (1 << X86_PDT_SHIFT)) {
pdte_val = __boot_pmalloc(X86_PDT_SHIFT);
if (pdte_val != BOOT_PMALLOC_ERR) {
pdte_val |= __P_PRESENT | __P_RW | __P_GLOBAL
| __P_HUGE | __P_NOEXEC;
pdte->val = pdte_val;
remaining_size -= 1 << X86_PDT_SHIFT;
map_pos += 1 << X86_PDT_SHIFT;
if (remaining_size == 0)
goto map_done;
continue;
}
}
/* couldn't use a hugepage, continue in page steps */
pdte_val = __boot_pmalloc(PAGE_SHIFT);
KASSERT(pdte_val != BOOT_PMALLOC_ERR);
__boot_clear_page(pdpte_val);
pdte_val |= __P_PRESENT | __P_RW | __P_GLOBAL | __P_NOEXEC;
pdte->val = pdte_val;
vm_flush();
for (int pt_index = 0; pt_index < 512; pt_index++) {
x86_pte_t *pte = X86_PTE(map_pos);
vm_paddr_t pte_val = __boot_pmalloc(X86_PT_SHIFT);
KASSERT(pte_val != BOOT_PMALLOC_ERR);
pte_val |= __P_PRESENT | __P_RW | __P_GLOBAL | __P_NOEXEC;
pte->val = pte_val;
remaining_size -= 1 << X86_PT_SHIFT;
map_pos += 1 << X86_PT_SHIFT;
if (remaining_size == 0)
goto map_done;
} /* end of PT loop */
} /* end of PD loop */
} /* end of PDP loop */
} /* end of PML4 loop */
map_done:
vm_flush();
/*
* initialize the individual pages and calculate the usable RAM size
*/
vm_paddr_t prev_end = 0;
vm_size_t available_ram = 0;
struct free_area *cursor;
clist_foreach_entry(&free_area_list, cursor, link) {
/* list should have been ordered by ascending size */
KASSERT(cursor->start >= prev_end);
if (cursor->start != prev_end) {
vm_paddr_t reserved_start = prev_end;
vm_paddr_t reserved_end = cursor->start;
init_page_range(reserved_start, reserved_end, PG_RESERVED);
}
init_page_range(cursor->start, cursor->end, 0);
prev_end = cursor->end;
available_ram += cursor->end - cursor->start;
}
kprintf("Available RAM: %"PRIdVM_SIZE" bytes\n", available_ram);
}
static struct free_area *alloc_free_area_entry(void)
{
/* XXX this should pretty much never happen, but it would still be nice to
* have at least some sort of error recovery rather than giving up */
if (clist_is_empty(&free_area_freelist))
panic("Boot memory allocator has run out of free_areas");
return clist_del_first_entry(&free_area_freelist, struct free_area, link);
}
static void free_free_area_entry(struct free_area *area)
{
#ifdef DEBUG
area->start = ~0ul;
area->end = ~0ul;
#endif
clist_add(&free_area_freelist, &area->link);
}
static void init_free_area_freelist(void)
{
for (u_int i = 0; i < ARRAY_SIZE(free_areas); i++)
clist_add(&free_area_freelist, &free_areas[i].link);
}
static void insert_free_area(struct mb2_mmap_entry *entry)
{
vm_paddr_t start = align_ceil(entry->addr, PAGE_SIZE);
vm_paddr_t end = align_floor(entry->addr + entry->len, PAGE_SIZE);
if (start <= image_start_phys && end >= image_end_phys) {
/*
* This is the area that the kernel image is loaded in, which we need
* to treat differently than all the others because it gets split up
* into two usable areas. Illustration (addresses are examples only):
*
* 0x01000000 ---------------------- end (high_end)
* : <free real estate>
* 0x00500000 ---------------------- image_end_phys (high_start)
* : <kernel code & data>
* 0x00400000 ---------------------- image_start_phys (low_end)
* : <free real estate>
* 0x00100000 ---------------------- start (low_start)
*
* (we silently assert that the image always spans only one region)
*/
vm_paddr_t low_start = start;
vm_paddr_t low_end = align_floor(image_start_phys, PAGE_SIZE);
if (low_start < low_end) {
struct free_area *area = alloc_free_area_entry();
area->start = low_start;
area->end = low_end;
clist_add(&free_area_list, &area->link);
}
vm_paddr_t high_start = align_ceil(image_end_phys, PAGE_SIZE);
vm_paddr_t high_end = end;
if (high_start < high_end) {
struct free_area *area = alloc_free_area_entry();
area->start = high_start;
area->end = high_end;
clist_add(&free_area_list, &area->link);
}
} else {
struct free_area *area = alloc_free_area_entry();
area->start = start;
area->end = end;
clist_add(&free_area_list, &area->link);
}
}
static void init_page_range(vm_paddr_t start, vm_paddr_t end, u_int flags)
{
KASSERT(start <= end);
vm_page_t cursor = vm_page_array + (start >> PAGE_SHIFT);
usize count = (end - start) >> PAGE_SHIFT;
if (flags == 0) {
memset(cursor, 0, count * sizeof(*cursor));
} else {
while (count--) {
atom_init(&cursor->count, 0);
cursor->flags = flags;
cursor->try_free = nil;
cursor->extra = nil;
cursor++;
}
}
}
/*
* This works relatively simple, actually.
* We iterate over the list of `struct free_area`s in reverse order because the
* list is sorted by ascending physical address and i've decided that we prefer
* using higher physical addresses for the page array. The first fit wins, and
* all that's left is to split up the area and insert the top and bottom
* remainder back into the list, if applicable.
*/
static vm_paddr_t __boot_pmalloc(u_int log2)
{
const usize alloc_size = 1 << log2;
KASSERT(log2 >= PAGE_SHIFT); /* never hand out less than a full page */
struct free_area *cursor;
clist_foreach_entry_rev(&free_area_list, cursor, link) {
vm_paddr_t area_start = cursor->start;
vm_paddr_t area_end = cursor->end;
KASSERT(area_start < area_end);
/* the areas tend to be aligned to greater sizes at their beginning */
vm_paddr_t alloc_start = align_ceil(area_start, alloc_size);
vm_paddr_t alloc_end = alloc_start + alloc_size;
if (alloc_start >= area_start && alloc_end <= area_end) {
/*
* Example with log2 == 21 (alloc_size == 0x00200000):
*
* 0x00500000 ------------------- area_end (not aligned)
* : <high_rest>
* 0x00400000 ------------------- alloc_end (aligned to alloc_size)
* : <allocated block>
* 0x00200000 ------------------- alloc_start (aligned to alloc_size)
* : <low_rest>
* 0x00100000 ------------------- area_start (not aligned)
*/
if (alloc_start > area_start) {
struct free_area *low_rest = alloc_free_area_entry();
low_rest->start = area_start;
low_rest->end = alloc_start;
clist_add(&cursor->link, &low_rest->link);
}
if (alloc_end < area_end) {
struct free_area *high_rest = alloc_free_area_entry();
high_rest->start = alloc_end;
high_rest->end = area_end;
clist_add_first(&cursor->link, &high_rest->link);
}
clist_del(&cursor->link);
free_free_area_entry(cursor);
return alloc_start;
}
}
return BOOT_PMALLOC_ERR;
}
/*
* It's really unfortunate that we have to zero a page before we can use it as
* a page table, yet also need to reference it in the page table structures
* (thereby mapping it into virtual memory) before we can zero it out.
* This little hack temporarily maps the area at one PDP entry before KERNBASE
* (meaning index 1022 of _pdp0), zeroes the area, and then unmaps it again.
*/
static void __boot_clear_page(vm_paddr_t paddr)
{
vm_paddr_t pbase = align_floor(paddr, 1 << X86_PDPT_SHIFT);
vm_offset_t offset = paddr - pbase;
void *vbase = (void *)KERNBASE - (1 << X86_PDPT_SHIFT);
x86_pdpte_t *pdpe = X86_PDPTE(vbase);
pdpe->val = pbase | __P_PRESENT | __P_RW | __P_HUGE | __P_NOEXEC;
vm_flush();
memset(vbase + offset, 0, PAGE_SIZE);
pdpe->flags.present = false;
vm_flush();
}
static void print_mem_area(struct mb2_mmap_entry *entry)
{
const char *name;
switch (entry->type) {
case MB2_MEMORY_AVAILABLE:
name = "Available";
break;
case MB2_MEMORY_RESERVED:
name = "Reserved";
break;
case MB2_MEMORY_ACPI_RECLAIMABLE:
name = "ACPI (reclaimable)";
break;
case MB2_MEMORY_NVS:
name = "Non-Volatile Storage";
break;
case MB2_MEMORY_BADRAM:
name = "Bad RAM";
break;
}
kprintf(" [0x%016"PRIxVM_PADDR"-0x%016"PRIxVM_PADDR"] %s\n",
entry->addr, entry->addr + entry->len - 1, name);
}

@ -13,17 +13,18 @@
#include <string.h>
/* from linker script */
extern void _image_start_phys;
extern void _image_end_phys;
__asmlink x86_pdp_t _pdp0;
__asmlink x86_pml4_t _pml4;
/*
* Initial Page Directory Pointer Table and Page Map Level 4 Table for the
* assembly startup routine (see setup64.S). Used for statically mapping the
* lowest 2 GB of physical memory into the -2 GB virtual area.
*/
__asmlink x86_pdpt_t _pdpt0;
__asmlink x86_pml4t_t _pml4t;
int map_page(uintptr_t phys, void *virt, enum pflags flags)
{
flags |= P_PRESENT;
x86_pml4e_t *pml4e = X86_PML4E(virt);
x86_pml4te_t *pml4e = X86_PML4TE(virt);
if (!pml4e->flags.present) {
void *page = get_pages(0, M_ATOMIC);
if (page == nil)
@ -95,43 +96,31 @@ void x86_isr_page_fault(trap_frame_t *frame, u32 error_code)
panic("Page fault");
}
uintptr_t vtophys(void *virt)
vm_paddr_t vtophys(void *virt)
{
x86_pml4e_t *pml4e = X86_PML4E(virt);
if (!pml4e->flags.present)
return 0;
x86_pdpe_t *pdpe = X86_PDPE(virt);
if (!pml4e->flags.present)
return 0;
if (pml4e->flags.huge) {
uintptr_t phys_base = pdpe->val & X86_PMAP_MASK;
return phys_base + ((uintptr_t)virt % (1 << X86_PDP_SHIFT));
x86_pml4te_t *pml4te = X86_PML4TE(virt);
if (!pml4te->flags.present)
return (vm_paddr_t)-1;
x86_pdpte_t *pdpte = X86_PDPTE(virt);
if (!pdpte->flags.present)
return (vm_paddr_t)-1;
if (pdpte->flags.huge) {
vm_paddr_t phys_base = pdpte->val & X86_PMAP_MASK;
return phys_base + ((vm_paddr_t)virt % (1 << X86_PDPT_SHIFT));
}
x86_pde_t *pde = X86_PDE(virt);
if (!pde->flags.present)
return 0;
if (pde->flags.huge) {
uintptr_t phys_base = pde->val & X86_PMAP_MASK;
return phys_base + ((uintptr_t)virt % (1 << X86_PD_SHIFT));
x86_pdte_t *pdte = X86_PDTE(virt);
if (!pdte->flags.present)
return (vm_paddr_t)-1;
if (pdte->flags.huge) {
vm_paddr_t phys_base = pdte->val & X86_PMAP_MASK;
return phys_base + ((vm_paddr_t)virt % (1 << X86_PDT_SHIFT));
}
x86_pte_t *pte = X86_PTE(virt);
if (!pte->flags.present)
return 0;
uintptr_t phys_base = pte->val & X86_PMAP_MASK;
return phys_base + ((uintptr_t)virt % (1 << X86_PT_SHIFT));
}
void vm_flush(void)
{
register_t tmp;
__asm__ volatile(
" mov %%cr3, %0 \n"
" mov %0, %%cr3 \n"
: "=r"(tmp)
:
: "memory"
);
return (vm_paddr_t)-1;
vm_paddr_t phys_base = pte->val & X86_PMAP_MASK;
return phys_base + ((vm_paddr_t)virt % (1 << X86_PT_SHIFT));
}

@ -11,6 +11,12 @@
#ifdef __cplusplus
/** @brief Use `__restrict` in header files, and just `restrict` in C code */
#define __restrict
#define __BEGIN_DELCS extern "C" {
#define __END_DECLS }
#else
#define __BEGIN_DECLS
#define __END_DECLS
#endif
/** @brief Annotated symbol is an alias for another symbol. */
@ -110,12 +116,6 @@
* These are hints for clang's branch optimizer which will try to arrange the
* code to yield the best performance when a condition is true or false.
*
* - Use them sparingly and only in performance critical places because they
* come with a sometimes very significant code size overhead due to branches
* being rearranged and aligned
* - Only use them if you know *for sure* that a particular branch is *very*
* unlikely to be hit, for example when
*
* Use it sparingly and only in performance critical places because the overhead
* from rearranging and aligning the individual instructions can quickly make
* the kernel image too big.

@ -40,7 +40,7 @@ struct kprintf_printer {
/**
* @brief Write to the kernel log.
* The data itself may be cached in a buffer rather than written to the
* target immediately; `krpintf()` will call `flush()` when needed.
* target immediately; `kprintf()` will call `flush()` when needed.
*
* @param printer A reference to the original structure
* @param buf Data to write
@ -48,7 +48,7 @@ struct kprintf_printer {
* @returns The amount of bytes actually written,
* or a negative code from `errno.h` on failure
*/
ssize_t (*write)(struct kprintf_printer *printer, const void *buf, size_t len);
isize (*write)(struct kprintf_printer *printer, const void *buf, usize len);
/**
* @brief Flush the kernel log buffer.
* On implementations that don't have a buffer, this can be a no-op.
@ -58,7 +58,7 @@ struct kprintf_printer {
* @returns The amount of bytes flushed out (0 if none),
* or a negative code from `errno.h` on failure
*/
ssize_t (*flush)(struct kprintf_printer *printer);
isize (*flush)(struct kprintf_printer *printer);
};
/**

@ -0,0 +1,29 @@
/* Copyright (C) 2021 fef <owo@fef.moe>. All rights reserved. */
#pragma once
#include <gay/types.h>
extern void _image_start_phys;
#define image_start_phys ((vm_paddr_t)&_image_start_phys)
extern void _image_end_phys;
#define image_end_phys ((vm_paddr_t)&_image_end_phys)
extern void _image_start;
#define image_start (&_image_start)
extern void _image_end;
#define image_end (&_image_end)
extern void _kernel_start_phys;
#define kern_start_phys ((vm_paddr_t)&_kernel_start_phys)
extern void _kernel_end_phys;
#define kern_end_phys ((vm_paddr_t)&_kernel_end_phys)
extern void _kernel_start;
#define kern_start (&_kernel_start)
extern void _kernel_end;
#define kern_end (&_kernel_end)

@ -6,9 +6,10 @@
* @file include/gay/mm.h
* @brief Header for dynamic memory management
*
* To avoid possible confusion, physical memory addresses always use type
* `uintptr_t` and virtual ones are `void *`. This should give us at least some
* type of compiler warning if they are accidentally mixed up.
* To avoid possible confusion (and Not break 32-bit systems, even though they
* aren't really supported anyway), physical memory addresses always use type
* `vm_paddr_t` and virtual ones are `void *`. This should give us at least
* some type of compiler warning if they are accidentally mixed up.
*
* GayBSD uses a classic slab algorithm for its own data structures, which is
* backed by a buddy page frame allocator. The latter is also used for getting
@ -25,22 +26,44 @@
#include <gay/kprintf.h>
#include <gay/types.h>
#define _M_ZONE_NORMAL 0
#define _M_ZONE_DMA 1
#define _M_ZONE_INDEX(flags) ((flags) & 1)
#define _M_EMERG (1 << 1)
#define _M_NOWAIT (1 << 2)
#define MM_ZONE_NORMAL 0
#define MM_ZONE_DMA 1
struct mm_zone {
patom_t freelist; /* -> struct page */
usize length;
};
/**
* @brief Map of all memory zones.
*
* Memory is currently divided into two zones: DMA and normal.
* The mm subsystem isn't NUMA aware, because it's not really a thing on desktop
* grade machines anyway and would only complicate things unnecessarily.
*/
extern struct mm_zone mm_zones[2];
/**
* @brief Memory allocation flags passed to `kmalloc()`.
*/
enum mflags {
/** @brief Physically contiguous memory for DMA. */
M_CONTIG = (1 << 0),
/** @brief Use emergency memory reserves if necessary. */
M_EMERG = (1 << 1),
/** @brief Don't sleep during the allocation. */
M_NOSLEEP = (1 << 2),
/** @brief Allocate userspace memory. */
M_USER = (1 << 4),
/** @brief Kernel memory */
M_KERN = M_CONTIG,
/** @brief Allocate memory in atomic (irq) context. */
M_ATOMIC = M_EMERG | M_NOSLEEP,
/** @brief Use emergency memory reserves if necessary */
M_EMERG = _M_EMERG,
/** @brief Don't sleep during the allocation (required for atomic context) */
M_NOWAIT = _M_NOWAIT,
/** @brief Regular kernel memory */
M_KERN = _M_ZONE_NORMAL,
/** @brief Don't sleep, and use emergency reserves if necessary */
M_ATOMIC = _M_EMERG | _M_NOWAIT,
/** @brief Allocate low memory suitable for DMA transfers */
M_DMA = _M_ZONE_DMA,
};
/**
@ -69,22 +92,22 @@ void kfree(void *ptr);
* layout for better performance (no shifting around required).
*/
enum pflags {
P_PRESENT = __PFLAG_PRESENT, /**< @brief Page exists */
P_RW = __PFLAG_RW, /**< @brief Page is writable */
P_USER = __PFLAG_USER, /**< @brief Page is accessible from ring 3 */
P_ACCESSED = __PFLAG_ACCESSED, /**< @brief Page has been accessed */
P_DIRTY = __PFLAG_DIRTY, /**< @brief Page has been written */
P_GLOBAL = __PFLAG_GLOBAL, /**< @brief The entry survives `vm_flush()` */
P_NOCACHE = __PFLAG_NOCACHE, /**< @brief The TLB won't cache this entry */
P_SLAB = __PFLAG_SLAB, /**< @brief Page is used by the slab allocator */
P_NOSLEEP = __PFLAG_ATOMIC, /**< @brief Page is atomic */
P_PRESENT = __P_PRESENT, /**< @brief Page exists */
P_RW = __P_RW, /**< @brief Page is writable */
P_USER = __P_USER, /**< @brief Page is accessible from ring 3 */
P_ACCESSED = __P_ACCESSED, /**< @brief Page has been accessed */
P_DIRTY = __P_DIRTY, /**< @brief Page has been written */
P_GLOBAL = __P_GLOBAL, /**< @brief The entry survives `vm_flush()` */
P_NOCACHE = __P_NOCACHE, /**< @brief The TLB won't cache this entry */
P_SLAB = __P_SLAB, /**< @brief Page is used by the slab allocator */
P_NOSLEEP = __P_ATOMIC, /**< @brief Page is atomic */
#ifdef __HAVE_HUGEPAGES
/** @brief This page is `HUGEPAGE_SIZE` bytes long, rather than `PAGE_SIZE` */
P_HUGE = __PFLAG_HUGE,
P_HUGE = __P_HUGE,
#endif
#ifdef __HAVE_NOEXEC
/** @brief No instructions can be fetched from this page */
P_NOEXEC = __PFLAG_NOEXEC,
P_NOEXEC = __P_NOEXEC,
#endif
};
@ -143,9 +166,6 @@ enum pflags get_pflags(void *page);
*/
int set_pflags(void *page, enum pflags flags);
/** @brief Flush the TLB. */
void vm_flush(void);
/**
* @brief Initialize the memory allocator.
*
@ -180,7 +200,7 @@ int pages_init(void);
* The returned region will be `(1 << order) * PAGE_SIZE` bytes long.
*
* @param order Order of magnitude (as in `1 << order`) for the region size
* @param flags How to allocate (`order` must be 0 if `M_NOSLEEP` is specified)
* @param flags How to allocate (`order` must be 0 if `M_NOWAIT` is specified)
* @return A pointer to the beginning of the region in the direct mapping area,
* or `nil` if the allocation failed
*/

@ -0,0 +1,81 @@
/* Copyright (C) 2021 fef <owo@fef.moe>. All rights reserved. */
#pragma once
#include <arch/page.h>
#include <gay/cdefs.h>
#include <gay/systm.h>
#include <gay/types.h>
/**
* @brief Stores information about a single page in physical memory.
* There is exactly one of these for every physical page, no matter what that
* page is used for or whether it is usable at all.
*/
struct vm_page {
/** @brief Reference count (0 = unused) */
atom_t count;
/** @brief Various flags describing how and for what the page is used, see below */
u_int flags;
/** @brief Singly linked list, if the page is free */
patom_t next;
/**
* @brief Request this page to be freed if possible.
* This callback may be `nil` unless the `PG_FREEABLE` bit in `flags`
* is set. The presence of this bit does *not* guarantee that the page
* is actually reclaimable, it's merely a performance optimization to
* avoid having to call this function on pages that can never be
* reclaimed anyway.
*
* @param page Pointer to the page itself
* @return 0 if the page could be reclaimed and is now free
*/
int (*try_free)(struct vm_page *page);
/**
* @brief Optional extra data pointer, reserved for private use.
* The current owner of the page may use this to track the underlying
* object in memory (or pretty much anything else), for example the
* `struct slab` if this page is currently used by the slab allocator.
* Useful for implementing the `try_free()` callback.
*/
void *extra;
};
typedef struct vm_page *vm_page_t;
/* values for struct page::flags */
/** @brief Page must never be accessed */
#define PG_RESERVED (1 << 0)
/** @brief Page is in an atomic per-cpu cache */
#define PG_ATOMIC (1 << 1)
/** @brief Page is used by the slab allocator */
#define PG_SLAB (1 << 2)
/** @brief It **might** be possible to reclaim this page using `try_free()` */
#define PG_FREEABLE (1 << 3)
/** @brief Array of every single page in physical memory, indexed by page frame number. */
extern struct vm_page *const vm_page_array;
#ifdef DEBUG
extern vm_page_t _vm_page_array_end;
#endif
/** @brief Get the page frame number of a page. */
__pure2 static inline u_long pg2pfn(vm_page_t page)
{
KASSERT(page < _vm_page_array_end);
return page - vm_page_array;
}
__pure2 static inline u_long paddr2pfn(vm_paddr_t paddr)
{
KASSERT(&vm_page_array[paddr >> PAGE_SHIFT] < _vm_page_array_end);
return paddr >> PAGE_SHIFT;
}
__pure2 static inline vm_page_t paddr2pg(vm_paddr_t paddr)
{
vm_page_t page = vm_page_array + (paddr >> PAGE_SHIFT);
KASSERT(page < _vm_page_array_end);
return page;
}

@ -43,8 +43,8 @@ typedef ___wchar_t wchar_t;
#endif
typedef struct {
intmax_t quot; /* Quotient. */
intmax_t rem; /* Remainder. */
__intmax_t quot; /* Quotient. */
__intmax_t rem; /* Remainder. */
} imaxdiv_t;
/* TODO: these haven't been ported over yet */

Loading…
Cancel
Save