diff --git a/arch/x86/boot/boot.c b/arch/x86/boot/boot.c index 34a9f28..e90a573 100644 --- a/arch/x86/boot/boot.c +++ b/arch/x86/boot/boot.c @@ -73,14 +73,14 @@ static void fb_init(enum vga_color fg, enum vga_color bg); static void print_gay_propaganda(void); static struct mb2_tag *next_tag(struct mb2_tag *tag); -static int handle_tag(struct mb2_tag *tag); -static int handle_mmap_tag(struct mb2_tag_mmap *tag); -static const char *mmap_type_name(u32 type); +static void handle_tag(struct mb2_tag *tag); extern int main(int argc, char *argv[]); __asmlink void _boot(void *address) { + volatile int x = 69420; + while (x == 69420); kprintf_set_printer(&fb_kprintf_printer); fb_init(VGA_COLOR_LIGHT_GREY, VGA_COLOR_BLACK); @@ -88,24 +88,17 @@ __asmlink void _boot(void *address) print_gay_propaganda(); - int err = 0; /* the +8 for the address has something to do with the tags * being embedded in another struct iirc, but i have no idea what * that was and quite honestly i'm just glad it works at all. */ - for (struct mb2_tag *tag = address + 8; tag != NULL; tag = next_tag(tag)) { - err = handle_tag(tag); - if (err) - break; - } + for (struct mb2_tag *tag = address + 8; tag != NULL; tag = next_tag(tag)) + handle_tag(tag); - if (!err) - main(0, NULL); + main(0, nil); } -static inline int handle_tag(struct mb2_tag *tag) +static inline void handle_tag(struct mb2_tag *tag) { - int ret = 0; - switch (tag->type) { case MB2_TAG_TYPE_END: break; @@ -113,62 +106,12 @@ static inline int handle_tag(struct mb2_tag *tag) kprintf("Kernel command line: %s\n", ((struct mb2_tag_string *)tag)->string); break; case MB2_TAG_TYPE_MMAP: - ret = handle_mmap_tag((struct mb2_tag_mmap *)tag); + x86_paging_init((struct mb2_tag_mmap *)tag); break; default: //kprintf("Unknown tag %u\n", tag->type); break; } - - return ret; -} - -static inline int handle_mmap_tag(struct mb2_tag_mmap *tag) -{ - kprintf("Memory map:\n"); - - uintptr_t region = 0; - usize region_len = 0; - - struct mb2_mmap_entry *entry = &tag->entries[0]; - while ((void *)entry < (void *)tag + tag->tag.size) { - kprintf(" [0x%016llx-0x%016llx] %s\n", - entry->addr, - entry->addr + entry->len - 1, - mmap_type_name(entry->type)); - - usize safe_len; -# ifdef __x86_64__ - safe_len = entry->len; -# else - if (entry->addr >= (1llu << 32)) - safe_len = 0; /* we can't handle 64-bit pointers */ - else if (entry->len > (1llu << 32) - entry->addr) - safe_len = (1llu << 32) - entry->addr; /* clip to 32-bit */ - else - safe_len = entry->len; -# endif - - if (entry->type == MB2_MEMORY_AVAILABLE && safe_len > region_len) { - region = entry->addr; - region_len = safe_len; - } - - entry = (void *)entry + tag->entry_size; - } - - if (region == 0 || region_len == 0) { - kprintf("No memory available! Aborting.\n"); - return 1; - } - - int err = kmalloc_init(region, region + region_len); - if (err) { - kprintf("kmalloc_init() failed! Aborting.\n"); - return 1; - } - - return 0; } static inline struct mb2_tag *next_tag(struct mb2_tag *tag) @@ -280,21 +223,3 @@ static void print_gay_propaganda(void) fb_foreground = fg_before; kprintf(", be gay do crime!\n\n"); } - -static const char *mmap_type_name(u32 type) -{ - switch (type) { - case MB2_MEMORY_AVAILABLE: - return "Available"; - case MB2_MEMORY_RESERVED: - return "Reserved"; - case MB2_MEMORY_ACPI_RECLAIMABLE: - return "ACPI"; - case MB2_MEMORY_NVS: /* non-volatile storage */ - return "NVS"; - case MB2_MEMORY_BADRAM: - return "Bad RAM"; - default: - return "Unknown"; - } -} diff --git a/arch/x86/boot/multiboot.S b/arch/x86/boot/multiboot.S index d0019ac..a471944 100644 --- a/arch/x86/boot/multiboot.S +++ b/arch/x86/boot/multiboot.S @@ -26,7 +26,7 @@ header_start: /* struct mb2_header */ /* heaer_length */ .long header_end - header_start /* checksum */ - .long (1 << 33) - MB2_HEADER_MAGIC - MB2_ARCHITECTURE_I386 - (header_end - header_start) + .long (1 << 32) - MB2_HEADER_MAGIC - MB2_ARCHITECTURE_I386 - (header_end - header_start) #if 0 /* TODO: implement graphics */ .align MB2_TAG_ALIGN diff --git a/arch/x86/boot/setup64.S b/arch/x86/boot/setup64.S index ee3a81e..05f9f5f 100644 --- a/arch/x86/boot/setup64.S +++ b/arch/x86/boot/setup64.S @@ -129,36 +129,36 @@ ENTRY(_setup) movl $X86_KERN_TSS, %eax ltr %ax -#if (KERNBASE % (1 << X86_PDP_SHIFT)) != 0 +#if (KERNBASE % (1 << X86_PDPT_SHIFT)) != 0 #error "KERNBASE must be aligned to at least a PDP entry (1 GB)" #endif -#if (X86_PMAP_OFFSET % (1 << X86_PML4_SHIFT)) != 0 +#if (X86_PMAP_OFFSET % (1 << X86_PML4T_SHIFT)) != 0 #error "X86_PMAP_OFFSET must be aligned to at least a PML4 entry (512 GB)" #endif #define V48 0xffff000000000000 -#define PDP_OFFSET(ptr) (( (((ptr) - V48) >> X86_PDP_SHIFT) % 512 ) * 8) -#define PML4_OFFSET(ptr) ( ((ptr) - V48) >> (X86_PML4_SHIFT) * 8 ) +#define PDP_OFFSET(ptr) (( (((ptr) - V48) >> X86_PDPT_SHIFT) % 512 ) * 8) +#define PML4_OFFSET(ptr) ( ((ptr) - V48) >> (X86_PML4T_SHIFT) * 8 ) /* * statically map the low 2 GB to itself and to the high kernel half */ /* for the identity mapping */ - movl $0x00000083, PADDR(_pdp0) /* present (0), write (1), huge (7) */ - movl $0x40000083, PADDR(_pdp0 + 8) + movl $0x00000083, PADDR(_pdpt0) /* present (0), write (1), huge (7) */ + movl $0x40000083, PADDR(_pdpt0 + 8) /* For the -2GB at the end of virtual memory. We use the same PDP for * both low and high memory, so techincally this creates a total of four * mappings (+0 GB, +510 GB, -512 GB, -2 GB), but we remove all except * the -2GB one once we have transitioned to high memory. */ - movl $0x00000083, PADDR(_pdp0 + PDP_OFFSET(KERNBASE)) - movl $0x40000083, PADDR(_pdp0 + PDP_OFFSET(KERNBASE + 0x40000000)) + movl $0x00000083, PADDR(_pdpt0 + PDP_OFFSET(KERNBASE)) + movl $0x40000083, PADDR(_pdpt0 + PDP_OFFSET(KERNBASE + 0x40000000)) - movl $PADDR(_pdp0 + 0x003), PADDR(_pml4) /* present (0), write (1), huge (7) */ - movl $PADDR(_pdp0 + 0x003), PADDR(_pml4 + PML4_OFFSET(KERNBASE)) + movl $PADDR(_pdpt0 + 0x003), PADDR(_pml4t) /* present (0), write (1), huge (7) */ + movl $PADDR(_pdpt0 + 0x003), PADDR(_pml4t + PML4_OFFSET(KERNBASE)) /* map the PML4 to itself */ - movl $PADDR(_pml4 + 0x003), PADDR(_pml4 + PML4_OFFSET(X86_PMAP_OFFSET)) - movb $0x80, PADDR(_pml4 + PML4_OFFSET(X86_PMAP_OFFSET) + 7) /* NX bit */ + movl $PADDR(_pml4t + 0x003), PADDR(_pml4t + PML4_OFFSET(X86_PMAP_OFFSET)) + movb $0x80, PADDR(_pml4t + PML4_OFFSET(X86_PMAP_OFFSET) + 7) /* NX bit */ /* * ensure paging is disabled by clearing CR0.PG (bit 31) @@ -178,7 +178,7 @@ ENTRY(_setup) movl %eax, %cr4 /* load cr3 with the PML4 */ - movl $PADDR(_pml4), %eax + movl $PADDR(_pml4t), %eax movl %eax, %cr3 /* @@ -249,9 +249,9 @@ L_ENTRY(_setup_highmem) popfq /* remove the low memory identity mapping and bonk the TLB */ - movl $0, _pdp0 - movl $0, _pdp0 + 8 - movl $0, _pml4 + movl $0, _pdpt0 + movl $0, _pdpt0 + 8 + movl $0, _pml4t movq %cr3, %rax movq %rax, %cr3 diff --git a/arch/x86/include/amd64/page.h b/arch/x86/include/amd64/page.h index 30d8c15..71a0c19 100644 --- a/arch/x86/include/amd64/page.h +++ b/arch/x86/include/amd64/page.h @@ -5,30 +5,77 @@ #error "This file is not meant to be included directly, use " #endif -/** @brief Binary logarithm of `HUGEPAGE_SIZE`. */ -#define HUGEPAGE_SHIFT 21 +/* + * Common abbreviations used throughout the entire x86 vm code base: + * PT - Page Table + * PDT - Page Directory Table + * PDPT - Page Directory Pointer Table + * PML4T - Page Map Level 4 Table + * PTE - Page Table Entry + * PDTE - Page Directory Table Entry + * PDPTE - Page Directory Pointer Table Entry + * PML4TE - Page Map Level 4 entry + * PTI - Page Table Index (range 0 - 511) + * PDTI - Page Directory Table Index (range 0 - 511) + * PDPTI - Page Directory Pointer Table Index (range 0 - 511) + * PML4TI - Page Map Level 4 Index (range 0 - 511) + * + * Quick recap on how the x86 transes virtual to physical addresses: + * + * |63 48|47 39|38 30|29 21|21 12|11 0| + * +------------------+-----------+-----------+-----------+-----------+--------------+ + * | 16 bits | 9 bits | 9 bits | 9 bits | 9 bits | 12 bits | + * +------------------+-----------+-----------+-----------+-----------+--------------+ + * (1) | (copy of bit 47) | PML4T | PDPT | PDT | PT | offset (4 K) | + * +------------------+-----------+-----------+-----------+-----------+--------------+ + * (2) | (copy of bit 47) | PML4T | PDPT | PDT | offset (2 M) | + * +------------------+-----------+-----------+-----------+--------------------------+ + * (3) | (copy of bit 47) | PML4T | PDPT | offset (1 G) | + * +------------------+-----------+-----------+--------------------------------------+ + * + * %CR3: pointer to PML4T, 256 TB (2^36 pages) + * PML4T: 512 entries, 512 GB per entry (2^27 pages) + * PDPT: 512 entries, 1 GB per entry (2^18 pages) + * PDT: 512 entries, 2 MB per entry (2^9 pages) + * PT: 512 entries, 4 KB per entry (1 page) + * + * PDPT entries can either reference a PDT or a 1 GB region directly (if __P_HUGE is set) + * PDT entries can either reference a PT or a 2 MB region directly (if __P_HUGE is set) + * + * (1) shows a PML4T -> PDPT -> PDT -> PT regular mapping + * (2) shows a PML4T -> PDPT -> PDT hugepage mapping + * (3) shows a PML4T -> PDPT gigapage mapping + * + * Since the lowest 12 bits are always zero in any page map entry, they are + * used for flags. Additionally, bit 63 stores the NX (no execute) flag. + */ #include -#define X86_PT_SHIFT PAGE_SHIFT -#define X86_PD_SHIFT (X86_PT_SHIFT + 9) -#define X86_PDP_SHIFT (X86_PD_SHIFT + 9) -#define X86_PML4_SHIFT (X86_PDP_SHIFT + 9) +#define X86_PT_SHIFT PAGE_SHIFT +#define X86_PDT_SHIFT (X86_PT_SHIFT + 9) +#define X86_PDPT_SHIFT (X86_PDT_SHIFT + 9) +#define X86_PML4T_SHIFT (X86_PDPT_SHIFT + 9) + +#define __HAVE_NOEXEC +#define __HAVE_GIGAPAGES + +/** @brief Binary logarithm of `HUGEPAGE_SIZE`. */ +#define HUGEPAGE_SHIFT X86_PDT_SHIFT +/** @brief Binary logarithm of `GIGAPAGE_SIZE`. */ +#define GIGAPAGE_SHIFT +#define GIGAPAGE_SIZE (1 << GIGAPAGE_SHIFT) #ifndef _ASM_SOURCE #include #include -#define __HAVE_NOEXEC - /** - * @brief A single 64-bit Page Table Entry. + * @brief A single 64-bit page map entry, split up into its individual bit flags. * The layout matches that of the Intel SDM, vol 3, sect 4.3, fig 4-4. - * Bits 9 and 10 (`slab` and `atomic`) are marked as AVL in the manual and - * ignored by the MMU. We only use them for `get_pflags()`/`set_pflags()`. */ -struct x86_page_flags { +struct x86_pmap_flags { /* 0 */bool present:1; /**< Page Fault on access if 0 */ /* 1 */bool rw:1; /**< Page Fault on write if 0 */ /* 2 */bool user:1; /**< Page Fault on user mode access if 0 */ @@ -36,27 +83,30 @@ struct x86_page_flags { /* 4 */bool cache_disabled:1; /**< Disable caching in TLB */ /* 5 */bool accessed:1; /**< 1 if page has been accessed */ /* 6 */bool dirty:1; /**< 1 if page has been written to */ -/* 7 */bool huge:1; /**< only valid for PDPTEs and PDEs */ -/* 8 */bool global:1; /**< Don't update the TLB on table swap if 1 */ -/* 9 */bool slab:1; /**< Used by the slab allocator */ -/* 10 */bool atomic:1; /**< Allocated atomically */ -/* 11 */unsigned _unused:1; -/* 12 */uintptr_t shifted_address:51; -/* 63 */bool noexec:1; +/* 7 */bool huge:1; /**< Only valid for PDPTEs and PDTEs */ +/* 8 */bool global:1; /**< Entry survives `vm_flush()` if 1 */ +/* 9 */unsigned _unused:3; +/* 12 */vm_paddr_t shifted_address:51; +/* 63 */bool noexec:1; /**< Prevent instruction fetches */ } __packed; -#define __PFLAG_PRESENT (1 << 0) -#define __PFLAG_RW (1 << 1) -#define __PFLAG_USER (1 << 2) -#define __PFLAG_WRITE_THROUGH (1 << 3) -#define __PFLAG_NOCACHE (1 << 4) -#define __PFLAG_ACCESSED (1 << 5) -#define __PFLAG_DIRTY (1 << 6) -#define __PFLAG_HUGE (1 << 7) -#define __PFLAG_GLOBAL (1 << 8) -#define __PFLAG_SLAB (1 << 9) -#define __PFLAG_ATOMIC (1 << 10) -#define __PFLAG_NOEXEC (1 << 63) +/* bitmasks for the structure above */ + +#define __P_PRESENT (1 << 0) +#define __P_RW (1 << 1) +#define __P_USER (1 << 2) +#define __P_WRITE_THROUGH (1 << 3) +#define __P_NOCACHE (1 << 4) +#define __P_ACCESSED (1 << 5) +#define __P_DIRTY (1 << 6) +#define __P_HUGE (1 << 7) +#define __P_GLOBAL (1 << 8) +#define __P_SLAB (1 << 9) +#define __P_ATOMIC (1 << 10) +#define __P_NOEXEC (1ul << 63) + +/** @brief Bitmask for extracting the physical address from a page map entry. */ +#define X86_PMAP_MASK 0x7ffffffffffff000 /* * these types are deliberately not merged into one so that the @@ -64,39 +114,82 @@ struct x86_page_flags { */ #define __pmap_entry_union union { \ - struct x86_page_flags flags; \ - uintptr_t val; \ + struct x86_pmap_flags flags; \ + vm_paddr_t val; \ } +/** @brief x86 Page Table Entry. */ typedef __pmap_entry_union x86_pte_t; -typedef __pmap_entry_union x86_pde_t; -typedef __pmap_entry_union x86_pdpe_t; -typedef __pmap_entry_union x86_pml4e_t; - +/** @brief x86 Page Directory Table Entry. */ +typedef __pmap_entry_union x86_pdte_t; +/** @brief x86 Page Directory Pointer Table Entry. */ +typedef __pmap_entry_union x86_pdpte_t; +/** @brief x86 Page Map Level 4 Table Entry. */ +typedef __pmap_entry_union x86_pml4te_t; + +/** @brief x86 Page Table. */ typedef struct { x86_pte_t entries[512]; } __aligned(PAGE_SIZE) x86_pt_t; -typedef struct { x86_pde_t entries[512]; } __aligned(PAGE_SIZE) x86_pd_t; -typedef struct { x86_pdpe_t entries[512]; } __aligned(PAGE_SIZE) x86_pdp_t; -typedef struct { x86_pml4e_t entries[512]; } __aligned(PAGE_SIZE) x86_pml4_t; - -#define X86_PMAP_MASK 0x7ffffffffffff000 +/** @brief x86 Page Directory Table. */ +typedef struct { x86_pdte_t entries[512]; } __aligned(PAGE_SIZE) x86_pdt_t; +/** @brief x86 Page Directory Pointer Table. */ +typedef struct { x86_pdpte_t entries[512]; } __aligned(PAGE_SIZE) x86_pdpt_t; +/** @brief x86 Page Map Level 4 Table. */ +typedef struct { x86_pml4te_t entries[512]; } __aligned(PAGE_SIZE) x86_pml4t_t; /* you aren't expected to understand any of these, they're just nasty offset calculations */ +#define __V48_MASK ( ((uintptr_t)1 << 48) - 1 ) + /** @brief Get the linear 48-bit address */ -#define __V48ADDR(ptr) ((uintptr_t)(ptr) & 0x0000ffffffffffff) - -#define X86_PT_INDEX(ptr) (( __V48ADDR(ptr) >> X86_PT_SHIFT ) % 512) -#define X86_PD_INDEX(ptr) (( __V48ADDR(ptr) >> X86_PD_SHIFT ) % 512) -#define X86_PDP_INDEX(ptr) (( __V48ADDR(ptr) >> X86_PDP_SHIFT ) % 512) -#define X86_PML4_INDEX(ptr) ( __V48ADDR(ptr) >> X86_PML4_SHIFT ) - -#define __PT_BASE X86_PMAP_OFFSET -#define __PD_BASE (__PT_BASE + (__V48ADDR(X86_PMAP_OFFSET) >> X86_PT_SHIFT)) -#define __PDP_BASE (__PD_BASE + (__V48ADDR(X86_PMAP_OFFSET) >> X86_PD_SHIFT)) -#define __PML4_BASE (__PDP_BASE + (__V48ADDR(X86_PMAP_OFFSET) >> X86_PDP_SHIFT)) - -#define X86_PTE(ptr) ((x86_pte_t *)( __PT_BASE + (__V48ADDR(ptr) >> X86_PT_SHIFT) )) -#define X86_PDE(ptr) ((x86_pde_t *)( __PD_BASE + (__V48ADDR(ptr) >> X86_PD_SHIFT) )) -#define X86_PDPE(ptr) ((x86_pdpe_t *)( __PDP_BASE + (__V48ADDR(ptr) >> X86_PDP_SHIFT) )) -#define X86_PML4E(ptr) ((x86_pml4e_t *)( __PML4_BASE + (__V48ADDR(ptr) >> X86_PML4_SHIFT) )) +#define __V48(ptr) ((uintptr_t)(ptr) & __V48_MASK) + +/** + * @brief Generate a 48-bit virtual address in user space, based on its pmap indices. + * Every index must be less than 512, or you'll get a garbage address. + * `pml4i` must be less than 256, or you'll hurt the MMU's feelings. + * This is because bits 63-48 of the virtual address must all match bit 47. + */ +#define UV48ADDR(pml4ti, pdpti, pdti, pti) ( \ + (vm_paddr_t)(pml4ti) << X86_PML4T_SHIFT | \ + (vm_paddr_t)(pdpti) << X86_PDPT_SHIFT | \ + (vm_paddr_t)(pdti) << X86_PDT_SHIFT | \ + (vm_paddr_t)(pti) << X86_PT_SHIFT \ +) + +/** + * @brief Generate a 48-bit virtual address in kernel space, based on its pmap indices. + * Every index must be less than 512, or you'll get a garbage address. + * `pml4i` must be at least 256, or you'll hurt the MMU's feelings. + * This is because bits 63-48 of the virtual address must all match bit 47. + */ +#define KV48ADDR(pml4ti, pdpti, pdti, pti) ( \ + (vm_paddr_t)0xffff000000000000 | \ + UV48ADDR(pml4ti, pdpti, pdti, pti) \ +) + +/** @brief Get the Page Table index for a given virtual address. */ +#define X86_PTI(ptr) ((__V48(ptr) >> X86_PT_SHIFT ) % 512) +/** @brief Get the Page Directory Table index for a given virtual address. */ +#define X86_PDTI(ptr) ((__V48(ptr) >> X86_PDT_SHIFT ) % 512) +/** @brief Get the Page Directory Pointer Table index for a given virtual address. */ +#define X86_PDPTI(ptr) ((__V48(ptr) >> X86_PDPT_SHIFT ) % 512) +/** @brief Get the Page Map Level 4 Table index for a given virtual address. */ +#define X86_PML4TI(ptr) (__V48(ptr) >> X86_PML4T_SHIFT) + +/* Page Map Level 4 Table index for the recursive page map */ +#define __PML4TI (X86_PML4TI(X86_PMAP_OFFSET)) /* = 256 */ + +#define __PT_BASE ( (x86_pt_t *)KV48ADDR(__PML4TI, 0, 0, 0) ) +#define __PDT_BASE ( (x86_pdt_t *)KV48ADDR(__PML4TI, __PML4TI, 0, 0) ) +#define __PDPT_BASE ( (x86_pdpt_t *)KV48ADDR(__PML4TI, __PML4TI, __PML4TI, 0) ) +#define __PML4T_BASE ( (x86_pml4t_t *)KV48ADDR(__PML4TI, __PML4TI, __PML4TI, __PML4TI) ) + +/** @brief Get the Page Table Entry for a given virtual address. */ +#define X86_PTE(ptr) ( &__PT_BASE->entries[__V48(ptr) >> X86_PT_SHIFT] ) +/** @brief Get the Page Directory Table Entry for a given virtual address. */ +#define X86_PDTE(ptr) ( &__PDT_BASE->entries[__V48(ptr) >> X86_PDT_SHIFT] ) +/** @brief Get the Page Directory Pointer Table Entry for a given virtual address. */ +#define X86_PDPTE(ptr) ( &__PDPT_BASE->entries[__V48(ptr) >> X86_PDPT_SHIFT] ) +/** @brief Get the Page Map Level 4 Table Entry for a given virtual address. */ +#define X86_PML4TE(ptr) ( &__PML4T_BASE->entries[__V48(ptr) >> X86_PML4T_SHIFT] ) #endif /* not _ASM_SOURCE */ diff --git a/arch/x86/include/amd64/vmparam.h b/arch/x86/include/amd64/vmparam.h index 69ecb6a..b7df17d 100644 --- a/arch/x86/include/amd64/vmparam.h +++ b/arch/x86/include/amd64/vmparam.h @@ -6,20 +6,24 @@ #endif /** @brief Userland memory region */ -#define USER_OFFSET 0x0000000000000000 /* +0 TB */ -#define USER_LENGTH 0x0000800000000000 /* 128 TB */ +#define USER_OFFSET 0x0000000000000000 /* +0 TB */ +#define USER_LENGTH 0x0000800000000000 /* 128 TB */ /** @brief Recursive Page Map Level 4 map */ -#define X86_PMAP_OFFSET 0xffff800000000000 /* -128 TB */ -#define X86_PMAP_LENGTH 0x0000004020101000 /* ~ 256.5 GB */ +#define X86_PMAP_OFFSET 0xffff800000000000 /* -128 TB */ +#define X86_PMAP_LENGTH 0x0000004020101000 /* ~ 256.5 GB */ /** @brief Direct (contiguous) mapping of physical memory */ -#define DMAP_OFFSET 0xfffff80000000000 /* -8 TB */ -#define DMAP_LENGTH 0x0000040000000000 /* 4 TB */ +#define DMAP_OFFSET 0xfffff80000000000 /* -8 TB */ +#define DMAP_LENGTH 0x0000040000000000 /* 4 TB */ /** @brief Kernel region (image, heap, etc) */ -#define KERN_OFFSET 0xfffffe0000000000 /* -2 TB */ -#define KERN_LENGTH 0x0000020000000000 /* 2 TB */ +#define KERN_OFFSET 0xfffffe0000000000 /* -2 TB */ +#define KERN_LENGTH 0x0000020000000000 /* 2 TB */ + /** @brief Where the kernel image is actually mapped to */ -#define KERNBASE 0xffffffff80000000 /* -2 GB */ -#define KERNBASE_LENGTH 0x0000000080000000 +#define KERNBASE 0xffffffff80000000 /* -2 GB */ +#define KERNBASE_LENGTH 0x0000000080000000 + +#define VM_PAGE_ARRAY_OFFSET KERN_OFFSET +#define VM_PAGE_ARRAY_LENGTH (KERN_OFFSET - KERNBASE) diff --git a/arch/x86/include/arch/_inttypes.h b/arch/x86/include/arch/_inttypes.h index fe7604a..b3bd603 100644 --- a/arch/x86/include/arch/_inttypes.h +++ b/arch/x86/include/arch/_inttypes.h @@ -140,6 +140,15 @@ #define PRIXMAX "jX" /* uintmax_t */ #define PRIXPTR __PRIptr"X" /* uintptr_t */ +#ifdef _KERNEL + +#define PRIxVM_PADDR __PRI64"x" /* vm_paddr_t */ +#define PRIxVM_OFFSET __PRI64"x" /* vm_offset_t */ +#define PRIdVM_OFFSET __PRI64"d" /* vm_offset_t */ +#define PRIdVM_SIZE __PRI64"d" /* vm_size_t */ + +#endif /* _KERNEL */ + /* fscanf(3) macros for signed integers. */ #define SCNd8 "hhd" /* int8_t */ diff --git a/arch/x86/include/arch/page.h b/arch/x86/include/arch/page.h index c84c73e..176c453 100644 --- a/arch/x86/include/arch/page.h +++ b/arch/x86/include/arch/page.h @@ -26,6 +26,11 @@ #ifndef _ASM_SOURCE +#include + +/** @brief Initialize `vm_page_array` based on the multiboot memory map. */ +void x86_paging_init(struct mb2_tag_mmap *mmap); + /** @brief Pointer bitmask to get the base address of their page. */ #define PAGE_MASK ( ~((unsigned long)PAGE_SIZE - 1) ) /** @brief Pointer bitmask to get the base address of their huge page. */ @@ -48,8 +53,20 @@ * @brief Get the physical address a virtual one is currently mapped to. * * @param virt virtual address - * @returns The physical address, or `0` if there is no mapping + * @returns The physical address, or -1 cast to `vm_paddr_t` if there is no mapping */ -uintptr_t vtophys(void *virt); +vm_paddr_t vtophys(void *virt); + +static inline void vm_flush(void) +{ + register_t tmp; + __asm__ volatile( + " mov %%cr3, %0 \n" + " mov %0, %%cr3 \n" + : "=r"(tmp) + : + : "memory" + ); +} #endif /* not _ASM_SOURCE */ diff --git a/arch/x86/include/arch/smp.h b/arch/x86/include/arch/smp.h index 10f6e9b..159fc64 100644 --- a/arch/x86/include/arch/smp.h +++ b/arch/x86/include/arch/smp.h @@ -34,17 +34,3 @@ static inline int smp_cpuid(void) return 0; #endif /* !CFG_SMP */ } - -/* - * This file is part of GayBSD. - * Copyright (c) 2021 fef . - * - * GayBSD is nonviolent software: you may only use, redistribute, and/or - * modify it under the terms of the Cooperative Nonviolent Public License - * (CNPL) as found in the LICENSE file in the source code root directory - * or at ; either version 7 - * of the license, or (at your option) any later version. - * - * GayBSD comes with ABSOLUTELY NO WARRANTY, to the extent - * permitted by applicable law. See the CNPL for details. - */ diff --git a/arch/x86/mm/amd64/CMakeLists.txt b/arch/x86/mm/amd64/CMakeLists.txt index 1b063ef..cb14a7f 100644 --- a/arch/x86/mm/amd64/CMakeLists.txt +++ b/arch/x86/mm/amd64/CMakeLists.txt @@ -1,5 +1,6 @@ # Copyright (C) 2021 fef . All rights reserved. target_sources(gay_arch PRIVATE + init.c page.c ) diff --git a/arch/x86/mm/amd64/init.c b/arch/x86/mm/amd64/init.c new file mode 100644 index 0000000..2540a1a --- /dev/null +++ b/arch/x86/mm/amd64/init.c @@ -0,0 +1,428 @@ +/* Copyright (C) 2021 fef . All rights reserved. */ + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include + +/* + * This file is funny. + * Our job here seems simple at first glance: initialize the vm_page_array. + * The catch is that we can't use the regular kernel memory allocators for + * doing so, because those depend on vm_page_array. Classic chicken/egg stuff. + * So, how do we allocate (and map!) memory for the array? Simple, by using a + * completely separate page frame allocator that is so basic that it can't even + * free pages again. That's not a problem though, because it doesn't need to. + * Memory maps are created manually, which is very painful, but doable. + * HOWEVER! This boot page frame allocator needs to allocate memory for keeping + * track of which memory areas were already allocated and which ones are still + * free, too. Areas might also have to be split, if the region we want to + * allocate is not the exact size of the physical area. Therefore, we have + * *another* allocator, which is basically the most primitive slab allocator in + * existence. It uses a fixed-size "slab" (the `free_areas` array below), and + * keeps track of which free areas are available. + * + * To sum up: + * - The boot "slab" allocator hands out `struct free_area`s to ... + * - the boot page frame allocator, which is used to set up ... + * - the buddy page frame allocator, which serves as a backend to ... + * - the kernel slab allocator. + * + * XXX the boot memory allocator could probably be moved to an architecture + * independent file, because it is not really specific to the x86. + */ + +struct vm_page *const vm_page_array = (vm_page_t)VM_PAGE_ARRAY_OFFSET; +#ifdef DEBUG +/* this gets updated in x86_setup_paging() once we know how big the array is */ +vm_page_t _vm_page_array_end = (vm_page_t)(VM_PAGE_ARRAY_OFFSET + VM_PAGE_ARRAY_LENGTH); +#endif + +/** + * @brief Memory area information for the boot page frame allocator. + * The multiboot bootloader gives us an array of memory areas, and tells us + * which ones are available and which aren't. We insert all available areas + * into a circular list (`free_area_list`), and the boot page frame allocator + * iterates over that list for getting memory. + * + * Also, this is probably one of the most unfortunately named structures in the + * entire system, because instances of this structure need to be allocated and, + * well, freed. + */ +struct free_area { + struct clist link; + vm_paddr_t start; + vm_size_t end; +}; +/** @brief This is essentially a very basic slab. */ +static struct free_area free_areas[16]; +/** @brief List of all free memory areas, ordered by ascending address */ +static CLIST(free_area_list); +/** + * @brief List of all the unused members in `free_areas`. + * This is essentially a very basic slab freelist. + */ +static CLIST(free_area_freelist); + +/** + * @brief VERY early page frame allocator. + * + * Allocates `1 << log2` bytes of memory, aligned to at least its own size. + * + * @param log2 Binary logarithm of the allocation size. Must be at least `PAGE_SHIFT`. + * @returns Physical address of the allocated region, or `BOOT_PMALLOC_ERR` on failure + */ +static vm_paddr_t __boot_pmalloc(u_int log2); +#define BOOT_PMALLOC_ERR (~0ul) +/** @brief Zero out a single page (required for page tables) */ +static void __boot_clear_page(vm_paddr_t paddr); + +/** @brief Initialize the members of `vm_page_array` within the given range. */ +static void init_page_range(vm_paddr_t start, vm_paddr_t end, u_int flags); +/** @brief Add a new entry to the list of free memory areas. */ +static void insert_free_area(struct mb2_mmap_entry *entry); +static void init_free_area_freelist(void); +static void print_mem_area(struct mb2_mmap_entry *entry); + +/* + * "Oh cool another deeply nested 100-liner that nobody understands" + */ +void x86_paging_init(struct mb2_tag_mmap *mmap) +{ + init_free_area_freelist(); + + /* + * insert all free areas and find the end of physical memory + */ + struct mb2_mmap_entry *entry = mmap->entries; + vm_paddr_t end = 0; + kprintf("Memory map:\n"); + while ((void *)entry - (void *)mmap < mmap->tag.size) { + vm_paddr_t entry_end = entry->addr + entry->len; + end = max(end, entry_end); + print_mem_area(entry); + if (entry->type == MB2_MEMORY_AVAILABLE) + insert_free_area(entry); + entry = (void *)entry + mmap->entry_size; + } + + /* + * allocate and map vm_page_array into virtual memory at VM_PAGE_ARRAY_OFFSET + * (this is gonna be a long one) + */ + struct vm_page *vm_page_array_end = vm_page_array + (end >> PAGE_SHIFT); +#ifdef DEBUG + _vm_page_array_end = vm_page_array_end; +#endif + void *map_pos = vm_page_array; + usize remaining_size = (void *)vm_page_array_end - (void *)vm_page_array; + remaining_size = align_ceil(remaining_size, PAGE_SIZE); + kprintf("Mapping %zu bytes for vm_page_array\n", remaining_size); + + while (remaining_size != 0) { + x86_pml4te_t *pml4te = X86_PML4TE(map_pos); + vm_paddr_t pml4te_val = __boot_pmalloc(PAGE_SHIFT); + KASSERT(pml4te_val != BOOT_PMALLOC_ERR); + __boot_clear_page(pml4te_val); + pml4te_val |= __P_PRESENT | __P_RW | __P_GLOBAL | __P_NOEXEC; + pml4te->val = pml4te_val; + vm_flush(); + + for (int pdpt_index = 0; pdpt_index < 512; pdpt_index++) { + x86_pdpte_t *pdpte = X86_PDPTE(map_pos); + vm_paddr_t pdpte_val; + + /* try allocating a 1 GB gigapage first */ + if (remaining_size >= 1 << X86_PDPT_SHIFT) { + pdpte_val = __boot_pmalloc(X86_PDPT_SHIFT); + /* CLion is warning about this condition being always true, but + * that is not the case. I've checked the disassembly with -O2, + * and clang is emitting the check. So it's fine, i guess. */ + if (pdpte_val != BOOT_PMALLOC_ERR) { + pdpte_val |= __P_PRESENT | __P_RW | __P_HUGE + | __P_GLOBAL | __P_NOEXEC; + pdpte->val = pdpte_val; + remaining_size -= 1 << X86_PDPT_SHIFT; + map_pos += 1 << X86_PDPT_SHIFT; + if (remaining_size == 0) + goto map_done; + continue; + } + } + + /* couldn't use a gigapage, continue in hugepage steps */ + pdpte_val = __boot_pmalloc(PAGE_SHIFT); + KASSERT(pdpte_val != BOOT_PMALLOC_ERR); + __boot_clear_page(pdpte_val); + pdpte_val |= __P_PRESENT | __P_RW | __P_GLOBAL | __P_NOEXEC; + pdpte->val = pdpte_val; + vm_flush(); + + for (int pdt_index = 0; pdt_index < 512; pdt_index++) { + x86_pdte_t *pdte = X86_PDTE(map_pos); + vm_paddr_t pdte_val; + + /* try allocating a 2 MB hugepage first */ + if (remaining_size >= (1 << X86_PDT_SHIFT)) { + pdte_val = __boot_pmalloc(X86_PDT_SHIFT); + if (pdte_val != BOOT_PMALLOC_ERR) { + pdte_val |= __P_PRESENT | __P_RW | __P_GLOBAL + | __P_HUGE | __P_NOEXEC; + pdte->val = pdte_val; + remaining_size -= 1 << X86_PDT_SHIFT; + map_pos += 1 << X86_PDT_SHIFT; + if (remaining_size == 0) + goto map_done; + continue; + } + } + + /* couldn't use a hugepage, continue in page steps */ + pdte_val = __boot_pmalloc(PAGE_SHIFT); + KASSERT(pdte_val != BOOT_PMALLOC_ERR); + __boot_clear_page(pdpte_val); + pdte_val |= __P_PRESENT | __P_RW | __P_GLOBAL | __P_NOEXEC; + pdte->val = pdte_val; + vm_flush(); + + for (int pt_index = 0; pt_index < 512; pt_index++) { + x86_pte_t *pte = X86_PTE(map_pos); + vm_paddr_t pte_val = __boot_pmalloc(X86_PT_SHIFT); + KASSERT(pte_val != BOOT_PMALLOC_ERR); + pte_val |= __P_PRESENT | __P_RW | __P_GLOBAL | __P_NOEXEC; + pte->val = pte_val; + + remaining_size -= 1 << X86_PT_SHIFT; + map_pos += 1 << X86_PT_SHIFT; + if (remaining_size == 0) + goto map_done; + } /* end of PT loop */ + } /* end of PD loop */ + } /* end of PDP loop */ + } /* end of PML4 loop */ + +map_done: + vm_flush(); + + /* + * initialize the individual pages and calculate the usable RAM size + */ + vm_paddr_t prev_end = 0; + vm_size_t available_ram = 0; + struct free_area *cursor; + clist_foreach_entry(&free_area_list, cursor, link) { + /* list should have been ordered by ascending size */ + KASSERT(cursor->start >= prev_end); + + if (cursor->start != prev_end) { + vm_paddr_t reserved_start = prev_end; + vm_paddr_t reserved_end = cursor->start; + init_page_range(reserved_start, reserved_end, PG_RESERVED); + } + + init_page_range(cursor->start, cursor->end, 0); + prev_end = cursor->end; + available_ram += cursor->end - cursor->start; + } + + kprintf("Available RAM: %"PRIdVM_SIZE" bytes\n", available_ram); +} + +static struct free_area *alloc_free_area_entry(void) +{ + /* XXX this should pretty much never happen, but it would still be nice to + * have at least some sort of error recovery rather than giving up */ + if (clist_is_empty(&free_area_freelist)) + panic("Boot memory allocator has run out of free_areas"); + return clist_del_first_entry(&free_area_freelist, struct free_area, link); +} + +static void free_free_area_entry(struct free_area *area) +{ +#ifdef DEBUG + area->start = ~0ul; + area->end = ~0ul; +#endif + clist_add(&free_area_freelist, &area->link); +} + +static void init_free_area_freelist(void) +{ + for (u_int i = 0; i < ARRAY_SIZE(free_areas); i++) + clist_add(&free_area_freelist, &free_areas[i].link); +} + +static void insert_free_area(struct mb2_mmap_entry *entry) +{ + vm_paddr_t start = align_ceil(entry->addr, PAGE_SIZE); + vm_paddr_t end = align_floor(entry->addr + entry->len, PAGE_SIZE); + if (start <= image_start_phys && end >= image_end_phys) { + /* + * This is the area that the kernel image is loaded in, which we need + * to treat differently than all the others because it gets split up + * into two usable areas. Illustration (addresses are examples only): + * + * 0x01000000 ---------------------- end (high_end) + * : + * 0x00500000 ---------------------- image_end_phys (high_start) + * : + * 0x00400000 ---------------------- image_start_phys (low_end) + * : + * 0x00100000 ---------------------- start (low_start) + * + * (we silently assert that the image always spans only one region) + */ + vm_paddr_t low_start = start; + vm_paddr_t low_end = align_floor(image_start_phys, PAGE_SIZE); + if (low_start < low_end) { + struct free_area *area = alloc_free_area_entry(); + area->start = low_start; + area->end = low_end; + clist_add(&free_area_list, &area->link); + } + + vm_paddr_t high_start = align_ceil(image_end_phys, PAGE_SIZE); + vm_paddr_t high_end = end; + if (high_start < high_end) { + struct free_area *area = alloc_free_area_entry(); + area->start = high_start; + area->end = high_end; + clist_add(&free_area_list, &area->link); + } + } else { + struct free_area *area = alloc_free_area_entry(); + area->start = start; + area->end = end; + clist_add(&free_area_list, &area->link); + } +} + +static void init_page_range(vm_paddr_t start, vm_paddr_t end, u_int flags) +{ + KASSERT(start <= end); + vm_page_t cursor = vm_page_array + (start >> PAGE_SHIFT); + usize count = (end - start) >> PAGE_SHIFT; + + if (flags == 0) { + memset(cursor, 0, count * sizeof(*cursor)); + } else { + while (count--) { + atom_init(&cursor->count, 0); + cursor->flags = flags; + cursor->try_free = nil; + cursor->extra = nil; + cursor++; + } + } +} + +/* + * This works relatively simple, actually. + * We iterate over the list of `struct free_area`s in reverse order because the + * list is sorted by ascending physical address and i've decided that we prefer + * using higher physical addresses for the page array. The first fit wins, and + * all that's left is to split up the area and insert the top and bottom + * remainder back into the list, if applicable. + */ +static vm_paddr_t __boot_pmalloc(u_int log2) +{ + const usize alloc_size = 1 << log2; + KASSERT(log2 >= PAGE_SHIFT); /* never hand out less than a full page */ + + struct free_area *cursor; + clist_foreach_entry_rev(&free_area_list, cursor, link) { + vm_paddr_t area_start = cursor->start; + vm_paddr_t area_end = cursor->end; + KASSERT(area_start < area_end); + /* the areas tend to be aligned to greater sizes at their beginning */ + vm_paddr_t alloc_start = align_ceil(area_start, alloc_size); + vm_paddr_t alloc_end = alloc_start + alloc_size; + + if (alloc_start >= area_start && alloc_end <= area_end) { + /* + * Example with log2 == 21 (alloc_size == 0x00200000): + * + * 0x00500000 ------------------- area_end (not aligned) + * : + * 0x00400000 ------------------- alloc_end (aligned to alloc_size) + * : + * 0x00200000 ------------------- alloc_start (aligned to alloc_size) + * : + * 0x00100000 ------------------- area_start (not aligned) + */ + + if (alloc_start > area_start) { + struct free_area *low_rest = alloc_free_area_entry(); + low_rest->start = area_start; + low_rest->end = alloc_start; + clist_add(&cursor->link, &low_rest->link); + } + + if (alloc_end < area_end) { + struct free_area *high_rest = alloc_free_area_entry(); + high_rest->start = alloc_end; + high_rest->end = area_end; + clist_add_first(&cursor->link, &high_rest->link); + } + + clist_del(&cursor->link); + free_free_area_entry(cursor); + return alloc_start; + } + } + + return BOOT_PMALLOC_ERR; +} + +/* + * It's really unfortunate that we have to zero a page before we can use it as + * a page table, yet also need to reference it in the page table structures + * (thereby mapping it into virtual memory) before we can zero it out. + * This little hack temporarily maps the area at one PDP entry before KERNBASE + * (meaning index 1022 of _pdp0), zeroes the area, and then unmaps it again. + */ +static void __boot_clear_page(vm_paddr_t paddr) +{ + vm_paddr_t pbase = align_floor(paddr, 1 << X86_PDPT_SHIFT); + vm_offset_t offset = paddr - pbase; + void *vbase = (void *)KERNBASE - (1 << X86_PDPT_SHIFT); + x86_pdpte_t *pdpe = X86_PDPTE(vbase); + pdpe->val = pbase | __P_PRESENT | __P_RW | __P_HUGE | __P_NOEXEC; + vm_flush(); + memset(vbase + offset, 0, PAGE_SIZE); + pdpe->flags.present = false; + vm_flush(); +} + +static void print_mem_area(struct mb2_mmap_entry *entry) +{ + const char *name; + switch (entry->type) { + case MB2_MEMORY_AVAILABLE: + name = "Available"; + break; + case MB2_MEMORY_RESERVED: + name = "Reserved"; + break; + case MB2_MEMORY_ACPI_RECLAIMABLE: + name = "ACPI (reclaimable)"; + break; + case MB2_MEMORY_NVS: + name = "Non-Volatile Storage"; + break; + case MB2_MEMORY_BADRAM: + name = "Bad RAM"; + break; + } + + kprintf(" [0x%016"PRIxVM_PADDR"-0x%016"PRIxVM_PADDR"] %s\n", + entry->addr, entry->addr + entry->len - 1, name); +} diff --git a/arch/x86/mm/amd64/page.c b/arch/x86/mm/amd64/page.c index a3455b1..ce5b18d 100644 --- a/arch/x86/mm/amd64/page.c +++ b/arch/x86/mm/amd64/page.c @@ -13,17 +13,18 @@ #include -/* from linker script */ -extern void _image_start_phys; -extern void _image_end_phys; - -__asmlink x86_pdp_t _pdp0; -__asmlink x86_pml4_t _pml4; +/* + * Initial Page Directory Pointer Table and Page Map Level 4 Table for the + * assembly startup routine (see setup64.S). Used for statically mapping the + * lowest 2 GB of physical memory into the -2 GB virtual area. + */ +__asmlink x86_pdpt_t _pdpt0; +__asmlink x86_pml4t_t _pml4t; int map_page(uintptr_t phys, void *virt, enum pflags flags) { flags |= P_PRESENT; - x86_pml4e_t *pml4e = X86_PML4E(virt); + x86_pml4te_t *pml4e = X86_PML4TE(virt); if (!pml4e->flags.present) { void *page = get_pages(0, M_ATOMIC); if (page == nil) @@ -95,43 +96,31 @@ void x86_isr_page_fault(trap_frame_t *frame, u32 error_code) panic("Page fault"); } -uintptr_t vtophys(void *virt) +vm_paddr_t vtophys(void *virt) { - x86_pml4e_t *pml4e = X86_PML4E(virt); - if (!pml4e->flags.present) - return 0; - - x86_pdpe_t *pdpe = X86_PDPE(virt); - if (!pml4e->flags.present) - return 0; - if (pml4e->flags.huge) { - uintptr_t phys_base = pdpe->val & X86_PMAP_MASK; - return phys_base + ((uintptr_t)virt % (1 << X86_PDP_SHIFT)); + x86_pml4te_t *pml4te = X86_PML4TE(virt); + if (!pml4te->flags.present) + return (vm_paddr_t)-1; + + x86_pdpte_t *pdpte = X86_PDPTE(virt); + if (!pdpte->flags.present) + return (vm_paddr_t)-1; + if (pdpte->flags.huge) { + vm_paddr_t phys_base = pdpte->val & X86_PMAP_MASK; + return phys_base + ((vm_paddr_t)virt % (1 << X86_PDPT_SHIFT)); } - x86_pde_t *pde = X86_PDE(virt); - if (!pde->flags.present) - return 0; - if (pde->flags.huge) { - uintptr_t phys_base = pde->val & X86_PMAP_MASK; - return phys_base + ((uintptr_t)virt % (1 << X86_PD_SHIFT)); + x86_pdte_t *pdte = X86_PDTE(virt); + if (!pdte->flags.present) + return (vm_paddr_t)-1; + if (pdte->flags.huge) { + vm_paddr_t phys_base = pdte->val & X86_PMAP_MASK; + return phys_base + ((vm_paddr_t)virt % (1 << X86_PDT_SHIFT)); } x86_pte_t *pte = X86_PTE(virt); if (!pte->flags.present) - return 0; - uintptr_t phys_base = pte->val & X86_PMAP_MASK; - return phys_base + ((uintptr_t)virt % (1 << X86_PT_SHIFT)); -} - -void vm_flush(void) -{ - register_t tmp; - __asm__ volatile( -" mov %%cr3, %0 \n" -" mov %0, %%cr3 \n" - : "=r"(tmp) - : - : "memory" - ); + return (vm_paddr_t)-1; + vm_paddr_t phys_base = pte->val & X86_PMAP_MASK; + return phys_base + ((vm_paddr_t)virt % (1 << X86_PT_SHIFT)); } diff --git a/include/gay/cdefs.h b/include/gay/cdefs.h index 2289e92..c8882ba 100644 --- a/include/gay/cdefs.h +++ b/include/gay/cdefs.h @@ -11,6 +11,12 @@ #ifdef __cplusplus /** @brief Use `__restrict` in header files, and just `restrict` in C code */ #define __restrict + +#define __BEGIN_DELCS extern "C" { +#define __END_DECLS } +#else +#define __BEGIN_DECLS +#define __END_DECLS #endif /** @brief Annotated symbol is an alias for another symbol. */ @@ -110,12 +116,6 @@ * These are hints for clang's branch optimizer which will try to arrange the * code to yield the best performance when a condition is true or false. * - * - Use them sparingly and only in performance critical places because they - * come with a sometimes very significant code size overhead due to branches - * being rearranged and aligned - * - Only use them if you know *for sure* that a particular branch is *very* - * unlikely to be hit, for example when - * * Use it sparingly and only in performance critical places because the overhead * from rearranging and aligning the individual instructions can quickly make * the kernel image too big. diff --git a/include/gay/kprintf.h b/include/gay/kprintf.h index 534d3e9..c3c5ef0 100644 --- a/include/gay/kprintf.h +++ b/include/gay/kprintf.h @@ -40,7 +40,7 @@ struct kprintf_printer { /** * @brief Write to the kernel log. * The data itself may be cached in a buffer rather than written to the - * target immediately; `krpintf()` will call `flush()` when needed. + * target immediately; `kprintf()` will call `flush()` when needed. * * @param printer A reference to the original structure * @param buf Data to write @@ -48,7 +48,7 @@ struct kprintf_printer { * @returns The amount of bytes actually written, * or a negative code from `errno.h` on failure */ - ssize_t (*write)(struct kprintf_printer *printer, const void *buf, size_t len); + isize (*write)(struct kprintf_printer *printer, const void *buf, usize len); /** * @brief Flush the kernel log buffer. * On implementations that don't have a buffer, this can be a no-op. @@ -58,7 +58,7 @@ struct kprintf_printer { * @returns The amount of bytes flushed out (0 if none), * or a negative code from `errno.h` on failure */ - ssize_t (*flush)(struct kprintf_printer *printer); + isize (*flush)(struct kprintf_printer *printer); }; /** diff --git a/include/gay/linker.h b/include/gay/linker.h new file mode 100644 index 0000000..e5af387 --- /dev/null +++ b/include/gay/linker.h @@ -0,0 +1,29 @@ +/* Copyright (C) 2021 fef . All rights reserved. */ + +#pragma once + +#include + +extern void _image_start_phys; +#define image_start_phys ((vm_paddr_t)&_image_start_phys) + +extern void _image_end_phys; +#define image_end_phys ((vm_paddr_t)&_image_end_phys) + +extern void _image_start; +#define image_start (&_image_start) + +extern void _image_end; +#define image_end (&_image_end) + +extern void _kernel_start_phys; +#define kern_start_phys ((vm_paddr_t)&_kernel_start_phys) + +extern void _kernel_end_phys; +#define kern_end_phys ((vm_paddr_t)&_kernel_end_phys) + +extern void _kernel_start; +#define kern_start (&_kernel_start) + +extern void _kernel_end; +#define kern_end (&_kernel_end) diff --git a/include/gay/mm.h b/include/gay/mm.h index 304c657..3e07e55 100644 --- a/include/gay/mm.h +++ b/include/gay/mm.h @@ -6,9 +6,10 @@ * @file include/gay/mm.h * @brief Header for dynamic memory management * - * To avoid possible confusion, physical memory addresses always use type - * `uintptr_t` and virtual ones are `void *`. This should give us at least some - * type of compiler warning if they are accidentally mixed up. + * To avoid possible confusion (and Not break 32-bit systems, even though they + * aren't really supported anyway), physical memory addresses always use type + * `vm_paddr_t` and virtual ones are `void *`. This should give us at least + * some type of compiler warning if they are accidentally mixed up. * * GayBSD uses a classic slab algorithm for its own data structures, which is * backed by a buddy page frame allocator. The latter is also used for getting @@ -25,22 +26,44 @@ #include #include +#define _M_ZONE_NORMAL 0 +#define _M_ZONE_DMA 1 +#define _M_ZONE_INDEX(flags) ((flags) & 1) + +#define _M_EMERG (1 << 1) +#define _M_NOWAIT (1 << 2) + +#define MM_ZONE_NORMAL 0 +#define MM_ZONE_DMA 1 + +struct mm_zone { + patom_t freelist; /* -> struct page */ + usize length; +}; + +/** + * @brief Map of all memory zones. + * + * Memory is currently divided into two zones: DMA and normal. + * The mm subsystem isn't NUMA aware, because it's not really a thing on desktop + * grade machines anyway and would only complicate things unnecessarily. + */ +extern struct mm_zone mm_zones[2]; + /** * @brief Memory allocation flags passed to `kmalloc()`. */ enum mflags { - /** @brief Physically contiguous memory for DMA. */ - M_CONTIG = (1 << 0), - /** @brief Use emergency memory reserves if necessary. */ - M_EMERG = (1 << 1), - /** @brief Don't sleep during the allocation. */ - M_NOSLEEP = (1 << 2), - /** @brief Allocate userspace memory. */ - M_USER = (1 << 4), - /** @brief Kernel memory */ - M_KERN = M_CONTIG, - /** @brief Allocate memory in atomic (irq) context. */ - M_ATOMIC = M_EMERG | M_NOSLEEP, + /** @brief Use emergency memory reserves if necessary */ + M_EMERG = _M_EMERG, + /** @brief Don't sleep during the allocation (required for atomic context) */ + M_NOWAIT = _M_NOWAIT, + /** @brief Regular kernel memory */ + M_KERN = _M_ZONE_NORMAL, + /** @brief Don't sleep, and use emergency reserves if necessary */ + M_ATOMIC = _M_EMERG | _M_NOWAIT, + /** @brief Allocate low memory suitable for DMA transfers */ + M_DMA = _M_ZONE_DMA, }; /** @@ -69,22 +92,22 @@ void kfree(void *ptr); * layout for better performance (no shifting around required). */ enum pflags { - P_PRESENT = __PFLAG_PRESENT, /**< @brief Page exists */ - P_RW = __PFLAG_RW, /**< @brief Page is writable */ - P_USER = __PFLAG_USER, /**< @brief Page is accessible from ring 3 */ - P_ACCESSED = __PFLAG_ACCESSED, /**< @brief Page has been accessed */ - P_DIRTY = __PFLAG_DIRTY, /**< @brief Page has been written */ - P_GLOBAL = __PFLAG_GLOBAL, /**< @brief The entry survives `vm_flush()` */ - P_NOCACHE = __PFLAG_NOCACHE, /**< @brief The TLB won't cache this entry */ - P_SLAB = __PFLAG_SLAB, /**< @brief Page is used by the slab allocator */ - P_NOSLEEP = __PFLAG_ATOMIC, /**< @brief Page is atomic */ + P_PRESENT = __P_PRESENT, /**< @brief Page exists */ + P_RW = __P_RW, /**< @brief Page is writable */ + P_USER = __P_USER, /**< @brief Page is accessible from ring 3 */ + P_ACCESSED = __P_ACCESSED, /**< @brief Page has been accessed */ + P_DIRTY = __P_DIRTY, /**< @brief Page has been written */ + P_GLOBAL = __P_GLOBAL, /**< @brief The entry survives `vm_flush()` */ + P_NOCACHE = __P_NOCACHE, /**< @brief The TLB won't cache this entry */ + P_SLAB = __P_SLAB, /**< @brief Page is used by the slab allocator */ + P_NOSLEEP = __P_ATOMIC, /**< @brief Page is atomic */ #ifdef __HAVE_HUGEPAGES /** @brief This page is `HUGEPAGE_SIZE` bytes long, rather than `PAGE_SIZE` */ - P_HUGE = __PFLAG_HUGE, + P_HUGE = __P_HUGE, #endif #ifdef __HAVE_NOEXEC /** @brief No instructions can be fetched from this page */ - P_NOEXEC = __PFLAG_NOEXEC, + P_NOEXEC = __P_NOEXEC, #endif }; @@ -143,9 +166,6 @@ enum pflags get_pflags(void *page); */ int set_pflags(void *page, enum pflags flags); -/** @brief Flush the TLB. */ -void vm_flush(void); - /** * @brief Initialize the memory allocator. * @@ -180,7 +200,7 @@ int pages_init(void); * The returned region will be `(1 << order) * PAGE_SIZE` bytes long. * * @param order Order of magnitude (as in `1 << order`) for the region size - * @param flags How to allocate (`order` must be 0 if `M_NOSLEEP` is specified) + * @param flags How to allocate (`order` must be 0 if `M_NOWAIT` is specified) * @return A pointer to the beginning of the region in the direct mapping area, * or `nil` if the allocation failed */ diff --git a/include/gay/vm/page.h b/include/gay/vm/page.h new file mode 100644 index 0000000..d7245be --- /dev/null +++ b/include/gay/vm/page.h @@ -0,0 +1,81 @@ +/* Copyright (C) 2021 fef . All rights reserved. */ + +#pragma once + +#include + +#include +#include +#include + +/** + * @brief Stores information about a single page in physical memory. + * There is exactly one of these for every physical page, no matter what that + * page is used for or whether it is usable at all. + */ +struct vm_page { + /** @brief Reference count (0 = unused) */ + atom_t count; + /** @brief Various flags describing how and for what the page is used, see below */ + u_int flags; + /** @brief Singly linked list, if the page is free */ + patom_t next; + /** + * @brief Request this page to be freed if possible. + * This callback may be `nil` unless the `PG_FREEABLE` bit in `flags` + * is set. The presence of this bit does *not* guarantee that the page + * is actually reclaimable, it's merely a performance optimization to + * avoid having to call this function on pages that can never be + * reclaimed anyway. + * + * @param page Pointer to the page itself + * @return 0 if the page could be reclaimed and is now free + */ + int (*try_free)(struct vm_page *page); + /** + * @brief Optional extra data pointer, reserved for private use. + * The current owner of the page may use this to track the underlying + * object in memory (or pretty much anything else), for example the + * `struct slab` if this page is currently used by the slab allocator. + * Useful for implementing the `try_free()` callback. + */ + void *extra; +}; + +typedef struct vm_page *vm_page_t; + +/* values for struct page::flags */ +/** @brief Page must never be accessed */ +#define PG_RESERVED (1 << 0) +/** @brief Page is in an atomic per-cpu cache */ +#define PG_ATOMIC (1 << 1) +/** @brief Page is used by the slab allocator */ +#define PG_SLAB (1 << 2) +/** @brief It **might** be possible to reclaim this page using `try_free()` */ +#define PG_FREEABLE (1 << 3) + +/** @brief Array of every single page in physical memory, indexed by page frame number. */ +extern struct vm_page *const vm_page_array; +#ifdef DEBUG +extern vm_page_t _vm_page_array_end; +#endif + +/** @brief Get the page frame number of a page. */ +__pure2 static inline u_long pg2pfn(vm_page_t page) +{ + KASSERT(page < _vm_page_array_end); + return page - vm_page_array; +} + +__pure2 static inline u_long paddr2pfn(vm_paddr_t paddr) +{ + KASSERT(&vm_page_array[paddr >> PAGE_SHIFT] < _vm_page_array_end); + return paddr >> PAGE_SHIFT; +} + +__pure2 static inline vm_page_t paddr2pg(vm_paddr_t paddr) +{ + vm_page_t page = vm_page_array + (paddr >> PAGE_SHIFT); + KASSERT(page < _vm_page_array_end); + return page; +} diff --git a/lib/c/include/inttypes.h b/lib/c/include/inttypes.h index e2a57ec..718e96d 100644 --- a/lib/c/include/inttypes.h +++ b/lib/c/include/inttypes.h @@ -43,8 +43,8 @@ typedef ___wchar_t wchar_t; #endif typedef struct { - intmax_t quot; /* Quotient. */ - intmax_t rem; /* Remainder. */ + __intmax_t quot; /* Quotient. */ + __intmax_t rem; /* Remainder. */ } imaxdiv_t; /* TODO: these haven't been ported over yet */