x86/mm: add huge page support

This is part of a series of commits where i
completely rewrite kmalloc() because it needs to
be able to return physically contiguous memory for
DMA operations.
Yes, i know the build isn't working right now.
This commit is contained in:
anna 2021-10-17 01:09:51 +02:00
parent 6865864444
commit c911ff9009
Signed by: fef
GPG key ID: EC22E476DC2D3D84
5 changed files with 103 additions and 190 deletions

View file

@ -6,15 +6,24 @@
* @brief Data structures and constants for paging on x86 (please end my suffering). * @brief Data structures and constants for paging on x86 (please end my suffering).
*/ */
#define __HAS_HUGEPAGES
/** @brief Binary logarithm of `PAGE_SIZE`. */ /** @brief Binary logarithm of `PAGE_SIZE`. */
#define PAGE_SHIFT 12 #define PAGE_SHIFT 12
/** @brief Page size in bytes. */ /** @brief Page size in bytes. */
#define PAGE_SIZE (1 << PAGE_SHIFT) #define PAGE_SIZE (1 << PAGE_SHIFT)
/** @brief Binary logarithm of `HUGEPAGE_SIZE`. */
#define HUGEPAGE_SHIFT 22
/** @brief Huge page size in bytes. */
#define HUGEPAGE_SIZE (1 << HUGEPAGE_SHIFT)
#ifndef _ASM_SOURCE #ifndef _ASM_SOURCE
/** @brief Pointer bitmask to get the base address of their page. */ /** @brief Pointer bitmask to get the base address of their page. */
#define PAGE_MASK (~((unsigned long)PAGE_SIZE - 1)) #define PAGE_MASK ( ~((unsigned long)PAGE_SIZE - 1) )
/** @brief Pointer bitmask to get the base address of their huge page. */
#define HUGEPAGE_MASK ( ~((unsigned long)HUGEPAGE_SIZE - 1) )
#include <gay/cdefs.h> #include <gay/cdefs.h>
#include <gay/types.h> #include <gay/types.h>
@ -34,6 +43,7 @@ struct x86_page_table_entry {
} __packed; } __packed;
#define PAGE_ALIGN(ptr) ((typeof(ptr))( (uintptr_t)(ptr) & PAGE_MASK )) #define PAGE_ALIGN(ptr) ((typeof(ptr))( (uintptr_t)(ptr) & PAGE_MASK ))
#define HUGEPAGE_ALIGN(ptr) ((typeof(ptr))( (uintptr_t)(ptr) & HUGEPAGE_MASK ))
struct x86_page_table { struct x86_page_table {
struct x86_page_table_entry entries[1024]; struct x86_page_table_entry entries[1024];
@ -59,7 +69,7 @@ struct x86_page_directory_entry {
unsigned cache_disabled:1; /**< Disable caching in TLB */ unsigned cache_disabled:1; /**< Disable caching in TLB */
unsigned accessed:1; /**< 1 if page has been accessed */ unsigned accessed:1; /**< 1 if page has been accessed */
unsigned _reserved0:1; unsigned _reserved0:1;
unsigned large:1; /**< 0 = 4K, 1 = 4M */ unsigned huge:1; /**< 0 = 4K, 1 = 4M */
unsigned _reserved1:1; unsigned _reserved1:1;
unsigned _ignored2:3; unsigned _ignored2:3;
uintptr_t shifted_address:20; /**< Aligned pointer to `struct x86_page_table` */ uintptr_t shifted_address:20; /**< Aligned pointer to `struct x86_page_table` */

View file

@ -27,20 +27,11 @@
#include <gay/util.h> #include <gay/util.h>
#include <string.h> #include <string.h>
#include <strings.h>
/* from linker script */ /* from linker script */
extern void _image_start_phys; extern void _image_start_phys;
extern void _image_end_phys; extern void _image_end_phys;
/**
* @brief Page allocation bitmap.
* 0 = free, 1 = allocated.
*/
static unsigned long *pagemap;
/** @brief Pagemap length as in number of `unsigned long`s, *not* bytes! */
static usize pagemap_len;
/* first and last dynamic page address (watch out, these are physical) */ /* first and last dynamic page address (watch out, these are physical) */
static uintptr_t dynpage_start; static uintptr_t dynpage_start;
static uintptr_t dynpage_end; static uintptr_t dynpage_end;
@ -50,11 +41,9 @@ static uintptr_t dynpage_end;
* This is initialized by the early boot routine in assembly so that paging * This is initialized by the early boot routine in assembly so that paging
* can be enabled (the kernel itself is mapped to `0xc0100000` by default). * can be enabled (the kernel itself is mapped to `0xc0100000` by default).
*/ */
struct x86_page_table pt0; __asmlink struct x86_page_table pt0;
/** @brief First page directory for low memory. */ /** @brief First page directory for low memory. */
struct x86_page_directory pd0; __asmlink struct x86_page_directory pd0;
static void setup_pagemap(void);
int mem_init(uintptr_t start_phys, uintptr_t end_phys) int mem_init(uintptr_t start_phys, uintptr_t end_phys)
{ {
@ -83,8 +72,6 @@ int mem_init(uintptr_t start_phys, uintptr_t end_phys)
*/ */
dynpage_start += 32 * PAGE_SIZE; dynpage_start += 32 * PAGE_SIZE;
setup_pagemap();
kprintf("Available memory: %zu bytes (%lu pages)\n", kprintf("Available memory: %zu bytes (%lu pages)\n",
dynpage_end - dynpage_start, dynpage_end - dynpage_start,
(unsigned long)(dynpage_end - dynpage_start) / PAGE_SIZE); (unsigned long)(dynpage_end - dynpage_start) / PAGE_SIZE);
@ -104,6 +91,30 @@ int map_page(uintptr_t phys, void *virt, enum mm_page_flags flags)
usize pd_index = ((uintptr_t)virt >> PAGE_SHIFT) / 1024; usize pd_index = ((uintptr_t)virt >> PAGE_SHIFT) / 1024;
usize pt_index = ((uintptr_t)virt >> PAGE_SHIFT) % 1024; usize pt_index = ((uintptr_t)virt >> PAGE_SHIFT) % 1024;
struct x86_page_directory_entry *pde = &X86_CURRENT_PD->entries[pd_index];
if (flags & MM_PAGE_HUGE) {
# ifdef DEBUG
if (phys != HUGEPAGE_ALIGN(phys)) {
kprintf("map_page(): unaligned physical address %p!\n",
(void *)phys);
phys = HUGEPAGE_ALIGN(phys);
}
if (virt != HUGEPAGE_ALIGN(virt)) {
kprintf("map_page(): unaligned virtual address %p!\n",
virt);
}
# endif
*(unsigned long *)pde = 0;
pde->present = 1;
pde->huge = 1;
pde->rw = (flags & MM_PAGE_RW) != 0;
pde->user = (flags & MM_PAGE_USER) != 0;
pde->accessed = (flags & MM_PAGE_ACCESSED) != 0;
pde->cache_disabled = (flags & MM_PAGE_NOCACHE) != 0;
return 0;
}
/* /*
* warning: pt might not be present yet before the if block below, * warning: pt might not be present yet before the if block below,
* we only define it here already so we can easily call memset() in * we only define it here already so we can easily call memset() in
@ -111,27 +122,26 @@ int map_page(uintptr_t phys, void *virt, enum mm_page_flags flags)
*/ */
struct x86_page_table *pt = X86_CURRENT_PT(pd_index); struct x86_page_table *pt = X86_CURRENT_PT(pd_index);
struct x86_page_directory_entry *pd_entry = &X86_CURRENT_PD->entries[pd_index]; if (!pde->present) {
if (!pd_entry->present) {
uintptr_t pt_phys = get_page(); uintptr_t pt_phys = get_page();
if (!pt_phys) if (!pt_phys)
return -ENOMEM; return -ENOMEM;
*(unsigned long *)pd_entry = 0; *(unsigned long *)pde = 0;
pd_entry->shifted_address = pt_phys >> PAGE_SHIFT; pde->shifted_address = pt_phys >> PAGE_SHIFT;
pd_entry->rw = 1; pde->rw = 1;
pd_entry->present = 1; pde->present = 1;
vm_flush(); vm_flush();
memset(pt, 0, sizeof(*pt)); memset(pt, 0, sizeof(*pt));
} }
struct x86_page_table_entry *pt_entry = &pt->entries[pt_index]; struct x86_page_table_entry *pte = &pt->entries[pt_index];
*(unsigned long *)pt_entry = 0; /* zero out the entire entry first */ *(unsigned long *)pte = 0; /* zero out the entire entry first */
pt_entry->rw = (flags & MM_PAGE_RW) != 0; pte->rw = (flags & MM_PAGE_RW) != 0;
pt_entry->user = (flags & MM_PAGE_USER) != 0; pte->user = (flags & MM_PAGE_USER) != 0;
pt_entry->cache_disabled = (flags & MM_PAGE_NOCACHE) != 0; pte->cache_disabled = (flags & MM_PAGE_NOCACHE) != 0;
pt_entry->shifted_address = phys >> PAGE_SHIFT; pte->shifted_address = phys >> PAGE_SHIFT;
pt_entry->present = 1; pte->present = 1;
return 0; return 0;
} }
@ -148,76 +158,33 @@ uintptr_t unmap_page(void *virt)
usize pd_index = ((uintptr_t)virt >> PAGE_SHIFT) / 1024; usize pd_index = ((uintptr_t)virt >> PAGE_SHIFT) / 1024;
usize pt_index = ((uintptr_t)virt >> PAGE_SHIFT) % 1024; usize pt_index = ((uintptr_t)virt >> PAGE_SHIFT) % 1024;
struct x86_page_directory_entry *pd_entry = &pd->entries[pd_index]; struct x86_page_directory_entry *pde = &pd->entries[pd_index];
if (!pd_entry->present) if (!pde->present)
return 0; return 0;
struct x86_page_table *pt = X86_CURRENT_PT(pd_index); uintptr_t phys = 0;
struct x86_page_table_entry *pt_entry = &pt->entries[pt_index]; if (pde->huge) {
if (!pt_entry->present) phys = pde->shifted_address;
return 0; phys <<= HUGEPAGE_SHIFT;
*(unsigned long *)pde = 0;
uintptr_t phys_shifted = pt_entry->shifted_address; } else {
*(unsigned long *)pt_entry = 0; struct x86_page_table *pt = X86_CURRENT_PT(pd_index);
struct x86_page_table_entry *pte = &pt->entries[pt_index];
return phys_shifted << PAGE_SHIFT; if (pte->present) {
} phys = pte->shifted_address;
phys <<= PAGE_SHIFT;
uintptr_t get_page(void) *(unsigned long *)pte = 0;
{
uintptr_t page = 0;
for (usize i = 0; i < pagemap_len; i++) {
if (~pagemap[i] != 0) {
/*
* for some stupid reason, the bit index returned by
* ffsl() starts at 1 rather than 0
* (and is 0 if there is no bit set)
*/
int bit = ffsl((long)~pagemap[i]) - 1;
if (bit >= 0) {
unsigned long page_number = i * sizeof(*pagemap) * 8 + bit;
page = dynpage_start + page_number * PAGE_SIZE;
pagemap[i] |= (1lu << bit);
} else {
kprintf("Throw your computer in the garbage\n");
}
break;
} }
} }
return page; return phys;
}
void put_page(uintptr_t phys)
{
# ifdef DEBUG
if (phys % PAGE_SIZE != 0) {
kprintf("Unaligned ptr %p passed to put_page()!\n", (void *)phys);
return;
}
if (phys < dynpage_start || phys >= dynpage_end) {
kprintf("Page %p passed to put_page() is not in the dynamic area!\n",
(void *)phys);
return;
}
# endif
usize page_number = (phys - dynpage_start) >> PAGE_SHIFT;
usize index = page_number / (sizeof(*pagemap) * 8);
int bit = page_number % (sizeof(*pagemap) * 8);
if ((pagemap[index] & (1lu << bit)) == 0)
kprintf("Double free of page %p!\n", (void *)phys);
pagemap[index] &= ~(1lu << bit);
} }
void x86_isr_page_fault(struct x86_trap_frame *frame, u32 error_code) void x86_isr_page_fault(struct x86_trap_frame *frame, u32 error_code)
{ {
void *address; void *address;
__asm__ volatile( __asm__ volatile(
" mov %%cr2, %0 \n" " mov %%cr2, %0 \n"
: "=r"(address) : "=r"(address)
: :
); );
@ -247,9 +214,9 @@ void x86_isr_page_fault(struct x86_trap_frame *frame, u32 error_code)
x86_print_regs(frame); x86_print_regs(frame);
kprintf("system halted"); kprintf("system halted");
__asm__ volatile( __asm__ volatile(
" cli \n" " cli \n"
"1: hlt \n" "1: hlt \n"
" jmp 1b \n" " jmp 1b \n"
); );
} }
@ -258,17 +225,26 @@ uintptr_t virt_to_phys(void *virt)
usize pd_index = ((uintptr_t)virt >> PAGE_SHIFT) / 1024; usize pd_index = ((uintptr_t)virt >> PAGE_SHIFT) / 1024;
usize pt_index = ((uintptr_t)virt >> PAGE_SHIFT) % 1024; usize pt_index = ((uintptr_t)virt >> PAGE_SHIFT) % 1024;
struct x86_page_directory *pd = X86_CURRENT_PD; struct x86_page_directory_entry *pde = &X86_CURRENT_PD->entries[pd_index];
if (!pd->entries[pd_index].present) if (!pde->present)
return 0; return 0;
struct x86_page_table *pt = X86_CURRENT_PT(pd_index); uintptr_t phys = 0;
if (!pt->entries[pt_index].present) if (pde->huge) {
return 0; phys = pde->shifted_address;
phys <<= PAGE_SHIFT; /* attention, this is not HUGEPAGE_SHIFT */
phys |= (uintptr_t)virt & ~HUGEPAGE_MASK;
} else {
struct x86_page_table *pt = X86_CURRENT_PT(pd_index);
struct x86_page_table_entry *pte = &pt->entries[pt_index];
if (pte->present) {
phys = pte->shifted_address;
phys <<= PAGE_SHIFT;
phys |= (uintptr_t)virt & ~PAGE_MASK;
}
}
uintptr_t phys = pt->entries[pt_index].shifted_address << PAGE_SHIFT; return phys;
/* if the virtual address wasn't page aligned, add the offset into the page */
return phys | ((uintptr_t)virt & ~PAGE_MASK);
} }
void vm_flush(void) void vm_flush(void)
@ -280,85 +256,6 @@ void vm_flush(void)
); );
} }
/**
* So, this is going to be a little awkward. Pretty much the entire mm code
* depends on the page bitmap, so we can't use any of it before the bitmap is
* actually present. This means we have to do *everything* by hand here.
*/
static void setup_pagemap(void)
{
/*
* If we blow up the pagemap we blow up the entire system, so we give
* it its very own page table and map it somewhere far, far away from
* anything else. A page table takes up exactly one page, so we cut
* that away from the usable dynamic page area. So these two lines are
* basically a replacement for a call to get_page().
*/
uintptr_t pt_phys = dynpage_start;
dynpage_start += PAGE_SIZE;
/*
* As described in multiboot.S, the last entry in the page directory points
* to the page directory itself so we can still manipulate it while we
* are in virtual address space. The second-last entry in the page
* directory is still free, so we put the page table for the bitmap there.
* If you do the math, that page table therefore maps addresses
* 0xff800000-0xffbfffff, which is where we start off with the bitmap.
*/
pagemap = (unsigned long *)0xff800000;
/*
* Now that we have a physical page for the page table, we need to
* map it to a virtual address so we can fill its entries.
* So this is basically a replacement for a call to map_page().
*/
struct x86_page_directory_entry *pd_entry = &X86_CURRENT_PD->entries[1022];
*(unsigned long *)pd_entry = 0;
pd_entry->shifted_address = pt_phys >> PAGE_SHIFT;
pd_entry->rw = 1;
pd_entry->present = 1;
vm_flush();
struct x86_page_table *pt = X86_CURRENT_PT(1022);
memset(pt, 0, sizeof(*pt));
/*
* Alright, now we can actually fill the page table with entries for
* the bitmap. Again, we just take away pages from the dynpage area,
* until there is enough space. We also need to map those pages to the
* virtual address, of course.
*/
uintptr_t pagemap_phys = dynpage_start;
usize pt_index = 0;
do {
/*
* take one page away from the dynamic area and reserve it for
* the bitmap, and recalculate the required bitmap length
*/
dynpage_start += PAGE_SIZE;
pagemap_len = (dynpage_end - dynpage_start) / (PAGE_SIZE * sizeof(*pagemap) * 8);
/* now add a page table entry for that page */
struct x86_page_table_entry *pt_entry = &pt->entries[pt_index];
*(unsigned long *)pt_entry = 0;
uintptr_t address = pagemap_phys + pt_index * PAGE_SIZE;
pt_entry->shifted_address = address >> PAGE_SHIFT;
pt_entry->present = 1;
pt_entry->rw = 1;
pt_index++;
} while (pagemap_len * sizeof(*pagemap) * 8 > (dynpage_start - pagemap_phys));
/*
* Great! We have enough space for the bitmap, and it is mapped
* correctly (at least i hope so). Now all that's left is to flush
* the TLB once again to make the updated entries take effect, and
* clear the bitmap.
*/
vm_flush();
memset(pagemap, 0, pagemap_len * sizeof(*pagemap));
}
/* /*
* This file is part of GayBSD. * This file is part of GayBSD.
* Copyright (c) 2021 fef <owo@fef.moe>. * Copyright (c) 2021 fef <owo@fef.moe>.

View file

@ -21,15 +21,18 @@
* @brief Memory allocation flags passed to `kmalloc()`. * @brief Memory allocation flags passed to `kmalloc()`.
*/ */
enum mm_flags { enum mm_flags {
/** @brief Physically contiguous memory for DMA. */
MM_CONTIG = (1 << 0),
/** @brief Use emergency memory reserves if necessary. */
MM_EMERG = (1 << 1),
/** @brief Don't sleep during the allocation. */
MM_NOSLEEP = (1 << 2),
/** @brief Allocate userspace memory. */
MM_USER = (1 << 4),
/** @brief Kernel memory */ /** @brief Kernel memory */
MM_KERNEL = (1 << 0), MM_KERN = MM_CONTIG,
/** @brief Allocate memory in atomic (irq) context. */
/* MM_ATOMIC = MM_EMERG | MM_NOSLEEP,
* This will be extended once we have a base system and everything
* in place. I've only defined this because i know we'll need it
* later anyway and it would take more effort to change all occurrences
* of kmalloc() to use flags rather than do it properly right now.
*/
}; };
/** /**
@ -58,6 +61,9 @@ enum mm_page_flags {
MM_PAGE_DIRTY = (1 << 4), MM_PAGE_DIRTY = (1 << 4),
MM_PAGE_GLOBAL = (1 << 5), MM_PAGE_GLOBAL = (1 << 5),
MM_PAGE_NOCACHE = (1 << 6), MM_PAGE_NOCACHE = (1 << 6),
#ifdef __HAS_HUGEPAGES
MM_PAGE_HUGE = (1 << 7),
#endif
}; };
/** /**

View file

@ -7,7 +7,7 @@
#ifdef _KERNEL #ifdef _KERNEL
#include <gay/mm.h> #include <gay/mm.h>
#define malloc(size) kmalloc(size, MM_KERNEL) #define malloc(size) kmalloc(size, MM_KERN)
#efine free(ptr) kfree(ptr) #efine free(ptr) kfree(ptr)
#else #else
/* /*

View file

@ -144,7 +144,7 @@ int kmalloc_init(uintptr_t phys_start, uintptr_t phys_end)
void *kmalloc(usize size, enum mm_flags flags) void *kmalloc(usize size, enum mm_flags flags)
{ {
if (flags != MM_KERNEL) { if (flags != MM_KERN) {
kprintf("Invalid flags passed to kmalloc()\n"); kprintf("Invalid flags passed to kmalloc()\n");
return NULL; return NULL;
} }
@ -216,7 +216,7 @@ void kfree(void *ptr)
__weak void *malloc(usize size) __weak void *malloc(usize size)
{ {
return kmalloc(size, MM_KERNEL); return kmalloc(size, MM_KERN);
} }
__weak void free(void *ptr) __weak void free(void *ptr)