/* Copyright (C) 2021,2022 fef . All rights reserved. */ #include #include #include #include #include #include #include #include #include #include struct vm_page *const vm_page_array = (vm_page_t)VM_PAGE_ARRAY_OFFSET; #if CFG_DEBUG_PGADDRS /* this gets updated in x86_setup_paging() once we know how big the array is */ vm_page_t _vm_page_array_end = (vm_page_t)(VM_PAGE_ARRAY_OFFSET + VM_PAGE_ARRAY_LENGTH); #endif static void print_mem_area(struct mb2_mmap_entry *entry); static void register_area(struct mb2_mmap_entry *entry) { vm_paddr_t start = entry->addr; vm_paddr_t end = start + entry->len; if (start >= DMA_LIMIT) { __boot_register_mem_area(start, end, MM_ZONE_NORMAL); } else if (start < DMA_LIMIT && end > DMA_LIMIT) { __boot_register_mem_area(start, DMA_LIMIT, MM_ZONE_DMA); __boot_register_mem_area(DMA_LIMIT, end, MM_ZONE_NORMAL); } else if (start < DMA_LIMIT && end <= DMA_LIMIT) { __boot_register_mem_area(start, end, MM_ZONE_DMA); } else { panic("congratulations, you reached an unreachable branch"); } } /** * @brief Map the entire physical memory to `DMAP_OFFSET`. * * This may overshoot up to 1 GB because we only use gigapages, but considering * the fact that mapping literally the entire physical RAM is probably the * bigger problem here i'd say it's fine. * * @param end End of physical memory */ static void map_direct_area(vm_paddr_t end) { vm_paddr_t ppos = 0; void *vpos = __v(0); void *const vend = __v(end); /* This assertion fails if > 4 TB of physical memory are available. * Sorry gamers, we don't support enough RAM for all your Chrome tabs. */ KASSERT(vend < DMAP_END); while (vpos < vend) { x86_pml4te_t *pml4te = X86_PML4TE(vpos); vm_paddr_t pdpt_phys = __boot_pmalloc(PAGE_SHIFT, MM_ZONE_NORMAL); panic_if(pdpt_phys == BOOT_PMALLOC_ERR, "cannot allocate memory for direct mapping"); __boot_clear_page(pdpt_phys); pml4te->val = pdpt_phys | __P_PRESENT | __P_RW | __P_NOEXEC; vm_flush(); for (int pdpti = 0; pdpti < 512; pdpti++) { x86_pdpte_t *pdpte = X86_PDPTE(vpos); pdpte->val = ppos | __P_PRESENT | __P_RW | __P_GLOBAL | __P_HUGE | __P_NOEXEC; ppos += GIGAPAGE_SIZE; vpos += GIGAPAGE_SIZE; if (vpos >= vend) break; } pml4te->flags.global = 1; } vm_flush(); } /* * "Oh cool another deeply nested 100-liner that nobody understands" */ void x86_paging_init(struct mb2_tag_mmap *mmap) { __boot_pmalloc_init(); /* * insert all free areas and find the end of physical memory */ struct mb2_mmap_entry *entry = mmap->entries; vm_paddr_t end = 0; kprintf("Memory map:\n"); while ((void *)entry - (void *)mmap < mmap->tag.size) { vm_paddr_t entry_end = entry->addr + entry->len; end = max(end, entry_end); print_mem_area(entry); if (entry->type == MB2_MEMORY_AVAILABLE) register_area(entry); entry = (void *)entry + mmap->entry_size; } /* * allocate and map vm_page_array into virtual memory at VM_PAGE_ARRAY_OFFSET * (this is gonna be a long one) */ struct vm_page *vm_page_array_end = vm_page_array + (end >> PAGE_SHIFT); #if CFG_DEBUG_PGADDRS _vm_page_array_end = vm_page_array_end; #endif void *map_pos = vm_page_array; void *map_end = map_pos + ((void *)vm_page_array_end - (void *)vm_page_array); kprintf("Mapping %zu bytes for vm_page_array\n", map_end - map_pos); /* PML4T loop */ while (map_pos < map_end) { /* Is vm_page_array so huge that it spans almost the entire 2 TB * kernel region? If that's the case, something has gone terribly * wrong, unless we somehow happen to have about an Exabyte of RAM * (which is not physically addressable by the CPU's 40-bit bus). */ KASSERT(map_pos < (void *)KERNBASE); x86_pml4te_t *pml4te = X86_PML4TE(map_pos); vm_paddr_t pml4te_val = __boot_pmalloc(PAGE_SHIFT, MM_ZONE_NORMAL); panic_if(pml4te_val == BOOT_PMALLOC_ERR, "cannot reserve memory for vm_page_array"); __boot_clear_page(pml4te_val); pml4te_val |= __P_PRESENT | __P_RW | __P_NOCACHE | __P_GLOBAL | __P_NOEXEC; pml4te->val = pml4te_val; vm_flush(); /* PDPT loop */ for (int pdpt_index = 0; pdpt_index < 512; pdpt_index++) { x86_pdpte_t *pdpte = X86_PDPTE(map_pos); vm_paddr_t pdpte_val; /* try allocating a 1 GB gigapage first */ if (map_end - map_pos > GIGAPAGE_SIZE) { pdpte_val = __boot_pmalloc(X86_PDPT_SHIFT, MM_ZONE_NORMAL); /* CLion is warning about this condition being always true, but * that is not the case. I've checked the disassembly with -O2, * and clang is emitting the check. So it's fine, i guess. */ if (pdpte_val != BOOT_PMALLOC_ERR) { pdpte_val |= __P_PRESENT | __P_RW | __P_NOCACHE | __P_HUGE | __P_GLOBAL | __P_NOEXEC; pdpte->val = pdpte_val; map_pos += GIGAPAGE_SIZE; if (map_pos >= map_end) goto map_done; continue; } } /* couldn't use a gigapage, continue in hugepage steps */ pdpte_val = __boot_pmalloc(PAGE_SHIFT, MM_ZONE_NORMAL); panic_if(pdpte_val == BOOT_PMALLOC_ERR, "cannot reserve memory for vm_page_array"); __boot_clear_page(pdpte_val); pdpte_val |= __P_PRESENT | __P_RW | __P_NOCACHE | __P_GLOBAL | __P_NOEXEC; pdpte->val = pdpte_val; vm_flush(); /* PDT loop */ for (int pdt_index = 0; pdt_index < 512; pdt_index++) { x86_pdte_t *pdte = X86_PDTE(map_pos); vm_paddr_t pdte_val; /* try allocating a 2 MB hugepage first */ if (map_end - map_pos >= HUGEPAGE_SIZE) { pdte_val = __boot_pmalloc(X86_PDT_SHIFT, MM_ZONE_NORMAL); if (pdte_val != BOOT_PMALLOC_ERR) { pdte_val |= __P_PRESENT | __P_RW | __P_NOCACHE | __P_GLOBAL | __P_HUGE | __P_NOEXEC; pdte->val = pdte_val; map_pos += HUGEPAGE_SIZE; if (map_pos >= map_end) goto map_done; continue; } } /* couldn't use a hugepage, continue in page steps */ pdte_val = __boot_pmalloc(PAGE_SHIFT, MM_ZONE_NORMAL); panic_if(pdte_val == BOOT_PMALLOC_ERR, "cannot reserve memory for vm_page_array"); __boot_clear_page(pdpte_val); pdte_val |= __P_PRESENT | __P_RW | __P_NOCACHE | __P_GLOBAL | __P_NOEXEC; pdte->val = pdte_val; vm_flush(); /* PT loop */ for (int pt_index = 0; pt_index < 512; pt_index++) { x86_pte_t *pte = X86_PTE(map_pos); vm_paddr_t pte_val = __boot_pmalloc(X86_PT_SHIFT, MM_ZONE_NORMAL); panic_if(pte_val == BOOT_PMALLOC_ERR, "cannot reserve memory for vm_page_array"); pte_val |= __P_PRESENT | __P_RW | __P_NOCACHE | __P_GLOBAL | __P_NOEXEC; pte->val = pte_val; map_pos += PAGE_SIZE; if (map_pos >= map_end) goto map_done; } /* end of PT loop */ } /* end of PDT loop */ } /* end of PDPT loop */ } /* end of PML4T loop */ map_done: map_direct_area(end); paging_init(end); } /* * It's really unfortunate that we have to zero a page before we can use it as * a page table, yet also need to reference it in the page table structures * (thereby mapping it into virtual memory) before we can zero it out. * This little hack temporarily maps the area at one PDP entry before KERNBASE * (meaning index 510 of _pdp0), zeroes the area, and then unmaps it again. */ void __boot_clear_page(vm_paddr_t paddr) { vm_paddr_t pbase = align_floor(paddr, 1 << X86_PDPT_SHIFT); vm_offset_t offset = paddr - pbase; void *vbase = (void *)KERNBASE - (1 << X86_PDPT_SHIFT); x86_pdpte_t *pdpe = X86_PDPTE(vbase); pdpe->val = pbase | __P_PRESENT | __P_RW | __P_NOCACHE | __P_HUGE | __P_NOEXEC; vm_flush(); memset64(vbase + offset, 0, PAGE_SIZE); pdpe->val = 0; vm_flush(); } static void print_mem_area(struct mb2_mmap_entry *entry) { const char *name; switch (entry->type) { case MB2_MEMORY_AVAILABLE: name = "Available"; break; case MB2_MEMORY_RESERVED: name = "Reserved"; break; case MB2_MEMORY_ACPI_RECLAIMABLE: name = "ACPI (reclaimable)"; break; case MB2_MEMORY_NVS: name = "Non-Volatile Storage"; break; case MB2_MEMORY_BADRAM: name = "Bad RAM"; break; } kprintf(" [0x%016"PRIxVM_PADDR"-0x%016"PRIxVM_PADDR"] %s\n", entry->addr, entry->addr + entry->len - 1, name); }