From e5cf32c7b39d084dc56fe3184a7e5b46705071de Mon Sep 17 00:00:00 2001 From: apio Date: Wed, 2 Nov 2022 19:32:28 +0100 Subject: [PATCH] Kernel: Introduce page ownership Some pages, such as framebuffer pages, are not physical memory frames reserved for the current process. Some, such as the framebuffer, may be shared between all processes. Yet, on exit() or on exec(), a process frees all frames mapped into its address spaces. And on fork(), it copies all data between frames. So how could we map framebuffers. Simple: we use one of the bits in page table entries which are available to the OS, and mark whether that page is owned by the current process. If it is owned, it will be: - Freed on address space destruction - Its data will be copied to a new page owned by the child process on fork() If it is not owned, it will be: - Left alone on address space destruction - On fork(), the child's virtual page will be mapped to the same physical frame as the parent This still needs a bit more work, such as keeping a reference of how many processes use a page to free it when all processes using it exit/exec. This should be done for MAP_SHARED mappings, for example, since they are not permanent forever, unlike the framebuffer for example. --- kernel/include/memory/MemoryManager.h | 1 + kernel/include/memory/Paging.h | 3 +- kernel/include/memory/VMM.h | 3 +- kernel/src/memory/AddressSpace.cpp | 58 +++++++++++++++++++++------ kernel/src/memory/VMM.cpp | 9 ++++- kernel/src/sys/elf/ELFLoader.cpp | 2 +- kernel/src/sys/exec.cpp | 2 +- kernel/src/sys/mem.cpp | 19 +++++---- kernel/src/thread/Scheduler.cpp | 2 +- 9 files changed, 73 insertions(+), 26 deletions(-) diff --git a/kernel/include/memory/MemoryManager.h b/kernel/include/memory/MemoryManager.h index 072c004c..b509a198 100644 --- a/kernel/include/memory/MemoryManager.h +++ b/kernel/include/memory/MemoryManager.h @@ -8,6 +8,7 @@ #define MAP_READ_WRITE 1 << 0 #define MAP_USER 1 << 1 #define MAP_EXEC 1 << 2 +#define MAP_AS_OWNED_BY_TASK 1 << 3 namespace MemoryManager { diff --git a/kernel/include/memory/Paging.h b/kernel/include/memory/Paging.h index 38998b5d..baedd9a3 100644 --- a/kernel/include/memory/Paging.h +++ b/kernel/include/memory/Paging.h @@ -18,7 +18,8 @@ struct PageDirectoryEntry bool ignore1 : 1; uint8_t available : 3; uint64_t address : 48; - uint8_t available2 : 3; + bool owned_by_task : 1; // Part of the available for OS use bits. + uint8_t available2 : 2; bool no_execute : 1; void set_address(uint64_t addr); diff --git a/kernel/include/memory/VMM.h b/kernel/include/memory/VMM.h index b2b56a77..69fd97e0 100644 --- a/kernel/include/memory/VMM.h +++ b/kernel/include/memory/VMM.h @@ -6,7 +6,8 @@ enum Flags { ReadWrite = 1 << 0, User = 1 << 1, - Execute = 1 << 2 + Execute = 1 << 2, + OwnedByTask = 1 << 3, }; namespace VMM { diff --git a/kernel/src/memory/AddressSpace.cpp b/kernel/src/memory/AddressSpace.cpp index e844fe54..2a499bbf 100644 --- a/kernel/src/memory/AddressSpace.cpp +++ b/kernel/src/memory/AddressSpace.cpp @@ -26,8 +26,10 @@ void AddressSpace::destroy() if (!pdp_pde.present) continue; if (pdp_pde.larger_pages) { - pages_freed++; - PMM::free_page((void*)pdp_pde.get_address()); + if(pdp_pde.owned_by_task) { + pages_freed++; + PMM::free_page((void*)pdp_pde.get_address()); + } continue; } PageTable* pdp = (PageTable*)pdp_pde.get_address(); @@ -37,8 +39,10 @@ void AddressSpace::destroy() if (!pd_pde.present) continue; if (pd_pde.larger_pages) { - pages_freed++; - PMM::free_page((void*)pd_pde.get_address()); + if(pd_pde.owned_by_task) { + pages_freed++; + PMM::free_page((void*)pd_pde.get_address()); + } continue; } PageTable* pd = (PageTable*)pd_pde.get_address(); @@ -48,8 +52,10 @@ void AddressSpace::destroy() if (!pt_pde.present) continue; if (pt_pde.larger_pages) { - pages_freed++; - PMM::free_page((void*)pt_pde.get_address()); + if(pt_pde.owned_by_task) { + pages_freed++; + PMM::free_page((void*)pt_pde.get_address()); + } continue; } PageTable* pt = (PageTable*)pt_pde.get_address(); @@ -57,6 +63,7 @@ void AddressSpace::destroy() { PageDirectoryEntry& pde = pt->entries[l]; if (!pde.present) continue; + if (!pde.owned_by_task) continue; pages_freed++; PMM::free_page((void*)pde.get_address()); } @@ -84,8 +91,10 @@ void AddressSpace::clear() if (!pdp_pde.present) continue; if (pdp_pde.larger_pages) { - pages_freed++; - PMM::free_page((void*)pdp_pde.get_address()); + if(pdp_pde.owned_by_task) { + pages_freed++; + PMM::free_page((void*)pdp_pde.get_address()); + } continue; } PageTable* pdp = (PageTable*)pdp_pde.get_address(); @@ -95,8 +104,10 @@ void AddressSpace::clear() if (!pd_pde.present) continue; if (pd_pde.larger_pages) { - pages_freed++; - PMM::free_page((void*)pd_pde.get_address()); + if(pd_pde.owned_by_task) { + pages_freed++; + PMM::free_page((void*)pd_pde.get_address()); + } continue; } PageTable* pd = (PageTable*)pd_pde.get_address(); @@ -106,8 +117,10 @@ void AddressSpace::clear() if (!pt_pde.present) continue; if (pt_pde.larger_pages) { - pages_freed++; - PMM::free_page((void*)pt_pde.get_address()); + if(pt_pde.owned_by_task) { + pages_freed++; + PMM::free_page((void*)pt_pde.get_address()); + } continue; } PageTable* pt = (PageTable*)pt_pde.get_address(); @@ -115,6 +128,7 @@ void AddressSpace::clear() { PageDirectoryEntry& pde = pt->entries[l]; if (!pde.present) continue; + if (!pde.owned_by_task) continue; pages_freed++; PMM::free_page((void*)pde.get_address()); } @@ -154,6 +168,11 @@ AddressSpace AddressSpace::clone() // FIXME: Add out-of-memory checks to this fu if (!pdp_pde.present) continue; if (pdp_pde.larger_pages) { + if(!pdp_pde.owned_by_task) + { + memcpy(&cloned_pdp_pde, &pdp_pde, sizeof(PageDirectoryEntry)); + continue; + } void* cloned = try_clone_page_table((PageTable*)pdp_pde.get_address()); if (!cloned) { @@ -178,6 +197,11 @@ AddressSpace AddressSpace::clone() // FIXME: Add out-of-memory checks to this fu if (!pd_pde.present) continue; if (pd_pde.larger_pages) { + if(!pd_pde.owned_by_task) + { + memcpy(&cloned_pd_pde, &pd_pde, sizeof(PageDirectoryEntry)); + continue; + } void* cloned = try_clone_page_table((PageTable*)pd_pde.get_address()); if (!cloned) { @@ -202,6 +226,11 @@ AddressSpace AddressSpace::clone() // FIXME: Add out-of-memory checks to this fu if (!pt_pde.present) continue; if (pt_pde.larger_pages) { + if(!pt_pde.owned_by_task) + { + memcpy(&cloned_pt_pde, &pt_pde, sizeof(PageDirectoryEntry)); + continue; + } void* cloned = try_clone_page_table((PageTable*)pt_pde.get_address()); if (!cloned) { @@ -223,6 +252,11 @@ AddressSpace AddressSpace::clone() // FIXME: Add out-of-memory checks to this fu { PageDirectoryEntry& pde = pt->entries[l]; PageDirectoryEntry& cloned_pde = cloned_pt->entries[l]; + if(!pde.owned_by_task) + { + memcpy(&cloned_pde, &pde, sizeof(PageDirectoryEntry)); + continue; + } if (!pde.present) continue; void* cloned = try_clone_page_table((PageTable*)pde.get_address()); if (!cloned) diff --git a/kernel/src/memory/VMM.cpp b/kernel/src/memory/VMM.cpp index 3e2be67d..5bd042b8 100644 --- a/kernel/src/memory/VMM.cpp +++ b/kernel/src/memory/VMM.cpp @@ -90,6 +90,9 @@ void VMM::remap(uint64_t vaddr, int flags) if(flags & Execute) pde->no_execute = false; else pde->no_execute = true; + if(flags & OwnedByTask) pde->owned_by_task = true; + else + pde->owned_by_task = false; flush_tlb(vaddr); } @@ -104,12 +107,13 @@ uint64_t VMM::get_physical(uint64_t vaddr) uint64_t VMM::get_flags(uint64_t vaddr) { PageDirectoryEntry* pde = find_pde(current_pml4, round_down_to_nearest_page(vaddr)); - if (!pde) return 0; // Not mapped + if (!pde) return (uint64_t)-1; // Not mapped uint64_t flags = 0; if (pde->user) flags |= User; if (pde->read_write) flags |= ReadWrite; if(!pde->no_execute) flags |= Execute; + if(pde->owned_by_task) flags |= OwnedByTask; return flags; } @@ -139,6 +143,9 @@ void VMM::map(uint64_t vaddr, uint64_t paddr, int flags) if(flags & Execute) pde->no_execute = false; else pde->no_execute = true; + if(flags & OwnedByTask) pde->owned_by_task = true; + else + pde->owned_by_task = false; if (will_flush_tlb) flush_tlb(vaddr); } diff --git a/kernel/src/sys/elf/ELFLoader.cpp b/kernel/src/sys/elf/ELFLoader.cpp index 3b8be7dc..d037754b 100644 --- a/kernel/src/sys/elf/ELFLoader.cpp +++ b/kernel/src/sys/elf/ELFLoader.cpp @@ -105,7 +105,7 @@ ELFImage* ELFLoader::load_elf_from_vfs(VFS::Node* node) VMM::apply_address_space(); VMM::switch_to_previous_user_address_space(); - int new_flags = MAP_USER; + int new_flags = MAP_USER | MAP_AS_OWNED_BY_TASK; if(phdr.p_flags & 2) new_flags |= MAP_READ_WRITE; if(phdr.p_flags & 1) new_flags |= MAP_EXEC; diff --git a/kernel/src/sys/exec.cpp b/kernel/src/sys/exec.cpp index c110c661..937808d0 100644 --- a/kernel/src/sys/exec.cpp +++ b/kernel/src/sys/exec.cpp @@ -216,7 +216,7 @@ void sys_execv(Context* context, const char* pathname, char** argv) task->address_space.clear(); task->allocated_stack = (uint64_t)MemoryManager::get_pages_at( 0x100000, TASK_PAGES_IN_STACK, - MAP_USER | MAP_READ_WRITE); // If we had enough space for the old stack, there should be enough space for the + MAP_USER | MAP_READ_WRITE | MAP_AS_OWNED_BY_TASK); // If we had enough space for the old stack, there should be enough space for the // new stack. ELFImage* image = ELFLoader::load_elf_from_vfs(program); diff --git a/kernel/src/sys/mem.cpp b/kernel/src/sys/mem.cpp index 6f81f841..69c3ba59 100644 --- a/kernel/src/sys/mem.cpp +++ b/kernel/src/sys/mem.cpp @@ -30,8 +30,8 @@ static const char* format_prot(int prot) static int mman_flags_from_prot(int prot) { prot &= 0b111; - int flags = MAP_USER; - if (prot == PROT_NONE) return 0; + int flags = MAP_USER | MAP_AS_OWNED_BY_TASK; + if (prot == PROT_NONE) return MAP_AS_OWNED_BY_TASK; if ((prot & PROT_WRITE) > 0) { flags |= MAP_READ_WRITE; } @@ -138,8 +138,8 @@ void sys_munmap(Context* context, void* address, size_t size) context->rax = -EINVAL; return; } - uint64_t phys = VMM::get_physical((uint64_t)address); - if (phys == (uint64_t)-1) + uint64_t flags = VMM::get_flags((uint64_t)address); + if (flags == (uint64_t)-1) { kwarnln("munmap() failed: attempted to unmap a non-existent page"); context->rax = -EINVAL; @@ -148,7 +148,10 @@ void sys_munmap(Context* context, void* address, size_t size) uint64_t offset = (uint64_t)address % PAGE_SIZE; Scheduler::current_task()->allocator.free_virtual_pages(((uint64_t)address - offset), Utilities::get_blocks_from_size(PAGE_SIZE, size)); - MemoryManager::release_pages((void*)((uint64_t)address - offset), Utilities::get_blocks_from_size(PAGE_SIZE, size)); + if(flags & MAP_AS_OWNED_BY_TASK) + MemoryManager::release_pages((void*)((uint64_t)address - offset), Utilities::get_blocks_from_size(PAGE_SIZE, size)); + else + MemoryManager::release_unaligned_mappings((void*)((uint64_t)address - offset), Utilities::get_blocks_from_size(PAGE_SIZE, size)); kdbgln("munmap() succeeded"); context->rax = 0; return; @@ -182,8 +185,8 @@ void sys_mprotect(Context* context, void* address, size_t size, int prot) context->rax = -EINVAL; return; } - uint64_t phys = VMM::get_physical((uint64_t)address); - if (phys == (uint64_t)-1) + uint64_t flags = VMM::get_flags((uint64_t)address); + if (flags == (uint64_t)-1) { kwarnln("mprotect() failed: attempted to protect a non-existent page"); context->rax = -EINVAL; @@ -192,7 +195,7 @@ void sys_mprotect(Context* context, void* address, size_t size, int prot) uint64_t offset = (uint64_t)address % PAGE_SIZE; MemoryManager::protect((void*)((uint64_t)address - offset), Utilities::get_blocks_from_size(PAGE_SIZE, size), - mman_flags_from_prot(prot)); + flags & MAP_AS_OWNED_BY_TASK ? mman_flags_from_prot(prot) : mman_flags_from_prot(prot) & ~(MAP_AS_OWNED_BY_TASK)); kdbgln("mprotect() succeeded"); context->rax = 0; return; diff --git a/kernel/src/thread/Scheduler.cpp b/kernel/src/thread/Scheduler.cpp index 8248095d..d3fbd356 100644 --- a/kernel/src/thread/Scheduler.cpp +++ b/kernel/src/thread/Scheduler.cpp @@ -196,7 +196,7 @@ long Scheduler::load_user_task(const char* filename) new_task->regs.rip = image->entry; new_task->image = image; new_task->allocated_stack = (uint64_t)MemoryManager::get_pages_at( - 0x100000, TASK_PAGES_IN_STACK, MAP_READ_WRITE | MAP_USER); // 16 KB is enough for everyone, right? + 0x100000, TASK_PAGES_IN_STACK, MAP_READ_WRITE | MAP_USER | MAP_AS_OWNED_BY_TASK); // 16 KB is enough for everyone, right? if (!new_task->allocated_stack) { new_task->address_space.destroy();