Kernel: Introduce page ownership

Some pages, such as framebuffer pages, are not physical memory frames reserved for the current process.
Some, such as the framebuffer, may be shared between all processes.
Yet, on exit() or on exec(), a process frees all frames mapped into its address spaces.
And on fork(), it copies all data between frames. So how could we map framebuffers.

Simple: we use one of the bits in page table entries which are available to the OS, and mark whether that page is owned by the current process.

If it is owned, it will be:
- Freed on address space destruction
- Its data will be copied to a new page owned by the child process on fork()

If it is not owned, it will be:
- Left alone on address space destruction
- On fork(), the child's virtual page will be mapped to the same physical frame as the parent

This still needs a bit more work, such as keeping a reference of how many processes use a page to free it when all processes using it exit/exec.
This should be done for MAP_SHARED mappings, for example, since they are not permanent forever,
unlike the framebuffer for example.
This commit is contained in:
apio 2022-11-02 19:32:28 +01:00
parent 875d971d3b
commit e5cf32c7b3
9 changed files with 73 additions and 26 deletions

View File

@ -8,6 +8,7 @@
#define MAP_READ_WRITE 1 << 0
#define MAP_USER 1 << 1
#define MAP_EXEC 1 << 2
#define MAP_AS_OWNED_BY_TASK 1 << 3
namespace MemoryManager
{

View File

@ -18,7 +18,8 @@ struct PageDirectoryEntry
bool ignore1 : 1;
uint8_t available : 3;
uint64_t address : 48;
uint8_t available2 : 3;
bool owned_by_task : 1; // Part of the available for OS use bits.
uint8_t available2 : 2;
bool no_execute : 1;
void set_address(uint64_t addr);

View File

@ -6,7 +6,8 @@ enum Flags
{
ReadWrite = 1 << 0,
User = 1 << 1,
Execute = 1 << 2
Execute = 1 << 2,
OwnedByTask = 1 << 3,
};
namespace VMM
{

View File

@ -26,8 +26,10 @@ void AddressSpace::destroy()
if (!pdp_pde.present) continue;
if (pdp_pde.larger_pages)
{
pages_freed++;
PMM::free_page((void*)pdp_pde.get_address());
if(pdp_pde.owned_by_task) {
pages_freed++;
PMM::free_page((void*)pdp_pde.get_address());
}
continue;
}
PageTable* pdp = (PageTable*)pdp_pde.get_address();
@ -37,8 +39,10 @@ void AddressSpace::destroy()
if (!pd_pde.present) continue;
if (pd_pde.larger_pages)
{
pages_freed++;
PMM::free_page((void*)pd_pde.get_address());
if(pd_pde.owned_by_task) {
pages_freed++;
PMM::free_page((void*)pd_pde.get_address());
}
continue;
}
PageTable* pd = (PageTable*)pd_pde.get_address();
@ -48,8 +52,10 @@ void AddressSpace::destroy()
if (!pt_pde.present) continue;
if (pt_pde.larger_pages)
{
pages_freed++;
PMM::free_page((void*)pt_pde.get_address());
if(pt_pde.owned_by_task) {
pages_freed++;
PMM::free_page((void*)pt_pde.get_address());
}
continue;
}
PageTable* pt = (PageTable*)pt_pde.get_address();
@ -57,6 +63,7 @@ void AddressSpace::destroy()
{
PageDirectoryEntry& pde = pt->entries[l];
if (!pde.present) continue;
if (!pde.owned_by_task) continue;
pages_freed++;
PMM::free_page((void*)pde.get_address());
}
@ -84,8 +91,10 @@ void AddressSpace::clear()
if (!pdp_pde.present) continue;
if (pdp_pde.larger_pages)
{
pages_freed++;
PMM::free_page((void*)pdp_pde.get_address());
if(pdp_pde.owned_by_task) {
pages_freed++;
PMM::free_page((void*)pdp_pde.get_address());
}
continue;
}
PageTable* pdp = (PageTable*)pdp_pde.get_address();
@ -95,8 +104,10 @@ void AddressSpace::clear()
if (!pd_pde.present) continue;
if (pd_pde.larger_pages)
{
pages_freed++;
PMM::free_page((void*)pd_pde.get_address());
if(pd_pde.owned_by_task) {
pages_freed++;
PMM::free_page((void*)pd_pde.get_address());
}
continue;
}
PageTable* pd = (PageTable*)pd_pde.get_address();
@ -106,8 +117,10 @@ void AddressSpace::clear()
if (!pt_pde.present) continue;
if (pt_pde.larger_pages)
{
pages_freed++;
PMM::free_page((void*)pt_pde.get_address());
if(pt_pde.owned_by_task) {
pages_freed++;
PMM::free_page((void*)pt_pde.get_address());
}
continue;
}
PageTable* pt = (PageTable*)pt_pde.get_address();
@ -115,6 +128,7 @@ void AddressSpace::clear()
{
PageDirectoryEntry& pde = pt->entries[l];
if (!pde.present) continue;
if (!pde.owned_by_task) continue;
pages_freed++;
PMM::free_page((void*)pde.get_address());
}
@ -154,6 +168,11 @@ AddressSpace AddressSpace::clone() // FIXME: Add out-of-memory checks to this fu
if (!pdp_pde.present) continue;
if (pdp_pde.larger_pages)
{
if(!pdp_pde.owned_by_task)
{
memcpy(&cloned_pdp_pde, &pdp_pde, sizeof(PageDirectoryEntry));
continue;
}
void* cloned = try_clone_page_table((PageTable*)pdp_pde.get_address());
if (!cloned)
{
@ -178,6 +197,11 @@ AddressSpace AddressSpace::clone() // FIXME: Add out-of-memory checks to this fu
if (!pd_pde.present) continue;
if (pd_pde.larger_pages)
{
if(!pd_pde.owned_by_task)
{
memcpy(&cloned_pd_pde, &pd_pde, sizeof(PageDirectoryEntry));
continue;
}
void* cloned = try_clone_page_table((PageTable*)pd_pde.get_address());
if (!cloned)
{
@ -202,6 +226,11 @@ AddressSpace AddressSpace::clone() // FIXME: Add out-of-memory checks to this fu
if (!pt_pde.present) continue;
if (pt_pde.larger_pages)
{
if(!pt_pde.owned_by_task)
{
memcpy(&cloned_pt_pde, &pt_pde, sizeof(PageDirectoryEntry));
continue;
}
void* cloned = try_clone_page_table((PageTable*)pt_pde.get_address());
if (!cloned)
{
@ -223,6 +252,11 @@ AddressSpace AddressSpace::clone() // FIXME: Add out-of-memory checks to this fu
{
PageDirectoryEntry& pde = pt->entries[l];
PageDirectoryEntry& cloned_pde = cloned_pt->entries[l];
if(!pde.owned_by_task)
{
memcpy(&cloned_pde, &pde, sizeof(PageDirectoryEntry));
continue;
}
if (!pde.present) continue;
void* cloned = try_clone_page_table((PageTable*)pde.get_address());
if (!cloned)

View File

@ -90,6 +90,9 @@ void VMM::remap(uint64_t vaddr, int flags)
if(flags & Execute) pde->no_execute = false;
else
pde->no_execute = true;
if(flags & OwnedByTask) pde->owned_by_task = true;
else
pde->owned_by_task = false;
flush_tlb(vaddr);
}
@ -104,12 +107,13 @@ uint64_t VMM::get_physical(uint64_t vaddr)
uint64_t VMM::get_flags(uint64_t vaddr)
{
PageDirectoryEntry* pde = find_pde(current_pml4, round_down_to_nearest_page(vaddr));
if (!pde) return 0; // Not mapped
if (!pde) return (uint64_t)-1; // Not mapped
uint64_t flags = 0;
if (pde->user) flags |= User;
if (pde->read_write) flags |= ReadWrite;
if(!pde->no_execute) flags |= Execute;
if(pde->owned_by_task) flags |= OwnedByTask;
return flags;
}
@ -139,6 +143,9 @@ void VMM::map(uint64_t vaddr, uint64_t paddr, int flags)
if(flags & Execute) pde->no_execute = false;
else
pde->no_execute = true;
if(flags & OwnedByTask) pde->owned_by_task = true;
else
pde->owned_by_task = false;
if (will_flush_tlb) flush_tlb(vaddr);
}

View File

@ -105,7 +105,7 @@ ELFImage* ELFLoader::load_elf_from_vfs(VFS::Node* node)
VMM::apply_address_space();
VMM::switch_to_previous_user_address_space();
int new_flags = MAP_USER;
int new_flags = MAP_USER | MAP_AS_OWNED_BY_TASK;
if(phdr.p_flags & 2) new_flags |= MAP_READ_WRITE;
if(phdr.p_flags & 1) new_flags |= MAP_EXEC;

View File

@ -216,7 +216,7 @@ void sys_execv(Context* context, const char* pathname, char** argv)
task->address_space.clear();
task->allocated_stack = (uint64_t)MemoryManager::get_pages_at(
0x100000, TASK_PAGES_IN_STACK,
MAP_USER | MAP_READ_WRITE); // If we had enough space for the old stack, there should be enough space for the
MAP_USER | MAP_READ_WRITE | MAP_AS_OWNED_BY_TASK); // If we had enough space for the old stack, there should be enough space for the
// new stack.
ELFImage* image = ELFLoader::load_elf_from_vfs(program);

View File

@ -30,8 +30,8 @@ static const char* format_prot(int prot)
static int mman_flags_from_prot(int prot)
{
prot &= 0b111;
int flags = MAP_USER;
if (prot == PROT_NONE) return 0;
int flags = MAP_USER | MAP_AS_OWNED_BY_TASK;
if (prot == PROT_NONE) return MAP_AS_OWNED_BY_TASK;
if ((prot & PROT_WRITE) > 0) {
flags |= MAP_READ_WRITE;
}
@ -138,8 +138,8 @@ void sys_munmap(Context* context, void* address, size_t size)
context->rax = -EINVAL;
return;
}
uint64_t phys = VMM::get_physical((uint64_t)address);
if (phys == (uint64_t)-1)
uint64_t flags = VMM::get_flags((uint64_t)address);
if (flags == (uint64_t)-1)
{
kwarnln("munmap() failed: attempted to unmap a non-existent page");
context->rax = -EINVAL;
@ -148,7 +148,10 @@ void sys_munmap(Context* context, void* address, size_t size)
uint64_t offset = (uint64_t)address % PAGE_SIZE;
Scheduler::current_task()->allocator.free_virtual_pages(((uint64_t)address - offset),
Utilities::get_blocks_from_size(PAGE_SIZE, size));
MemoryManager::release_pages((void*)((uint64_t)address - offset), Utilities::get_blocks_from_size(PAGE_SIZE, size));
if(flags & MAP_AS_OWNED_BY_TASK)
MemoryManager::release_pages((void*)((uint64_t)address - offset), Utilities::get_blocks_from_size(PAGE_SIZE, size));
else
MemoryManager::release_unaligned_mappings((void*)((uint64_t)address - offset), Utilities::get_blocks_from_size(PAGE_SIZE, size));
kdbgln("munmap() succeeded");
context->rax = 0;
return;
@ -182,8 +185,8 @@ void sys_mprotect(Context* context, void* address, size_t size, int prot)
context->rax = -EINVAL;
return;
}
uint64_t phys = VMM::get_physical((uint64_t)address);
if (phys == (uint64_t)-1)
uint64_t flags = VMM::get_flags((uint64_t)address);
if (flags == (uint64_t)-1)
{
kwarnln("mprotect() failed: attempted to protect a non-existent page");
context->rax = -EINVAL;
@ -192,7 +195,7 @@ void sys_mprotect(Context* context, void* address, size_t size, int prot)
uint64_t offset = (uint64_t)address % PAGE_SIZE;
MemoryManager::protect((void*)((uint64_t)address - offset), Utilities::get_blocks_from_size(PAGE_SIZE, size),
mman_flags_from_prot(prot));
flags & MAP_AS_OWNED_BY_TASK ? mman_flags_from_prot(prot) : mman_flags_from_prot(prot) & ~(MAP_AS_OWNED_BY_TASK));
kdbgln("mprotect() succeeded");
context->rax = 0;
return;

View File

@ -196,7 +196,7 @@ long Scheduler::load_user_task(const char* filename)
new_task->regs.rip = image->entry;
new_task->image = image;
new_task->allocated_stack = (uint64_t)MemoryManager::get_pages_at(
0x100000, TASK_PAGES_IN_STACK, MAP_READ_WRITE | MAP_USER); // 16 KB is enough for everyone, right?
0x100000, TASK_PAGES_IN_STACK, MAP_READ_WRITE | MAP_USER | MAP_AS_OWNED_BY_TASK); // 16 KB is enough for everyone, right?
if (!new_task->allocated_stack)
{
new_task->address_space.destroy();