kernel: Update the VM allocator for userspace to use a linked list
All checks were successful
continuous-integration/drone/push Build is passing

This can cover the entire address space at once in a more memory-efficient way.

Stress-tested using 'base64 /bin/ls' which allocates enough contiguous
virtual memory to store the entirety of /bin/ls :)

A couple of bugs and fixes later, here we are!
This commit is contained in:
apio 2023-06-18 01:48:36 +02:00
parent 2f08e0f5b0
commit 67ed18629d
Signed by: apio
GPG Key ID: B8A7D06E42258954
6 changed files with 204 additions and 92 deletions

View File

@ -5,118 +5,197 @@
#include <luna/CString.h> #include <luna/CString.h>
#include <luna/ScopeGuard.h> #include <luna/ScopeGuard.h>
static constexpr u64 VM_BASE = 0x10000000; static constexpr u64 VM_START = ARCH_PAGE_SIZE;
static constexpr u64 VM_END = 0x0000800000000000;
static constexpr usize INITIAL_VM_SIZE = 80;
static constexpr usize MAX_VM_SIZE = 1024 * 1024 * 16;
Result<OwnedPtr<UserVM>> UserVM::try_create() Result<OwnedPtr<UserVM>> UserVM::try_create()
{ {
void* const base = TRY(kmalloc(INITIAL_VM_SIZE)); OwnedPtr<UserVM> ptr = TRY(make_owned<UserVM>());
auto guard = make_scope_guard([&] { kfree(base); }); TRY(ptr->create_null_region());
TRY(ptr->create_default_region());
OwnedPtr<UserVM> ptr = TRY(make_owned<UserVM>(base, INITIAL_VM_SIZE));
guard.deactivate();
return move(ptr); return move(ptr);
} }
Result<void> UserVM::create_null_region()
{
// Create a small region at the start of the address space to prevent anyone from mapping page 0.
auto* region = TRY(make<VMRegion>());
region->start = 0;
region->end = VM_START;
region->count = 1;
region->used = true;
region->persistent = true;
m_regions.append(region);
return {};
}
Result<void> UserVM::create_default_region()
{
// Create a free region covering the rest of the address space.
auto* region = TRY(make<VMRegion>());
region->start = VM_START;
region->end = VM_END;
region->count = (VM_END / ARCH_PAGE_SIZE) - 1;
region->used = false;
m_regions.append(region);
return {};
}
Result<OwnedPtr<UserVM>> UserVM::clone() Result<OwnedPtr<UserVM>> UserVM::clone()
{ {
void* const base = TRY(kmalloc(m_bitmap.size_in_bytes())); OwnedPtr<UserVM> ptr = TRY(make_owned<UserVM>());
auto guard = make_scope_guard([&] { kfree(base); }); for (const auto* region : m_regions)
{
OwnedPtr<UserVM> ptr = TRY(make_owned<UserVM>(base, m_bitmap.size_in_bytes())); auto* copied_region = TRY(make<VMRegion>());
copied_region->start = region->start;
memcpy(ptr->m_bitmap.location(), m_bitmap.location(), m_bitmap.size_in_bytes()); copied_region->end = region->end;
copied_region->count = region->count;
guard.deactivate(); copied_region->used = region->used;
copied_region->persistent = region->persistent;
ptr->m_regions.append(copied_region);
}
return move(ptr); return move(ptr);
} }
UserVM::UserVM(void* base, usize size) UserVM::UserVM()
{ {
m_bitmap.initialize(base, size);
m_bitmap.clear(false);
} }
Result<bool> UserVM::try_expand(usize size) Result<u64> UserVM::alloc_region(usize count, bool persistent)
{ {
if (m_bitmap.size_in_bytes() == MAX_VM_SIZE) { return false; } for (auto* region = m_regions.expect_last(); region; region = m_regions.previous(region).value_or(nullptr))
const usize old_size = m_bitmap.size_in_bytes();
usize new_size = old_size + size;
if (new_size > MAX_VM_SIZE) new_size = MAX_VM_SIZE;
m_bitmap.resize(new_size);
m_bitmap.clear_region(old_size * 8, (new_size - old_size) * 8, false);
return true;
}
Result<u64> UserVM::alloc_one_page()
{
u64 index;
bool ok = m_bitmap.find_and_toggle(false).try_set_value(index);
if (!ok)
{ {
bool success = TRY(try_expand()); if (!region->used)
if (!success) return err(ENOMEM); {
index = TRY(Result<u64>::from_option(m_bitmap.find_and_toggle(false), ENOMEM)); if (region->count < count) continue;
if (region->count == count)
{
region->used = true;
region->persistent = persistent;
u64 address = region->start;
try_merge_region_with_neighbors(region);
return address;
}
u64 boundary = region->end - (count * ARCH_PAGE_SIZE);
auto* new_region = TRY(split_region(region, boundary));
new_region->used = true;
new_region->persistent = persistent;
try_merge_region_with_neighbors(new_region);
return boundary;
}
} }
return VM_BASE + index * ARCH_PAGE_SIZE; return err(ENOMEM);
} }
Result<u64> UserVM::alloc_several_pages(usize count) Result<bool> UserVM::set_region(u64 address, usize count, bool used)
{ {
u64 index; if (address >= VM_END) return err(EINVAL);
bool ok = m_bitmap.find_and_toggle_region(false, count).try_set_value(index);
if (!ok) u64 end = address + (count * ARCH_PAGE_SIZE);
for (auto* region : m_regions)
{ {
bool success = TRY(try_expand((count / 8) + INITIAL_VM_SIZE)); if (region->end < address) continue;
if (!success) return err(ENOMEM); if (region->start > end) return false;
index = TRY(Result<u64>::from_option(m_bitmap.find_and_toggle_region(false, count), ENOMEM));
if (region->persistent) return false;
if (region->used == used)
{
if (used) return false;
continue;
}
if (region->start >= address && region->end <= end)
{
region->used = used;
if (region->start == address && region->end == end)
{
try_merge_region_with_neighbors(region);
return true;
}
continue;
}
if (region->end > end && region->start < address)
{
auto* middle_region = TRY(split_region(region, address));
TRY(split_region(middle_region, end));
middle_region->used = used;
return true;
}
if (region->start < address)
{
bool finished = region->end == end;
auto* split = TRY(split_region(region, address));
split->used = used;
try_merge_region_with_neighbors(split);
if (!finished) continue;
return true;
}
if (region->end > end)
{
TRY(split_region(region, end));
region->used = used;
try_merge_region_with_neighbors(region);
return true;
}
} }
return VM_BASE + index * ARCH_PAGE_SIZE;
}
Result<bool> UserVM::free_one_page(u64 address)
{
if (address < VM_BASE) return err(EINVAL);
const u64 index = (address - VM_BASE) / ARCH_PAGE_SIZE;
if (index > (MAX_VM_SIZE * 8)) return err(EINVAL);
// NOTE: POSIX says munmap() should silently do nothing if the address is not mapped, instead of throwing an error
// like EFAULT.
if (!m_bitmap.get(index)) return false;
m_bitmap.set(index, false);
return true; return true;
} }
Result<bool> UserVM::free_several_pages(u64 address, usize count) void UserVM::merge_contiguous_regions(VMRegion* a, VMRegion* b)
{ {
if (address < VM_BASE) return err(EINVAL); a->end = b->end;
const u64 index = (address - VM_BASE) / ARCH_PAGE_SIZE; a->count += b->count;
if ((index + count) > (MAX_VM_SIZE * 8)) return err(EINVAL); m_regions.remove(b);
delete b;
}
// NOTE: Same as above. void UserVM::try_merge_region_with_neighbors(VMRegion* region)
if (!TRY(m_bitmap.try_match_region(index, count, true))) return false; {
auto prev = m_regions.previous(region);
if (prev.has_value() && (*prev)->used == region->used && (*prev)->persistent == region->persistent)
{
merge_contiguous_regions(*prev, region);
region = *prev;
}
m_bitmap.clear_region(index, count, false); auto next = m_regions.next(region);
if (next.has_value() && (*next)->used == region->used && (*next)->persistent == region->persistent)
{
merge_contiguous_regions(region, *next);
}
}
return true; Result<VMRegion*> UserVM::split_region(VMRegion* parent, u64 boundary)
{
auto* region = TRY(make<VMRegion>());
region->start = boundary;
region->end = parent->end;
region->count = (region->end - region->start) / ARCH_PAGE_SIZE;
region->used = parent->used;
region->persistent = parent->persistent;
m_regions.add_after(parent, region);
parent->end = boundary;
parent->count -= region->count;
return region;
} }
UserVM::~UserVM() UserVM::~UserVM()
{ {
m_bitmap.deallocate(); m_regions.consume([](VMRegion* region) { delete region; });
} }

View File

@ -1,25 +1,46 @@
#pragma once #pragma once
#include <luna/Bitmap.h> #include <luna/LinkedList.h>
#include <luna/OwnedPtr.h> #include <luna/OwnedPtr.h>
#include <luna/Result.h> #include <luna/Result.h>
class VMRegion : LinkedListNode<VMRegion>
{
public:
u64 start;
u64 end;
usize count;
bool used { true };
bool persistent { false };
};
class UserVM class UserVM
{ {
public: public:
UserVM(void* base, usize size); UserVM();
~UserVM(); ~UserVM();
Result<u64> alloc_one_page(); Result<u64> alloc_region(usize count, bool persistent = false);
Result<u64> alloc_several_pages(usize count);
Result<bool> free_one_page(u64 address); Result<bool> test_and_alloc_region(u64 address, usize count)
Result<bool> free_several_pages(u64 address, usize count); {
return set_region(address, count, true);
}
Result<bool> free_region(u64 address, usize count)
{
return set_region(address, count, false);
}
static Result<OwnedPtr<UserVM>> try_create(); static Result<OwnedPtr<UserVM>> try_create();
Result<OwnedPtr<UserVM>> clone(); Result<OwnedPtr<UserVM>> clone();
private: private:
Result<bool> try_expand(usize size = 160); Result<bool> set_region(u64 address, usize count, bool used);
Bitmap m_bitmap; Result<void> create_default_region();
Result<void> create_null_region();
void try_merge_region_with_neighbors(VMRegion* region);
void merge_contiguous_regions(VMRegion* a, VMRegion* b);
Result<VMRegion*> split_region(VMRegion* parent, u64 boundary);
LinkedList<VMRegion> m_regions;
}; };

View File

@ -37,11 +37,13 @@ Result<u64> sys_mmap(Registers*, SyscallArgs args)
Thread* current = Scheduler::current(); Thread* current = Scheduler::current();
u64 address; u64 address;
if (!addr) address = TRY(current->vm_allocator->alloc_several_pages(get_blocks_from_size(len, ARCH_PAGE_SIZE))); if (!addr) address = TRY(current->vm_allocator->alloc_region(get_blocks_from_size(len, ARCH_PAGE_SIZE)));
else else
{ {
kwarnln("mmap: FIXME: tried to mmap at a given address, instead of letting us choose"); // FIXME: We should be more flexible if MAP_FIXED was not specified.
return err(ENOTSUP); if (!TRY(current->vm_allocator->test_and_alloc_region((u64)addr, get_blocks_from_size(len, ARCH_PAGE_SIZE))))
return err(ENOMEM);
address = (u64)addr;
} }
int mmu_flags = MMU::User | MMU::NoExecute; int mmu_flags = MMU::User | MMU::NoExecute;
@ -53,6 +55,7 @@ Result<u64> sys_mmap(Registers*, SyscallArgs args)
kdbgln("mmap: mapping memory at %#lx, size=%zu", address, len); kdbgln("mmap: mapping memory at %#lx, size=%zu", address, len);
#endif #endif
// FIXME: This leaks VM if it fails.
return MemoryManager::alloc_at_zeroed(address, get_blocks_from_size(len, ARCH_PAGE_SIZE), mmu_flags); return MemoryManager::alloc_at_zeroed(address, get_blocks_from_size(len, ARCH_PAGE_SIZE), mmu_flags);
} }
@ -66,7 +69,7 @@ Result<u64> sys_munmap(Registers*, SyscallArgs args)
Thread* current = Scheduler::current(); Thread* current = Scheduler::current();
bool ok = TRY(current->vm_allocator->free_several_pages(address, get_blocks_from_size(size, ARCH_PAGE_SIZE))); bool ok = TRY(current->vm_allocator->free_region(address, get_blocks_from_size(size, ARCH_PAGE_SIZE)));
// POSIX says munmap should silently do nothing if the memory was not already mapped. // POSIX says munmap should silently do nothing if the memory was not already mapped.
if (!ok) return 0; if (!ok) return 0;

View File

@ -25,7 +25,7 @@ static bool can_write_segment(u32 flags)
namespace ELFLoader namespace ELFLoader
{ {
Result<ELFData> load(SharedPtr<VFS::Inode> inode) Result<ELFData> load(SharedPtr<VFS::Inode> inode, UserVM* vm)
{ {
Elf64_Ehdr elf_header; Elf64_Ehdr elf_header;
usize nread = TRY(inode->read((u8*)&elf_header, 0, sizeof elf_header)); usize nread = TRY(inode->read((u8*)&elf_header, 0, sizeof elf_header));
@ -100,6 +100,11 @@ namespace ELFLoader
if (can_write_segment(program_header.p_flags)) flags |= MMU::ReadWrite; if (can_write_segment(program_header.p_flags)) flags |= MMU::ReadWrite;
if (can_execute_segment(program_header.p_flags)) flags &= ~MMU::NoExecute; if (can_execute_segment(program_header.p_flags)) flags &= ~MMU::NoExecute;
// FIXME: Set this memory range to persistent so that munmap() cannot remove it.
if (!TRY(vm->test_and_alloc_region(
base_vaddr, get_blocks_from_size(program_header.p_memsz + vaddr_diff, ARCH_PAGE_SIZE))))
return err(ENOMEM);
// Allocate physical memory for the segment // Allocate physical memory for the segment
TRY(MemoryManager::alloc_at( TRY(MemoryManager::alloc_at(
base_vaddr, get_blocks_from_size(program_header.p_memsz + vaddr_diff, ARCH_PAGE_SIZE), flags)); base_vaddr, get_blocks_from_size(program_header.p_memsz + vaddr_diff, ARCH_PAGE_SIZE), flags));

View File

@ -1,5 +1,6 @@
#pragma once #pragma once
#include "fs/VFS.h" #include "fs/VFS.h"
#include "memory/UserVM.h"
#include <luna/Types.h> #include <luna/Types.h>
#define ELFMAG "\177ELF" #define ELFMAG "\177ELF"
@ -53,5 +54,5 @@ struct ELFData
namespace ELFLoader namespace ELFLoader
{ {
Result<ELFData> load(SharedPtr<VFS::Inode> inode); Result<ELFData> load(SharedPtr<VFS::Inode> inode, UserVM* vm);
}; };

View File

@ -6,10 +6,13 @@
static constexpr usize DEFAULT_USER_STACK_PAGES = 6; static constexpr usize DEFAULT_USER_STACK_PAGES = 6;
static constexpr usize DEFAULT_USER_STACK_SIZE = DEFAULT_USER_STACK_PAGES * ARCH_PAGE_SIZE; static constexpr usize DEFAULT_USER_STACK_SIZE = DEFAULT_USER_STACK_PAGES * ARCH_PAGE_SIZE;
static Result<void> create_stacks(Stack& user_stack, Stack& kernel_stack) static Result<void> create_stacks(Stack& user_stack, Stack& kernel_stack, UserVM* vm)
{ {
const u64 THREAD_STACK_BASE = 0x10000; const u64 THREAD_STACK_BASE = 0x10000;
// FIXME: Set this memory range to persistent so that munmap() cannot remove it.
if (!TRY(vm->test_and_alloc_region(THREAD_STACK_BASE, DEFAULT_USER_STACK_PAGES))) return err(ENOMEM);
TRY(MemoryManager::alloc_at_zeroed(THREAD_STACK_BASE, DEFAULT_USER_STACK_PAGES, TRY(MemoryManager::alloc_at_zeroed(THREAD_STACK_BASE, DEFAULT_USER_STACK_PAGES,
MMU::ReadWrite | MMU::NoExecute | MMU::User)); MMU::ReadWrite | MMU::NoExecute | MMU::User));
@ -42,11 +45,11 @@ Result<OwnedPtr<ThreadImage>> ThreadImage::try_load_from_elf(SharedPtr<VFS::Inod
MMU::switch_page_directory(old_directory); MMU::switch_page_directory(old_directory);
}); });
const ELFData data = TRY(ELFLoader::load(inode)); const ELFData data = TRY(ELFLoader::load(inode, vm_allocator.ptr()));
Stack user_stack; Stack user_stack;
Stack kernel_stack; Stack kernel_stack;
TRY(create_stacks(user_stack, kernel_stack)); TRY(create_stacks(user_stack, kernel_stack, vm_allocator.ptr()));
guard.deactivate(); guard.deactivate();