From dc766e1da7ba37afbf90b84915bd90dcd2e5e1bc Mon Sep 17 00:00:00 2001 From: apio Date: Fri, 6 Dec 2024 21:35:59 +0100 Subject: [PATCH 1/5] kernel: Rework VFS access checking + add processes VFS functions now accept a single Process* pointer instead of credentials and groups. There is now a distinction between processes and threads Now to fix all the bugs... waitpid crashes the process with an NX error... --- base/etc/loginui.conf | 2 +- kernel/src/Pledge.cpp | 37 ++++--- kernel/src/Pledge.h | 2 +- kernel/src/binfmt/Script.cpp | 9 +- kernel/src/fs/InitRD.cpp | 5 +- kernel/src/fs/Pipe.cpp | 2 +- kernel/src/fs/VFS.cpp | 123 ++++++++++++++++----- kernel/src/fs/VFS.h | 19 ++-- kernel/src/fs/devices/MasterPTY.cpp | 17 +-- kernel/src/fs/devices/SlavePTY.cpp | 14 +-- kernel/src/fs/devices/TTYLink.cpp | 4 +- kernel/src/lib/Mutex.cpp | 12 +-- kernel/src/lib/Mutex.h | 79 ++++++++++++++ kernel/src/main.cpp | 4 +- kernel/src/net/UnixSocket.cpp | 34 +++--- kernel/src/sys/chdir.cpp | 13 ++- kernel/src/sys/clock_gettime.cpp | 2 +- kernel/src/sys/exec.cpp | 115 +++++++++++++------- kernel/src/sys/exit.cpp | 4 +- kernel/src/sys/file.cpp | 72 +++++++------ kernel/src/sys/getdents.cpp | 2 +- kernel/src/sys/id.cpp | 135 +++++++++++------------ kernel/src/sys/link.cpp | 24 +++-- kernel/src/sys/memstat.cpp | 2 +- kernel/src/sys/mkdir.cpp | 10 +- kernel/src/sys/mmap.cpp | 22 ++-- kernel/src/sys/mount.cpp | 19 ++-- kernel/src/sys/open.cpp | 32 +++--- kernel/src/sys/pledge.cpp | 2 +- kernel/src/sys/poll.cpp | 7 +- kernel/src/sys/pstat.cpp | 33 +++--- kernel/src/sys/resource.cpp | 2 +- kernel/src/sys/setitimer.cpp | 14 +-- kernel/src/sys/signal.cpp | 23 ++-- kernel/src/sys/socket.cpp | 19 ++-- kernel/src/sys/stat.cpp | 16 +-- kernel/src/sys/uname.cpp | 4 +- kernel/src/sys/usleep.cpp | 4 +- kernel/src/sys/waitpid.cpp | 37 ++++--- kernel/src/thread/Clock.cpp | 2 +- kernel/src/thread/Scheduler.cpp | 137 +++++++++++++++--------- kernel/src/thread/Scheduler.h | 37 +++++-- kernel/src/thread/Thread.cpp | 159 +++++++++++++++++++--------- kernel/src/thread/Thread.h | 140 +++++++++++++++--------- kernel/src/thread/ThreadImage.cpp | 7 +- kernel/src/thread/Timer.h | 4 +- 46 files changed, 915 insertions(+), 547 deletions(-) diff --git a/base/etc/loginui.conf b/base/etc/loginui.conf index 9a514af7..55ca66d6 100644 --- a/base/etc/loginui.conf +++ b/base/etc/loginui.conf @@ -1,5 +1,5 @@ # Configuration file for loginui. # If this parameter is set to "true", loginui automatically spawns a UI session as the below user instead of prompting for a username and password. -Autologin=true +Autologin=false # The user to create a session for if "Autologin" is set to true (see above). If the username is invalid, loginui will behave as if "Autologin" was set to false. AutologinUser=selene diff --git a/kernel/src/Pledge.cpp b/kernel/src/Pledge.cpp index 0f91474d..f5666e43 100644 --- a/kernel/src/Pledge.cpp +++ b/kernel/src/Pledge.cpp @@ -1,6 +1,7 @@ #include "Pledge.h" #include "Log.h" #include "memory/MemoryManager.h" +#include "thread/Scheduler.h" static const char* promise_names[] = { #define __enumerate(promise) #promise, @@ -8,30 +9,34 @@ static const char* promise_names[] = { #undef __enumerate }; -Result check_pledge(Thread* thread, Promise promise) +Result check_pledge(Process* process, Promise promise) { // Thread has not called pledge(). - if (thread->promises < 0) return {}; + if (process->promises < 0) return {}; int mask = (1 << (int)promise); - if ((thread->promises & mask) != mask) + if ((process->promises & mask) != mask) { - kerrorln("Pledge violation in thread %d! Has not pledged %s", thread->id, promise_names[(int)promise]); - if (thread->promises & (1 << (int)Promise::p_error)) return err(ENOSYS); + kerrorln("Pledge violation in process %d! Has not pledged %s", process->id, promise_names[(int)promise]); + if (process->promises & (1 << (int)Promise::p_error)) return err(ENOSYS); - // Kill this thread with an uncatchable SIGABRT. For this, we reset the disposition of SIGABRT to the default - // (dump core). We could just kill the thread here and be done, but that discards anything on the current stack, - // which means that some destructors might not be called. Instead, leave the job to the next call of - // Thread::process_pending_signals(). - thread->signal_handlers[SIGABRT - 1].sa_handler = SIG_DFL; + Scheduler::for_each_thread(process, [](Thread* thread) { + // Kill this thread with an uncatchable SIGABRT. For this, we reset the disposition of SIGABRT to the + // default (dump core). We could just kill the thread here and be done, but that discards anything on the + // current stack, which means that some destructors might not be called. Instead, leave the job to the next + // call of Thread::process_pending_signals(). + thread->signal_handlers[SIGABRT - 1].sa_handler = SIG_DFL; - // Unblock SIGABRT. - thread->signal_mask.set(SIGABRT - 1, false); + // Unblock SIGABRT. + thread->signal_mask.set(SIGABRT - 1, false); - // If there are any other pending signals, they might be processed before SIGABRT. Avoid that by resetting the - // thread's pending signals. - thread->pending_signals.clear(); + // If there are any other pending signals, they might be processed before SIGABRT. Avoid that by resetting + // the thread's pending signals. + thread->pending_signals.clear(); - thread->send_signal(SIGABRT); + thread->send_signal(SIGABRT); + + return true; + }); // This should never arrive to userspace, unless we're init and have ignored SIGABRT. return err(ENOSYS); diff --git a/kernel/src/Pledge.h b/kernel/src/Pledge.h index f7ea264f..77b92e71 100644 --- a/kernel/src/Pledge.h +++ b/kernel/src/Pledge.h @@ -14,6 +14,6 @@ enum class Promise num_promises, }; -Result check_pledge(Thread* thread, Promise promise); +Result check_pledge(Process* process, Promise promise); Result parse_promises(u64 pledge); diff --git a/kernel/src/binfmt/Script.cpp b/kernel/src/binfmt/Script.cpp index acf5f6c2..78b6fef9 100644 --- a/kernel/src/binfmt/Script.cpp +++ b/kernel/src/binfmt/Script.cpp @@ -17,7 +17,7 @@ Result ScriptLoader::load(AddressSpace* space) { u8 buf[256]; memset(buf, 0, sizeof(buf)); - + usize nread = TRY(m_inode->read(buf, 2, 255)); if (!nread) return err(ENOEXEC); for (usize i = 0; i < nread; i++) @@ -35,11 +35,10 @@ Result ScriptLoader::load(AddressSpace* space) if (!m_interpreter_cmdline.size()) return err(ENOEXEC); auto& interpreter_path = m_interpreter_cmdline[0]; - auto* current = Scheduler::current(); + auto* current = Process::current(); - auto interpreter = TRY(VFS::resolve_path(interpreter_path.chars(), current->auth, ¤t->extra_groups, - current->current_directory, true)); - if (!VFS::can_execute(interpreter, current->auth, ¤t->extra_groups)) return err(EACCES); + auto interpreter = TRY(VFS::resolve_path(interpreter_path.chars(), current, current->current_directory, true)); + if (!VFS::can_execute(interpreter, current)) return err(EACCES); auto loader = TRY(BinaryFormat::create_loader(interpreter, m_recursion_level + 1)); u64 entry = TRY(loader->load(space)); diff --git a/kernel/src/fs/InitRD.cpp b/kernel/src/fs/InitRD.cpp index 0cf712c5..c0b7f581 100644 --- a/kernel/src/fs/InitRD.cpp +++ b/kernel/src/fs/InitRD.cpp @@ -20,7 +20,7 @@ void InitRD::initialize() static Result vfs_create_dir_if_not_exists(const char* path, mode_t mode) { - auto rc = VFS::create_directory(path, mode & (mode_t)~S_IFMT, Credentials {}, nullptr); + auto rc = VFS::create_directory(path, mode & (mode_t)~S_IFMT, nullptr); if (rc.has_error()) { if (rc.error() == EEXIST) return {}; @@ -37,8 +37,7 @@ Result InitRD::populate_vfs() { if (entry.type == TarStream::EntryType::RegularFile) { - auto file = - TRY(VFS::create_file(entry.name.chars(), entry.mode & (mode_t)~S_IFMT, Credentials {}, nullptr)); + auto file = TRY(VFS::create_file(entry.name.chars(), entry.mode & (mode_t)~S_IFMT, nullptr)); file->write(entry.data(), 0, entry.size); } else if (entry.type == TarStream::EntryType::Directory) diff --git a/kernel/src/fs/Pipe.cpp b/kernel/src/fs/Pipe.cpp index ff9f8384..cc4f015f 100644 --- a/kernel/src/fs/Pipe.cpp +++ b/kernel/src/fs/Pipe.cpp @@ -8,7 +8,7 @@ Result Pipe::create(SharedPtr& rpipe, SharedPtr& w auto writer = TRY(make_shared()); auto reader = TRY(make_shared()); - auto auth = Scheduler::current()->auth; + auto auth = Process::current()->credentials(); pipe->m_writer = writer.ptr(); pipe->m_reader = reader.ptr(); diff --git a/kernel/src/fs/VFS.cpp b/kernel/src/fs/VFS.cpp index f809ec20..b9b150f5 100644 --- a/kernel/src/fs/VFS.cpp +++ b/kernel/src/fs/VFS.cpp @@ -17,9 +17,8 @@ namespace VFS static constexpr int MAX_SYMLINKS = 8; - Result> resolve_path_impl(const char* path, Credentials auth, const Vector* extra_groups, - SharedPtr current_inode, bool follow_last_symlink, - int& symlinks_followed) + Result> resolve_path_impl(const char* path, Process* process, SharedPtr current_inode, + bool follow_last_symlink, int& symlinks_followed) { if (symlinks_followed >= MAX_SYMLINKS) return err(ELOOP); @@ -32,7 +31,7 @@ namespace VFS const char* section; while (parser.next().try_set_value(section)) { - if (!can_execute(current_inode, auth, extra_groups)) return err(EACCES); + if (!can_execute(current_inode, process)) return err(EACCES); current_inode = TRY(current_inode->find(section)); if (current_inode->type() == VFS::InodeType::Symlink && (follow_last_symlink || parser.has_next())) @@ -46,8 +45,7 @@ namespace VFS symlink_root = parent_inode; symlinks_followed++; - current_inode = - TRY(resolve_path_impl(link.chars(), auth, extra_groups, symlink_root, true, symlinks_followed)); + current_inode = TRY(resolve_path_impl(link.chars(), process, symlink_root, true, symlinks_followed)); symlinks_followed--; } @@ -57,8 +55,8 @@ namespace VFS return current_inode; } - Result> resolve_path(const char* path, Credentials auth, const Vector* extra_groups, - SharedPtr working_directory, bool follow_last_symlink) + Result> resolve_path(const char* path, Process* process, SharedPtr working_directory, + bool follow_last_symlink) { SharedPtr current_inode; @@ -68,17 +66,17 @@ namespace VFS int symlinks_followed = 0; - return resolve_path_impl(path, auth, extra_groups, current_inode, follow_last_symlink, symlinks_followed); + return resolve_path_impl(path, process, current_inode, follow_last_symlink, symlinks_followed); } - Result> create_directory(const char* path, mode_t mode, Credentials auth, - const Vector* extra_groups, SharedPtr working_directory) + Result> create_directory(const char* path, mode_t mode, Process* process, + SharedPtr working_directory) { auto parent_path = TRY(PathParser::dirname(path)); - auto parent_inode = TRY(resolve_path(parent_path.chars(), auth, extra_groups, working_directory)); + auto parent_inode = TRY(resolve_path(parent_path.chars(), process, working_directory)); - if (!can_write(parent_inode, auth, extra_groups)) return err(EACCES); + if (!can_write(parent_inode, process)) return err(EACCES); auto child_name = TRY(PathParser::basename(path)); @@ -87,14 +85,14 @@ namespace VFS return parent_inode->create_subdirectory(child_name.chars(), mode); } - Result> create_file(const char* path, mode_t mode, Credentials auth, - const Vector* extra_groups, SharedPtr working_directory) + Result> create_file(const char* path, mode_t mode, Process* process, + SharedPtr working_directory) { auto parent_path = TRY(PathParser::dirname(path)); - auto parent_inode = TRY(resolve_path(parent_path.chars(), auth, extra_groups, working_directory)); + auto parent_inode = TRY(resolve_path(parent_path.chars(), process, working_directory)); - if (!can_write(parent_inode, auth, extra_groups)) return err(EACCES); + if (!can_write(parent_inode, process)) return err(EACCES); auto child_name = TRY(PathParser::basename(path)); @@ -135,6 +133,81 @@ namespace VFS return {}; } + // FIXME: Check all three permissions even if the UID or GID match. + bool can_execute(SharedPtr inode, Process* process) + { + const auto& metadata = inode->metadata(); + + Credentials auth { 0 }; + if (process) auth = process->credentials(); + + if (auth.euid == 0) return true; + + if (metadata.uid == auth.euid) { return metadata.mode & S_IXUSR; } + if (metadata.gid == auth.egid) { return metadata.mode & S_IXGRP; } + + if (process) + { + auto groups = process->extra_groups.lock(); + for (gid_t group : *groups) + { + if (metadata.gid == group) return metadata.mode & S_IXGRP; + } + } + + return metadata.mode & S_IXOTH; + } + + // FIXME: Check all three permissions even if the UID or GID match. + bool can_write(SharedPtr inode, Process* process) + { + const auto& metadata = inode->metadata(); + + Credentials auth { 0 }; + if (process) auth = process->credentials(); + + if (auth.euid == 0) return true; + + if (metadata.uid == auth.euid) { return metadata.mode & S_IWUSR; } + if (metadata.gid == auth.egid) { return metadata.mode & S_IWGRP; } + + if (process) + { + auto groups = process->extra_groups.lock(); + for (gid_t group : *groups) + { + if (metadata.gid == group) return metadata.mode & S_IWGRP; + } + } + + return metadata.mode & S_IWOTH; + } + + // FIXME: Check all three permissions even if the UID or GID match. + bool can_read(SharedPtr inode, Process* process) + { + const auto& metadata = inode->metadata(); + + Credentials auth { 0 }; + if (process) auth = process->credentials(); + + if (auth.euid == 0) return true; + + if (metadata.uid == auth.euid) { return metadata.mode & S_IRUSR; } + if (metadata.gid == auth.egid) { return metadata.mode & S_IRGRP; } + + if (process) + { + auto groups = process->extra_groups.lock(); + for (gid_t group : *groups) + { + if (metadata.gid == group) return metadata.mode & S_IRGRP; + } + } + + return metadata.mode & S_IROTH; + } + // FIXME: Check all three permissions even if the UID or GID match. bool can_execute(SharedPtr inode, Credentials auth, const Vector* extra_groups) { @@ -232,8 +305,7 @@ namespace VFS auto new_root_parent = TRY(PathParser::dirname(new_root)); auto new_root_path = TRY(PathParser::basename(new_root)); - auto new_root_parent_inode = - TRY(VFS::resolve_path(new_root_parent.chars(), Credentials {}, nullptr, working_directory)); + auto new_root_parent_inode = TRY(VFS::resolve_path(new_root_parent.chars(), nullptr, working_directory)); auto new_root_inode = TRY(new_root_parent_inode->find(new_root_path.chars())); if (new_root_inode->type() != VFS::InodeType::Directory) return err(ENOTDIR); @@ -245,7 +317,7 @@ namespace VFS kdbgln("vfs: Pivoting root from / to %s, using %s as new root", put_old, new_root); - auto parent_inode = TRY(resolve_path(parent_path.chars(), Credentials {}, nullptr, working_directory)); + auto parent_inode = TRY(resolve_path(parent_path.chars(), nullptr, working_directory)); auto inode = TRY(parent_inode->find(child.chars())); if (inode->type() != VFS::InodeType::Directory) return err(ENOTDIR); @@ -265,8 +337,8 @@ namespace VFS return {}; } - Result mount(const char* path, SharedPtr fs, Credentials auth, - const Vector* extra_groups, SharedPtr working_directory) + Result mount(const char* path, SharedPtr fs, Process* process, + SharedPtr working_directory) { auto parent_path = TRY(PathParser::dirname(path)); auto child = TRY(PathParser::basename(path)); @@ -275,7 +347,7 @@ namespace VFS kdbgln("vfs: Mounting filesystem on target %s", path); #endif - auto parent_inode = TRY(resolve_path(parent_path.chars(), auth, extra_groups, working_directory)); + auto parent_inode = TRY(resolve_path(parent_path.chars(), process, working_directory)); auto inode = TRY(parent_inode->find(child.chars())); if (inode->type() != VFS::InodeType::Directory) return err(ENOTDIR); @@ -290,8 +362,7 @@ namespace VFS return {}; } - Result umount(const char* path, Credentials auth, const Vector* extra_groups, - SharedPtr working_directory) + Result umount(const char* path, Process* process, SharedPtr working_directory) { auto parent_path = TRY(PathParser::dirname(path)); auto child = TRY(PathParser::basename(path)); @@ -300,7 +371,7 @@ namespace VFS kinfoln("vfs: Unmounting filesystem on target %s", path); - auto parent_inode = TRY(resolve_path(parent_path.chars(), auth, extra_groups, working_directory)); + auto parent_inode = TRY(resolve_path(parent_path.chars(), process, working_directory)); auto inode = TRY(parent_inode->find(child.chars())); if (!inode->is_mountpoint()) return err(EINVAL); diff --git a/kernel/src/fs/VFS.h b/kernel/src/fs/VFS.h index b28a974d..4d50b7b5 100644 --- a/kernel/src/fs/VFS.h +++ b/kernel/src/fs/VFS.h @@ -7,6 +7,7 @@ #include #include +struct Process; struct Credentials; namespace VFS @@ -319,20 +320,21 @@ namespace VFS virtual ~DeviceInode() = default; }; - Result> resolve_path(const char* path, Credentials auth, const Vector* extra_groups, + Result> resolve_path(const char* path, Process* process, SharedPtr working_directory = {}, bool follow_last_symlink = true); - Result> create_directory(const char* path, mode_t mode, Credentials auth, - const Vector* extra_groups, + Result> create_directory(const char* path, mode_t mode, Process* process, SharedPtr working_directory = {}); - Result> create_file(const char* path, mode_t mode, Credentials auth, - const Vector* extra_groups, + Result> create_file(const char* path, mode_t mode, Process* process, SharedPtr working_directory = {}); Result validate_filename(StringView name); + bool can_execute(SharedPtr inode, Process* process); + bool can_read(SharedPtr inode, Process* process); + bool can_write(SharedPtr inode, Process* process); bool can_execute(SharedPtr inode, Credentials auth, const Vector* extra_groups); bool can_read(SharedPtr inode, Credentials auth, const Vector* extra_groups); bool can_write(SharedPtr inode, Credentials auth, const Vector* extra_groups); @@ -346,9 +348,8 @@ namespace VFS Result mount_root(SharedPtr fs); Result pivot_root(const char* new_root, const char* put_old, SharedPtr working_directory); - Result mount(const char* path, SharedPtr fs, Credentials auth, - const Vector* extra_groups, SharedPtr working_directory = {}); + Result mount(const char* path, SharedPtr fs, Process* process, + SharedPtr working_directory = {}); - Result umount(const char* path, Credentials auth, const Vector* extra_groups, - SharedPtr working_directory = {}); + Result umount(const char* path, Process* process, SharedPtr working_directory = {}); } diff --git a/kernel/src/fs/devices/MasterPTY.cpp b/kernel/src/fs/devices/MasterPTY.cpp index 5d43016a..1cd87d55 100644 --- a/kernel/src/fs/devices/MasterPTY.cpp +++ b/kernel/src/fs/devices/MasterPTY.cpp @@ -31,8 +31,9 @@ Result> MasterPTY::create_pair(int index) slave->m_master = master.ptr(); slave->m_metadata.devid = luna_dev_makedev(DeviceRegistry::Terminal, index + 2); - slave->m_metadata.uid = Scheduler::current()->auth.euid; - slave->m_metadata.gid = Scheduler::current()->auth.egid; + auto credentials = Process::current()->credentials(); + slave->m_metadata.uid = credentials.euid; + slave->m_metadata.gid = credentials.egid; slave->m_metadata.mode = 0620; slave->m_metadata.initialize_times(); @@ -46,7 +47,7 @@ Result MasterPTY::handle_background_process_group(bool can_succeed, int si auto foreground_pgrp = m_foreground_process_group.value(); auto* current = Scheduler::current(); - if (current->pgid == foreground_pgrp) return {}; + if (current->process->pgid == foreground_pgrp) return {}; if ((current->signal_mask.get(signo - 1)) || (current->signal_handlers[signo - 1].sa_handler == SIG_IGN)) { @@ -112,8 +113,8 @@ Result MasterPTY::handle_input(u8 key) if (!(m_settings.c_lflag & NOFLSH)) m_current_line_buffer.clear(); if (m_foreground_process_group.has_value()) - Scheduler::for_each_in_process_group(*m_foreground_process_group, [](Thread* thread) { - thread->send_signal(SIGINT); + Scheduler::for_each_in_process_group(*m_foreground_process_group, [](Process* p) { + p->send_signal(SIGINT); return true; }); @@ -125,8 +126,8 @@ Result MasterPTY::handle_input(u8 key) if (!(m_settings.c_lflag & NOFLSH)) m_current_line_buffer.clear(); if (m_foreground_process_group.has_value()) - Scheduler::for_each_in_process_group(*m_foreground_process_group, [](Thread* thread) { - thread->send_signal(SIGQUIT); + Scheduler::for_each_in_process_group(*m_foreground_process_group, [](Process* p) { + p->send_signal(SIGQUIT); return true; }); @@ -202,7 +203,7 @@ Result MasterPTY::write(const u8* buf, usize, usize length) Result MasterPTY::ioctl(int request, void* arg) { - auto* current = Scheduler::current(); + auto* current = Process::current(); TRY(check_pledge(current, Promise::p_tty)); switch (request) diff --git a/kernel/src/fs/devices/SlavePTY.cpp b/kernel/src/fs/devices/SlavePTY.cpp index 223d412b..c3be46d5 100644 --- a/kernel/src/fs/devices/SlavePTY.cpp +++ b/kernel/src/fs/devices/SlavePTY.cpp @@ -47,7 +47,7 @@ bool SlavePTY::will_block_if_read() const Result SlavePTY::ioctl(int request, void* arg) { - auto* current = Scheduler::current(); + auto* current = Process::current(); TRY(check_pledge(current, Promise::p_tty)); if (!m_master) return err(EIO); @@ -69,9 +69,9 @@ Result SlavePTY::ioctl(int request, void* arg) bool pgid_exists = false; pid_t sid; - Scheduler::for_each_in_process_group(pgid, [&pgid_exists, &sid](Thread* thread) { + Scheduler::for_each_in_process_group(pgid, [&pgid_exists, &sid](Process* p) { pgid_exists = true; - sid = thread->sid; // should be the same for all threads in the process group + sid = p->sid.load(); // should be the same for all threads in the process group return false; }); if (!pgid_exists) return err(EPERM); @@ -95,13 +95,13 @@ Result SlavePTY::ioctl(int request, void* arg) if (this->m_master->m_session.has_value()) return err(EPERM); if (!current->is_session_leader()) return err(EPERM); - Scheduler::for_each_in_session(current->sid, [this](Thread* thread) { - thread->controlling_terminal = this; + Scheduler::for_each_in_session(current->sid, [this](Process* p) { + p->controlling_terminal = this; return true; }); - m_master->m_session = current->sid; - m_master->m_foreground_process_group = current->pgid; + m_master->m_session = current->sid.load(); + m_master->m_foreground_process_group = current->pgid.load(); return 0; } diff --git a/kernel/src/fs/devices/TTYLink.cpp b/kernel/src/fs/devices/TTYLink.cpp index 423338ed..ae1b268e 100644 --- a/kernel/src/fs/devices/TTYLink.cpp +++ b/kernel/src/fs/devices/TTYLink.cpp @@ -10,7 +10,7 @@ TTYLink::TTYLink() Result> TTYLink::open() { - if (!Scheduler::current()->controlling_terminal) return err(ENXIO); + if (!Process::current()->controlling_terminal) return err(ENXIO); - return Scheduler::current()->controlling_terminal; + return Process::current()->controlling_terminal; } diff --git a/kernel/src/lib/Mutex.cpp b/kernel/src/lib/Mutex.cpp index 0919471a..17a56d2a 100644 --- a/kernel/src/lib/Mutex.cpp +++ b/kernel/src/lib/Mutex.cpp @@ -6,7 +6,7 @@ void Mutex::lock() { auto* current = Scheduler::current(); - const pid_t desired = current->id; + const pid_t desired = current->tid; check(desired > 0); // Why the hell would the idle thread be touching a mutex? while (true) @@ -19,7 +19,7 @@ void Mutex::lock() { if (expected == desired) { - kerrorln("DEADLOCK! KMutex::lock() recursively called by the same thread (%d)", current->id); + kerrorln("DEADLOCK! KMutex::lock() recursively called by the same thread (%d)", current->tid); fail("Mutex deadlock detected"); } @@ -40,7 +40,7 @@ void Mutex::lock() void Mutex::unlock() { auto* current = Scheduler::current(); - pid_t expected = current->id; + pid_t expected = current->tid; check(expected > 0); // Why the hell would the idle thread be touching a mutex? m_spinlock.lock(); @@ -48,7 +48,7 @@ void Mutex::unlock() if (!m_thread.compare_exchange_strong(expected, 0)) { kerrorln("KMutex::unlock() called on a lock already locked by another thread (%d, current is %d)", expected, - current->id); + current->tid); fail("Mutex unlock by different thread"); } @@ -70,7 +70,7 @@ void Mutex::unlock() bool Mutex::try_lock() { auto* current = Scheduler::current(); - const pid_t desired = current->id; + const pid_t desired = current->tid; check(desired > 0); // Why the hell would the idle thread be touching a mutex? // Make sure only one thread is touching the mutex at the same time. @@ -83,7 +83,7 @@ bool Mutex::try_lock() { kwarnln("Deadlock avoided! KMutex::try_lock() failed because it was already locked by the same thread " "(%d), this is not supposed to happen", - current->id); + current->tid); CPU::print_stack_trace(); } diff --git a/kernel/src/lib/Mutex.h b/kernel/src/lib/Mutex.h index 57f46368..fb2bd07b 100644 --- a/kernel/src/lib/Mutex.h +++ b/kernel/src/lib/Mutex.h @@ -1,4 +1,5 @@ #pragma once +#include #include #include #include @@ -18,6 +19,84 @@ class Mutex Atomic m_thread; }; +template class MutexLocked +{ + struct MutexLockedGuard + { + MutexLockedGuard(MutexLocked& value_ref) : m_value_ref(&value_ref) + { + } + + MutexLockedGuard(const MutexLockedGuard& other) = delete; + MutexLockedGuard(MutexLockedGuard&& other) + { + m_value_ref = other.m_value_ref; + other.m_value_ref = nullptr; + } + + ~MutexLockedGuard() + { + if (m_value_ref) m_value_ref->m_lock.unlock(); + } + + T& ref() + { + expect(m_value_ref, "MutexLockedGuard::ref() called on a moved MutexLockedGuard"); + return m_value_ref->m_value; + } + + void set(const T& other) + { + ref() = other; + } + + T* operator->() + { + return &ref(); + } + + T& operator*() + { + return ref(); + } + + private: + MutexLocked* m_value_ref; + }; + + public: + MutexLocked() : m_value() + { + } + + MutexLocked(T value) : m_value(move(value)) + { + } + + MutexLockedGuard lock() + { + m_lock.lock(); + return { *this }; + } + + Option try_lock() + { + if (m_lock.try_lock()) { return { *this }; } + return {}; + } + + void with_lock(Function callback) + { + m_lock.lock(); + callback(m_value); + m_lock.unlock(); + } + + private: + T m_value; + Mutex m_lock; +}; + class ScopedMutexLock { public: diff --git a/kernel/src/main.cpp b/kernel/src/main.cpp index 76de5daa..378ecb57 100644 --- a/kernel/src/main.cpp +++ b/kernel/src/main.cpp @@ -65,8 +65,8 @@ void oom_thread() mark_critical(BinaryFormat::init(), "Failed to register initial binary formats"); mark_critical(FSRegistry::init(), "Failed to register initial file systems"); - auto init = mark_critical(VFS::resolve_path("/bin/preinit", Credentials {}, nullptr), - "Can't find init in the initial ramfs!"); + auto init = + mark_critical(VFS::resolve_path("/bin/preinit", nullptr, nullptr), "Can't find init in the initial ramfs!"); auto init_thread = mark_critical(Scheduler::create_init_process(init, "/bin/preinit"), "Failed to create PID 1 process for init"); diff --git a/kernel/src/net/UnixSocket.cpp b/kernel/src/net/UnixSocket.cpp index 751ea959..40bc861d 100644 --- a/kernel/src/net/UnixSocket.cpp +++ b/kernel/src/net/UnixSocket.cpp @@ -56,14 +56,14 @@ Result UnixSocket::recv(u8* buf, usize length, int) const return m_data.dequeue_data(buf, length); } -static Result bind_socket_to_fs(const char* path, Credentials auth, const Vector* extra_groups, - SharedPtr working_directory, SharedPtr socket) +static Result bind_socket_to_fs(const char* path, Process* process, SharedPtr working_directory, + SharedPtr socket) { auto parent_path = TRY(PathParser::dirname(path)); - auto parent_inode = TRY(VFS::resolve_path(parent_path.chars(), auth, extra_groups, working_directory)); + auto parent_inode = TRY(VFS::resolve_path(parent_path.chars(), process, working_directory)); - if (!VFS::can_write(parent_inode, auth, extra_groups)) return err(EACCES); + if (!VFS::can_write(parent_inode, process)) return err(EACCES); auto child_name = TRY(PathParser::basename(path)); @@ -89,14 +89,14 @@ Result UnixSocket::bind(struct sockaddr* addr, socklen_t addrlen) String path = TRY(String::from_string_view( StringView::from_fixed_size_cstring(un_address->sun_path, addrlen - sizeof(sa_family_t)))); - auto* current = Scheduler::current(); + auto* current = Process::current(); m_metadata.mode = 0777 & ~current->umask; - m_metadata.uid = current->auth.euid; - m_metadata.gid = current->auth.egid; + auto auth = current->credentials(); + m_metadata.uid = auth.euid; + m_metadata.gid = auth.egid; - auto rc = bind_socket_to_fs(path.chars(), current->auth, ¤t->extra_groups, current->current_directory, - SharedPtr { this }); + auto rc = bind_socket_to_fs(path.chars(), current, current->current_directory, SharedPtr { this }); if (rc.has_error()) { if (rc.error() == EEXIST) return err(EADDRINUSE); @@ -126,13 +126,13 @@ Result UnixSocket::connect(Registers* regs, int flags, struct sockaddr* ad String path = TRY(String::from_string_view( StringView::from_fixed_size_cstring(un_address->sun_path, addrlen - sizeof(sa_family_t)))); - auto* current = Scheduler::current(); + auto* current = Process::current(); + auto* thread = Scheduler::current(); - auto inode = - TRY(VFS::resolve_path(path.chars(), current->auth, ¤t->extra_groups, current->current_directory)); + auto inode = TRY(VFS::resolve_path(path.chars(), current, current->current_directory)); if (inode->type() != VFS::InodeType::Socket) return err(ENOTSOCK); // FIXME: POSIX doesn't say what error to return here? - if (!VFS::can_write(inode, current->auth, ¤t->extra_groups)) return err(EACCES); + if (!VFS::can_write(inode, current)) return err(EACCES); auto socket = (SharedPtr)inode; if (socket->m_state != State::Listening) return err(ECONNREFUSED); @@ -144,14 +144,14 @@ Result UnixSocket::connect(Registers* regs, int flags, struct sockaddr* ad while (1) { - m_blocked_thread = current; + m_blocked_thread = thread; kernel_wait_for_event(); m_blocked_thread = nullptr; - if (current->interrupted) + if (thread->interrupted) { - if (current->will_ignore_pending_signal()) + if (thread->will_ignore_pending_signal()) { - current->process_pending_signals(regs); + thread->process_pending_signals(regs); continue; } return err(EINTR); diff --git a/kernel/src/sys/chdir.cpp b/kernel/src/sys/chdir.cpp index 70baeda5..03221aeb 100644 --- a/kernel/src/sys/chdir.cpp +++ b/kernel/src/sys/chdir.cpp @@ -8,16 +8,16 @@ Result sys_chdir(Registers*, SyscallArgs args) { auto path = TRY(MemoryManager::strdup_from_user(args[0])); - Thread* current = Scheduler::current(); + Process* current = Process::current(); TRY(check_pledge(current, Promise::p_rpath)); if (PathParser::is_absolute(path.view())) { - SharedPtr inode = TRY(VFS::resolve_path(path.chars(), current->auth, ¤t->extra_groups)); + SharedPtr inode = TRY(VFS::resolve_path(path.chars(), current)); if (inode->type() != VFS::InodeType::Directory) return err(ENOTDIR); - if (!VFS::can_execute(inode, current->auth, ¤t->extra_groups)) return err(EACCES); + if (!VFS::can_execute(inode, current)) return err(EACCES); inode->add_handle(); if (current->current_directory) current->current_directory->remove_handle(); @@ -29,11 +29,10 @@ Result sys_chdir(Registers*, SyscallArgs args) } else { - SharedPtr inode = - TRY(VFS::resolve_path(path.chars(), current->auth, ¤t->extra_groups, current->current_directory)); + SharedPtr inode = TRY(VFS::resolve_path(path.chars(), current, current->current_directory)); if (inode->type() != VFS::InodeType::Directory) return err(ENOTDIR); - if (!VFS::can_execute(inode, current->auth, ¤t->extra_groups)) return err(EACCES); + if (!VFS::can_execute(inode, current)) return err(EACCES); auto old_wdir = current->current_directory_path.view(); @@ -54,7 +53,7 @@ Result sys_getcwd(Registers*, SyscallArgs args) u8* buf = (u8*)args[0]; usize size = (usize)args[1]; - Thread* current = Scheduler::current(); + Process* current = Process::current(); StringView cwd = current->current_directory_path.view(); if (cwd.is_empty()) cwd = "/"_sv; diff --git a/kernel/src/sys/clock_gettime.cpp b/kernel/src/sys/clock_gettime.cpp index 9511d312..1af6db85 100644 --- a/kernel/src/sys/clock_gettime.cpp +++ b/kernel/src/sys/clock_gettime.cpp @@ -11,7 +11,7 @@ Result sys_clock_gettime(Registers*, SyscallArgs args) clockid_t id = (clockid_t)args[0]; struct timespec* ts = (struct timespec*)args[1]; - auto* current = Scheduler::current(); + auto* current = Process::current(); TRY(check_pledge(current, Promise::p_stdio)); diff --git a/kernel/src/sys/exec.cpp b/kernel/src/sys/exec.cpp index 1da8b799..c952da8c 100644 --- a/kernel/src/sys/exec.cpp +++ b/kernel/src/sys/exec.cpp @@ -64,14 +64,14 @@ Result sys_execve(Registers* regs, SyscallArgs args) if ((calculate_userspace_stack_size(argv) + calculate_userspace_stack_size(envp)) > MAX_ARGV_STACK_SIZE) return err(E2BIG); - auto current = Scheduler::current(); + auto current = Process::current(); + auto thread = Scheduler::current(); TRY(check_pledge(current, Promise::p_exec)); - auto inode = - TRY(VFS::resolve_path(path.chars(), current->auth, ¤t->extra_groups, current->current_directory)); + auto inode = TRY(VFS::resolve_path(path.chars(), current, current->current_directory)); - if (!VFS::can_execute(inode, current->auth, ¤t->extra_groups)) return err(EACCES); + if (!VFS::can_execute(inode, current)) return err(EACCES); #ifdef EXEC_DEBUG kdbgln("exec: attempting to replace current image with %s", path.chars()); @@ -88,7 +88,7 @@ Result sys_execve(Registers* regs, SyscallArgs args) kdbgln("exec: created loader for binary format %s", loader->format().chars()); #endif - auto guard = make_scope_guard([current] { MMU::switch_page_directory(current->self_directory()); }); + auto guard = make_scope_guard([thread] { MMU::switch_page_directory(thread->self_directory()); }); auto image = TRY(ThreadImage::try_load_from_binary(loader)); @@ -108,6 +108,14 @@ Result sys_execve(Registers* regs, SyscallArgs args) guard.deactivate(); + // Terminate all other threads. + Scheduler::for_each_thread(current, [thread](Thread* t) { + if (t != thread) t->quit(); + return true; + }); + + current->thread_count = 1; + current->real_timer.disarm(); current->virtual_timer.disarm(); current->profiling_timer.disarm(); @@ -120,32 +128,38 @@ Result sys_execve(Registers* regs, SyscallArgs args) } } - for (int i = 0; i < FD_MAX; i++) { - auto& descriptor = current->fd_table[i]; - if (!descriptor.has_value()) continue; - if (descriptor->flags & O_CLOEXEC) { descriptor = {}; } + auto table = current->fd_table.lock(); + for (int i = 0; i < FD_MAX; i++) + { + auto& descriptor = (*table)[i]; + if (!descriptor.has_value()) continue; + if (descriptor->flags & O_CLOEXEC) { descriptor = {}; } + } } - if (is_setuid) current->auth.euid = current->auth.suid = inode->metadata().uid; - if (is_setgid) current->auth.egid = current->auth.sgid = inode->metadata().gid; + { + auto auth = current->auth.lock(); + if (is_setuid) (*auth).euid = (*auth).suid = inode->metadata().uid; + if (is_setgid) (*auth).egid = (*auth).sgid = inode->metadata().gid; + } current->cmdline = cmdline.chars(); - image->apply(current); + image->apply(thread); - MMU::switch_page_directory(current->self_directory()); + MMU::switch_page_directory(thread->self_directory()); - current->set_arguments(user_argc, user_argv, user_envc, user_envp); + thread->set_arguments(user_argc, user_argv, user_envc, user_envp); current->promises = current->execpromises; current->execpromises = -1; - memcpy(regs, ¤t->regs, sizeof(*regs)); + memcpy(regs, &thread->regs, sizeof(*regs)); for (int i = 0; i < NSIG; i++) { - current->signal_handlers[i] = { .sa_handler = SIG_DFL, .sa_mask = 0, .sa_flags = 0 }; + thread->signal_handlers[i] = { .sa_handler = SIG_DFL, .sa_mask = 0, .sa_flags = 0 }; } current->has_called_exec = true; @@ -157,57 +171,78 @@ Result sys_execve(Registers* regs, SyscallArgs args) Result sys_fork(Registers* regs, SyscallArgs) { - auto current = Scheduler::current(); + auto current = Process::current(); + auto current_thread = Scheduler::current(); TRY(check_pledge(current, Promise::p_proc)); - auto extra_groups = TRY(current->extra_groups.shallow_copy()); + Vector extra_groups = TRY(current->copy_groups()); + Credentials auth = current->credentials(); - auto guard = make_scope_guard([current] { MMU::switch_page_directory(current->self_directory()); }); + auto guard = make_scope_guard([current_thread] { MMU::switch_page_directory(current_thread->self_directory()); }); - memcpy(¤t->regs, regs, sizeof(*regs)); + memcpy(¤t_thread->regs, regs, sizeof(*regs)); auto current_directory_path = TRY(current->current_directory_path.clone()); - auto image = TRY(ThreadImage::clone_from_thread(current)); + auto image = TRY(ThreadImage::clone_from_thread(current_thread)); auto thread = TRY(new_thread()); + auto process = TRY(make()); + + Option fds[FD_MAX]; + { + auto table = current->fd_table.lock(); + for (int i = 0; i < FD_MAX; i++) { fds[i] = (*table)[i]; } + } thread->state = ThreadState::Runnable; - thread->is_kernel = false; thread->fp_data.save(); - thread->cmdline = current->cmdline; - thread->auth = current->auth; - thread->current_directory = current->current_directory; - thread->current_directory_path = move(current_directory_path); - thread->umask = current->umask; - thread->parent = current; - thread->promises = current->promises; - thread->execpromises = current->execpromises; - thread->controlling_terminal = current->controlling_terminal; - thread->pgid = current->pgid; - thread->sid = current->sid; - thread->extra_groups = move(extra_groups); + thread->cmdline = current_thread->cmdline; + thread->process = process; - thread->virtual_clock.set_resolution(1'000'000); - thread->profiling_clock.set_resolution(1'000'000); + process->thread_count = 1; + process->id = thread->tid; + process->current_directory = current->current_directory; + process->current_directory_path = move(current_directory_path); + process->umask = current->umask; + process->parent = current; + process->promises = current->promises; + process->execpromises = current->execpromises; + process->controlling_terminal = current->controlling_terminal; + process->pgid = current->pgid; + process->sid = current->sid; + process->extra_groups = move(extra_groups); + process->cmdline = current->cmdline; - for (int i = 0; i < FD_MAX; i++) { thread->fd_table[i] = current->fd_table[i]; } + process->virtual_clock.set_resolution(1'000'000); + process->profiling_clock.set_resolution(1'000'000); + + { + auto credentials = process->auth.lock(); + *credentials = auth; + } + + { + auto table = process->fd_table.lock(); + for (int i = 0; i < FD_MAX; i++) { (*table)[i] = fds[i]; } + } image->apply(thread); memcpy(&thread->regs, regs, sizeof(*regs)); - for (int i = 0; i < NSIG; i++) thread->signal_handlers[i] = current->signal_handlers[i]; - thread->signal_mask = current->signal_mask; + for (int i = 0; i < NSIG; i++) thread->signal_handlers[i] = current_thread->signal_handlers[i]; + thread->signal_mask = current_thread->signal_mask; thread->set_return(0); Scheduler::add_thread(thread); + Scheduler::add_process(process); #ifdef FORK_DEBUG kdbgln("fork: thread %d forked into child %d", current->id, thread->id); #endif - return thread->id; + return process->id; } diff --git a/kernel/src/sys/exit.cpp b/kernel/src/sys/exit.cpp index b6a33dcc..bc18c095 100644 --- a/kernel/src/sys/exit.cpp +++ b/kernel/src/sys/exit.cpp @@ -5,7 +5,7 @@ Result sys_exit(Registers*, SyscallArgs args) { u8 status = (u8)args[0]; - Thread* current = Scheduler::current(); + Process* current = Process::current(); - current->exit_and_signal_parent(status); + current->exit(status); } diff --git a/kernel/src/sys/file.cpp b/kernel/src/sys/file.cpp index 8a768b46..49e02f25 100644 --- a/kernel/src/sys/file.cpp +++ b/kernel/src/sys/file.cpp @@ -25,9 +25,9 @@ Result sys_read(Registers* regs, SyscallArgs args) Thread* current = Scheduler::current(); - TRY(check_pledge(current, Promise::p_stdio)); + TRY(check_pledge(current->process, Promise::p_stdio)); - auto& descriptor = *TRY(current->resolve_fd(fd)); + auto& descriptor = *TRY(current->process->resolve_fd(fd)); if (!descriptor.is_readable()) return err(EBADF); @@ -66,7 +66,7 @@ Result sys_write(Registers*, SyscallArgs args) if (!MemoryManager::validate_user_read(buf, size)) return err(EFAULT); - Thread* current = Scheduler::current(); + Process* current = Process::current(); TRY(check_pledge(current, Promise::p_stdio)); @@ -90,7 +90,7 @@ Result sys_lseek(Registers*, SyscallArgs args) off_t offset = (long)args[1]; int whence = (int)args[2]; - Thread* current = Scheduler::current(); + Process* current = Process::current(); TRY(check_pledge(current, Promise::p_stdio)); @@ -122,7 +122,7 @@ Result sys_fcntl(Registers*, SyscallArgs args) int fd = (int)args[0]; int cmd = (int)args[1]; - Thread* current = Scheduler::current(); + Process* current = Process::current(); TRY(check_pledge(current, Promise::p_stdio)); @@ -135,13 +135,13 @@ Result sys_fcntl(Registers*, SyscallArgs args) case F_DUPFD: is_cloexec = false; [[fallthrough]]; case F_DUPFD_CLOEXEC: { int arg = (int)args[2]; - int new_fd = TRY(current->allocate_fd(arg)); + int new_fd = TRY(current->allocate_fd(arg, descriptor)); - current->fd_table[new_fd] = descriptor; + auto table = current->fd_table.lock(); - if (is_cloexec) current->fd_table[new_fd]->flags |= O_CLOEXEC; + if (is_cloexec) (*table)[new_fd]->flags |= O_CLOEXEC; else - current->fd_table[new_fd]->flags &= ~O_CLOEXEC; + (*table)[new_fd]->flags &= ~O_CLOEXEC; return (u64)new_fd; } @@ -174,7 +174,7 @@ Result sys_ioctl(Registers*, SyscallArgs args) int request = (int)args[1]; void* arg = (void*)args[2]; - Thread* current = Scheduler::current(); + Process* current = Process::current(); auto& descriptor = *TRY(current->resolve_fd(fd)); return descriptor.inode()->ioctl(request, arg); @@ -184,7 +184,7 @@ Result sys_isatty(Registers*, SyscallArgs args) { int fd = (int)args[0]; - Thread* current = Scheduler::current(); + Process* current = Process::current(); TRY(check_pledge(current, Promise::p_stdio)); auto& descriptor = *TRY(current->resolve_fd(fd)); @@ -196,7 +196,7 @@ Result sys_dup2(Registers*, SyscallArgs args) int oldfd = (int)args[0]; int newfd = (int)args[1]; - Thread* current = Scheduler::current(); + Process* current = Process::current(); TRY(check_pledge(current, Promise::p_stdio)); @@ -206,8 +206,10 @@ Result sys_dup2(Registers*, SyscallArgs args) if (newfd == oldfd) return (u64)newfd; - current->fd_table[newfd] = descriptor; - current->fd_table[newfd]->flags &= ~O_CLOEXEC; + auto table = current->fd_table.lock(); + + (*table)[newfd] = descriptor; + (*table)[newfd]->flags &= ~O_CLOEXEC; return (u64)newfd; } @@ -216,23 +218,23 @@ Result sys_pipe(Registers*, SyscallArgs args) { int* pfds = (int*)args[0]; - Thread* current = Scheduler::current(); + Process* current = Process::current(); TRY(check_pledge(current, Promise::p_stdio)); - int rfd = TRY(current->allocate_fd(0)); - int wfd = TRY(current->allocate_fd(rfd + 1)); - - if (!MemoryManager::copy_to_user_typed(pfds, &rfd)) return err(EFAULT); - if (!MemoryManager::copy_to_user_typed(pfds + 1, &wfd)) return err(EFAULT); - SharedPtr rpipe; SharedPtr wpipe; TRY(Pipe::create(rpipe, wpipe)); - current->fd_table[rfd] = FileDescriptor { TRY(make_shared(rpipe, O_RDONLY)), 0 }; - current->fd_table[wfd] = FileDescriptor { TRY(make_shared(wpipe, O_WRONLY)), 0 }; + auto rd = FileDescriptor { TRY(make_shared(rpipe, O_RDONLY)), 0 }; + auto wd = FileDescriptor { TRY(make_shared(wpipe, O_WRONLY)), 0 }; + + int rfd = TRY(current->allocate_fd(0, rd)); + int wfd = TRY(current->allocate_fd(rfd + 1, wd)); + + if (!MemoryManager::copy_to_user_typed(pfds, &rfd)) return err(EFAULT); + if (!MemoryManager::copy_to_user_typed(pfds + 1, &wfd)) return err(EFAULT); return 0; } @@ -241,7 +243,7 @@ Result sys_umask(Registers*, SyscallArgs args) { mode_t new_umask = (mode_t)args[0]; - auto* current = Scheduler::current(); + auto* current = Process::current(); TRY(check_pledge(current, Promise::p_stdio)); @@ -257,12 +259,12 @@ Result sys_truncate(Registers*, SyscallArgs args) auto path = TRY(MemoryManager::strdup_from_user(args[0])); size_t length = (size_t)args[1]; - auto* current = Scheduler::current(); + auto* current = Process::current(); TRY(check_pledge(current, Promise::p_wpath)); - auto inode = - TRY(VFS::resolve_path(path.chars(), current->auth, ¤t->extra_groups, current->current_directory)); - if (!VFS::can_write(inode, current->auth, ¤t->extra_groups)) return err(EACCES); + auto inode = TRY(VFS::resolve_path(path.chars(), current, current->current_directory)); + + if (!VFS::can_write(inode, current)) return err(EACCES); TRY(inode->truncate(length)); @@ -274,7 +276,7 @@ Result sys_ftruncate(Registers*, SyscallArgs args) int fd = (int)args[0]; size_t length = (size_t)args[1]; - auto* current = Scheduler::current(); + auto* current = Process::current(); TRY(check_pledge(current, Promise::p_stdio)); auto description = TRY(current->resolve_fd(fd))->description; if (!(description->flags & O_WRONLY)) return err(EBADF); @@ -291,9 +293,12 @@ Result sys_utimensat(Registers*, SyscallArgs args) const auto* times = (const struct timespec*)args[2]; int flags = (int)args[3]; - auto* current = Scheduler::current(); + auto* current = Process::current(); TRY(check_pledge(current, Promise::p_fattr)); - auto inode = TRY(current->resolve_atfile(dirfd, path, flags & AT_EMPTY_PATH, !(flags & AT_SYMLINK_NOFOLLOW))); + + auto* process = Process::current(); + auto credentials = process->credentials(); + auto inode = TRY(process->resolve_atfile(dirfd, path, flags & AT_EMPTY_PATH, !(flags & AT_SYMLINK_NOFOLLOW))); struct timespec ktimes[2]; ktimes[0].tv_sec = ktimes[1].tv_sec = 0; @@ -309,11 +314,10 @@ Result sys_utimensat(Registers*, SyscallArgs args) if (allow_write_access) { - if (!VFS::can_write(inode, current->auth, ¤t->extra_groups) && - current->auth.euid != inode->metadata().uid && current->auth.euid != 0) + if (!VFS::can_write(inode, current) && credentials.euid != inode->metadata().uid && credentials.euid != 0) return err(EACCES); } - else if (current->auth.euid != inode->metadata().uid && current->auth.euid != 0) + else if (credentials.euid != inode->metadata().uid && credentials.euid != 0) return err(EPERM); auto metadata = inode->metadata(); diff --git a/kernel/src/sys/getdents.cpp b/kernel/src/sys/getdents.cpp index 3e268d0e..23262723 100644 --- a/kernel/src/sys/getdents.cpp +++ b/kernel/src/sys/getdents.cpp @@ -11,7 +11,7 @@ Result sys_getdents(Registers*, SyscallArgs args) luna_dirent* ent = (luna_dirent*)args[1]; usize count = (usize)args[2]; - Thread* current = Scheduler::current(); + Process* current = Process::current(); TRY(check_pledge(current, Promise::p_stdio)); auto& descriptor = *TRY(current->resolve_fd(fd)); diff --git a/kernel/src/sys/id.cpp b/kernel/src/sys/id.cpp index 12dadd98..d079d9b7 100644 --- a/kernel/src/sys/id.cpp +++ b/kernel/src/sys/id.cpp @@ -6,14 +6,14 @@ Result sys_getpid(Registers*, SyscallArgs) { - auto* current = Scheduler::current(); + auto* current = Process::current(); TRY(check_pledge(current, Promise::p_stdio)); return current->id; } Result sys_getppid(Registers*, SyscallArgs) { - auto* current = Scheduler::current(); + auto* current = Process::current(); TRY(check_pledge(current, Promise::p_stdio)); auto* parent = current->parent; return parent ? parent->id : 0; @@ -21,48 +21,48 @@ Result sys_getppid(Registers*, SyscallArgs) Result sys_getuid(Registers*, SyscallArgs) { - auto* current = Scheduler::current(); + auto* current = Process::current(); TRY(check_pledge(current, Promise::p_stdio)); - return current->auth.uid; + return current->credentials().uid; } Result sys_geteuid(Registers*, SyscallArgs) { - auto* current = Scheduler::current(); + auto* current = Process::current(); TRY(check_pledge(current, Promise::p_stdio)); - return current->auth.euid; + return current->credentials().euid; } Result sys_getgid(Registers*, SyscallArgs) { - auto* current = Scheduler::current(); + auto* current = Process::current(); TRY(check_pledge(current, Promise::p_stdio)); - return current->auth.gid; + return current->credentials().gid; } Result sys_getegid(Registers*, SyscallArgs) { - auto* current = Scheduler::current(); + auto* current = Process::current(); TRY(check_pledge(current, Promise::p_stdio)); - return current->auth.egid; + return current->credentials().egid; } Result sys_setuid(Registers*, SyscallArgs args) { u32 uid = (u32)args[0]; - auto* current = Scheduler::current(); + auto* current = Process::current(); TRY(check_pledge(current, Promise::p_id)); - Credentials& auth = current->auth; + auto auth = current->auth.lock(); - if (auth.euid == 0) + if (auth->euid == 0) { - auth.uid = auth.euid = auth.suid = uid; + auth->uid = auth->euid = auth->suid = uid; return 0; } - if (uid != auth.uid && uid != auth.suid) return err(EPERM); - auth.euid = uid; + if (uid != auth->uid && uid != auth->suid) return err(EPERM); + auth->euid = uid; return 0; } @@ -71,12 +71,12 @@ Result sys_seteuid(Registers*, SyscallArgs args) { u32 uid = (u32)args[0]; - auto* current = Scheduler::current(); + auto* current = Process::current(); TRY(check_pledge(current, Promise::p_id)); - Credentials& auth = current->auth; + auto auth = current->auth.lock(); - if (auth.euid != 0 && uid != auth.uid && uid != auth.suid) return err(EPERM); - auth.euid = uid; + if (auth->euid != 0 && uid != auth->uid && uid != auth->suid) return err(EPERM); + auth->euid = uid; return 0; } @@ -85,18 +85,18 @@ Result sys_setgid(Registers*, SyscallArgs args) { u32 gid = (u32)args[0]; - auto* current = Scheduler::current(); + auto* current = Process::current(); TRY(check_pledge(current, Promise::p_id)); - Credentials& auth = current->auth; + auto auth = current->auth.lock(); - if (auth.euid == 0) + if (auth->euid == 0) { - auth.gid = auth.egid = auth.sgid = gid; + auth->gid = auth->egid = auth->sgid = gid; return 0; } - if (gid != auth.gid && gid != auth.sgid) return err(EPERM); - auth.egid = gid; + if (gid != auth->gid && gid != auth->sgid) return err(EPERM); + auth->egid = gid; return 0; } @@ -105,12 +105,12 @@ Result sys_setegid(Registers*, SyscallArgs args) { u32 gid = (u32)args[0]; - auto* current = Scheduler::current(); + auto* current = Process::current(); TRY(check_pledge(current, Promise::p_id)); - Credentials& auth = current->auth; + auto auth = current->auth.lock(); - if (auth.euid != 0 && gid != auth.gid && gid != auth.sgid) return err(EPERM); - auth.egid = gid; + if (auth->euid != 0 && gid != auth->gid && gid != auth->sgid) return err(EPERM); + auth->egid = gid; return 0; } @@ -120,34 +120,34 @@ Result sys_setpgid(Registers*, SyscallArgs args) pid_t pid = (pid_t)args[0]; pid_t pgid = (pid_t)args[1]; - auto* current = Scheduler::current(); + auto* current = Process::current(); TRY(check_pledge(current, Promise::p_proc)); if (pid == 0) pid = current->id; if (pgid == 0) pgid = current->id; if (pgid < 0) return err(EINVAL); - auto* thread = TRY(Result::from_option(Scheduler::find_by_pid(pid), ESRCH)); - if (thread != current && thread->parent != current) return err(ESRCH); + auto* target = TRY(Result::from_option(Scheduler::find_by_pid(pid), ESRCH)); + if (target != current && target->parent != current) return err(ESRCH); - if (thread->is_session_leader() || thread->sid != current->sid) return err(EPERM); + if (target->is_session_leader() || target->sid != current->sid) return err(EPERM); - if (thread->has_called_exec) return err(EPERM); + if (target->has_called_exec) return err(EACCES); if (pgid != current->id) { bool pgid_exists = false; pid_t sid; - Scheduler::for_each_in_process_group(pgid, [&pgid_exists, &sid](Thread* t) { + Scheduler::for_each_in_process_group(pgid, [&pgid_exists, &sid](Process* p) { pgid_exists = true; - sid = t->sid; // this should be the same for all threads in the process group + sid = p->sid; // this should be the same for all threads in the process group return false; }); if (!pgid_exists) return err(EPERM); - if (sid != thread->sid) return err(EPERM); + if (sid != target->sid) return err(EPERM); } - thread->pgid = (u64)pgid; + target->pgid = (u64)pgid; return 0; } @@ -156,20 +156,20 @@ Result sys_getpgid(Registers*, SyscallArgs args) { pid_t pid = (pid_t)args[0]; - auto* current = Scheduler::current(); + auto* current = Process::current(); TRY(check_pledge(current, Promise::p_stdio)); if (pid == 0) pid = current->id; if (pid < 0) return err(EINVAL); - auto* thread = TRY(Result::from_option(Scheduler::find_by_pid(pid), ESRCH)); + auto* process = TRY(Result::from_option(Scheduler::find_by_pid(pid), ESRCH)); - return (u64)thread->pgid; + return (u64)process->pgid.load(); } Result sys_setsid(Registers*, SyscallArgs) { - auto* current = Scheduler::current(); + auto* current = Process::current(); TRY(check_pledge(current, Promise::p_proc)); if (current->pgid == current->id) return err(EPERM); @@ -177,21 +177,21 @@ Result sys_setsid(Registers*, SyscallArgs) current->sid = current->pgid = current->id; current->controlling_terminal = {}; - return current->sid; + return current->sid.load(); } Result sys_getsid(Registers*, SyscallArgs args) { pid_t pid = (pid_t)args[0]; - auto* current = Scheduler::current(); + auto* current = Process::current(); TRY(check_pledge(current, Promise::p_stdio)); if (pid == 0) pid = current->id; - auto* thread = TRY(Result::from_option(Scheduler::find_by_pid(pid), ESRCH)); + auto* p = TRY(Result::from_option(Scheduler::find_by_pid(pid), ESRCH)); - return thread->sid; + return p->sid.load(); } Result sys_fchmodat(Registers*, SyscallArgs args) @@ -201,12 +201,13 @@ Result sys_fchmodat(Registers*, SyscallArgs args) mode_t mode = (mode_t)args[2]; int flags = (int)args[3]; - auto* current = Scheduler::current(); + auto* current = Process::current(); TRY(check_pledge(current, Promise::p_wpath)); + auto credentials = current->credentials(); auto inode = TRY(current->resolve_atfile(dirfd, path, flags & AT_EMPTY_PATH, !(flags & AT_SYMLINK_NOFOLLOW))); - if (current->auth.euid != 0 && current->auth.euid != inode->metadata().uid) return err(EPERM); + if (credentials.euid != 0 && credentials.euid != inode->metadata().uid) return err(EPERM); auto metadata = inode->metadata(); metadata.mode = mode; @@ -223,12 +224,13 @@ Result sys_fchownat(Registers*, SyscallArgs args) gid_t gid = (u32)args[3]; int flags = (int)args[4]; - auto* current = Scheduler::current(); + auto* current = Process::current(); TRY(check_pledge(current, Promise::p_chown)); + auto credentials = current->credentials(); auto inode = TRY(current->resolve_atfile(dirfd, path, flags & AT_EMPTY_PATH, !(flags & AT_SYMLINK_NOFOLLOW))); - if (current->auth.euid != 0) return err(EPERM); + if (credentials.euid != 0) return err(EPERM); auto metadata = inode->metadata(); if (uid != (uid_t)-1) metadata.uid = uid; @@ -243,20 +245,20 @@ Result sys_getgroups(Registers*, SyscallArgs args) int ngroups = (int)args[0]; gid_t* grouplist = (gid_t*)args[1]; - auto* current = Scheduler::current(); + auto* current = Process::current(); TRY(check_pledge(current, Promise::p_stdio)); - if (!ngroups) return current->extra_groups.size(); + auto groups = current->extra_groups.lock(); + + if (!ngroups) return groups->size(); if (ngroups < 0) return err(EINVAL); - if (static_cast(ngroups) < current->extra_groups.size()) return err(EINVAL); + if (static_cast(ngroups) < groups->size()) return err(EINVAL); - if (!MemoryManager::copy_to_user(grouplist, current->extra_groups.data(), - current->extra_groups.size() * sizeof(gid_t))) - return err(EFAULT); + if (!MemoryManager::copy_to_user(grouplist, groups->data(), groups->size() * sizeof(gid_t))) return err(EFAULT); - return current->extra_groups.size(); + return groups->size(); } Result sys_setgroups(Registers*, SyscallArgs args) @@ -264,26 +266,27 @@ Result sys_setgroups(Registers*, SyscallArgs args) int ngroups = (int)args[0]; const gid_t* grouplist = (const gid_t*)args[1]; - auto* current = Scheduler::current(); - + auto* current = Process::current(); TRY(check_pledge(current, Promise::p_id)); - Credentials& auth = current->auth; - if (auth.euid != 0) return err(EPERM); + auto credentials = current->credentials(); + if (credentials.euid != 0) return err(EPERM); + + auto groups = current->extra_groups.lock(); if (!ngroups) { - current->extra_groups.clear(); + groups->clear(); return 0; } if (ngroups < 0 || ngroups > 32) return err(EINVAL); - TRY(current->extra_groups.try_reserve(ngroups)); + TRY(groups->try_reserve(ngroups)); - current->extra_groups.mutate([&](gid_t* list, usize) -> usize { + groups->mutate([&](gid_t* list, usize) -> usize { if (MemoryManager::copy_from_user(grouplist, list, ngroups * sizeof(gid_t))) return ngroups; - return current->extra_groups.size(); + return groups->size(); }); return 0; diff --git a/kernel/src/sys/link.cpp b/kernel/src/sys/link.cpp index 909aa460..0be1bff0 100644 --- a/kernel/src/sys/link.cpp +++ b/kernel/src/sys/link.cpp @@ -12,7 +12,7 @@ Result sys_unlinkat(Registers*, SyscallArgs args) auto path = TRY(MemoryManager::strdup_from_user(args[1])); int flags = (int)args[2]; - Thread* current = Scheduler::current(); + Process* current = Process::current(); TRY(check_pledge(current, Promise::p_cpath)); auto dirname = TRY(PathParser::dirname(path.view())); @@ -23,13 +23,14 @@ Result sys_unlinkat(Registers*, SyscallArgs args) kinfoln("unlinkat: remove %s from directory %s, dirfd is %d", basename.chars(), dirname.chars(), dirfd); auto inode = TRY(current->resolve_atfile(dirfd, dirname, false, false)); - if (!VFS::can_write(inode, current->auth, ¤t->extra_groups)) return err(EACCES); + auto auth = current->credentials(); + if (!VFS::can_write(inode, current)) return err(EACCES); auto child = TRY(inode->find(basename.chars())); if (flags == AT_REMOVEDIR && child->type() != VFS::InodeType::Directory) return err(ENOTDIR); - if (current->auth.euid != 0 && VFS::is_sticky(inode) && current->auth.euid != inode->metadata().uid && - current->auth.euid != child->metadata().uid) + if (auth.euid != 0 && VFS::is_sticky(inode) && auth.euid != inode->metadata().uid && + auth.euid != child->metadata().uid) return err(EACCES); TRY(inode->remove_entry(basename.chars())); @@ -45,14 +46,14 @@ Result sys_symlinkat(Registers*, SyscallArgs args) if (target.is_empty()) return err(ENOENT); - auto* current = Scheduler::current(); + auto* current = Process::current(); TRY(check_pledge(current, Promise::p_cpath)); auto parent = TRY(PathParser::dirname(linkpath.view())); auto parent_inode = TRY(current->resolve_atfile(dirfd, parent, false, true)); - if (!VFS::can_write(parent_inode, current->auth, ¤t->extra_groups)) return err(EACCES); + if (!VFS::can_write(parent_inode, current)) return err(EACCES); auto child_name = TRY(PathParser::basename(linkpath.view())); @@ -60,8 +61,9 @@ Result sys_symlinkat(Registers*, SyscallArgs args) auto inode = TRY(parent_inode->fs()->create_symlink_inode(target.view())); auto metadata = inode->metadata(); - metadata.uid = current->auth.euid; - metadata.gid = current->auth.egid; + auto auth = current->credentials(); + metadata.uid = auth.euid; + metadata.gid = auth.egid; TRY(inode->set_metadata(metadata)); TRY(parent_inode->add_entry(inode, child_name.chars())); @@ -75,7 +77,7 @@ Result sys_readlinkat(Registers*, SyscallArgs args) char* buf = (char*)args[2]; usize bufsiz = (usize)args[3]; - auto* current = Scheduler::current(); + auto* current = Process::current(); TRY(check_pledge(current, Promise::p_rpath)); auto symlink = TRY(current->resolve_atfile(dirfd, path, true, false)); @@ -101,7 +103,7 @@ Result sys_linkat(Registers*, SyscallArgs args) auto newpath = TRY(MemoryManager::strdup_from_user(args[3])); int flags = (int)args[4]; - auto* current = Scheduler::current(); + auto* current = Process::current(); TRY(check_pledge(current, Promise::p_cpath)); auto parent = TRY(PathParser::dirname(newpath.view())); @@ -115,7 +117,7 @@ Result sys_linkat(Registers*, SyscallArgs args) if (target->fs() != parent_inode->fs()) return err(EXDEV); - if (!VFS::can_write(parent_inode, current->auth, ¤t->extra_groups)) return err(EACCES); + if (!VFS::can_write(parent_inode, current)) return err(EACCES); auto child_name = TRY(PathParser::basename(newpath.view())); diff --git a/kernel/src/sys/memstat.cpp b/kernel/src/sys/memstat.cpp index 8d42a781..85d682db 100644 --- a/kernel/src/sys/memstat.cpp +++ b/kernel/src/sys/memstat.cpp @@ -6,7 +6,7 @@ Result sys_memstat(Registers*, SyscallArgs args) { - auto* current = Scheduler::current(); + auto* current = Process::current(); TRY(check_pledge(current, Promise::p_stdio)); struct membuf buf; diff --git a/kernel/src/sys/mkdir.cpp b/kernel/src/sys/mkdir.cpp index a07c3e17..5e8d731c 100644 --- a/kernel/src/sys/mkdir.cpp +++ b/kernel/src/sys/mkdir.cpp @@ -10,14 +10,14 @@ Result sys_mkdir(Registers*, SyscallArgs args) auto path = TRY(MemoryManager::strdup_from_user(args[0])); mode_t mode = (mode_t)args[1]; - Thread* current = Scheduler::current(); + Process* current = Process::current(); + auto credentials = current->credentials(); TRY(check_pledge(current, Promise::p_cpath)); - auto inode = TRY(VFS::create_directory(path.chars(), mode & ~current->umask, current->auth, ¤t->extra_groups, - current->current_directory)); + auto inode = TRY(VFS::create_directory(path.chars(), mode & ~current->umask, current, current->current_directory)); auto metadata = inode->metadata(); - metadata.uid = current->auth.euid; - metadata.gid = current->auth.egid; + metadata.uid = credentials.euid; + metadata.gid = credentials.egid; TRY(inode->set_metadata(metadata)); return 0; diff --git a/kernel/src/sys/mmap.cpp b/kernel/src/sys/mmap.cpp index 08648436..87928933 100644 --- a/kernel/src/sys/mmap.cpp +++ b/kernel/src/sys/mmap.cpp @@ -20,7 +20,7 @@ Result sys_mmap(Registers*, SyscallArgs args) if (params.flags < 0) return err(EINVAL); - Thread* current = Scheduler::current(); + Process* current = Process::current(); if (params.prot & PROT_EXEC) TRY(check_pledge(current, Promise::p_prot_exec)); TRY(check_pledge(current, Promise::p_stdio)); @@ -55,15 +55,15 @@ Result sys_mmap(Registers*, SyscallArgs args) shmem->prot |= params.prot; } + auto space = current->address_space.lock(); + u64 address; - if (!params.addr) - address = TRY(current->address_space->alloc_region(pages, params.prot, params.flags, params.offset, shmid)); + if (!params.addr) address = TRY((*space)->alloc_region(pages, params.prot, params.flags, params.offset, shmid)); else { // FIXME: We should be more flexible if MAP_FIXED was not specified. address = align_down((u64)params.addr); - if (!TRY(current->address_space->test_and_alloc_region(address, pages, params.prot, params.flags, params.offset, - shmid))) + if (!TRY((*space)->test_and_alloc_region(address, pages, params.prot, params.flags, params.offset, shmid))) return err(ENOMEM); } @@ -94,10 +94,12 @@ Result sys_munmap(Registers*, SyscallArgs args) if (size == 0) return err(EINVAL); if (!is_aligned(address)) return err(EINVAL); - Thread* current = Scheduler::current(); + Process* current = Process::current(); TRY(check_pledge(current, Promise::p_stdio)); - bool ok = TRY(current->address_space->free_region(address, ceil_div(size, ARCH_PAGE_SIZE))); + auto space = current->address_space.lock(); + + bool ok = TRY((*space)->free_region(address, ceil_div(size, ARCH_PAGE_SIZE))); // POSIX says munmap should silently do nothing if the memory was not already mapped. if (!ok) return 0; @@ -119,10 +121,12 @@ Result sys_msync(Registers*, SyscallArgs args) if (!size) return 0; if (!is_aligned(address)) return err(EINVAL); - Thread* current = Scheduler::current(); + Process* current = Process::current(); TRY(check_pledge(current, Promise::p_stdio)); - TRY(current->address_space->sync_regions(address, ceil_div(size, ARCH_PAGE_SIZE))); + auto space = current->address_space.lock(); + + TRY((*space)->sync_regions(address, ceil_div(size, ARCH_PAGE_SIZE))); return { 0 }; } diff --git a/kernel/src/sys/mount.cpp b/kernel/src/sys/mount.cpp index fe14ca74..5c6922e4 100644 --- a/kernel/src/sys/mount.cpp +++ b/kernel/src/sys/mount.cpp @@ -14,13 +14,12 @@ Result sys_mount(Registers*, SyscallArgs args) auto fstype = TRY(MemoryManager::strdup_from_user(args[1])); auto source = TRY(MemoryManager::strdup_from_user(args[2])); - auto* current = Scheduler::current(); + auto* current = Process::current(); TRY(check_pledge(current, Promise::p_mount)); - if (current->auth.euid != 0) return err(EPERM); + if (current->credentials().euid != 0) return err(EPERM); auto get_source = [current, &source]() -> Result> { - auto inode = - TRY(VFS::resolve_path(source.chars(), current->auth, ¤t->extra_groups, current->current_directory)); + auto inode = TRY(VFS::resolve_path(source.chars(), current, current->current_directory)); if (inode->type() != VFS::InodeType::BlockDevice) return err(ENOTBLK); dev_t device_id = inode->metadata().devid; return TRY(DeviceRegistry::fetch_special_device(luna_dev_major(device_id), luna_dev_minor(device_id))); @@ -41,7 +40,7 @@ Result sys_mount(Registers*, SyscallArgs args) fs = TRY(factory(device)); } - TRY(VFS::mount(target.chars(), fs, current->auth, ¤t->extra_groups, current->current_directory)); + TRY(VFS::mount(target.chars(), fs, current, current->current_directory)); return 0; } @@ -50,11 +49,11 @@ Result sys_umount(Registers*, SyscallArgs args) { auto target = TRY(MemoryManager::strdup_from_user(args[0])); - auto* current = Scheduler::current(); + auto* current = Process::current(); TRY(check_pledge(current, Promise::p_mount)); - if (current->auth.euid != 0) return err(EPERM); + if (current->credentials().euid != 0) return err(EPERM); - TRY(VFS::umount(target.chars(), current->auth, ¤t->extra_groups, current->current_directory)); + TRY(VFS::umount(target.chars(), current, current->current_directory)); return 0; } @@ -64,9 +63,9 @@ Result sys_pivot_root(Registers*, SyscallArgs args) auto new_root = TRY(MemoryManager::strdup_from_user(args[0])); auto put_old = TRY(MemoryManager::strdup_from_user(args[1])); - auto* current = Scheduler::current(); + auto* current = Process::current(); TRY(check_pledge(current, Promise::p_mount)); - if (current->auth.euid != 0) return err(EPERM); + if (current->credentials().euid != 0) return err(EPERM); TRY(VFS::pivot_root(new_root.chars(), put_old.chars(), current->current_directory)); diff --git a/kernel/src/sys/open.cpp b/kernel/src/sys/open.cpp index c4ec95d0..4dd8b2f7 100644 --- a/kernel/src/sys/open.cpp +++ b/kernel/src/sys/open.cpp @@ -17,7 +17,7 @@ Result sys_openat(Registers*, SyscallArgs args) int flags = (int)args[2]; mode_t mode = (mode_t)args[3]; - Thread* current = Scheduler::current(); + Process* current = Process::current(); SharedPtr inode; @@ -44,12 +44,12 @@ Result sys_openat(Registers*, SyscallArgs args) { if (error == ENOENT && (flags & O_CREAT) && !path.is_empty()) { - inode = TRY(VFS::create_file(path.chars(), mode & ~current->umask, current->auth, ¤t->extra_groups, - parent_inode)); + auto auth = current->credentials(); + inode = TRY(VFS::create_file(path.chars(), mode & ~current->umask, current, parent_inode)); // FIXME: Pass these in create_file(). auto metadata = inode->metadata(); - metadata.uid = current->auth.euid; - metadata.gid = current->auth.egid; + metadata.uid = auth.euid; + metadata.gid = auth.egid; TRY(inode->set_metadata(metadata)); } else @@ -59,8 +59,8 @@ Result sys_openat(Registers*, SyscallArgs args) return err(EEXIST); else { - if ((flags & O_RDONLY) && !VFS::can_read(inode, current->auth, ¤t->extra_groups)) return err(EACCES); - if ((flags & O_WRONLY) && !VFS::can_write(inode, current->auth, ¤t->extra_groups)) return err(EACCES); + if ((flags & O_RDONLY) && !VFS::can_read(inode, current)) return err(EACCES); + if ((flags & O_WRONLY) && !VFS::can_write(inode, current)) return err(EACCES); } inode = TRY(inode->open()); @@ -72,11 +72,12 @@ Result sys_openat(Registers*, SyscallArgs args) if (flags & O_TMPFILE) { + auto auth = current->credentials(); if (inode->type() != VFS::InodeType::Directory) return err(EINVAL); inode = TRY(inode->fs()->create_file_inode(mode & current->umask)); auto metadata = inode->metadata(); - metadata.uid = current->auth.euid; - metadata.gid = current->auth.egid; + metadata.uid = auth.euid; + metadata.gid = auth.egid; TRY(inode->set_metadata(metadata)); } @@ -91,15 +92,15 @@ Result sys_openat(Registers*, SyscallArgs args) if ((flags & O_WRONLY) && (flags & O_TRUNC)) inode->truncate(0); - int fd = TRY(current->allocate_fd(0)); + auto descriptor = + FileDescriptor { TRY(make_shared(inode, flags & FLAGS_TO_KEEP)), 0, flags & O_CLOEXEC }; + + int fd = TRY(current->allocate_fd(0, descriptor)); #ifdef OPEN_DEBUG kdbgln("openat: opening file %s from dirfd %d, flags %d, mode %#o = fd %d", path.chars(), dirfd, flags, mode, fd); #endif - current->fd_table[fd] = - FileDescriptor { TRY(make_shared(inode, flags & FLAGS_TO_KEEP)), 0, flags & O_CLOEXEC }; - return (u64)fd; } @@ -108,10 +109,11 @@ Result sys_close(Registers*, SyscallArgs args) int fd = (int)args[0]; if (fd < 0 || fd >= FD_MAX) return err(EBADF); - Thread* current = Scheduler::current(); + Process* current = Process::current(); TRY(check_pledge(current, Promise::p_stdio)); - Option& descriptor = current->fd_table[fd]; + auto table = current->fd_table.lock(); + Option& descriptor = (*table)[fd]; if (!descriptor.has_value()) return err(EBADF); diff --git a/kernel/src/sys/pledge.cpp b/kernel/src/sys/pledge.cpp index cec37ef7..16dd103a 100644 --- a/kernel/src/sys/pledge.cpp +++ b/kernel/src/sys/pledge.cpp @@ -7,7 +7,7 @@ Result sys_pledge(Registers*, SyscallArgs args) int promises = TRY(parse_promises(args[0])); int execpromises = TRY(parse_promises(args[1])); - auto* current = Scheduler::current(); + auto* current = Process::current(); if (promises >= 0) { diff --git a/kernel/src/sys/poll.cpp b/kernel/src/sys/poll.cpp index 4f09241c..0138238f 100644 --- a/kernel/src/sys/poll.cpp +++ b/kernel/src/sys/poll.cpp @@ -18,7 +18,8 @@ Result sys_poll(Registers*, SyscallArgs args) if (!MemoryManager::copy_from_user(fds, kfds, nfds * sizeof(pollfd))) return err(EFAULT); - auto* current = Scheduler::current(); + auto* current = Process::current(); + auto* thread = Scheduler::current(); TRY(check_pledge(current, Promise::p_stdio)); Vector> inodes; @@ -78,8 +79,8 @@ Result sys_poll(Registers*, SyscallArgs args) if (!fds_with_events && (timeout > 0 || infinite)) { kernel_sleep(10); - timeout -= (10 - (int)current->sleep_ticks_left); - if (current->interrupted) + timeout -= (10 - (int)thread->sleep_ticks_left); + if (thread->interrupted) { guard.deactivate(); free_impl(kfds); diff --git a/kernel/src/sys/pstat.cpp b/kernel/src/sys/pstat.cpp index a402eb14..51746f4b 100644 --- a/kernel/src/sys/pstat.cpp +++ b/kernel/src/sys/pstat.cpp @@ -15,28 +15,29 @@ Result sys_pstat(Registers*, SyscallArgs args) pid_t pid = (pid_t)args[0]; struct process* ps = (struct process*)args[1]; - auto* current = Scheduler::current(); + auto* current = Process::current(); TRY(check_pledge(current, Promise::p_proc)); // If pid == -1, return the PID of the last spawned thread. - if (pid == -1) return g_threads.expect_last()->id; + if (pid == -1) return g_processes.expect_last()->id; - auto* thread = TRY(Result::from_option(Scheduler::find_by_pid(pid), ESRCH)); + auto* target = TRY(Result::from_option(Scheduler::find_by_pid(pid), ESRCH)); struct process proc; - proc.ps_pid = thread->id; - proc.ps_ppid = thread->parent ? thread->parent->id : 0; - proc.ps_uid = thread->auth.uid; - proc.ps_gid = thread->auth.gid; - proc.ps_euid = thread->auth.euid; - proc.ps_egid = thread->auth.egid; - proc.ps_state = (int)thread->state; - proc.ps_flags = thread->is_kernel ? PS_FLAG_KRNL : 0; - set_timespec(proc.ps_time, thread->user_ticks_self + thread->kernel_ticks_self); - set_timespec(proc.ps_ktime, thread->kernel_ticks_self); - set_timespec(proc.ps_utime, thread->kernel_ticks_children); - strlcpy(proc.ps_name, thread->cmdline.chars(), sizeof(proc.ps_name)); - strlcpy(proc.ps_cwd, thread->current_directory_path.is_empty() ? "/" : thread->current_directory_path.chars(), + proc.ps_pid = target->id; + proc.ps_ppid = target->parent ? target->parent->id : 0; + auto auth = target->credentials(); + proc.ps_uid = auth.uid; + proc.ps_gid = auth.gid; + proc.ps_euid = auth.euid; + proc.ps_egid = auth.egid; + proc.ps_state = 0; // FIXME: this is thread-specific now + proc.ps_flags = 0; // FIXME: add flags + set_timespec(proc.ps_time, target->user_ticks_self + target->kernel_ticks_self); + set_timespec(proc.ps_ktime, target->kernel_ticks_self); + set_timespec(proc.ps_utime, target->kernel_ticks_children); + strlcpy(proc.ps_name, target->cmdline.chars(), sizeof(proc.ps_name)); + strlcpy(proc.ps_cwd, target->current_directory_path.is_empty() ? "/" : target->current_directory_path.chars(), sizeof(proc.ps_cwd)); if (!MemoryManager::copy_to_user_typed(ps, &proc)) return err(EFAULT); diff --git a/kernel/src/sys/resource.cpp b/kernel/src/sys/resource.cpp index d4a38937..d4c2a535 100644 --- a/kernel/src/sys/resource.cpp +++ b/kernel/src/sys/resource.cpp @@ -16,7 +16,7 @@ Result sys_getrusage(Registers*, SyscallArgs args) int who = (int)args[0]; struct rusage* ru = (struct rusage*)args[1]; - auto* current = Scheduler::current(); + auto* current = Process::current(); struct rusage kru; switch (who) diff --git a/kernel/src/sys/setitimer.cpp b/kernel/src/sys/setitimer.cpp index d3b03fa2..b2c57198 100644 --- a/kernel/src/sys/setitimer.cpp +++ b/kernel/src/sys/setitimer.cpp @@ -16,7 +16,7 @@ Result sys_setitimer(Registers*, SyscallArgs args) const struct itimerval* new_timer = (const struct itimerval*)args[1]; struct itimerval* old_timer = (struct itimerval*)args[2]; - auto* current = Scheduler::current(); + auto* current = Process::current(); TRY(check_pledge(current, Promise::p_stdio)); @@ -66,7 +66,7 @@ Result sys_setitimer(Registers*, SyscallArgs args) if (!MemoryManager::copy_from_user_typed(new_timer, &itimer)) return err(EFAULT); timer->signo = SIGALRM; // FIXME: Also use SIGVTALRM or SIGPROF for other timer types. - timer->thread = current; + timer->process = current; if (itimer.it_interval.tv_sec != 0 || itimer.it_interval.tv_usec != 0) { @@ -93,7 +93,7 @@ Result sys_timer_create(Registers*, SyscallArgs args) struct sigevent* sevp = (struct sigevent*)args[1]; timer_t* timerid = (timer_t*)args[2]; - auto* current = Scheduler::current(); + auto* current = Process::current(); TRY(check_pledge(current, Promise::p_stdio)); @@ -137,7 +137,7 @@ Result sys_timer_settime(Registers*, SyscallArgs args) if (timerid < 0 || timerid >= MAX_POSIX_TIMERS) return err(EINVAL); if (flags > 0) return err(ENOTSUP); - auto* current = Scheduler::current(); + auto* current = Process::current(); TRY(check_pledge(current, Promise::p_stdio)); @@ -169,7 +169,7 @@ Result sys_timer_settime(Registers*, SyscallArgs args) Clock* clock = timer->designated_clock; check(clock); - timer->thread = current; + timer->process = current; if (itimer.it_interval.tv_sec != 0 || itimer.it_interval.tv_nsec != 0) { @@ -195,7 +195,7 @@ Result sys_timer_gettime(Registers*, SyscallArgs args) struct itimerspec* value = (struct itimerspec*)args[1]; if (timerid < 0 || timerid >= MAX_POSIX_TIMERS) return err(EINVAL); - auto* current = Scheduler::current(); + auto* current = Process::current(); TRY(check_pledge(current, Promise::p_stdio)); @@ -224,7 +224,7 @@ Result sys_timer_delete(Registers*, SyscallArgs args) timer_t timerid = (timer_t)args[0]; if (timerid < 0 || timerid >= MAX_POSIX_TIMERS) return err(EINVAL); - auto* current = Scheduler::current(); + auto* current = Process::current(); TRY(check_pledge(current, Promise::p_stdio)); diff --git a/kernel/src/sys/signal.cpp b/kernel/src/sys/signal.cpp index c1034411..b74a1a40 100644 --- a/kernel/src/sys/signal.cpp +++ b/kernel/src/sys/signal.cpp @@ -19,7 +19,7 @@ Result sys_sigreturn(Registers* regs, SyscallArgs) Result sys_sigaction(Registers*, SyscallArgs args) { auto* current = Scheduler::current(); - TRY(check_pledge(current, Promise::p_stdio)); + TRY(check_pledge(current->process, Promise::p_stdio)); int signo = (int)args[0]; const struct sigaction* act = (const struct sigaction*)args[1]; @@ -48,15 +48,16 @@ Result sys_sigaction(Registers*, SyscallArgs args) Result sys_kill(Registers*, SyscallArgs args) { - auto* current = Scheduler::current(); + auto* current = Process::current(); TRY(check_pledge(current, Promise::p_proc)); pid_t pid = (pid_t)args[0]; int signo = (int)args[1]; - auto send_signal = [&](Thread* target) -> Result { - if (current->auth.euid != 0 && current->auth.euid != target->auth.euid && - current->auth.egid != target->auth.egid) + auto send_signal = [&](Process* target) -> Result { + auto this_auth = current->credentials(); + auto other_auth = target->credentials(); + if (this_auth.euid != 0 && this_auth.euid != other_auth.euid && this_auth.egid != other_auth.egid) return err(EPERM); if (target->is_kernel) return {}; if (signo == 0) return {}; @@ -68,14 +69,14 @@ Result sys_kill(Registers*, SyscallArgs args) if (pid > 0) { - auto* target = TRY(Result::from_option(Scheduler::find_by_pid(pid), ESRCH)); + auto* target = TRY(Result::from_option(Scheduler::find_by_pid(pid), ESRCH)); TRY(send_signal(target)); } else if (pid == 0) { int errno = -1; bool pgid_exists = false; - Scheduler::for_each_in_process_group(current->pgid, [&](Thread* target) { + Scheduler::for_each_in_process_group(current->pgid, [&](Process* target) { pgid_exists = true; auto rc = send_signal(target); if (rc.has_error()) @@ -90,17 +91,17 @@ Result sys_kill(Registers*, SyscallArgs args) } else if (pid == -1) { - for (auto* thread : g_threads) + for (auto* process : g_processes) { // We ignore permission errors here. - if (thread != current && thread->id != 1) send_signal(thread); + if (process != current && process->id != 1) send_signal(process); } } else if (pid < -1) { int errno = -1; bool pgid_exists = false; - Scheduler::for_each_in_process_group(-pid, [&](Thread* target) { + Scheduler::for_each_in_process_group(-pid, [&](Process* target) { pgid_exists = true; auto rc = send_signal(target); if (rc.has_error()) @@ -120,7 +121,7 @@ Result sys_kill(Registers*, SyscallArgs args) Result sys_sigprocmask(Registers*, SyscallArgs args) { auto* current = Scheduler::current(); - TRY(check_pledge(current, Promise::p_stdio)); + TRY(check_pledge(current->process, Promise::p_stdio)); int how = (int)args[0]; const sigset_t* set = (const sigset_t*)args[1]; diff --git a/kernel/src/sys/socket.cpp b/kernel/src/sys/socket.cpp index 908575d8..23fb7a5a 100644 --- a/kernel/src/sys/socket.cpp +++ b/kernel/src/sys/socket.cpp @@ -15,14 +15,13 @@ Result sys_socket(Registers*, SyscallArgs args) if (type != SOCK_STREAM) return err(EPROTOTYPE); if (domain != AF_UNIX) return err(EAFNOSUPPORT); - auto* current = Scheduler::current(); + auto* current = Process::current(); TRY(check_pledge(current, Promise::p_unix)); auto socket = TRY(make_shared()); - int fd = TRY(current->allocate_fd(0)); - - current->fd_table[fd] = FileDescriptor { TRY(make_shared(socket, O_RDWR)), 0 }; + auto descriptor = FileDescriptor { TRY(make_shared(socket, O_RDWR)), 0 }; + int fd = TRY(current->allocate_fd(0, descriptor)); return fd; } @@ -37,7 +36,7 @@ Result sys_bind(Registers*, SyscallArgs args) if ((usize)addrlen > sizeof(storage)) return err(EINVAL); if (!MemoryManager::copy_from_user(addr, &storage, addrlen)) return err(EFAULT); - auto* current = Scheduler::current(); + auto* current = Process::current(); TRY(check_pledge(current, Promise::p_unix)); auto inode = TRY(current->resolve_fd(sockfd))->inode(); @@ -61,7 +60,7 @@ Result sys_connect(Registers* regs, SyscallArgs args) if ((usize)addrlen > sizeof(storage)) return err(EINVAL); if (!MemoryManager::copy_from_user(addr, &storage, addrlen)) return err(EFAULT); - auto* current = Scheduler::current(); + auto* current = Process::current(); TRY(check_pledge(current, Promise::p_unix)); auto description = TRY(current->resolve_fd(sockfd))->description; @@ -80,7 +79,7 @@ Result sys_listen(Registers*, SyscallArgs args) int sockfd = (int)args[0]; int backlog = (int)args[1]; - auto* current = Scheduler::current(); + auto* current = Process::current(); TRY(check_pledge(current, Promise::p_unix)); auto inode = TRY(current->resolve_fd(sockfd))->inode(); @@ -108,7 +107,7 @@ Result sys_accept(Registers* regs, SyscallArgs args) if (!MemoryManager::copy_from_user_typed(addrlen, &len)) return err(EFAULT); } - auto* current = Scheduler::current(); + auto* current = Process::current(); TRY(check_pledge(current, Promise::p_unix)); auto description = TRY(current->resolve_fd(sockfd))->description; @@ -121,8 +120,8 @@ Result sys_accept(Registers* regs, SyscallArgs args) socklen_t client_len; auto new_description = TRY(socket->accept(regs, description->flags, &client, &client_len)); - int fd = TRY(current->allocate_fd(0)); - current->fd_table[fd] = FileDescriptor { new_description, 0 }; + auto descriptor = FileDescriptor { new_description, 0 }; + int fd = TRY(current->allocate_fd(0, descriptor)); if (client_len < len) len = client_len; if (addr) diff --git a/kernel/src/sys/stat.cpp b/kernel/src/sys/stat.cpp index 1573d7b5..59be0ed1 100644 --- a/kernel/src/sys/stat.cpp +++ b/kernel/src/sys/stat.cpp @@ -33,7 +33,7 @@ Result sys_fstatat(Registers*, SyscallArgs args) stat* st = (stat*)args[2]; int flags = (int)args[3]; - Thread* current = Scheduler::current(); + Process* current = Process::current(); TRY(check_pledge(current, Promise::p_rpath)); auto inode = TRY(current->resolve_atfile(dirfd, path, flags & AT_EMPTY_PATH, !(flags & AT_SYMLINK_NOFOLLOW))); @@ -68,22 +68,24 @@ Result sys_faccessat(Registers*, SyscallArgs args) Credentials creds; - auto* current = Scheduler::current(); + auto* current = Process::current(); TRY(check_pledge(current, Promise::p_rpath)); + auto auth = current->credentials(); - if (flags & AT_EACCESS) creds = current->auth; + if (flags & AT_EACCESS) creds = auth; else { - auto auth = current->auth; creds.euid = auth.uid; creds.egid = auth.gid; } auto inode = TRY(current->resolve_atfile(dirfd, path, false, true)); - if ((amode & R_OK) && !VFS::can_read(inode, creds, ¤t->extra_groups)) return err(EACCES); - if ((amode & W_OK) && !VFS::can_write(inode, creds, ¤t->extra_groups)) return err(EACCES); - if ((amode & X_OK) && !VFS::can_execute(inode, creds, ¤t->extra_groups)) return err(EACCES); + auto groups = current->extra_groups.lock(); + + if ((amode & R_OK) && !VFS::can_read(inode, creds, &groups.ref())) return err(EACCES); + if ((amode & W_OK) && !VFS::can_write(inode, creds, &groups.ref())) return err(EACCES); + if ((amode & X_OK) && !VFS::can_execute(inode, creds, &groups.ref())) return err(EACCES); // Either all checks succeeded, or amode == F_OK and the file exists, since resolve_atfile() would have failed // otherwise. diff --git a/kernel/src/sys/uname.cpp b/kernel/src/sys/uname.cpp index 28e4785c..67796bcc 100644 --- a/kernel/src/sys/uname.cpp +++ b/kernel/src/sys/uname.cpp @@ -40,9 +40,9 @@ Result sys_sethostname(Registers*, SyscallArgs args) const char* buf = (const char*)args[0]; usize length = (usize)args[1]; - Thread* current = Scheduler::current(); + Process* current = Process::current(); TRY(check_pledge(current, Promise::p_host)); - if (current->auth.euid != 0) return err(EPERM); + if (current->credentials().euid != 0) return err(EPERM); if (length >= _UTSNAME_LENGTH) return err(EINVAL); diff --git a/kernel/src/sys/usleep.cpp b/kernel/src/sys/usleep.cpp index 8a16c50d..7394f023 100644 --- a/kernel/src/sys/usleep.cpp +++ b/kernel/src/sys/usleep.cpp @@ -8,12 +8,12 @@ Result sys_usleep(Registers*, SyscallArgs args) useconds_t us = (useconds_t)args[0]; auto* current = Scheduler::current(); - TRY(check_pledge(current, Promise::p_stdio)); + TRY(check_pledge(current->process, Promise::p_stdio)); // FIXME: Allow usleep() to use a more precise resolution. if (us < 1000) return 0; kernel_sleep(us / 1000); - return current->sleep_ticks_left; + return current->sleep_ticks_left.load(); } diff --git a/kernel/src/sys/waitpid.cpp b/kernel/src/sys/waitpid.cpp index a385e166..4241a131 100644 --- a/kernel/src/sys/waitpid.cpp +++ b/kernel/src/sys/waitpid.cpp @@ -12,20 +12,20 @@ Result sys_waitpid(Registers* regs, SyscallArgs args) int options = (int)args[2]; Thread* current = Scheduler::current(); - TRY(check_pledge(current, Promise::p_stdio)); + TRY(check_pledge(current->process, Promise::p_stdio)); - Thread* thread; + Process* target; if (pid > 0) { - thread = TRY(Result::from_option(Scheduler::find_by_pid(pid), ESRCH)); + target = TRY(Result::from_option(Scheduler::find_by_pid(pid), ESRCH)); - if (thread->parent && thread->parent != current) return err(ECHILD); + if (target->parent && target->parent != current->process) return err(ECHILD); if (options & WNOHANG) return err(EAGAIN); wait_for_child: - if (thread->state != ThreadState::Exited) kernel_wait(pid); + if (target->alive()) kernel_wait(pid); if (current->interrupted) { kdbgln("signal: waitpid interrupted by signal"); @@ -37,13 +37,13 @@ Result sys_waitpid(Registers* regs, SyscallArgs args) return err(EINTR); } - check(thread->state == ThreadState::Exited); + check(!target->alive()); } else if (pid == -1) { - if (!Scheduler::has_children(current)) return err(ECHILD); + if (!Scheduler::has_children(current->process)) return err(ECHILD); - auto child = Scheduler::find_exited_child(current); + auto child = Scheduler::find_exited_child(current->process); if (!child.has_value()) { if (options & WNOHANG) return err(EAGAIN); @@ -61,27 +61,26 @@ Result sys_waitpid(Registers* regs, SyscallArgs args) return err(EINTR); } - check(current->child_being_waited_for.value_or(-1) != -1); + check(current->child_being_waited_for != -1); - thread = TRY(Result::from_option(Scheduler::find_by_pid(*current->child_being_waited_for), ESRCH)); - check(thread->state == ThreadState::Exited); + target = TRY(Result::from_option(Scheduler::find_by_pid(current->child_being_waited_for), ESRCH)); + check(!target->alive()); } else - thread = child.value(); + target = child.value(); } else // FIXME: Now that we have process groups, implement the cases where pid = 0 and pid < -1. return err(ENOTSUP); - current->child_being_waited_for = {}; + current->child_being_waited_for = -2; - int status = (int)thread->status; - u64 id = thread->id; + int status = (int)target->status; + u64 id = target->id; - current->user_ticks_children += thread->user_ticks_self + thread->user_ticks_children; - current->kernel_ticks_children += thread->kernel_ticks_self + thread->kernel_ticks_children; + current->process->user_ticks_children += target->user_ticks_self + target->user_ticks_children; + current->process->kernel_ticks_children += target->kernel_ticks_self + target->kernel_ticks_children; - thread->state = ThreadState::Dying; - Scheduler::signal_reap_thread(); + Scheduler::reap_process(target); if (status_ptr) if (!MemoryManager::copy_to_user_typed(status_ptr, &status)) return err(EFAULT); diff --git a/kernel/src/thread/Clock.cpp b/kernel/src/thread/Clock.cpp index 2cf28b3b..88a1d1ae 100644 --- a/kernel/src/thread/Clock.cpp +++ b/kernel/src/thread/Clock.cpp @@ -124,7 +124,7 @@ void Clock::tick() { this->m_timer_queue.remove(t); t->active_clock = nullptr; - t->thread->send_signal(t->signo); + t->process->send_signal(t->signo); if (t->restart) timers_to_be_restarted.append(t); return true; } diff --git a/kernel/src/thread/Scheduler.cpp b/kernel/src/thread/Scheduler.cpp index 4b869a01..263cbe11 100644 --- a/kernel/src/thread/Scheduler.cpp +++ b/kernel/src/thread/Scheduler.cpp @@ -11,8 +11,9 @@ #include static Thread g_idle; +static Process g_idle_process; static Thread* g_current = nullptr; -static Thread* g_init = nullptr; +static Process* g_init = nullptr; static Thread* g_reap = nullptr; static Thread* g_oom = nullptr; @@ -22,15 +23,20 @@ namespace Scheduler { void init() { - g_idle.id = 0; + g_idle.tid = 0; g_idle.init_regs_kernel(); g_idle.set_ip((u64)CPU::idle_loop); g_idle.state = ThreadState::Idle; g_idle.is_kernel = true; - g_idle.parent = nullptr; + g_idle.process = &g_idle_process; g_idle.cmdline = "[idle]"; g_idle.active_directory = nullptr; + g_idle_process.id = 0; + g_idle_process.parent = nullptr; + g_idle_process.thread_count = 1; + g_idle_process.is_kernel = true; + g_idle.ticks_left = 1; // Map some stack for the idle task @@ -42,7 +48,7 @@ namespace Scheduler g_idle.stack = idle_stack; - kinfoln("Created idle thread: id %d with ip %#lx and sp %#lx", g_idle.id, g_idle.ip(), g_idle.sp()); + kinfoln("Created idle thread: id %d with ip %#lx and sp %#lx", g_idle_process.id, g_idle.ip(), g_idle.sp()); g_current = &g_idle; } @@ -57,7 +63,7 @@ namespace Scheduler return &g_idle; } - Thread* init_thread() + Process* init_process() { return g_init; } @@ -88,30 +94,37 @@ namespace Scheduler // If anything fails, make sure to clean up. auto guard = make_scope_guard([&] { delete thread; }); + Process* process = TRY(make()); + + auto guard2 = make_scope_guard([&] { delete process; }); + const u64 thread_stack_vm = TRY(MemoryManager::alloc_for_kernel(4, MMU::NoExecute | MMU::ReadWrite)); guard.deactivate(); + guard2.deactivate(); const Stack thread_stack { thread_stack_vm, ARCH_PAGE_SIZE * 4 }; thread->set_sp(thread_stack.top()); - thread->stack = thread_stack; - thread->cmdline = name; - thread->is_kernel = true; thread->active_directory = MMU::kernel_page_directory(); - thread->virtual_clock.set_resolution(1'000'000); - thread->profiling_clock.set_resolution(1'000'000); + thread->process = process; - thread->auth = Credentials { .uid = 0, .euid = 0, .suid = 0, .gid = 0, .egid = 0, .sgid = 0 }; + process->id = thread->tid; + process->parent = nullptr; + process->thread_count = 1; + process->virtual_clock.set_resolution(1'000'000); + process->profiling_clock.set_resolution(1'000'000); + process->is_kernel = true; g_threads.append(thread); + g_processes.append(process); thread->state = ThreadState::Runnable; - kinfoln("Created kernel thread: id %d with ip %#lx and sp %#lx", thread->id, thread->ip(), thread->sp()); + kinfoln("Created kernel thread: id %d with ip %#lx and sp %#lx", process->id, thread->ip(), thread->sp()); return thread; } @@ -149,14 +162,16 @@ namespace Scheduler check(!g_init); Thread* const thread = TRY(make()); + Process* const process = TRY(make()); thread->state = ThreadState::None; - thread->is_kernel = false; - thread->id = 1; - thread->pgid = 1; + thread->tid = 1; thread->cmdline = name; - thread->auth = Credentials { .uid = 0, .euid = 0, .suid = 0, .gid = 0, .egid = 0, .sgid = 0 }; - thread->extra_groups = {}; + thread->process = process; + + process->id = 1; + process->pgid = 1; + process->thread_count = 1; Vector args; auto name_string = TRY(String::from_cstring(name)); @@ -164,7 +179,10 @@ namespace Scheduler Vector env; - auto guard = make_scope_guard([&] { delete thread; }); + auto guard = make_scope_guard([&] { + delete thread; + delete process; + }); // Contrary to other programs, which use BinaryFormat::create_loader(), init must be a native executable. auto loader = TRY(ELFLoader::create(inode, nullptr, 0)); @@ -188,11 +206,12 @@ namespace Scheduler thread->signal_handlers[i] = { .sa_handler = SIG_DFL, .sa_mask = 0, .sa_flags = 0 }; } - kinfoln("Created userspace thread: id %d with ip %#.16lx and sp %#.16lx (ksp %#lx)", thread->id, thread->ip(), + kinfoln("Created userspace thread: id %d with ip %#.16lx and sp %#.16lx (ksp %#lx)", process->id, thread->ip(), thread->sp(), thread->kernel_stack.top()); g_threads.append(thread); - g_init = thread; + g_processes.append(process); + g_init = process; return thread; } @@ -202,6 +221,25 @@ namespace Scheduler g_threads.append(thread); } + void add_process(Process* process) + { + g_processes.append(process); + } + + void reap_process(Process* process) + { + // FIXME: Shouldn't all this be done when the timers' destructors are called? + process->real_timer.disarm(); + process->virtual_timer.disarm(); + process->profiling_timer.disarm(); + for (int i = 0; i < MAX_POSIX_TIMERS; i++) + { + if (process->posix_timers[i].has_value()) process->posix_timers[i]->disarm(); + } + + delete process; + } + void reap_thread(Thread* thread) { CPU::disable_interrupts(); @@ -221,15 +259,6 @@ namespace Scheduler MemoryManager::unmap_owned_and_free_vm(stack.bottom(), stack.bytes() / ARCH_PAGE_SIZE).release_value(); } - // FIXME: Shouldn't all this be done when the timers' destructors are called? - thread->real_timer.disarm(); - thread->virtual_timer.disarm(); - thread->profiling_timer.disarm(); - for (int i = 0; i < MAX_POSIX_TIMERS; i++) - { - if (thread->posix_timers[i].has_value()) thread->posix_timers[i]->disarm(); - } - delete thread; CPU::enable_interrupts(); @@ -307,14 +336,19 @@ namespace Scheduler { CPU::disable_interrupts(); - if (is_in_kernel(regs)) g_current->kernel_ticks_self++; + if (is_in_kernel(regs)) + { + g_current->process->kernel_ticks_self++; + g_current->kernel_ticks_self++; + } else { - g_current->virtual_clock.tick(); + g_current->process->virtual_clock.tick(); + g_current->process->user_ticks_self++; g_current->user_ticks_self++; } - g_current->profiling_clock.tick(); + g_current->process->profiling_clock.tick(); g_current->ticks_left--; @@ -344,21 +378,21 @@ namespace Scheduler return result; } - Option find_by_pid(pid_t pid) + Option find_by_pid(pid_t pid) { - for (auto* const thread : g_threads) + for (auto* const process : g_processes) { - if (thread->id == pid && thread->state != ThreadState::Dying) return thread; + if (process->id == pid) return process; } return {}; } - bool has_children(Thread* thread) + bool has_children(Process* process) { bool result { false }; - for_each_child(thread, [&](Thread*) { + for_each_child(process, [&](Process*) { result = true; return false; }); @@ -366,12 +400,12 @@ namespace Scheduler return result; } - Option find_exited_child(Thread* thread) + Option find_exited_child(Process* process) { - Option result; + Option result; - for_each_child(thread, [&](Thread* child) { - if (!result.has_value() && child->state == ThreadState::Exited) + for_each_child(process, [&](Process* child) { + if (!result.has_value() && !child->alive()) { result = child; return false; @@ -387,16 +421,23 @@ namespace Scheduler CPU::disable_interrupts(); kdbgln("--- BEGIN SCHEDULER DUMP ---"); - kdbgln("current at %p, id = %d", g_current, g_current->id); + kdbgln("Current thread at %p, tid = %d", g_current, g_current->tid); + kdbgln("Current process at %p, pid = %d", g_current->process, g_current->process->id); for (const auto* thread : g_threads) { - kdbgln("%p %c [%-20s] %4d, parent = (%-18p,%d), state = %d, ticks: (k:%04zu,u:%04zu), status = " - "%d, cwd = %s", - thread, thread->is_kernel ? 'k' : 'u', thread->cmdline.chars(), thread->id, thread->parent, - thread->parent ? thread->parent->id : 0, (int)thread->state, thread->kernel_ticks_self, - thread->user_ticks_self, thread->status, - thread->current_directory_path.is_empty() ? "/" : thread->current_directory_path.chars()); + kdbgln("Thread %p (belongs to pid %4d) %c [%-20s] %4d, state = %d", thread, thread->process->id, + thread->is_kernel ? 'k' : 'u', thread->cmdline.chars(), thread->tid, (int)thread->state); + } + + for (const auto* process : g_processes) + { + kdbgln("Process %p (%zu threads) %4d, parent = (%-18p,%d), cwd = %s, ticks: (k:%04zu,u:%04zu), " + "status = %d", + process, process->thread_count.load(), process->id, process->parent, + process->parent ? process->parent->id : 0, + process->current_directory_path.is_empty() ? "/" : process->current_directory_path.chars(), + process->kernel_ticks_self.load(), process->user_ticks_self.load(), process->status); } kdbgln("--- END SCHEDULER DUMP ---"); diff --git a/kernel/src/thread/Scheduler.h b/kernel/src/thread/Scheduler.h index 1f9e4024..78b112a5 100644 --- a/kernel/src/thread/Scheduler.h +++ b/kernel/src/thread/Scheduler.h @@ -8,7 +8,7 @@ namespace Scheduler Thread* current(); Thread* idle(); - Thread* init_thread(); + Process* init_process(); void set_reap_thread(Thread*); void signal_reap_thread(); @@ -23,10 +23,12 @@ namespace Scheduler Result create_init_process(SharedPtr inode, const char* name); void add_thread(Thread* thread); + void add_process(Process* process); Thread* pick_task(); void reap_thread(Thread* thread); + void reap_process(Process* thread); void switch_task(Registers* regs); @@ -34,13 +36,26 @@ namespace Scheduler LinkedList check_for_dying_threads(); - Option find_by_pid(pid_t pid); + Option find_by_pid(pid_t pid); - template void for_each_child(Thread* thread, Callback callback) + template void for_each_child(Process* process, Callback callback) { - for (Thread* current = thread; current; current = g_threads.next(current).value_or(nullptr)) + for (Process* current = process; current; current = g_processes.next(current).value_or(nullptr)) { - if (current->parent == thread) + if (current->parent == process) + { + bool should_continue = callback(current); + if (!should_continue) return; + } + } + } + + template void for_each_thread(Process* process, Callback callback) + { + for (Thread* current = g_threads.first().value_or(nullptr); current; + current = g_threads.next(current).value_or(nullptr)) + { + if (current->process == process) { bool should_continue = callback(current); if (!should_continue) return; @@ -50,8 +65,8 @@ namespace Scheduler template void for_each_in_process_group(pid_t group, Callback callback) { - for (Thread* current = g_threads.first().value_or(nullptr); current; - current = g_threads.next(current).value_or(nullptr)) + for (Process* current = g_processes.first().value_or(nullptr); current; + current = g_processes.next(current).value_or(nullptr)) { if (current->pgid == group) { @@ -63,8 +78,8 @@ namespace Scheduler template void for_each_in_session(pid_t sid, Callback callback) { - for (Thread* current = g_threads.first().value_or(nullptr); current; - current = g_threads.next(current).value_or(nullptr)) + for (Process* current = g_processes.first().value_or(nullptr); current; + current = g_processes.next(current).value_or(nullptr)) { if (current->sid == sid) { @@ -76,9 +91,9 @@ namespace Scheduler void dump_state(); - bool has_children(Thread* thread); + bool has_children(Process* thread); - Option find_exited_child(Thread* thread); + Option find_exited_child(Process* thread); } extern "C" void kernel_yield(); diff --git a/kernel/src/thread/Thread.cpp b/kernel/src/thread/Thread.cpp index 6909739c..2d8211ea 100644 --- a/kernel/src/thread/Thread.cpp +++ b/kernel/src/thread/Thread.cpp @@ -15,6 +15,7 @@ static Atomic g_next_id; LinkedList g_threads; +LinkedList g_processes; void Thread::init() { @@ -25,7 +26,7 @@ Result new_thread() { Thread* const thread = TRY(make()); - thread->id = g_next_id++; + thread->tid = g_next_id++; return thread; } @@ -35,31 +36,49 @@ pid_t next_thread_id() return g_next_id.load(); } -Result Thread::allocate_fd(int min) +Result Process::allocate_fd(int min, FileDescriptor& descriptor) { if (min < 0 || min >= FD_MAX) return err(EINVAL); + + auto table = fd_table.lock(); for (int i = min; i < FD_MAX; i++) { - // FIXME: Possible race condition if multiple threads share a FileDescriptorTable? Let's not worry about it for - // now, we're still a long way away from reaching that point. - if (!fd_table[i].has_value()) { return i; } + if (!(*table)[i].has_value()) + { + (*table)[i] = descriptor; + return i; + } } return err(EMFILE); } -Result Thread::resolve_fd(int fd) +Result Process::resolve_fd(int fd) { if (fd < 0 || fd >= FD_MAX) return err(EBADF); - Option& maybe_descriptor = fd_table[fd]; + auto table = fd_table.lock(); + + Option& maybe_descriptor = (*table)[fd]; if (!maybe_descriptor.has_value()) return err(EBADF); return maybe_descriptor.value_ptr(); } -Result Thread::allocate_timerid() +Credentials Process::credentials() +{ + auto credentials = auth.lock(); + return *credentials; +} + +Result> Process::copy_groups() +{ + auto groups = extra_groups.lock(); + return groups->shallow_copy(); +} + +Result Process::allocate_timerid() { ScopedMutexLock lock(posix_timer_mutex); @@ -75,28 +94,25 @@ Result Thread::allocate_timerid() return err(EMFILE); } -Result Thread::resolve_timerid(int tid) +Result Process::resolve_timerid(int _tid) { - if (tid < 0 || tid >= MAX_POSIX_TIMERS) return err(EBADF); + if (_tid < 0 || _tid >= MAX_POSIX_TIMERS) return err(EBADF); - Option& maybe_timer = posix_timers[tid]; + Option& maybe_timer = posix_timers[_tid]; if (!maybe_timer.has_value()) return err(EINVAL); return maybe_timer.value_ptr(); } -Result> Thread::resolve_atfile(int dirfd, const String& path, bool allow_empty_path, - bool follow_last_symlink, SharedPtr* parent_inode) +Result> Process::resolve_atfile(int dirfd, const String& path, bool allow_empty_path, + bool follow_last_symlink, SharedPtr* parent_inode) { if (parent_inode) *parent_inode = this->current_directory; - if (PathParser::is_absolute(path.view())) - return VFS::resolve_path(path.chars(), this->auth, &this->extra_groups, {}, follow_last_symlink); + if (PathParser::is_absolute(path.view())) return VFS::resolve_path(path.chars(), this, {}, follow_last_symlink); - if (dirfd == AT_FDCWD) - return VFS::resolve_path(path.chars(), this->auth, &this->extra_groups, this->current_directory, - follow_last_symlink); + if (dirfd == AT_FDCWD) return VFS::resolve_path(path.chars(), this, this->current_directory, follow_last_symlink); auto descriptor = TRY(resolve_fd(dirfd)); @@ -104,60 +120,91 @@ Result> Thread::resolve_atfile(int dirfd, const String& pa if (path.is_empty() && allow_empty_path) return descriptor->inode(); - return VFS::resolve_path(path.chars(), this->auth, &this->extra_groups, descriptor->inode(), follow_last_symlink); + return VFS::resolve_path(path.chars(), this, descriptor->inode(), follow_last_symlink); } -[[noreturn]] void Thread::exit_and_signal_parent(int _status) +[[noreturn]] void Process::exit(int _status) { - check(!is_kernel); + check(this == Process::current()); // Process::exit() should only be called by the process itself. #ifndef MOON_ENABLE_TESTING_FEATURES - if (this->id == 1) fail("the init process exited"); + if (id == 1) fail("the init process exited"); #else - if (this->id == 1) CPU::magic_exit(_status); + if (id == 1) CPU::magic_exit(_status); #endif - Scheduler::for_each_child(this, [](Thread* child) { - child->parent = Scheduler::init_thread(); + Scheduler::for_each_thread(this, [](Thread* thread) { + thread->quit(); + return true; + }); + + thread_count = 0; + + status = _status; + + Scheduler::for_each_child(this, [](Process* child) { + child->parent = Scheduler::init_process(); return true; }); if (is_session_leader()) { - kinfoln("thread %d is exiting as a session leader, sending signals to session", id); + kinfoln("process %d is exiting as a session leader, sending signals to session", id); // FIXME: Send SIGHUP only to the foreground process group if the session has a controlling terminal. - Scheduler::for_each_in_session(sid, [this](Thread* thread) { - if (thread == this) return true; - thread->sid = 0; - thread->controlling_terminal = {}; - thread->send_signal(SIGHUP); - kinfoln("reparenting and sending SIGHUP to %d", thread->id); + Scheduler::for_each_in_session(sid, [this](Process* p) { + if (p == this) return true; + p->sid = 0; + p->controlling_terminal = {}; + p->send_signal(SIGHUP); + kinfoln("reparenting and sending SIGHUP to %d", p->id); return true; }); } if (parent) { - if (parent->state == ThreadState::Waiting) - { - auto child = *parent->child_being_waited_for; - if (child == -1 || child == id) + Scheduler::for_each_thread(parent, [&](Thread* t) { + if (t->state == ThreadState::Waiting) { - parent->child_being_waited_for = id; - parent->wake_up(); + pid_t expected = -1; + if (t->child_being_waited_for.compare_exchange_strong(expected, id)) + { + t->wake_up(); + return false; + } + + expected = id; + if (t->child_being_waited_for.compare_exchange_strong(expected, id)) + { + t->wake_up(); + return false; + } } - } - while (parent->pending_signals.get(SIGCHLD - 1)) kernel_yield(); + return true; + }); + parent->send_signal(SIGCHLD); } - state = ThreadState::Exited; - - status = _status; kernel_yield(); unreachable(); } +void Thread::quit() +{ + state = ThreadState::Dying; +} + +void Thread::exit(bool yield) +{ + quit(); + + process->thread_count--; + if (process->thread_count == 0) { process->exit(0); } + + if (yield) kernel_yield(); +} + enum class DefaultSignalAction { Ignore, @@ -202,7 +249,7 @@ void Thread::process_pending_signals(Registers* current_regs) if (handler.sa_handler == SIG_DFL || signo == SIGKILL || signo == SIGSTOP) { default_signal: - if (id == 1) + if (process->id == 1) { kwarnln("signal: init got a signal it has no handler for, ignoring"); return; @@ -213,9 +260,10 @@ void Thread::process_pending_signals(Registers* current_regs) { case DefaultSignalAction::Ignore: return; case DefaultSignalAction::Terminate: - kwarnln("Terminating thread %d with signal %d", id, signo); + kwarnln("Terminating thread %d with signal %d", tid, signo); CPU::print_stack_trace_at(current_regs); - exit_and_signal_parent(signo | _SIGBIT); + process->exit(signo | _SIGBIT); + unreachable(); case DefaultSignalAction::Stop: stop(); default: return; } @@ -246,6 +294,14 @@ bool Thread::will_ignore_pending_signal() return false; } +void Process::send_signal(int signo) +{ + Scheduler::for_each_thread(this, [signo](Thread* t) { + t->send_signal(signo); + return false; + }); +} + void Thread::send_signal(int signo) { if (is_kernel) return; @@ -299,9 +355,11 @@ bool Thread::check_stack_on_exception(u64 stack_pointer) return false; } + auto address_space = process->address_space.lock(); + // If we can, we'll add 2 more pages of buffer space, otherwise we use whatever we can. usize bytes_to_grow = min(stack_space_remaining, exceeded_bytes + 2 * ARCH_PAGE_SIZE); - auto maybe_base = address_space->grow_region(stack.bottom(), bytes_to_grow / ARCH_PAGE_SIZE, true); + auto maybe_base = (*address_space)->grow_region(stack.bottom(), bytes_to_grow / ARCH_PAGE_SIZE, true); if (maybe_base.has_error()) { kwarnln("Failed to grow stack: could not allocate virtual memory space (%s)", maybe_base.error_string()); @@ -313,7 +371,7 @@ bool Thread::check_stack_on_exception(u64 stack_pointer) MMU::ReadWrite | MMU::NoExecute | MMU::User); if (result.has_error()) { - address_space->free_region(base, bytes_to_grow / ARCH_PAGE_SIZE); + (*address_space)->free_region(base, bytes_to_grow / ARCH_PAGE_SIZE); kwarnln("Failed to grow stack: could not allocate physical pages (%s)", result.error_string()); return false; } @@ -333,3 +391,8 @@ void Thread::stop() state = ThreadState::Stopped; kernel_yield(); } + +Process* Process::current() +{ + return Scheduler::current()->process; +} diff --git a/kernel/src/thread/Thread.h b/kernel/src/thread/Thread.h index 2d19cc69..34fc0564 100644 --- a/kernel/src/thread/Thread.h +++ b/kernel/src/thread/Thread.h @@ -1,5 +1,4 @@ #pragma once - #include "arch/MMU.h" #include "fs/OpenFileDescription.h" #include "fs/VFS.h" @@ -48,59 +47,33 @@ struct Credentials u32 sgid { 0 }; }; -struct Thread : public LinkedListNode +struct Process : public LinkedListNode { - Registers regs; + Atomic thread_count; pid_t id; - pid_t pgid { 0 }; - pid_t sid { 0 }; + Atomic pgid { 0 }; + Atomic sid { 0 }; - Credentials auth; - Vector extra_groups; + bool has_called_exec { false }; - u64 user_ticks_self = 0; - u64 kernel_ticks_self = 0; - u64 user_ticks_children = 0; - u64 kernel_ticks_children = 0; - - u64 ticks_left; - u64 sleep_ticks_left; + mode_t umask { 0 }; int promises { -1 }; int execpromises { -1 }; - Stack stack; - Stack kernel_stack; + Process* parent { nullptr }; - OwnedPtr address_space; - Option fd_table[FD_MAX] = {}; + MutexLocked auth { Credentials { 0, 0, 0, 0, 0, 0 } }; - Result allocate_fd(int min); - Result resolve_fd(int fd); - Result> resolve_atfile(int dirfd, const String& path, bool allow_empty_path, - bool follow_last_symlink, - SharedPtr* parent_inode = nullptr); + MutexLocked> extra_groups { {} }; - struct sigaction signal_handlers[NSIG]; - Bitset signal_mask { 0 }; - Bitset pending_signals { 0 }; - bool interrupted { false }; + Credentials credentials(); + Result> copy_groups(); - SharedPtr controlling_terminal; + MutexLocked> address_space; - bool unrestricted_task { false }; - - FPData fp_data; - - ThreadState state = ThreadState::Runnable; - - bool is_kernel { true }; - bool has_called_exec { false }; - - int status { 0 }; - - mode_t umask { 0 }; + MutexLocked[FD_MAX]> fd_table = {}; Timer real_timer; Timer virtual_timer; @@ -109,28 +82,99 @@ struct Thread : public LinkedListNode Clock virtual_clock; Clock profiling_clock; + bool is_kernel { false }; + Option posix_timers[MAX_POSIX_TIMERS]; Mutex posix_timer_mutex; + StaticString<128> cmdline; + + Atomic user_ticks_self = 0; + Atomic kernel_ticks_self = 0; + Atomic user_ticks_children = 0; + Atomic kernel_ticks_children = 0; + Result allocate_timerid(); Result resolve_timerid(int id); - StaticString<128> cmdline; + Result allocate_fd(int min, FileDescriptor& descriptor); + Result resolve_fd(int fd); + Result> resolve_atfile(int dirfd, const String& path, bool allow_empty_path, + bool follow_last_symlink, + SharedPtr* parent_inode = nullptr); String current_directory_path = {}; SharedPtr current_directory = {}; - Thread* parent { nullptr }; - Option child_being_waited_for = {}; + SharedPtr controlling_terminal; + + int status { 0 }; + + void send_signal(int signo); + + bool is_session_leader() + { + return id == sid; + } + + bool alive() + { + return thread_count > 0; + } + + static Process* current(); + + [[noreturn]] void exit(int status); +}; + +struct Thread : public LinkedListNode +{ + Process* process; + + pid_t tid; + + Registers regs; + + Atomic ticks_left; + Atomic sleep_ticks_left; + + Atomic user_ticks_self = 0; + Atomic kernel_ticks_self = 0; + + Stack stack; + Stack kernel_stack; + + struct sigaction signal_handlers[NSIG]; + Bitset signal_mask { 0 }; + Bitset pending_signals { 0 }; + bool interrupted { false }; + + Atomic child_being_waited_for = -2; + + bool unrestricted_task { false }; + + FPData fp_data; + + ThreadState state = ThreadState::Runnable; + + bool is_kernel { false }; + + StaticString<128> cmdline; PageDirectory* self_directory() const { - return address_space->page_directory(); + PageDirectory* result; + auto lambda = Function&>::wrap([&](OwnedPtr& space) { + result = space->page_directory(); + }).release_value(); + process->address_space.with_lock(move(lambda)); + return result; } PageDirectory* active_directory { nullptr }; - [[noreturn]] void exit_and_signal_parent(int status); + void quit(); + void exit(bool yield = true); bool is_idle() { @@ -142,11 +186,6 @@ struct Thread : public LinkedListNode state = ThreadState::Runnable; } - bool is_session_leader() - { - return id == sid; - } - void init_regs_kernel(); void init_regs_user(); @@ -190,3 +229,4 @@ Result new_thread(); pid_t next_thread_id(); extern LinkedList g_threads; +extern LinkedList g_processes; diff --git a/kernel/src/thread/ThreadImage.cpp b/kernel/src/thread/ThreadImage.cpp index 85323724..93558f93 100644 --- a/kernel/src/thread/ThreadImage.cpp +++ b/kernel/src/thread/ThreadImage.cpp @@ -51,7 +51,9 @@ Result> ThreadImage::clone_from_thread(Thread* parent) { auto image = TRY(make_owned()); - auto address_space = TRY(parent->address_space->clone()); + auto space = parent->process->address_space.lock(); + + auto address_space = TRY((*space)->clone()); const u64 kernel_stack_base = TRY(MemoryManager::alloc_for_kernel(4, MMU::ReadWrite | MMU::NoExecute)); Stack kernel_stack { kernel_stack_base, 4 * ARCH_PAGE_SIZE }; @@ -98,5 +100,6 @@ void ThreadImage::apply(Thread* thread) thread->active_directory = m_address_space->page_directory(); - thread->address_space = move(m_address_space); + auto space = thread->process->address_space.lock(); + *space = move(m_address_space); } diff --git a/kernel/src/thread/Timer.h b/kernel/src/thread/Timer.h index f7b7e99c..e967d0c6 100644 --- a/kernel/src/thread/Timer.h +++ b/kernel/src/thread/Timer.h @@ -2,7 +2,7 @@ #include #include -struct Thread; +struct Process; struct Clock; class Timer : public LinkedListNode @@ -10,7 +10,7 @@ class Timer : public LinkedListNode public: u64 delta_ticks { 0 }; u64 interval_ticks { 0 }; - Thread* thread; + Process* process; int signo { SIGALRM }; bool restart { false }; -- 2.34.1 From 8e30e0e19d8d84b674942683473a11409caf8210 Mon Sep 17 00:00:00 2001 From: apio Date: Fri, 6 Dec 2024 21:38:29 +0100 Subject: [PATCH 2/5] base: Revert loginui.conf change Oops, was using this for loginui testing, it should be turned off by default. --- base/etc/loginui.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/etc/loginui.conf b/base/etc/loginui.conf index 55ca66d6..9a514af7 100644 --- a/base/etc/loginui.conf +++ b/base/etc/loginui.conf @@ -1,5 +1,5 @@ # Configuration file for loginui. # If this parameter is set to "true", loginui automatically spawns a UI session as the below user instead of prompting for a username and password. -Autologin=false +Autologin=true # The user to create a session for if "Autologin" is set to true (see above). If the username is invalid, loginui will behave as if "Autologin" was set to false. AutologinUser=selene -- 2.34.1 From 853a6d7b3854c69b0ce2c5cd3aa1a86bd8867c53 Mon Sep 17 00:00:00 2001 From: apio Date: Sat, 7 Dec 2024 13:02:25 +0100 Subject: [PATCH 3/5] kernel/x86_64: Dump the process address space ranges on exception --- kernel/src/arch/x86_64/CPU.cpp | 3 +++ kernel/src/memory/AddressSpace.cpp | 11 +++++++++++ kernel/src/memory/AddressSpace.h | 4 +++- 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/kernel/src/arch/x86_64/CPU.cpp b/kernel/src/arch/x86_64/CPU.cpp index afd2d4e8..2b9704e2 100644 --- a/kernel/src/arch/x86_64/CPU.cpp +++ b/kernel/src/arch/x86_64/CPU.cpp @@ -91,6 +91,9 @@ void handle_cpu_exception(int signo, const char* err, Registers* regs) auto* current = Scheduler::current(); if (current->check_stack_on_exception(regs->rsp)) return; + auto space = current->process->address_space.lock(); + (*space)->debug_log(); + current->send_signal(signo); current->process_pending_signals(regs); return; diff --git a/kernel/src/memory/AddressSpace.cpp b/kernel/src/memory/AddressSpace.cpp index 7aa0e717..1e7b0a30 100644 --- a/kernel/src/memory/AddressSpace.cpp +++ b/kernel/src/memory/AddressSpace.cpp @@ -1,4 +1,5 @@ #include "memory/AddressSpace.h" +#include "Log.h" #include "arch/MMU.h" #include "memory/Heap.h" #include "memory/MemoryManager.h" @@ -385,3 +386,13 @@ void VMRegion::sync_shared() } } } + +void AddressSpace::debug_log() +{ + m_regions.for_each([this](VMRegion* region) { + kdbgln("VMRegion start: %p, end: %p, count: %zu, used: %s, persistent: %s, flags: %d, prot: %d, shmid: %lu, " + "offset: %ld", + (void*)region->start, (void*)region->end, region->count, region->used ? "true" : "false", + region->persistent ? "true" : "false", region->flags, region->prot, region->shmid, region->offset); + }); +} diff --git a/kernel/src/memory/AddressSpace.h b/kernel/src/memory/AddressSpace.h index 2dfa8f2b..d9f6fe94 100644 --- a/kernel/src/memory/AddressSpace.h +++ b/kernel/src/memory/AddressSpace.h @@ -15,7 +15,7 @@ class VMRegion : LinkedListNode bool persistent { false }; int flags { 0 }; int prot { 0 }; - u64 shmid; + u64 shmid { 0 }; off_t offset { 0 }; void cleanup_shared(); @@ -52,6 +52,8 @@ class AddressSpace static Result> try_create(); + void debug_log(); + Result> clone(); PageDirectory* page_directory() const -- 2.34.1 From 42afef5ccba9bff83c7e0a90913edb1cfc2c2c98 Mon Sep 17 00:00:00 2001 From: apio Date: Sat, 7 Dec 2024 13:05:38 +0100 Subject: [PATCH 4/5] kernel: Leave reaping to the reap thread This seems to fix previous problems. Apparently reaping a thread somewhat corrupts/replaces the calling thread's address space. I should've known there's a reason we do it in a separate kernel thread... --- kernel/src/main.cpp | 2 ++ kernel/src/sys/exec.cpp | 2 ++ kernel/src/sys/waitpid.cpp | 13 +++++++------ kernel/src/thread/Scheduler.cpp | 24 +++++++++++++++++++++++- kernel/src/thread/Scheduler.h | 1 + kernel/src/thread/Thread.cpp | 10 ++++++++++ kernel/src/thread/Thread.h | 9 ++++++++- 7 files changed, 53 insertions(+), 8 deletions(-) diff --git a/kernel/src/main.cpp b/kernel/src/main.cpp index 378ecb57..b96fb020 100644 --- a/kernel/src/main.cpp +++ b/kernel/src/main.cpp @@ -26,9 +26,11 @@ void reap_thread() { CPU::disable_interrupts(); auto dying_threads = Scheduler::check_for_dying_threads(); + auto dead_processes = Scheduler::check_for_dead_processes(); CPU::enable_interrupts(); dying_threads.consume([](Thread* thread) { Scheduler::reap_thread(thread); }); + dead_processes.consume([](Process* p) { Scheduler::reap_process(p); }); kernel_wait_for_event(); } diff --git a/kernel/src/sys/exec.cpp b/kernel/src/sys/exec.cpp index c952da8c..70101810 100644 --- a/kernel/src/sys/exec.cpp +++ b/kernel/src/sys/exec.cpp @@ -113,6 +113,7 @@ Result sys_execve(Registers* regs, SyscallArgs args) if (t != thread) t->quit(); return true; }); + Scheduler::signal_reap_thread(); current->thread_count = 1; @@ -145,6 +146,7 @@ Result sys_execve(Registers* regs, SyscallArgs args) } current->cmdline = cmdline.chars(); + thread->cmdline = cmdline.chars(); image->apply(thread); diff --git a/kernel/src/sys/waitpid.cpp b/kernel/src/sys/waitpid.cpp index 4241a131..3ad215c2 100644 --- a/kernel/src/sys/waitpid.cpp +++ b/kernel/src/sys/waitpid.cpp @@ -25,8 +25,8 @@ Result sys_waitpid(Registers* regs, SyscallArgs args) if (options & WNOHANG) return err(EAGAIN); wait_for_child: - if (target->alive()) kernel_wait(pid); - if (current->interrupted) + if (!target->dead()) kernel_wait(pid); + if (current->interrupted && current->pending_signal() != SIGCHLD) { kdbgln("signal: waitpid interrupted by signal"); if (current->will_ignore_pending_signal()) @@ -37,7 +37,7 @@ Result sys_waitpid(Registers* regs, SyscallArgs args) return err(EINTR); } - check(!target->alive()); + check(target->dead()); } else if (pid == -1) { @@ -50,7 +50,7 @@ Result sys_waitpid(Registers* regs, SyscallArgs args) wait_for_any_child: kernel_wait(pid); - if (current->interrupted) + if (current->interrupted && current->pending_signal() != SIGCHLD) { kdbgln("signal: waitpid interrupted by signal"); if (current->will_ignore_pending_signal()) @@ -64,7 +64,7 @@ Result sys_waitpid(Registers* regs, SyscallArgs args) check(current->child_being_waited_for != -1); target = TRY(Result::from_option(Scheduler::find_by_pid(current->child_being_waited_for), ESRCH)); - check(!target->alive()); + check(!target->dead()); } else target = child.value(); @@ -80,7 +80,8 @@ Result sys_waitpid(Registers* regs, SyscallArgs args) current->process->user_ticks_children += target->user_ticks_self + target->user_ticks_children; current->process->kernel_ticks_children += target->kernel_ticks_self + target->kernel_ticks_children; - Scheduler::reap_process(target); + target->thread_count = PROCESS_SHOULD_REAP; + Scheduler::signal_reap_thread(); if (status_ptr) if (!MemoryManager::copy_to_user_typed(status_ptr, &status)) return err(EFAULT); diff --git a/kernel/src/thread/Scheduler.cpp b/kernel/src/thread/Scheduler.cpp index 263cbe11..a8247e7d 100644 --- a/kernel/src/thread/Scheduler.cpp +++ b/kernel/src/thread/Scheduler.cpp @@ -117,6 +117,7 @@ namespace Scheduler process->thread_count = 1; process->virtual_clock.set_resolution(1'000'000); process->profiling_clock.set_resolution(1'000'000); + process->cmdline = name; process->is_kernel = true; g_threads.append(thread); @@ -172,6 +173,7 @@ namespace Scheduler process->id = 1; process->pgid = 1; process->thread_count = 1; + process->cmdline = name; Vector args; auto name_string = TRY(String::from_cstring(name)); @@ -228,6 +230,8 @@ namespace Scheduler void reap_process(Process* process) { + CPU::disable_interrupts(); + // FIXME: Shouldn't all this be done when the timers' destructors are called? process->real_timer.disarm(); process->virtual_timer.disarm(); @@ -238,6 +242,8 @@ namespace Scheduler } delete process; + + CPU::enable_interrupts(); } void reap_thread(Thread* thread) @@ -378,6 +384,21 @@ namespace Scheduler return result; } + LinkedList check_for_dead_processes() + { + LinkedList result; + + g_processes.delayed_for_each([&](Process* p) { + if (p->thread_count == PROCESS_SHOULD_REAP) + { + g_processes.remove(p); + result.append(p); + } + }); + + return result; + } + Option find_by_pid(pid_t pid) { for (auto* const process : g_processes) @@ -405,7 +426,7 @@ namespace Scheduler Option result; for_each_child(process, [&](Process* child) { - if (!result.has_value() && !child->alive()) + if (!result.has_value() && child->dead()) { result = child; return false; @@ -469,6 +490,7 @@ void kernel_wait_for_event() [[noreturn]] void kernel_exit() { g_current->state = ThreadState::Dying; + g_current->process->thread_count = PROCESS_SHOULD_REAP; Scheduler::signal_reap_thread(); kernel_yield(); unreachable(); diff --git a/kernel/src/thread/Scheduler.h b/kernel/src/thread/Scheduler.h index 78b112a5..a7244947 100644 --- a/kernel/src/thread/Scheduler.h +++ b/kernel/src/thread/Scheduler.h @@ -35,6 +35,7 @@ namespace Scheduler void invoke(Registers* regs); LinkedList check_for_dying_threads(); + LinkedList check_for_dead_processes(); Option find_by_pid(pid_t pid); diff --git a/kernel/src/thread/Thread.cpp b/kernel/src/thread/Thread.cpp index 2d8211ea..967c8dc5 100644 --- a/kernel/src/thread/Thread.cpp +++ b/kernel/src/thread/Thread.cpp @@ -137,6 +137,7 @@ Result> Process::resolve_atfile(int dirfd, const String& p thread->quit(); return true; }); + Scheduler::signal_reap_thread(); thread_count = 0; @@ -276,6 +277,15 @@ void Thread::process_pending_signals(Registers* current_regs) } } +int Thread::pending_signal() +{ + for (int i = 0; i < NSIG; i++) + { + if (pending_signals.get(i)) { return i + 1; } + } + return 0; +} + bool Thread::will_ignore_pending_signal() { for (int i = 0; i < NSIG; i++) diff --git a/kernel/src/thread/Thread.h b/kernel/src/thread/Thread.h index 34fc0564..5bec29ea 100644 --- a/kernel/src/thread/Thread.h +++ b/kernel/src/thread/Thread.h @@ -20,6 +20,7 @@ #endif constexpr int MAX_POSIX_TIMERS = 64; +constexpr i64 PROCESS_SHOULD_REAP = -1; class Timer; @@ -49,7 +50,7 @@ struct Credentials struct Process : public LinkedListNode { - Atomic thread_count; + Atomic thread_count; pid_t id; Atomic pgid { 0 }; @@ -122,6 +123,11 @@ struct Process : public LinkedListNode return thread_count > 0; } + bool dead() + { + return thread_count == 0; + } + static Process* current(); [[noreturn]] void exit(int status); @@ -202,6 +208,7 @@ struct Thread : public LinkedListNode void process_pending_signals(Registers* current_regs); + int pending_signal(); bool will_ignore_pending_signal(); bool deliver_signal(int signo, Registers* current_regs); -- 2.34.1 From d05d6fad0bca1b1dcea893e3326c32eabc888f4a Mon Sep 17 00:00:00 2001 From: apio Date: Sat, 7 Dec 2024 13:15:58 +0100 Subject: [PATCH 5/5] kernel: Interrupt waitpid (even when SIGCHLD is pending) when other signals are also pending This fixes init not receiving the kill signal when running tests. --- kernel/src/sys/waitpid.cpp | 4 ++-- kernel/src/thread/Thread.cpp | 10 ++++++++++ kernel/src/thread/Thread.h | 1 + 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/kernel/src/sys/waitpid.cpp b/kernel/src/sys/waitpid.cpp index 3ad215c2..f533768f 100644 --- a/kernel/src/sys/waitpid.cpp +++ b/kernel/src/sys/waitpid.cpp @@ -26,7 +26,7 @@ Result sys_waitpid(Registers* regs, SyscallArgs args) wait_for_child: if (!target->dead()) kernel_wait(pid); - if (current->interrupted && current->pending_signal() != SIGCHLD) + if (current->interrupted && (current->pending_signal_count() > 1 || current->pending_signal() != SIGCHLD)) { kdbgln("signal: waitpid interrupted by signal"); if (current->will_ignore_pending_signal()) @@ -50,7 +50,7 @@ Result sys_waitpid(Registers* regs, SyscallArgs args) wait_for_any_child: kernel_wait(pid); - if (current->interrupted && current->pending_signal() != SIGCHLD) + if (current->interrupted && (current->pending_signal_count() > 1 || current->pending_signal() != SIGCHLD)) { kdbgln("signal: waitpid interrupted by signal"); if (current->will_ignore_pending_signal()) diff --git a/kernel/src/thread/Thread.cpp b/kernel/src/thread/Thread.cpp index 967c8dc5..74771917 100644 --- a/kernel/src/thread/Thread.cpp +++ b/kernel/src/thread/Thread.cpp @@ -277,6 +277,16 @@ void Thread::process_pending_signals(Registers* current_regs) } } +int Thread::pending_signal_count() +{ + int result = 0; + for (int i = 0; i < NSIG; i++) + { + if (pending_signals.get(i)) { result++; } + } + return result; +} + int Thread::pending_signal() { for (int i = 0; i < NSIG; i++) diff --git a/kernel/src/thread/Thread.h b/kernel/src/thread/Thread.h index 5bec29ea..082734e3 100644 --- a/kernel/src/thread/Thread.h +++ b/kernel/src/thread/Thread.h @@ -208,6 +208,7 @@ struct Thread : public LinkedListNode void process_pending_signals(Registers* current_regs); + int pending_signal_count(); int pending_signal(); bool will_ignore_pending_signal(); -- 2.34.1