kernel: Rework VFS access checking + add processes
Some checks failed
Build and test / build (push) Has been cancelled

VFS functions now accept a single Process* pointer instead of credentials and groups.
There is now a distinction between processes and threads
Now to fix all the bugs... waitpid crashes the process with an NX error...
This commit is contained in:
apio 2024-12-06 21:35:59 +01:00
parent 6fc49a0be5
commit dc766e1da7
Signed by: apio
GPG Key ID: B8A7D06E42258954
46 changed files with 915 additions and 547 deletions

View File

@ -1,5 +1,5 @@
# Configuration file for loginui.
# If this parameter is set to "true", loginui automatically spawns a UI session as the below user instead of prompting for a username and password.
Autologin=true
Autologin=false
# The user to create a session for if "Autologin" is set to true (see above). If the username is invalid, loginui will behave as if "Autologin" was set to false.
AutologinUser=selene

View File

@ -1,6 +1,7 @@
#include "Pledge.h"
#include "Log.h"
#include "memory/MemoryManager.h"
#include "thread/Scheduler.h"
static const char* promise_names[] = {
#define __enumerate(promise) #promise,
@ -8,30 +9,34 @@ static const char* promise_names[] = {
#undef __enumerate
};
Result<void> check_pledge(Thread* thread, Promise promise)
Result<void> check_pledge(Process* process, Promise promise)
{
// Thread has not called pledge().
if (thread->promises < 0) return {};
if (process->promises < 0) return {};
int mask = (1 << (int)promise);
if ((thread->promises & mask) != mask)
if ((process->promises & mask) != mask)
{
kerrorln("Pledge violation in thread %d! Has not pledged %s", thread->id, promise_names[(int)promise]);
if (thread->promises & (1 << (int)Promise::p_error)) return err(ENOSYS);
kerrorln("Pledge violation in process %d! Has not pledged %s", process->id, promise_names[(int)promise]);
if (process->promises & (1 << (int)Promise::p_error)) return err(ENOSYS);
// Kill this thread with an uncatchable SIGABRT. For this, we reset the disposition of SIGABRT to the default
// (dump core). We could just kill the thread here and be done, but that discards anything on the current stack,
// which means that some destructors might not be called. Instead, leave the job to the next call of
// Thread::process_pending_signals().
thread->signal_handlers[SIGABRT - 1].sa_handler = SIG_DFL;
Scheduler::for_each_thread(process, [](Thread* thread) {
// Kill this thread with an uncatchable SIGABRT. For this, we reset the disposition of SIGABRT to the
// default (dump core). We could just kill the thread here and be done, but that discards anything on the
// current stack, which means that some destructors might not be called. Instead, leave the job to the next
// call of Thread::process_pending_signals().
thread->signal_handlers[SIGABRT - 1].sa_handler = SIG_DFL;
// Unblock SIGABRT.
thread->signal_mask.set(SIGABRT - 1, false);
// Unblock SIGABRT.
thread->signal_mask.set(SIGABRT - 1, false);
// If there are any other pending signals, they might be processed before SIGABRT. Avoid that by resetting the
// thread's pending signals.
thread->pending_signals.clear();
// If there are any other pending signals, they might be processed before SIGABRT. Avoid that by resetting
// the thread's pending signals.
thread->pending_signals.clear();
thread->send_signal(SIGABRT);
thread->send_signal(SIGABRT);
return true;
});
// This should never arrive to userspace, unless we're init and have ignored SIGABRT.
return err(ENOSYS);

View File

@ -14,6 +14,6 @@ enum class Promise
num_promises,
};
Result<void> check_pledge(Thread* thread, Promise promise);
Result<void> check_pledge(Process* process, Promise promise);
Result<int> parse_promises(u64 pledge);

View File

@ -17,7 +17,7 @@ Result<u64> ScriptLoader::load(AddressSpace* space)
{
u8 buf[256];
memset(buf, 0, sizeof(buf));
usize nread = TRY(m_inode->read(buf, 2, 255));
if (!nread) return err(ENOEXEC);
for (usize i = 0; i < nread; i++)
@ -35,11 +35,10 @@ Result<u64> ScriptLoader::load(AddressSpace* space)
if (!m_interpreter_cmdline.size()) return err(ENOEXEC);
auto& interpreter_path = m_interpreter_cmdline[0];
auto* current = Scheduler::current();
auto* current = Process::current();
auto interpreter = TRY(VFS::resolve_path(interpreter_path.chars(), current->auth, &current->extra_groups,
current->current_directory, true));
if (!VFS::can_execute(interpreter, current->auth, &current->extra_groups)) return err(EACCES);
auto interpreter = TRY(VFS::resolve_path(interpreter_path.chars(), current, current->current_directory, true));
if (!VFS::can_execute(interpreter, current)) return err(EACCES);
auto loader = TRY(BinaryFormat::create_loader(interpreter, m_recursion_level + 1));
u64 entry = TRY(loader->load(space));

View File

@ -20,7 +20,7 @@ void InitRD::initialize()
static Result<void> vfs_create_dir_if_not_exists(const char* path, mode_t mode)
{
auto rc = VFS::create_directory(path, mode & (mode_t)~S_IFMT, Credentials {}, nullptr);
auto rc = VFS::create_directory(path, mode & (mode_t)~S_IFMT, nullptr);
if (rc.has_error())
{
if (rc.error() == EEXIST) return {};
@ -37,8 +37,7 @@ Result<void> InitRD::populate_vfs()
{
if (entry.type == TarStream::EntryType::RegularFile)
{
auto file =
TRY(VFS::create_file(entry.name.chars(), entry.mode & (mode_t)~S_IFMT, Credentials {}, nullptr));
auto file = TRY(VFS::create_file(entry.name.chars(), entry.mode & (mode_t)~S_IFMT, nullptr));
file->write(entry.data(), 0, entry.size);
}
else if (entry.type == TarStream::EntryType::Directory)

View File

@ -8,7 +8,7 @@ Result<void> Pipe::create(SharedPtr<VFS::Inode>& rpipe, SharedPtr<VFS::Inode>& w
auto writer = TRY(make_shared<PipeWriter>());
auto reader = TRY(make_shared<PipeReader>());
auto auth = Scheduler::current()->auth;
auto auth = Process::current()->credentials();
pipe->m_writer = writer.ptr();
pipe->m_reader = reader.ptr();

View File

@ -17,9 +17,8 @@ namespace VFS
static constexpr int MAX_SYMLINKS = 8;
Result<SharedPtr<Inode>> resolve_path_impl(const char* path, Credentials auth, const Vector<gid_t>* extra_groups,
SharedPtr<Inode> current_inode, bool follow_last_symlink,
int& symlinks_followed)
Result<SharedPtr<Inode>> resolve_path_impl(const char* path, Process* process, SharedPtr<Inode> current_inode,
bool follow_last_symlink, int& symlinks_followed)
{
if (symlinks_followed >= MAX_SYMLINKS) return err(ELOOP);
@ -32,7 +31,7 @@ namespace VFS
const char* section;
while (parser.next().try_set_value(section))
{
if (!can_execute(current_inode, auth, extra_groups)) return err(EACCES);
if (!can_execute(current_inode, process)) return err(EACCES);
current_inode = TRY(current_inode->find(section));
if (current_inode->type() == VFS::InodeType::Symlink && (follow_last_symlink || parser.has_next()))
@ -46,8 +45,7 @@ namespace VFS
symlink_root = parent_inode;
symlinks_followed++;
current_inode =
TRY(resolve_path_impl(link.chars(), auth, extra_groups, symlink_root, true, symlinks_followed));
current_inode = TRY(resolve_path_impl(link.chars(), process, symlink_root, true, symlinks_followed));
symlinks_followed--;
}
@ -57,8 +55,8 @@ namespace VFS
return current_inode;
}
Result<SharedPtr<Inode>> resolve_path(const char* path, Credentials auth, const Vector<gid_t>* extra_groups,
SharedPtr<VFS::Inode> working_directory, bool follow_last_symlink)
Result<SharedPtr<Inode>> resolve_path(const char* path, Process* process, SharedPtr<VFS::Inode> working_directory,
bool follow_last_symlink)
{
SharedPtr<Inode> current_inode;
@ -68,17 +66,17 @@ namespace VFS
int symlinks_followed = 0;
return resolve_path_impl(path, auth, extra_groups, current_inode, follow_last_symlink, symlinks_followed);
return resolve_path_impl(path, process, current_inode, follow_last_symlink, symlinks_followed);
}
Result<SharedPtr<Inode>> create_directory(const char* path, mode_t mode, Credentials auth,
const Vector<gid_t>* extra_groups, SharedPtr<Inode> working_directory)
Result<SharedPtr<Inode>> create_directory(const char* path, mode_t mode, Process* process,
SharedPtr<Inode> working_directory)
{
auto parent_path = TRY(PathParser::dirname(path));
auto parent_inode = TRY(resolve_path(parent_path.chars(), auth, extra_groups, working_directory));
auto parent_inode = TRY(resolve_path(parent_path.chars(), process, working_directory));
if (!can_write(parent_inode, auth, extra_groups)) return err(EACCES);
if (!can_write(parent_inode, process)) return err(EACCES);
auto child_name = TRY(PathParser::basename(path));
@ -87,14 +85,14 @@ namespace VFS
return parent_inode->create_subdirectory(child_name.chars(), mode);
}
Result<SharedPtr<Inode>> create_file(const char* path, mode_t mode, Credentials auth,
const Vector<gid_t>* extra_groups, SharedPtr<Inode> working_directory)
Result<SharedPtr<Inode>> create_file(const char* path, mode_t mode, Process* process,
SharedPtr<Inode> working_directory)
{
auto parent_path = TRY(PathParser::dirname(path));
auto parent_inode = TRY(resolve_path(parent_path.chars(), auth, extra_groups, working_directory));
auto parent_inode = TRY(resolve_path(parent_path.chars(), process, working_directory));
if (!can_write(parent_inode, auth, extra_groups)) return err(EACCES);
if (!can_write(parent_inode, process)) return err(EACCES);
auto child_name = TRY(PathParser::basename(path));
@ -135,6 +133,81 @@ namespace VFS
return {};
}
// FIXME: Check all three permissions even if the UID or GID match.
bool can_execute(SharedPtr<Inode> inode, Process* process)
{
const auto& metadata = inode->metadata();
Credentials auth { 0 };
if (process) auth = process->credentials();
if (auth.euid == 0) return true;
if (metadata.uid == auth.euid) { return metadata.mode & S_IXUSR; }
if (metadata.gid == auth.egid) { return metadata.mode & S_IXGRP; }
if (process)
{
auto groups = process->extra_groups.lock();
for (gid_t group : *groups)
{
if (metadata.gid == group) return metadata.mode & S_IXGRP;
}
}
return metadata.mode & S_IXOTH;
}
// FIXME: Check all three permissions even if the UID or GID match.
bool can_write(SharedPtr<Inode> inode, Process* process)
{
const auto& metadata = inode->metadata();
Credentials auth { 0 };
if (process) auth = process->credentials();
if (auth.euid == 0) return true;
if (metadata.uid == auth.euid) { return metadata.mode & S_IWUSR; }
if (metadata.gid == auth.egid) { return metadata.mode & S_IWGRP; }
if (process)
{
auto groups = process->extra_groups.lock();
for (gid_t group : *groups)
{
if (metadata.gid == group) return metadata.mode & S_IWGRP;
}
}
return metadata.mode & S_IWOTH;
}
// FIXME: Check all three permissions even if the UID or GID match.
bool can_read(SharedPtr<Inode> inode, Process* process)
{
const auto& metadata = inode->metadata();
Credentials auth { 0 };
if (process) auth = process->credentials();
if (auth.euid == 0) return true;
if (metadata.uid == auth.euid) { return metadata.mode & S_IRUSR; }
if (metadata.gid == auth.egid) { return metadata.mode & S_IRGRP; }
if (process)
{
auto groups = process->extra_groups.lock();
for (gid_t group : *groups)
{
if (metadata.gid == group) return metadata.mode & S_IRGRP;
}
}
return metadata.mode & S_IROTH;
}
// FIXME: Check all three permissions even if the UID or GID match.
bool can_execute(SharedPtr<Inode> inode, Credentials auth, const Vector<gid_t>* extra_groups)
{
@ -232,8 +305,7 @@ namespace VFS
auto new_root_parent = TRY(PathParser::dirname(new_root));
auto new_root_path = TRY(PathParser::basename(new_root));
auto new_root_parent_inode =
TRY(VFS::resolve_path(new_root_parent.chars(), Credentials {}, nullptr, working_directory));
auto new_root_parent_inode = TRY(VFS::resolve_path(new_root_parent.chars(), nullptr, working_directory));
auto new_root_inode = TRY(new_root_parent_inode->find(new_root_path.chars()));
if (new_root_inode->type() != VFS::InodeType::Directory) return err(ENOTDIR);
@ -245,7 +317,7 @@ namespace VFS
kdbgln("vfs: Pivoting root from / to %s, using %s as new root", put_old, new_root);
auto parent_inode = TRY(resolve_path(parent_path.chars(), Credentials {}, nullptr, working_directory));
auto parent_inode = TRY(resolve_path(parent_path.chars(), nullptr, working_directory));
auto inode = TRY(parent_inode->find(child.chars()));
if (inode->type() != VFS::InodeType::Directory) return err(ENOTDIR);
@ -265,8 +337,8 @@ namespace VFS
return {};
}
Result<void> mount(const char* path, SharedPtr<VFS::FileSystem> fs, Credentials auth,
const Vector<gid_t>* extra_groups, SharedPtr<VFS::Inode> working_directory)
Result<void> mount(const char* path, SharedPtr<VFS::FileSystem> fs, Process* process,
SharedPtr<VFS::Inode> working_directory)
{
auto parent_path = TRY(PathParser::dirname(path));
auto child = TRY(PathParser::basename(path));
@ -275,7 +347,7 @@ namespace VFS
kdbgln("vfs: Mounting filesystem on target %s", path);
#endif
auto parent_inode = TRY(resolve_path(parent_path.chars(), auth, extra_groups, working_directory));
auto parent_inode = TRY(resolve_path(parent_path.chars(), process, working_directory));
auto inode = TRY(parent_inode->find(child.chars()));
if (inode->type() != VFS::InodeType::Directory) return err(ENOTDIR);
@ -290,8 +362,7 @@ namespace VFS
return {};
}
Result<void> umount(const char* path, Credentials auth, const Vector<gid_t>* extra_groups,
SharedPtr<VFS::Inode> working_directory)
Result<void> umount(const char* path, Process* process, SharedPtr<VFS::Inode> working_directory)
{
auto parent_path = TRY(PathParser::dirname(path));
auto child = TRY(PathParser::basename(path));
@ -300,7 +371,7 @@ namespace VFS
kinfoln("vfs: Unmounting filesystem on target %s", path);
auto parent_inode = TRY(resolve_path(parent_path.chars(), auth, extra_groups, working_directory));
auto parent_inode = TRY(resolve_path(parent_path.chars(), process, working_directory));
auto inode = TRY(parent_inode->find(child.chars()));
if (!inode->is_mountpoint()) return err(EINVAL);

View File

@ -7,6 +7,7 @@
#include <luna/StringView.h>
#include <sys/types.h>
struct Process;
struct Credentials;
namespace VFS
@ -319,20 +320,21 @@ namespace VFS
virtual ~DeviceInode() = default;
};
Result<SharedPtr<Inode>> resolve_path(const char* path, Credentials auth, const Vector<gid_t>* extra_groups,
Result<SharedPtr<Inode>> resolve_path(const char* path, Process* process,
SharedPtr<VFS::Inode> working_directory = {},
bool follow_last_symlink = true);
Result<SharedPtr<Inode>> create_directory(const char* path, mode_t mode, Credentials auth,
const Vector<gid_t>* extra_groups,
Result<SharedPtr<Inode>> create_directory(const char* path, mode_t mode, Process* process,
SharedPtr<VFS::Inode> working_directory = {});
Result<SharedPtr<Inode>> create_file(const char* path, mode_t mode, Credentials auth,
const Vector<gid_t>* extra_groups,
Result<SharedPtr<Inode>> create_file(const char* path, mode_t mode, Process* process,
SharedPtr<VFS::Inode> working_directory = {});
Result<void> validate_filename(StringView name);
bool can_execute(SharedPtr<Inode> inode, Process* process);
bool can_read(SharedPtr<Inode> inode, Process* process);
bool can_write(SharedPtr<Inode> inode, Process* process);
bool can_execute(SharedPtr<Inode> inode, Credentials auth, const Vector<gid_t>* extra_groups);
bool can_read(SharedPtr<Inode> inode, Credentials auth, const Vector<gid_t>* extra_groups);
bool can_write(SharedPtr<Inode> inode, Credentials auth, const Vector<gid_t>* extra_groups);
@ -346,9 +348,8 @@ namespace VFS
Result<void> mount_root(SharedPtr<VFS::FileSystem> fs);
Result<void> pivot_root(const char* new_root, const char* put_old, SharedPtr<VFS::Inode> working_directory);
Result<void> mount(const char* path, SharedPtr<VFS::FileSystem> fs, Credentials auth,
const Vector<gid_t>* extra_groups, SharedPtr<Inode> working_directory = {});
Result<void> mount(const char* path, SharedPtr<VFS::FileSystem> fs, Process* process,
SharedPtr<Inode> working_directory = {});
Result<void> umount(const char* path, Credentials auth, const Vector<gid_t>* extra_groups,
SharedPtr<Inode> working_directory = {});
Result<void> umount(const char* path, Process* process, SharedPtr<Inode> working_directory = {});
}

View File

@ -31,8 +31,9 @@ Result<SharedPtr<VFS::Inode>> MasterPTY::create_pair(int index)
slave->m_master = master.ptr();
slave->m_metadata.devid = luna_dev_makedev(DeviceRegistry::Terminal, index + 2);
slave->m_metadata.uid = Scheduler::current()->auth.euid;
slave->m_metadata.gid = Scheduler::current()->auth.egid;
auto credentials = Process::current()->credentials();
slave->m_metadata.uid = credentials.euid;
slave->m_metadata.gid = credentials.egid;
slave->m_metadata.mode = 0620;
slave->m_metadata.initialize_times();
@ -46,7 +47,7 @@ Result<void> MasterPTY::handle_background_process_group(bool can_succeed, int si
auto foreground_pgrp = m_foreground_process_group.value();
auto* current = Scheduler::current();
if (current->pgid == foreground_pgrp) return {};
if (current->process->pgid == foreground_pgrp) return {};
if ((current->signal_mask.get(signo - 1)) || (current->signal_handlers[signo - 1].sa_handler == SIG_IGN))
{
@ -112,8 +113,8 @@ Result<void> MasterPTY::handle_input(u8 key)
if (!(m_settings.c_lflag & NOFLSH)) m_current_line_buffer.clear();
if (m_foreground_process_group.has_value())
Scheduler::for_each_in_process_group(*m_foreground_process_group, [](Thread* thread) {
thread->send_signal(SIGINT);
Scheduler::for_each_in_process_group(*m_foreground_process_group, [](Process* p) {
p->send_signal(SIGINT);
return true;
});
@ -125,8 +126,8 @@ Result<void> MasterPTY::handle_input(u8 key)
if (!(m_settings.c_lflag & NOFLSH)) m_current_line_buffer.clear();
if (m_foreground_process_group.has_value())
Scheduler::for_each_in_process_group(*m_foreground_process_group, [](Thread* thread) {
thread->send_signal(SIGQUIT);
Scheduler::for_each_in_process_group(*m_foreground_process_group, [](Process* p) {
p->send_signal(SIGQUIT);
return true;
});
@ -202,7 +203,7 @@ Result<usize> MasterPTY::write(const u8* buf, usize, usize length)
Result<u64> MasterPTY::ioctl(int request, void* arg)
{
auto* current = Scheduler::current();
auto* current = Process::current();
TRY(check_pledge(current, Promise::p_tty));
switch (request)

View File

@ -47,7 +47,7 @@ bool SlavePTY::will_block_if_read() const
Result<u64> SlavePTY::ioctl(int request, void* arg)
{
auto* current = Scheduler::current();
auto* current = Process::current();
TRY(check_pledge(current, Promise::p_tty));
if (!m_master) return err(EIO);
@ -69,9 +69,9 @@ Result<u64> SlavePTY::ioctl(int request, void* arg)
bool pgid_exists = false;
pid_t sid;
Scheduler::for_each_in_process_group(pgid, [&pgid_exists, &sid](Thread* thread) {
Scheduler::for_each_in_process_group(pgid, [&pgid_exists, &sid](Process* p) {
pgid_exists = true;
sid = thread->sid; // should be the same for all threads in the process group
sid = p->sid.load(); // should be the same for all threads in the process group
return false;
});
if (!pgid_exists) return err(EPERM);
@ -95,13 +95,13 @@ Result<u64> SlavePTY::ioctl(int request, void* arg)
if (this->m_master->m_session.has_value()) return err(EPERM);
if (!current->is_session_leader()) return err(EPERM);
Scheduler::for_each_in_session(current->sid, [this](Thread* thread) {
thread->controlling_terminal = this;
Scheduler::for_each_in_session(current->sid, [this](Process* p) {
p->controlling_terminal = this;
return true;
});
m_master->m_session = current->sid;
m_master->m_foreground_process_group = current->pgid;
m_master->m_session = current->sid.load();
m_master->m_foreground_process_group = current->pgid.load();
return 0;
}

View File

@ -10,7 +10,7 @@ TTYLink::TTYLink()
Result<SharedPtr<VFS::Inode>> TTYLink::open()
{
if (!Scheduler::current()->controlling_terminal) return err(ENXIO);
if (!Process::current()->controlling_terminal) return err(ENXIO);
return Scheduler::current()->controlling_terminal;
return Process::current()->controlling_terminal;
}

View File

@ -6,7 +6,7 @@
void Mutex::lock()
{
auto* current = Scheduler::current();
const pid_t desired = current->id;
const pid_t desired = current->tid;
check(desired > 0); // Why the hell would the idle thread be touching a mutex?
while (true)
@ -19,7 +19,7 @@ void Mutex::lock()
{
if (expected == desired)
{
kerrorln("DEADLOCK! KMutex::lock() recursively called by the same thread (%d)", current->id);
kerrorln("DEADLOCK! KMutex::lock() recursively called by the same thread (%d)", current->tid);
fail("Mutex deadlock detected");
}
@ -40,7 +40,7 @@ void Mutex::lock()
void Mutex::unlock()
{
auto* current = Scheduler::current();
pid_t expected = current->id;
pid_t expected = current->tid;
check(expected > 0); // Why the hell would the idle thread be touching a mutex?
m_spinlock.lock();
@ -48,7 +48,7 @@ void Mutex::unlock()
if (!m_thread.compare_exchange_strong(expected, 0))
{
kerrorln("KMutex::unlock() called on a lock already locked by another thread (%d, current is %d)", expected,
current->id);
current->tid);
fail("Mutex unlock by different thread");
}
@ -70,7 +70,7 @@ void Mutex::unlock()
bool Mutex::try_lock()
{
auto* current = Scheduler::current();
const pid_t desired = current->id;
const pid_t desired = current->tid;
check(desired > 0); // Why the hell would the idle thread be touching a mutex?
// Make sure only one thread is touching the mutex at the same time.
@ -83,7 +83,7 @@ bool Mutex::try_lock()
{
kwarnln("Deadlock avoided! KMutex::try_lock() failed because it was already locked by the same thread "
"(%d), this is not supposed to happen",
current->id);
current->tid);
CPU::print_stack_trace();
}

View File

@ -1,4 +1,5 @@
#pragma once
#include <luna/Action.h>
#include <luna/CircularQueue.h>
#include <luna/Spinlock.h>
#include <sys/types.h>
@ -18,6 +19,84 @@ class Mutex
Atomic<pid_t> m_thread;
};
template <typename T> class MutexLocked
{
struct MutexLockedGuard
{
MutexLockedGuard(MutexLocked& value_ref) : m_value_ref(&value_ref)
{
}
MutexLockedGuard(const MutexLockedGuard& other) = delete;
MutexLockedGuard(MutexLockedGuard&& other)
{
m_value_ref = other.m_value_ref;
other.m_value_ref = nullptr;
}
~MutexLockedGuard()
{
if (m_value_ref) m_value_ref->m_lock.unlock();
}
T& ref()
{
expect(m_value_ref, "MutexLockedGuard::ref() called on a moved MutexLockedGuard");
return m_value_ref->m_value;
}
void set(const T& other)
{
ref() = other;
}
T* operator->()
{
return &ref();
}
T& operator*()
{
return ref();
}
private:
MutexLocked* m_value_ref;
};
public:
MutexLocked() : m_value()
{
}
MutexLocked(T value) : m_value(move(value))
{
}
MutexLockedGuard lock()
{
m_lock.lock();
return { *this };
}
Option<MutexLockedGuard> try_lock()
{
if (m_lock.try_lock()) { return { *this }; }
return {};
}
void with_lock(Function<T&> callback)
{
m_lock.lock();
callback(m_value);
m_lock.unlock();
}
private:
T m_value;
Mutex m_lock;
};
class ScopedMutexLock
{
public:

View File

@ -65,8 +65,8 @@ void oom_thread()
mark_critical(BinaryFormat::init(), "Failed to register initial binary formats");
mark_critical(FSRegistry::init(), "Failed to register initial file systems");
auto init = mark_critical(VFS::resolve_path("/bin/preinit", Credentials {}, nullptr),
"Can't find init in the initial ramfs!");
auto init =
mark_critical(VFS::resolve_path("/bin/preinit", nullptr, nullptr), "Can't find init in the initial ramfs!");
auto init_thread = mark_critical(Scheduler::create_init_process(init, "/bin/preinit"),
"Failed to create PID 1 process for init");

View File

@ -56,14 +56,14 @@ Result<usize> UnixSocket::recv(u8* buf, usize length, int) const
return m_data.dequeue_data(buf, length);
}
static Result<void> bind_socket_to_fs(const char* path, Credentials auth, const Vector<gid_t>* extra_groups,
SharedPtr<VFS::Inode> working_directory, SharedPtr<UnixSocket> socket)
static Result<void> bind_socket_to_fs(const char* path, Process* process, SharedPtr<VFS::Inode> working_directory,
SharedPtr<UnixSocket> socket)
{
auto parent_path = TRY(PathParser::dirname(path));
auto parent_inode = TRY(VFS::resolve_path(parent_path.chars(), auth, extra_groups, working_directory));
auto parent_inode = TRY(VFS::resolve_path(parent_path.chars(), process, working_directory));
if (!VFS::can_write(parent_inode, auth, extra_groups)) return err(EACCES);
if (!VFS::can_write(parent_inode, process)) return err(EACCES);
auto child_name = TRY(PathParser::basename(path));
@ -89,14 +89,14 @@ Result<void> UnixSocket::bind(struct sockaddr* addr, socklen_t addrlen)
String path = TRY(String::from_string_view(
StringView::from_fixed_size_cstring(un_address->sun_path, addrlen - sizeof(sa_family_t))));
auto* current = Scheduler::current();
auto* current = Process::current();
m_metadata.mode = 0777 & ~current->umask;
m_metadata.uid = current->auth.euid;
m_metadata.gid = current->auth.egid;
auto auth = current->credentials();
m_metadata.uid = auth.euid;
m_metadata.gid = auth.egid;
auto rc = bind_socket_to_fs(path.chars(), current->auth, &current->extra_groups, current->current_directory,
SharedPtr<Socket> { this });
auto rc = bind_socket_to_fs(path.chars(), current, current->current_directory, SharedPtr<Socket> { this });
if (rc.has_error())
{
if (rc.error() == EEXIST) return err(EADDRINUSE);
@ -126,13 +126,13 @@ Result<void> UnixSocket::connect(Registers* regs, int flags, struct sockaddr* ad
String path = TRY(String::from_string_view(
StringView::from_fixed_size_cstring(un_address->sun_path, addrlen - sizeof(sa_family_t))));
auto* current = Scheduler::current();
auto* current = Process::current();
auto* thread = Scheduler::current();
auto inode =
TRY(VFS::resolve_path(path.chars(), current->auth, &current->extra_groups, current->current_directory));
auto inode = TRY(VFS::resolve_path(path.chars(), current, current->current_directory));
if (inode->type() != VFS::InodeType::Socket)
return err(ENOTSOCK); // FIXME: POSIX doesn't say what error to return here?
if (!VFS::can_write(inode, current->auth, &current->extra_groups)) return err(EACCES);
if (!VFS::can_write(inode, current)) return err(EACCES);
auto socket = (SharedPtr<UnixSocket>)inode;
if (socket->m_state != State::Listening) return err(ECONNREFUSED);
@ -144,14 +144,14 @@ Result<void> UnixSocket::connect(Registers* regs, int flags, struct sockaddr* ad
while (1)
{
m_blocked_thread = current;
m_blocked_thread = thread;
kernel_wait_for_event();
m_blocked_thread = nullptr;
if (current->interrupted)
if (thread->interrupted)
{
if (current->will_ignore_pending_signal())
if (thread->will_ignore_pending_signal())
{
current->process_pending_signals(regs);
thread->process_pending_signals(regs);
continue;
}
return err(EINTR);

View File

@ -8,16 +8,16 @@ Result<u64> sys_chdir(Registers*, SyscallArgs args)
{
auto path = TRY(MemoryManager::strdup_from_user(args[0]));
Thread* current = Scheduler::current();
Process* current = Process::current();
TRY(check_pledge(current, Promise::p_rpath));
if (PathParser::is_absolute(path.view()))
{
SharedPtr<VFS::Inode> inode = TRY(VFS::resolve_path(path.chars(), current->auth, &current->extra_groups));
SharedPtr<VFS::Inode> inode = TRY(VFS::resolve_path(path.chars(), current));
if (inode->type() != VFS::InodeType::Directory) return err(ENOTDIR);
if (!VFS::can_execute(inode, current->auth, &current->extra_groups)) return err(EACCES);
if (!VFS::can_execute(inode, current)) return err(EACCES);
inode->add_handle();
if (current->current_directory) current->current_directory->remove_handle();
@ -29,11 +29,10 @@ Result<u64> sys_chdir(Registers*, SyscallArgs args)
}
else
{
SharedPtr<VFS::Inode> inode =
TRY(VFS::resolve_path(path.chars(), current->auth, &current->extra_groups, current->current_directory));
SharedPtr<VFS::Inode> inode = TRY(VFS::resolve_path(path.chars(), current, current->current_directory));
if (inode->type() != VFS::InodeType::Directory) return err(ENOTDIR);
if (!VFS::can_execute(inode, current->auth, &current->extra_groups)) return err(EACCES);
if (!VFS::can_execute(inode, current)) return err(EACCES);
auto old_wdir = current->current_directory_path.view();
@ -54,7 +53,7 @@ Result<u64> sys_getcwd(Registers*, SyscallArgs args)
u8* buf = (u8*)args[0];
usize size = (usize)args[1];
Thread* current = Scheduler::current();
Process* current = Process::current();
StringView cwd = current->current_directory_path.view();
if (cwd.is_empty()) cwd = "/"_sv;

View File

@ -11,7 +11,7 @@ Result<u64> sys_clock_gettime(Registers*, SyscallArgs args)
clockid_t id = (clockid_t)args[0];
struct timespec* ts = (struct timespec*)args[1];
auto* current = Scheduler::current();
auto* current = Process::current();
TRY(check_pledge(current, Promise::p_stdio));

View File

@ -64,14 +64,14 @@ Result<u64> sys_execve(Registers* regs, SyscallArgs args)
if ((calculate_userspace_stack_size(argv) + calculate_userspace_stack_size(envp)) > MAX_ARGV_STACK_SIZE)
return err(E2BIG);
auto current = Scheduler::current();
auto current = Process::current();
auto thread = Scheduler::current();
TRY(check_pledge(current, Promise::p_exec));
auto inode =
TRY(VFS::resolve_path(path.chars(), current->auth, &current->extra_groups, current->current_directory));
auto inode = TRY(VFS::resolve_path(path.chars(), current, current->current_directory));
if (!VFS::can_execute(inode, current->auth, &current->extra_groups)) return err(EACCES);
if (!VFS::can_execute(inode, current)) return err(EACCES);
#ifdef EXEC_DEBUG
kdbgln("exec: attempting to replace current image with %s", path.chars());
@ -88,7 +88,7 @@ Result<u64> sys_execve(Registers* regs, SyscallArgs args)
kdbgln("exec: created loader for binary format %s", loader->format().chars());
#endif
auto guard = make_scope_guard([current] { MMU::switch_page_directory(current->self_directory()); });
auto guard = make_scope_guard([thread] { MMU::switch_page_directory(thread->self_directory()); });
auto image = TRY(ThreadImage::try_load_from_binary(loader));
@ -108,6 +108,14 @@ Result<u64> sys_execve(Registers* regs, SyscallArgs args)
guard.deactivate();
// Terminate all other threads.
Scheduler::for_each_thread(current, [thread](Thread* t) {
if (t != thread) t->quit();
return true;
});
current->thread_count = 1;
current->real_timer.disarm();
current->virtual_timer.disarm();
current->profiling_timer.disarm();
@ -120,32 +128,38 @@ Result<u64> sys_execve(Registers* regs, SyscallArgs args)
}
}
for (int i = 0; i < FD_MAX; i++)
{
auto& descriptor = current->fd_table[i];
if (!descriptor.has_value()) continue;
if (descriptor->flags & O_CLOEXEC) { descriptor = {}; }
auto table = current->fd_table.lock();
for (int i = 0; i < FD_MAX; i++)
{
auto& descriptor = (*table)[i];
if (!descriptor.has_value()) continue;
if (descriptor->flags & O_CLOEXEC) { descriptor = {}; }
}
}
if (is_setuid) current->auth.euid = current->auth.suid = inode->metadata().uid;
if (is_setgid) current->auth.egid = current->auth.sgid = inode->metadata().gid;
{
auto auth = current->auth.lock();
if (is_setuid) (*auth).euid = (*auth).suid = inode->metadata().uid;
if (is_setgid) (*auth).egid = (*auth).sgid = inode->metadata().gid;
}
current->cmdline = cmdline.chars();
image->apply(current);
image->apply(thread);
MMU::switch_page_directory(current->self_directory());
MMU::switch_page_directory(thread->self_directory());
current->set_arguments(user_argc, user_argv, user_envc, user_envp);
thread->set_arguments(user_argc, user_argv, user_envc, user_envp);
current->promises = current->execpromises;
current->execpromises = -1;
memcpy(regs, &current->regs, sizeof(*regs));
memcpy(regs, &thread->regs, sizeof(*regs));
for (int i = 0; i < NSIG; i++)
{
current->signal_handlers[i] = { .sa_handler = SIG_DFL, .sa_mask = 0, .sa_flags = 0 };
thread->signal_handlers[i] = { .sa_handler = SIG_DFL, .sa_mask = 0, .sa_flags = 0 };
}
current->has_called_exec = true;
@ -157,57 +171,78 @@ Result<u64> sys_execve(Registers* regs, SyscallArgs args)
Result<u64> sys_fork(Registers* regs, SyscallArgs)
{
auto current = Scheduler::current();
auto current = Process::current();
auto current_thread = Scheduler::current();
TRY(check_pledge(current, Promise::p_proc));
auto extra_groups = TRY(current->extra_groups.shallow_copy());
Vector<gid_t> extra_groups = TRY(current->copy_groups());
Credentials auth = current->credentials();
auto guard = make_scope_guard([current] { MMU::switch_page_directory(current->self_directory()); });
auto guard = make_scope_guard([current_thread] { MMU::switch_page_directory(current_thread->self_directory()); });
memcpy(&current->regs, regs, sizeof(*regs));
memcpy(&current_thread->regs, regs, sizeof(*regs));
auto current_directory_path = TRY(current->current_directory_path.clone());
auto image = TRY(ThreadImage::clone_from_thread(current));
auto image = TRY(ThreadImage::clone_from_thread(current_thread));
auto thread = TRY(new_thread());
auto process = TRY(make<Process>());
Option<FileDescriptor> fds[FD_MAX];
{
auto table = current->fd_table.lock();
for (int i = 0; i < FD_MAX; i++) { fds[i] = (*table)[i]; }
}
thread->state = ThreadState::Runnable;
thread->is_kernel = false;
thread->fp_data.save();
thread->cmdline = current->cmdline;
thread->auth = current->auth;
thread->current_directory = current->current_directory;
thread->current_directory_path = move(current_directory_path);
thread->umask = current->umask;
thread->parent = current;
thread->promises = current->promises;
thread->execpromises = current->execpromises;
thread->controlling_terminal = current->controlling_terminal;
thread->pgid = current->pgid;
thread->sid = current->sid;
thread->extra_groups = move(extra_groups);
thread->cmdline = current_thread->cmdline;
thread->process = process;
thread->virtual_clock.set_resolution(1'000'000);
thread->profiling_clock.set_resolution(1'000'000);
process->thread_count = 1;
process->id = thread->tid;
process->current_directory = current->current_directory;
process->current_directory_path = move(current_directory_path);
process->umask = current->umask;
process->parent = current;
process->promises = current->promises;
process->execpromises = current->execpromises;
process->controlling_terminal = current->controlling_terminal;
process->pgid = current->pgid;
process->sid = current->sid;
process->extra_groups = move(extra_groups);
process->cmdline = current->cmdline;
for (int i = 0; i < FD_MAX; i++) { thread->fd_table[i] = current->fd_table[i]; }
process->virtual_clock.set_resolution(1'000'000);
process->profiling_clock.set_resolution(1'000'000);
{
auto credentials = process->auth.lock();
*credentials = auth;
}
{
auto table = process->fd_table.lock();
for (int i = 0; i < FD_MAX; i++) { (*table)[i] = fds[i]; }
}
image->apply(thread);
memcpy(&thread->regs, regs, sizeof(*regs));
for (int i = 0; i < NSIG; i++) thread->signal_handlers[i] = current->signal_handlers[i];
thread->signal_mask = current->signal_mask;
for (int i = 0; i < NSIG; i++) thread->signal_handlers[i] = current_thread->signal_handlers[i];
thread->signal_mask = current_thread->signal_mask;
thread->set_return(0);
Scheduler::add_thread(thread);
Scheduler::add_process(process);
#ifdef FORK_DEBUG
kdbgln("fork: thread %d forked into child %d", current->id, thread->id);
#endif
return thread->id;
return process->id;
}

View File

@ -5,7 +5,7 @@ Result<u64> sys_exit(Registers*, SyscallArgs args)
{
u8 status = (u8)args[0];
Thread* current = Scheduler::current();
Process* current = Process::current();
current->exit_and_signal_parent(status);
current->exit(status);
}

View File

@ -25,9 +25,9 @@ Result<u64> sys_read(Registers* regs, SyscallArgs args)
Thread* current = Scheduler::current();
TRY(check_pledge(current, Promise::p_stdio));
TRY(check_pledge(current->process, Promise::p_stdio));
auto& descriptor = *TRY(current->resolve_fd(fd));
auto& descriptor = *TRY(current->process->resolve_fd(fd));
if (!descriptor.is_readable()) return err(EBADF);
@ -66,7 +66,7 @@ Result<u64> sys_write(Registers*, SyscallArgs args)
if (!MemoryManager::validate_user_read(buf, size)) return err(EFAULT);
Thread* current = Scheduler::current();
Process* current = Process::current();
TRY(check_pledge(current, Promise::p_stdio));
@ -90,7 +90,7 @@ Result<u64> sys_lseek(Registers*, SyscallArgs args)
off_t offset = (long)args[1];
int whence = (int)args[2];
Thread* current = Scheduler::current();
Process* current = Process::current();
TRY(check_pledge(current, Promise::p_stdio));
@ -122,7 +122,7 @@ Result<u64> sys_fcntl(Registers*, SyscallArgs args)
int fd = (int)args[0];
int cmd = (int)args[1];
Thread* current = Scheduler::current();
Process* current = Process::current();
TRY(check_pledge(current, Promise::p_stdio));
@ -135,13 +135,13 @@ Result<u64> sys_fcntl(Registers*, SyscallArgs args)
case F_DUPFD: is_cloexec = false; [[fallthrough]];
case F_DUPFD_CLOEXEC: {
int arg = (int)args[2];
int new_fd = TRY(current->allocate_fd(arg));
int new_fd = TRY(current->allocate_fd(arg, descriptor));
current->fd_table[new_fd] = descriptor;
auto table = current->fd_table.lock();
if (is_cloexec) current->fd_table[new_fd]->flags |= O_CLOEXEC;
if (is_cloexec) (*table)[new_fd]->flags |= O_CLOEXEC;
else
current->fd_table[new_fd]->flags &= ~O_CLOEXEC;
(*table)[new_fd]->flags &= ~O_CLOEXEC;
return (u64)new_fd;
}
@ -174,7 +174,7 @@ Result<u64> sys_ioctl(Registers*, SyscallArgs args)
int request = (int)args[1];
void* arg = (void*)args[2];
Thread* current = Scheduler::current();
Process* current = Process::current();
auto& descriptor = *TRY(current->resolve_fd(fd));
return descriptor.inode()->ioctl(request, arg);
@ -184,7 +184,7 @@ Result<u64> sys_isatty(Registers*, SyscallArgs args)
{
int fd = (int)args[0];
Thread* current = Scheduler::current();
Process* current = Process::current();
TRY(check_pledge(current, Promise::p_stdio));
auto& descriptor = *TRY(current->resolve_fd(fd));
@ -196,7 +196,7 @@ Result<u64> sys_dup2(Registers*, SyscallArgs args)
int oldfd = (int)args[0];
int newfd = (int)args[1];
Thread* current = Scheduler::current();
Process* current = Process::current();
TRY(check_pledge(current, Promise::p_stdio));
@ -206,8 +206,10 @@ Result<u64> sys_dup2(Registers*, SyscallArgs args)
if (newfd == oldfd) return (u64)newfd;
current->fd_table[newfd] = descriptor;
current->fd_table[newfd]->flags &= ~O_CLOEXEC;
auto table = current->fd_table.lock();
(*table)[newfd] = descriptor;
(*table)[newfd]->flags &= ~O_CLOEXEC;
return (u64)newfd;
}
@ -216,23 +218,23 @@ Result<u64> sys_pipe(Registers*, SyscallArgs args)
{
int* pfds = (int*)args[0];
Thread* current = Scheduler::current();
Process* current = Process::current();
TRY(check_pledge(current, Promise::p_stdio));
int rfd = TRY(current->allocate_fd(0));
int wfd = TRY(current->allocate_fd(rfd + 1));
if (!MemoryManager::copy_to_user_typed(pfds, &rfd)) return err(EFAULT);
if (!MemoryManager::copy_to_user_typed(pfds + 1, &wfd)) return err(EFAULT);
SharedPtr<VFS::Inode> rpipe;
SharedPtr<VFS::Inode> wpipe;
TRY(Pipe::create(rpipe, wpipe));
current->fd_table[rfd] = FileDescriptor { TRY(make_shared<OpenFileDescription>(rpipe, O_RDONLY)), 0 };
current->fd_table[wfd] = FileDescriptor { TRY(make_shared<OpenFileDescription>(wpipe, O_WRONLY)), 0 };
auto rd = FileDescriptor { TRY(make_shared<OpenFileDescription>(rpipe, O_RDONLY)), 0 };
auto wd = FileDescriptor { TRY(make_shared<OpenFileDescription>(wpipe, O_WRONLY)), 0 };
int rfd = TRY(current->allocate_fd(0, rd));
int wfd = TRY(current->allocate_fd(rfd + 1, wd));
if (!MemoryManager::copy_to_user_typed(pfds, &rfd)) return err(EFAULT);
if (!MemoryManager::copy_to_user_typed(pfds + 1, &wfd)) return err(EFAULT);
return 0;
}
@ -241,7 +243,7 @@ Result<u64> sys_umask(Registers*, SyscallArgs args)
{
mode_t new_umask = (mode_t)args[0];
auto* current = Scheduler::current();
auto* current = Process::current();
TRY(check_pledge(current, Promise::p_stdio));
@ -257,12 +259,12 @@ Result<u64> sys_truncate(Registers*, SyscallArgs args)
auto path = TRY(MemoryManager::strdup_from_user(args[0]));
size_t length = (size_t)args[1];
auto* current = Scheduler::current();
auto* current = Process::current();
TRY(check_pledge(current, Promise::p_wpath));
auto inode =
TRY(VFS::resolve_path(path.chars(), current->auth, &current->extra_groups, current->current_directory));
if (!VFS::can_write(inode, current->auth, &current->extra_groups)) return err(EACCES);
auto inode = TRY(VFS::resolve_path(path.chars(), current, current->current_directory));
if (!VFS::can_write(inode, current)) return err(EACCES);
TRY(inode->truncate(length));
@ -274,7 +276,7 @@ Result<u64> sys_ftruncate(Registers*, SyscallArgs args)
int fd = (int)args[0];
size_t length = (size_t)args[1];
auto* current = Scheduler::current();
auto* current = Process::current();
TRY(check_pledge(current, Promise::p_stdio));
auto description = TRY(current->resolve_fd(fd))->description;
if (!(description->flags & O_WRONLY)) return err(EBADF);
@ -291,9 +293,12 @@ Result<u64> sys_utimensat(Registers*, SyscallArgs args)
const auto* times = (const struct timespec*)args[2];
int flags = (int)args[3];
auto* current = Scheduler::current();
auto* current = Process::current();
TRY(check_pledge(current, Promise::p_fattr));
auto inode = TRY(current->resolve_atfile(dirfd, path, flags & AT_EMPTY_PATH, !(flags & AT_SYMLINK_NOFOLLOW)));
auto* process = Process::current();
auto credentials = process->credentials();
auto inode = TRY(process->resolve_atfile(dirfd, path, flags & AT_EMPTY_PATH, !(flags & AT_SYMLINK_NOFOLLOW)));
struct timespec ktimes[2];
ktimes[0].tv_sec = ktimes[1].tv_sec = 0;
@ -309,11 +314,10 @@ Result<u64> sys_utimensat(Registers*, SyscallArgs args)
if (allow_write_access)
{
if (!VFS::can_write(inode, current->auth, &current->extra_groups) &&
current->auth.euid != inode->metadata().uid && current->auth.euid != 0)
if (!VFS::can_write(inode, current) && credentials.euid != inode->metadata().uid && credentials.euid != 0)
return err(EACCES);
}
else if (current->auth.euid != inode->metadata().uid && current->auth.euid != 0)
else if (credentials.euid != inode->metadata().uid && credentials.euid != 0)
return err(EPERM);
auto metadata = inode->metadata();

View File

@ -11,7 +11,7 @@ Result<u64> sys_getdents(Registers*, SyscallArgs args)
luna_dirent* ent = (luna_dirent*)args[1];
usize count = (usize)args[2];
Thread* current = Scheduler::current();
Process* current = Process::current();
TRY(check_pledge(current, Promise::p_stdio));
auto& descriptor = *TRY(current->resolve_fd(fd));

View File

@ -6,14 +6,14 @@
Result<u64> sys_getpid(Registers*, SyscallArgs)
{
auto* current = Scheduler::current();
auto* current = Process::current();
TRY(check_pledge(current, Promise::p_stdio));
return current->id;
}
Result<u64> sys_getppid(Registers*, SyscallArgs)
{
auto* current = Scheduler::current();
auto* current = Process::current();
TRY(check_pledge(current, Promise::p_stdio));
auto* parent = current->parent;
return parent ? parent->id : 0;
@ -21,48 +21,48 @@ Result<u64> sys_getppid(Registers*, SyscallArgs)
Result<u64> sys_getuid(Registers*, SyscallArgs)
{
auto* current = Scheduler::current();
auto* current = Process::current();
TRY(check_pledge(current, Promise::p_stdio));
return current->auth.uid;
return current->credentials().uid;
}
Result<u64> sys_geteuid(Registers*, SyscallArgs)
{
auto* current = Scheduler::current();
auto* current = Process::current();
TRY(check_pledge(current, Promise::p_stdio));
return current->auth.euid;
return current->credentials().euid;
}
Result<u64> sys_getgid(Registers*, SyscallArgs)
{
auto* current = Scheduler::current();
auto* current = Process::current();
TRY(check_pledge(current, Promise::p_stdio));
return current->auth.gid;
return current->credentials().gid;
}
Result<u64> sys_getegid(Registers*, SyscallArgs)
{
auto* current = Scheduler::current();
auto* current = Process::current();
TRY(check_pledge(current, Promise::p_stdio));
return current->auth.egid;
return current->credentials().egid;
}
Result<u64> sys_setuid(Registers*, SyscallArgs args)
{
u32 uid = (u32)args[0];
auto* current = Scheduler::current();
auto* current = Process::current();
TRY(check_pledge(current, Promise::p_id));
Credentials& auth = current->auth;
auto auth = current->auth.lock();
if (auth.euid == 0)
if (auth->euid == 0)
{
auth.uid = auth.euid = auth.suid = uid;
auth->uid = auth->euid = auth->suid = uid;
return 0;
}
if (uid != auth.uid && uid != auth.suid) return err(EPERM);
auth.euid = uid;
if (uid != auth->uid && uid != auth->suid) return err(EPERM);
auth->euid = uid;
return 0;
}
@ -71,12 +71,12 @@ Result<u64> sys_seteuid(Registers*, SyscallArgs args)
{
u32 uid = (u32)args[0];
auto* current = Scheduler::current();
auto* current = Process::current();
TRY(check_pledge(current, Promise::p_id));
Credentials& auth = current->auth;
auto auth = current->auth.lock();
if (auth.euid != 0 && uid != auth.uid && uid != auth.suid) return err(EPERM);
auth.euid = uid;
if (auth->euid != 0 && uid != auth->uid && uid != auth->suid) return err(EPERM);
auth->euid = uid;
return 0;
}
@ -85,18 +85,18 @@ Result<u64> sys_setgid(Registers*, SyscallArgs args)
{
u32 gid = (u32)args[0];
auto* current = Scheduler::current();
auto* current = Process::current();
TRY(check_pledge(current, Promise::p_id));
Credentials& auth = current->auth;
auto auth = current->auth.lock();
if (auth.euid == 0)
if (auth->euid == 0)
{
auth.gid = auth.egid = auth.sgid = gid;
auth->gid = auth->egid = auth->sgid = gid;
return 0;
}
if (gid != auth.gid && gid != auth.sgid) return err(EPERM);
auth.egid = gid;
if (gid != auth->gid && gid != auth->sgid) return err(EPERM);
auth->egid = gid;
return 0;
}
@ -105,12 +105,12 @@ Result<u64> sys_setegid(Registers*, SyscallArgs args)
{
u32 gid = (u32)args[0];
auto* current = Scheduler::current();
auto* current = Process::current();
TRY(check_pledge(current, Promise::p_id));
Credentials& auth = current->auth;
auto auth = current->auth.lock();
if (auth.euid != 0 && gid != auth.gid && gid != auth.sgid) return err(EPERM);
auth.egid = gid;
if (auth->euid != 0 && gid != auth->gid && gid != auth->sgid) return err(EPERM);
auth->egid = gid;
return 0;
}
@ -120,34 +120,34 @@ Result<u64> sys_setpgid(Registers*, SyscallArgs args)
pid_t pid = (pid_t)args[0];
pid_t pgid = (pid_t)args[1];
auto* current = Scheduler::current();
auto* current = Process::current();
TRY(check_pledge(current, Promise::p_proc));
if (pid == 0) pid = current->id;
if (pgid == 0) pgid = current->id;
if (pgid < 0) return err(EINVAL);
auto* thread = TRY(Result<Thread*>::from_option(Scheduler::find_by_pid(pid), ESRCH));
if (thread != current && thread->parent != current) return err(ESRCH);
auto* target = TRY(Result<Process*>::from_option(Scheduler::find_by_pid(pid), ESRCH));
if (target != current && target->parent != current) return err(ESRCH);
if (thread->is_session_leader() || thread->sid != current->sid) return err(EPERM);
if (target->is_session_leader() || target->sid != current->sid) return err(EPERM);
if (thread->has_called_exec) return err(EPERM);
if (target->has_called_exec) return err(EACCES);
if (pgid != current->id)
{
bool pgid_exists = false;
pid_t sid;
Scheduler::for_each_in_process_group(pgid, [&pgid_exists, &sid](Thread* t) {
Scheduler::for_each_in_process_group(pgid, [&pgid_exists, &sid](Process* p) {
pgid_exists = true;
sid = t->sid; // this should be the same for all threads in the process group
sid = p->sid; // this should be the same for all threads in the process group
return false;
});
if (!pgid_exists) return err(EPERM);
if (sid != thread->sid) return err(EPERM);
if (sid != target->sid) return err(EPERM);
}
thread->pgid = (u64)pgid;
target->pgid = (u64)pgid;
return 0;
}
@ -156,20 +156,20 @@ Result<u64> sys_getpgid(Registers*, SyscallArgs args)
{
pid_t pid = (pid_t)args[0];
auto* current = Scheduler::current();
auto* current = Process::current();
TRY(check_pledge(current, Promise::p_stdio));
if (pid == 0) pid = current->id;
if (pid < 0) return err(EINVAL);
auto* thread = TRY(Result<Thread*>::from_option(Scheduler::find_by_pid(pid), ESRCH));
auto* process = TRY(Result<Process*>::from_option(Scheduler::find_by_pid(pid), ESRCH));
return (u64)thread->pgid;
return (u64)process->pgid.load();
}
Result<u64> sys_setsid(Registers*, SyscallArgs)
{
auto* current = Scheduler::current();
auto* current = Process::current();
TRY(check_pledge(current, Promise::p_proc));
if (current->pgid == current->id) return err(EPERM);
@ -177,21 +177,21 @@ Result<u64> sys_setsid(Registers*, SyscallArgs)
current->sid = current->pgid = current->id;
current->controlling_terminal = {};
return current->sid;
return current->sid.load();
}
Result<u64> sys_getsid(Registers*, SyscallArgs args)
{
pid_t pid = (pid_t)args[0];
auto* current = Scheduler::current();
auto* current = Process::current();
TRY(check_pledge(current, Promise::p_stdio));
if (pid == 0) pid = current->id;
auto* thread = TRY(Result<Thread*>::from_option(Scheduler::find_by_pid(pid), ESRCH));
auto* p = TRY(Result<Process*>::from_option(Scheduler::find_by_pid(pid), ESRCH));
return thread->sid;
return p->sid.load();
}
Result<u64> sys_fchmodat(Registers*, SyscallArgs args)
@ -201,12 +201,13 @@ Result<u64> sys_fchmodat(Registers*, SyscallArgs args)
mode_t mode = (mode_t)args[2];
int flags = (int)args[3];
auto* current = Scheduler::current();
auto* current = Process::current();
TRY(check_pledge(current, Promise::p_wpath));
auto credentials = current->credentials();
auto inode = TRY(current->resolve_atfile(dirfd, path, flags & AT_EMPTY_PATH, !(flags & AT_SYMLINK_NOFOLLOW)));
if (current->auth.euid != 0 && current->auth.euid != inode->metadata().uid) return err(EPERM);
if (credentials.euid != 0 && credentials.euid != inode->metadata().uid) return err(EPERM);
auto metadata = inode->metadata();
metadata.mode = mode;
@ -223,12 +224,13 @@ Result<u64> sys_fchownat(Registers*, SyscallArgs args)
gid_t gid = (u32)args[3];
int flags = (int)args[4];
auto* current = Scheduler::current();
auto* current = Process::current();
TRY(check_pledge(current, Promise::p_chown));
auto credentials = current->credentials();
auto inode = TRY(current->resolve_atfile(dirfd, path, flags & AT_EMPTY_PATH, !(flags & AT_SYMLINK_NOFOLLOW)));
if (current->auth.euid != 0) return err(EPERM);
if (credentials.euid != 0) return err(EPERM);
auto metadata = inode->metadata();
if (uid != (uid_t)-1) metadata.uid = uid;
@ -243,20 +245,20 @@ Result<u64> sys_getgroups(Registers*, SyscallArgs args)
int ngroups = (int)args[0];
gid_t* grouplist = (gid_t*)args[1];
auto* current = Scheduler::current();
auto* current = Process::current();
TRY(check_pledge(current, Promise::p_stdio));
if (!ngroups) return current->extra_groups.size();
auto groups = current->extra_groups.lock();
if (!ngroups) return groups->size();
if (ngroups < 0) return err(EINVAL);
if (static_cast<usize>(ngroups) < current->extra_groups.size()) return err(EINVAL);
if (static_cast<usize>(ngroups) < groups->size()) return err(EINVAL);
if (!MemoryManager::copy_to_user(grouplist, current->extra_groups.data(),
current->extra_groups.size() * sizeof(gid_t)))
return err(EFAULT);
if (!MemoryManager::copy_to_user(grouplist, groups->data(), groups->size() * sizeof(gid_t))) return err(EFAULT);
return current->extra_groups.size();
return groups->size();
}
Result<u64> sys_setgroups(Registers*, SyscallArgs args)
@ -264,26 +266,27 @@ Result<u64> sys_setgroups(Registers*, SyscallArgs args)
int ngroups = (int)args[0];
const gid_t* grouplist = (const gid_t*)args[1];
auto* current = Scheduler::current();
auto* current = Process::current();
TRY(check_pledge(current, Promise::p_id));
Credentials& auth = current->auth;
if (auth.euid != 0) return err(EPERM);
auto credentials = current->credentials();
if (credentials.euid != 0) return err(EPERM);
auto groups = current->extra_groups.lock();
if (!ngroups)
{
current->extra_groups.clear();
groups->clear();
return 0;
}
if (ngroups < 0 || ngroups > 32) return err(EINVAL);
TRY(current->extra_groups.try_reserve(ngroups));
TRY(groups->try_reserve(ngroups));
current->extra_groups.mutate([&](gid_t* list, usize) -> usize {
groups->mutate([&](gid_t* list, usize) -> usize {
if (MemoryManager::copy_from_user(grouplist, list, ngroups * sizeof(gid_t))) return ngroups;
return current->extra_groups.size();
return groups->size();
});
return 0;

View File

@ -12,7 +12,7 @@ Result<u64> sys_unlinkat(Registers*, SyscallArgs args)
auto path = TRY(MemoryManager::strdup_from_user(args[1]));
int flags = (int)args[2];
Thread* current = Scheduler::current();
Process* current = Process::current();
TRY(check_pledge(current, Promise::p_cpath));
auto dirname = TRY(PathParser::dirname(path.view()));
@ -23,13 +23,14 @@ Result<u64> sys_unlinkat(Registers*, SyscallArgs args)
kinfoln("unlinkat: remove %s from directory %s, dirfd is %d", basename.chars(), dirname.chars(), dirfd);
auto inode = TRY(current->resolve_atfile(dirfd, dirname, false, false));
if (!VFS::can_write(inode, current->auth, &current->extra_groups)) return err(EACCES);
auto auth = current->credentials();
if (!VFS::can_write(inode, current)) return err(EACCES);
auto child = TRY(inode->find(basename.chars()));
if (flags == AT_REMOVEDIR && child->type() != VFS::InodeType::Directory) return err(ENOTDIR);
if (current->auth.euid != 0 && VFS::is_sticky(inode) && current->auth.euid != inode->metadata().uid &&
current->auth.euid != child->metadata().uid)
if (auth.euid != 0 && VFS::is_sticky(inode) && auth.euid != inode->metadata().uid &&
auth.euid != child->metadata().uid)
return err(EACCES);
TRY(inode->remove_entry(basename.chars()));
@ -45,14 +46,14 @@ Result<u64> sys_symlinkat(Registers*, SyscallArgs args)
if (target.is_empty()) return err(ENOENT);
auto* current = Scheduler::current();
auto* current = Process::current();
TRY(check_pledge(current, Promise::p_cpath));
auto parent = TRY(PathParser::dirname(linkpath.view()));
auto parent_inode = TRY(current->resolve_atfile(dirfd, parent, false, true));
if (!VFS::can_write(parent_inode, current->auth, &current->extra_groups)) return err(EACCES);
if (!VFS::can_write(parent_inode, current)) return err(EACCES);
auto child_name = TRY(PathParser::basename(linkpath.view()));
@ -60,8 +61,9 @@ Result<u64> sys_symlinkat(Registers*, SyscallArgs args)
auto inode = TRY(parent_inode->fs()->create_symlink_inode(target.view()));
auto metadata = inode->metadata();
metadata.uid = current->auth.euid;
metadata.gid = current->auth.egid;
auto auth = current->credentials();
metadata.uid = auth.euid;
metadata.gid = auth.egid;
TRY(inode->set_metadata(metadata));
TRY(parent_inode->add_entry(inode, child_name.chars()));
@ -75,7 +77,7 @@ Result<u64> sys_readlinkat(Registers*, SyscallArgs args)
char* buf = (char*)args[2];
usize bufsiz = (usize)args[3];
auto* current = Scheduler::current();
auto* current = Process::current();
TRY(check_pledge(current, Promise::p_rpath));
auto symlink = TRY(current->resolve_atfile(dirfd, path, true, false));
@ -101,7 +103,7 @@ Result<u64> sys_linkat(Registers*, SyscallArgs args)
auto newpath = TRY(MemoryManager::strdup_from_user(args[3]));
int flags = (int)args[4];
auto* current = Scheduler::current();
auto* current = Process::current();
TRY(check_pledge(current, Promise::p_cpath));
auto parent = TRY(PathParser::dirname(newpath.view()));
@ -115,7 +117,7 @@ Result<u64> sys_linkat(Registers*, SyscallArgs args)
if (target->fs() != parent_inode->fs()) return err(EXDEV);
if (!VFS::can_write(parent_inode, current->auth, &current->extra_groups)) return err(EACCES);
if (!VFS::can_write(parent_inode, current)) return err(EACCES);
auto child_name = TRY(PathParser::basename(newpath.view()));

View File

@ -6,7 +6,7 @@
Result<u64> sys_memstat(Registers*, SyscallArgs args)
{
auto* current = Scheduler::current();
auto* current = Process::current();
TRY(check_pledge(current, Promise::p_stdio));
struct membuf buf;

View File

@ -10,14 +10,14 @@ Result<u64> sys_mkdir(Registers*, SyscallArgs args)
auto path = TRY(MemoryManager::strdup_from_user(args[0]));
mode_t mode = (mode_t)args[1];
Thread* current = Scheduler::current();
Process* current = Process::current();
auto credentials = current->credentials();
TRY(check_pledge(current, Promise::p_cpath));
auto inode = TRY(VFS::create_directory(path.chars(), mode & ~current->umask, current->auth, &current->extra_groups,
current->current_directory));
auto inode = TRY(VFS::create_directory(path.chars(), mode & ~current->umask, current, current->current_directory));
auto metadata = inode->metadata();
metadata.uid = current->auth.euid;
metadata.gid = current->auth.egid;
metadata.uid = credentials.euid;
metadata.gid = credentials.egid;
TRY(inode->set_metadata(metadata));
return 0;

View File

@ -20,7 +20,7 @@ Result<u64> sys_mmap(Registers*, SyscallArgs args)
if (params.flags < 0) return err(EINVAL);
Thread* current = Scheduler::current();
Process* current = Process::current();
if (params.prot & PROT_EXEC) TRY(check_pledge(current, Promise::p_prot_exec));
TRY(check_pledge(current, Promise::p_stdio));
@ -55,15 +55,15 @@ Result<u64> sys_mmap(Registers*, SyscallArgs args)
shmem->prot |= params.prot;
}
auto space = current->address_space.lock();
u64 address;
if (!params.addr)
address = TRY(current->address_space->alloc_region(pages, params.prot, params.flags, params.offset, shmid));
if (!params.addr) address = TRY((*space)->alloc_region(pages, params.prot, params.flags, params.offset, shmid));
else
{
// FIXME: We should be more flexible if MAP_FIXED was not specified.
address = align_down<ARCH_PAGE_SIZE>((u64)params.addr);
if (!TRY(current->address_space->test_and_alloc_region(address, pages, params.prot, params.flags, params.offset,
shmid)))
if (!TRY((*space)->test_and_alloc_region(address, pages, params.prot, params.flags, params.offset, shmid)))
return err(ENOMEM);
}
@ -94,10 +94,12 @@ Result<u64> sys_munmap(Registers*, SyscallArgs args)
if (size == 0) return err(EINVAL);
if (!is_aligned<ARCH_PAGE_SIZE>(address)) return err(EINVAL);
Thread* current = Scheduler::current();
Process* current = Process::current();
TRY(check_pledge(current, Promise::p_stdio));
bool ok = TRY(current->address_space->free_region(address, ceil_div(size, ARCH_PAGE_SIZE)));
auto space = current->address_space.lock();
bool ok = TRY((*space)->free_region(address, ceil_div(size, ARCH_PAGE_SIZE)));
// POSIX says munmap should silently do nothing if the memory was not already mapped.
if (!ok) return 0;
@ -119,10 +121,12 @@ Result<u64> sys_msync(Registers*, SyscallArgs args)
if (!size) return 0;
if (!is_aligned<ARCH_PAGE_SIZE>(address)) return err(EINVAL);
Thread* current = Scheduler::current();
Process* current = Process::current();
TRY(check_pledge(current, Promise::p_stdio));
TRY(current->address_space->sync_regions(address, ceil_div(size, ARCH_PAGE_SIZE)));
auto space = current->address_space.lock();
TRY((*space)->sync_regions(address, ceil_div(size, ARCH_PAGE_SIZE)));
return { 0 };
}

View File

@ -14,13 +14,12 @@ Result<u64> sys_mount(Registers*, SyscallArgs args)
auto fstype = TRY(MemoryManager::strdup_from_user(args[1]));
auto source = TRY(MemoryManager::strdup_from_user(args[2]));
auto* current = Scheduler::current();
auto* current = Process::current();
TRY(check_pledge(current, Promise::p_mount));
if (current->auth.euid != 0) return err(EPERM);
if (current->credentials().euid != 0) return err(EPERM);
auto get_source = [current, &source]() -> Result<SharedPtr<Device>> {
auto inode =
TRY(VFS::resolve_path(source.chars(), current->auth, &current->extra_groups, current->current_directory));
auto inode = TRY(VFS::resolve_path(source.chars(), current, current->current_directory));
if (inode->type() != VFS::InodeType::BlockDevice) return err(ENOTBLK);
dev_t device_id = inode->metadata().devid;
return TRY(DeviceRegistry::fetch_special_device(luna_dev_major(device_id), luna_dev_minor(device_id)));
@ -41,7 +40,7 @@ Result<u64> sys_mount(Registers*, SyscallArgs args)
fs = TRY(factory(device));
}
TRY(VFS::mount(target.chars(), fs, current->auth, &current->extra_groups, current->current_directory));
TRY(VFS::mount(target.chars(), fs, current, current->current_directory));
return 0;
}
@ -50,11 +49,11 @@ Result<u64> sys_umount(Registers*, SyscallArgs args)
{
auto target = TRY(MemoryManager::strdup_from_user(args[0]));
auto* current = Scheduler::current();
auto* current = Process::current();
TRY(check_pledge(current, Promise::p_mount));
if (current->auth.euid != 0) return err(EPERM);
if (current->credentials().euid != 0) return err(EPERM);
TRY(VFS::umount(target.chars(), current->auth, &current->extra_groups, current->current_directory));
TRY(VFS::umount(target.chars(), current, current->current_directory));
return 0;
}
@ -64,9 +63,9 @@ Result<u64> sys_pivot_root(Registers*, SyscallArgs args)
auto new_root = TRY(MemoryManager::strdup_from_user(args[0]));
auto put_old = TRY(MemoryManager::strdup_from_user(args[1]));
auto* current = Scheduler::current();
auto* current = Process::current();
TRY(check_pledge(current, Promise::p_mount));
if (current->auth.euid != 0) return err(EPERM);
if (current->credentials().euid != 0) return err(EPERM);
TRY(VFS::pivot_root(new_root.chars(), put_old.chars(), current->current_directory));

View File

@ -17,7 +17,7 @@ Result<u64> sys_openat(Registers*, SyscallArgs args)
int flags = (int)args[2];
mode_t mode = (mode_t)args[3];
Thread* current = Scheduler::current();
Process* current = Process::current();
SharedPtr<VFS::Inode> inode;
@ -44,12 +44,12 @@ Result<u64> sys_openat(Registers*, SyscallArgs args)
{
if (error == ENOENT && (flags & O_CREAT) && !path.is_empty())
{
inode = TRY(VFS::create_file(path.chars(), mode & ~current->umask, current->auth, &current->extra_groups,
parent_inode));
auto auth = current->credentials();
inode = TRY(VFS::create_file(path.chars(), mode & ~current->umask, current, parent_inode));
// FIXME: Pass these in create_file().
auto metadata = inode->metadata();
metadata.uid = current->auth.euid;
metadata.gid = current->auth.egid;
metadata.uid = auth.euid;
metadata.gid = auth.egid;
TRY(inode->set_metadata(metadata));
}
else
@ -59,8 +59,8 @@ Result<u64> sys_openat(Registers*, SyscallArgs args)
return err(EEXIST);
else
{
if ((flags & O_RDONLY) && !VFS::can_read(inode, current->auth, &current->extra_groups)) return err(EACCES);
if ((flags & O_WRONLY) && !VFS::can_write(inode, current->auth, &current->extra_groups)) return err(EACCES);
if ((flags & O_RDONLY) && !VFS::can_read(inode, current)) return err(EACCES);
if ((flags & O_WRONLY) && !VFS::can_write(inode, current)) return err(EACCES);
}
inode = TRY(inode->open());
@ -72,11 +72,12 @@ Result<u64> sys_openat(Registers*, SyscallArgs args)
if (flags & O_TMPFILE)
{
auto auth = current->credentials();
if (inode->type() != VFS::InodeType::Directory) return err(EINVAL);
inode = TRY(inode->fs()->create_file_inode(mode & current->umask));
auto metadata = inode->metadata();
metadata.uid = current->auth.euid;
metadata.gid = current->auth.egid;
metadata.uid = auth.euid;
metadata.gid = auth.egid;
TRY(inode->set_metadata(metadata));
}
@ -91,15 +92,15 @@ Result<u64> sys_openat(Registers*, SyscallArgs args)
if ((flags & O_WRONLY) && (flags & O_TRUNC)) inode->truncate(0);
int fd = TRY(current->allocate_fd(0));
auto descriptor =
FileDescriptor { TRY(make_shared<OpenFileDescription>(inode, flags & FLAGS_TO_KEEP)), 0, flags & O_CLOEXEC };
int fd = TRY(current->allocate_fd(0, descriptor));
#ifdef OPEN_DEBUG
kdbgln("openat: opening file %s from dirfd %d, flags %d, mode %#o = fd %d", path.chars(), dirfd, flags, mode, fd);
#endif
current->fd_table[fd] =
FileDescriptor { TRY(make_shared<OpenFileDescription>(inode, flags & FLAGS_TO_KEEP)), 0, flags & O_CLOEXEC };
return (u64)fd;
}
@ -108,10 +109,11 @@ Result<u64> sys_close(Registers*, SyscallArgs args)
int fd = (int)args[0];
if (fd < 0 || fd >= FD_MAX) return err(EBADF);
Thread* current = Scheduler::current();
Process* current = Process::current();
TRY(check_pledge(current, Promise::p_stdio));
Option<FileDescriptor>& descriptor = current->fd_table[fd];
auto table = current->fd_table.lock();
Option<FileDescriptor>& descriptor = (*table)[fd];
if (!descriptor.has_value()) return err(EBADF);

View File

@ -7,7 +7,7 @@ Result<u64> sys_pledge(Registers*, SyscallArgs args)
int promises = TRY(parse_promises(args[0]));
int execpromises = TRY(parse_promises(args[1]));
auto* current = Scheduler::current();
auto* current = Process::current();
if (promises >= 0)
{

View File

@ -18,7 +18,8 @@ Result<u64> sys_poll(Registers*, SyscallArgs args)
if (!MemoryManager::copy_from_user(fds, kfds, nfds * sizeof(pollfd))) return err(EFAULT);
auto* current = Scheduler::current();
auto* current = Process::current();
auto* thread = Scheduler::current();
TRY(check_pledge(current, Promise::p_stdio));
Vector<SharedPtr<VFS::Inode>> inodes;
@ -78,8 +79,8 @@ Result<u64> sys_poll(Registers*, SyscallArgs args)
if (!fds_with_events && (timeout > 0 || infinite))
{
kernel_sleep(10);
timeout -= (10 - (int)current->sleep_ticks_left);
if (current->interrupted)
timeout -= (10 - (int)thread->sleep_ticks_left);
if (thread->interrupted)
{
guard.deactivate();
free_impl(kfds);

View File

@ -15,28 +15,29 @@ Result<u64> sys_pstat(Registers*, SyscallArgs args)
pid_t pid = (pid_t)args[0];
struct process* ps = (struct process*)args[1];
auto* current = Scheduler::current();
auto* current = Process::current();
TRY(check_pledge(current, Promise::p_proc));
// If pid == -1, return the PID of the last spawned thread.
if (pid == -1) return g_threads.expect_last()->id;
if (pid == -1) return g_processes.expect_last()->id;
auto* thread = TRY(Result<Thread*>::from_option(Scheduler::find_by_pid(pid), ESRCH));
auto* target = TRY(Result<Process*>::from_option(Scheduler::find_by_pid(pid), ESRCH));
struct process proc;
proc.ps_pid = thread->id;
proc.ps_ppid = thread->parent ? thread->parent->id : 0;
proc.ps_uid = thread->auth.uid;
proc.ps_gid = thread->auth.gid;
proc.ps_euid = thread->auth.euid;
proc.ps_egid = thread->auth.egid;
proc.ps_state = (int)thread->state;
proc.ps_flags = thread->is_kernel ? PS_FLAG_KRNL : 0;
set_timespec(proc.ps_time, thread->user_ticks_self + thread->kernel_ticks_self);
set_timespec(proc.ps_ktime, thread->kernel_ticks_self);
set_timespec(proc.ps_utime, thread->kernel_ticks_children);
strlcpy(proc.ps_name, thread->cmdline.chars(), sizeof(proc.ps_name));
strlcpy(proc.ps_cwd, thread->current_directory_path.is_empty() ? "/" : thread->current_directory_path.chars(),
proc.ps_pid = target->id;
proc.ps_ppid = target->parent ? target->parent->id : 0;
auto auth = target->credentials();
proc.ps_uid = auth.uid;
proc.ps_gid = auth.gid;
proc.ps_euid = auth.euid;
proc.ps_egid = auth.egid;
proc.ps_state = 0; // FIXME: this is thread-specific now
proc.ps_flags = 0; // FIXME: add flags
set_timespec(proc.ps_time, target->user_ticks_self + target->kernel_ticks_self);
set_timespec(proc.ps_ktime, target->kernel_ticks_self);
set_timespec(proc.ps_utime, target->kernel_ticks_children);
strlcpy(proc.ps_name, target->cmdline.chars(), sizeof(proc.ps_name));
strlcpy(proc.ps_cwd, target->current_directory_path.is_empty() ? "/" : target->current_directory_path.chars(),
sizeof(proc.ps_cwd));
if (!MemoryManager::copy_to_user_typed(ps, &proc)) return err(EFAULT);

View File

@ -16,7 +16,7 @@ Result<u64> sys_getrusage(Registers*, SyscallArgs args)
int who = (int)args[0];
struct rusage* ru = (struct rusage*)args[1];
auto* current = Scheduler::current();
auto* current = Process::current();
struct rusage kru;
switch (who)

View File

@ -16,7 +16,7 @@ Result<u64> sys_setitimer(Registers*, SyscallArgs args)
const struct itimerval* new_timer = (const struct itimerval*)args[1];
struct itimerval* old_timer = (struct itimerval*)args[2];
auto* current = Scheduler::current();
auto* current = Process::current();
TRY(check_pledge(current, Promise::p_stdio));
@ -66,7 +66,7 @@ Result<u64> sys_setitimer(Registers*, SyscallArgs args)
if (!MemoryManager::copy_from_user_typed(new_timer, &itimer)) return err(EFAULT);
timer->signo = SIGALRM; // FIXME: Also use SIGVTALRM or SIGPROF for other timer types.
timer->thread = current;
timer->process = current;
if (itimer.it_interval.tv_sec != 0 || itimer.it_interval.tv_usec != 0)
{
@ -93,7 +93,7 @@ Result<u64> sys_timer_create(Registers*, SyscallArgs args)
struct sigevent* sevp = (struct sigevent*)args[1];
timer_t* timerid = (timer_t*)args[2];
auto* current = Scheduler::current();
auto* current = Process::current();
TRY(check_pledge(current, Promise::p_stdio));
@ -137,7 +137,7 @@ Result<u64> sys_timer_settime(Registers*, SyscallArgs args)
if (timerid < 0 || timerid >= MAX_POSIX_TIMERS) return err(EINVAL);
if (flags > 0) return err(ENOTSUP);
auto* current = Scheduler::current();
auto* current = Process::current();
TRY(check_pledge(current, Promise::p_stdio));
@ -169,7 +169,7 @@ Result<u64> sys_timer_settime(Registers*, SyscallArgs args)
Clock* clock = timer->designated_clock;
check(clock);
timer->thread = current;
timer->process = current;
if (itimer.it_interval.tv_sec != 0 || itimer.it_interval.tv_nsec != 0)
{
@ -195,7 +195,7 @@ Result<u64> sys_timer_gettime(Registers*, SyscallArgs args)
struct itimerspec* value = (struct itimerspec*)args[1];
if (timerid < 0 || timerid >= MAX_POSIX_TIMERS) return err(EINVAL);
auto* current = Scheduler::current();
auto* current = Process::current();
TRY(check_pledge(current, Promise::p_stdio));
@ -224,7 +224,7 @@ Result<u64> sys_timer_delete(Registers*, SyscallArgs args)
timer_t timerid = (timer_t)args[0];
if (timerid < 0 || timerid >= MAX_POSIX_TIMERS) return err(EINVAL);
auto* current = Scheduler::current();
auto* current = Process::current();
TRY(check_pledge(current, Promise::p_stdio));

View File

@ -19,7 +19,7 @@ Result<u64> sys_sigreturn(Registers* regs, SyscallArgs)
Result<u64> sys_sigaction(Registers*, SyscallArgs args)
{
auto* current = Scheduler::current();
TRY(check_pledge(current, Promise::p_stdio));
TRY(check_pledge(current->process, Promise::p_stdio));
int signo = (int)args[0];
const struct sigaction* act = (const struct sigaction*)args[1];
@ -48,15 +48,16 @@ Result<u64> sys_sigaction(Registers*, SyscallArgs args)
Result<u64> sys_kill(Registers*, SyscallArgs args)
{
auto* current = Scheduler::current();
auto* current = Process::current();
TRY(check_pledge(current, Promise::p_proc));
pid_t pid = (pid_t)args[0];
int signo = (int)args[1];
auto send_signal = [&](Thread* target) -> Result<void> {
if (current->auth.euid != 0 && current->auth.euid != target->auth.euid &&
current->auth.egid != target->auth.egid)
auto send_signal = [&](Process* target) -> Result<void> {
auto this_auth = current->credentials();
auto other_auth = target->credentials();
if (this_auth.euid != 0 && this_auth.euid != other_auth.euid && this_auth.egid != other_auth.egid)
return err(EPERM);
if (target->is_kernel) return {};
if (signo == 0) return {};
@ -68,14 +69,14 @@ Result<u64> sys_kill(Registers*, SyscallArgs args)
if (pid > 0)
{
auto* target = TRY(Result<Thread*>::from_option(Scheduler::find_by_pid(pid), ESRCH));
auto* target = TRY(Result<Process*>::from_option(Scheduler::find_by_pid(pid), ESRCH));
TRY(send_signal(target));
}
else if (pid == 0)
{
int errno = -1;
bool pgid_exists = false;
Scheduler::for_each_in_process_group(current->pgid, [&](Thread* target) {
Scheduler::for_each_in_process_group(current->pgid, [&](Process* target) {
pgid_exists = true;
auto rc = send_signal(target);
if (rc.has_error())
@ -90,17 +91,17 @@ Result<u64> sys_kill(Registers*, SyscallArgs args)
}
else if (pid == -1)
{
for (auto* thread : g_threads)
for (auto* process : g_processes)
{
// We ignore permission errors here.
if (thread != current && thread->id != 1) send_signal(thread);
if (process != current && process->id != 1) send_signal(process);
}
}
else if (pid < -1)
{
int errno = -1;
bool pgid_exists = false;
Scheduler::for_each_in_process_group(-pid, [&](Thread* target) {
Scheduler::for_each_in_process_group(-pid, [&](Process* target) {
pgid_exists = true;
auto rc = send_signal(target);
if (rc.has_error())
@ -120,7 +121,7 @@ Result<u64> sys_kill(Registers*, SyscallArgs args)
Result<u64> sys_sigprocmask(Registers*, SyscallArgs args)
{
auto* current = Scheduler::current();
TRY(check_pledge(current, Promise::p_stdio));
TRY(check_pledge(current->process, Promise::p_stdio));
int how = (int)args[0];
const sigset_t* set = (const sigset_t*)args[1];

View File

@ -15,14 +15,13 @@ Result<u64> sys_socket(Registers*, SyscallArgs args)
if (type != SOCK_STREAM) return err(EPROTOTYPE);
if (domain != AF_UNIX) return err(EAFNOSUPPORT);
auto* current = Scheduler::current();
auto* current = Process::current();
TRY(check_pledge(current, Promise::p_unix));
auto socket = TRY(make_shared<UnixSocket>());
int fd = TRY(current->allocate_fd(0));
current->fd_table[fd] = FileDescriptor { TRY(make_shared<OpenFileDescription>(socket, O_RDWR)), 0 };
auto descriptor = FileDescriptor { TRY(make_shared<OpenFileDescription>(socket, O_RDWR)), 0 };
int fd = TRY(current->allocate_fd(0, descriptor));
return fd;
}
@ -37,7 +36,7 @@ Result<u64> sys_bind(Registers*, SyscallArgs args)
if ((usize)addrlen > sizeof(storage)) return err(EINVAL);
if (!MemoryManager::copy_from_user(addr, &storage, addrlen)) return err(EFAULT);
auto* current = Scheduler::current();
auto* current = Process::current();
TRY(check_pledge(current, Promise::p_unix));
auto inode = TRY(current->resolve_fd(sockfd))->inode();
@ -61,7 +60,7 @@ Result<u64> sys_connect(Registers* regs, SyscallArgs args)
if ((usize)addrlen > sizeof(storage)) return err(EINVAL);
if (!MemoryManager::copy_from_user(addr, &storage, addrlen)) return err(EFAULT);
auto* current = Scheduler::current();
auto* current = Process::current();
TRY(check_pledge(current, Promise::p_unix));
auto description = TRY(current->resolve_fd(sockfd))->description;
@ -80,7 +79,7 @@ Result<u64> sys_listen(Registers*, SyscallArgs args)
int sockfd = (int)args[0];
int backlog = (int)args[1];
auto* current = Scheduler::current();
auto* current = Process::current();
TRY(check_pledge(current, Promise::p_unix));
auto inode = TRY(current->resolve_fd(sockfd))->inode();
@ -108,7 +107,7 @@ Result<u64> sys_accept(Registers* regs, SyscallArgs args)
if (!MemoryManager::copy_from_user_typed(addrlen, &len)) return err(EFAULT);
}
auto* current = Scheduler::current();
auto* current = Process::current();
TRY(check_pledge(current, Promise::p_unix));
auto description = TRY(current->resolve_fd(sockfd))->description;
@ -121,8 +120,8 @@ Result<u64> sys_accept(Registers* regs, SyscallArgs args)
socklen_t client_len;
auto new_description = TRY(socket->accept(regs, description->flags, &client, &client_len));
int fd = TRY(current->allocate_fd(0));
current->fd_table[fd] = FileDescriptor { new_description, 0 };
auto descriptor = FileDescriptor { new_description, 0 };
int fd = TRY(current->allocate_fd(0, descriptor));
if (client_len < len) len = client_len;
if (addr)

View File

@ -33,7 +33,7 @@ Result<u64> sys_fstatat(Registers*, SyscallArgs args)
stat* st = (stat*)args[2];
int flags = (int)args[3];
Thread* current = Scheduler::current();
Process* current = Process::current();
TRY(check_pledge(current, Promise::p_rpath));
auto inode = TRY(current->resolve_atfile(dirfd, path, flags & AT_EMPTY_PATH, !(flags & AT_SYMLINK_NOFOLLOW)));
@ -68,22 +68,24 @@ Result<u64> sys_faccessat(Registers*, SyscallArgs args)
Credentials creds;
auto* current = Scheduler::current();
auto* current = Process::current();
TRY(check_pledge(current, Promise::p_rpath));
auto auth = current->credentials();
if (flags & AT_EACCESS) creds = current->auth;
if (flags & AT_EACCESS) creds = auth;
else
{
auto auth = current->auth;
creds.euid = auth.uid;
creds.egid = auth.gid;
}
auto inode = TRY(current->resolve_atfile(dirfd, path, false, true));
if ((amode & R_OK) && !VFS::can_read(inode, creds, &current->extra_groups)) return err(EACCES);
if ((amode & W_OK) && !VFS::can_write(inode, creds, &current->extra_groups)) return err(EACCES);
if ((amode & X_OK) && !VFS::can_execute(inode, creds, &current->extra_groups)) return err(EACCES);
auto groups = current->extra_groups.lock();
if ((amode & R_OK) && !VFS::can_read(inode, creds, &groups.ref())) return err(EACCES);
if ((amode & W_OK) && !VFS::can_write(inode, creds, &groups.ref())) return err(EACCES);
if ((amode & X_OK) && !VFS::can_execute(inode, creds, &groups.ref())) return err(EACCES);
// Either all checks succeeded, or amode == F_OK and the file exists, since resolve_atfile() would have failed
// otherwise.

View File

@ -40,9 +40,9 @@ Result<u64> sys_sethostname(Registers*, SyscallArgs args)
const char* buf = (const char*)args[0];
usize length = (usize)args[1];
Thread* current = Scheduler::current();
Process* current = Process::current();
TRY(check_pledge(current, Promise::p_host));
if (current->auth.euid != 0) return err(EPERM);
if (current->credentials().euid != 0) return err(EPERM);
if (length >= _UTSNAME_LENGTH) return err(EINVAL);

View File

@ -8,12 +8,12 @@ Result<u64> sys_usleep(Registers*, SyscallArgs args)
useconds_t us = (useconds_t)args[0];
auto* current = Scheduler::current();
TRY(check_pledge(current, Promise::p_stdio));
TRY(check_pledge(current->process, Promise::p_stdio));
// FIXME: Allow usleep() to use a more precise resolution.
if (us < 1000) return 0;
kernel_sleep(us / 1000);
return current->sleep_ticks_left;
return current->sleep_ticks_left.load();
}

View File

@ -12,20 +12,20 @@ Result<u64> sys_waitpid(Registers* regs, SyscallArgs args)
int options = (int)args[2];
Thread* current = Scheduler::current();
TRY(check_pledge(current, Promise::p_stdio));
TRY(check_pledge(current->process, Promise::p_stdio));
Thread* thread;
Process* target;
if (pid > 0)
{
thread = TRY(Result<Thread*>::from_option(Scheduler::find_by_pid(pid), ESRCH));
target = TRY(Result<Process*>::from_option(Scheduler::find_by_pid(pid), ESRCH));
if (thread->parent && thread->parent != current) return err(ECHILD);
if (target->parent && target->parent != current->process) return err(ECHILD);
if (options & WNOHANG) return err(EAGAIN);
wait_for_child:
if (thread->state != ThreadState::Exited) kernel_wait(pid);
if (target->alive()) kernel_wait(pid);
if (current->interrupted)
{
kdbgln("signal: waitpid interrupted by signal");
@ -37,13 +37,13 @@ Result<u64> sys_waitpid(Registers* regs, SyscallArgs args)
return err(EINTR);
}
check(thread->state == ThreadState::Exited);
check(!target->alive());
}
else if (pid == -1)
{
if (!Scheduler::has_children(current)) return err(ECHILD);
if (!Scheduler::has_children(current->process)) return err(ECHILD);
auto child = Scheduler::find_exited_child(current);
auto child = Scheduler::find_exited_child(current->process);
if (!child.has_value())
{
if (options & WNOHANG) return err(EAGAIN);
@ -61,27 +61,26 @@ Result<u64> sys_waitpid(Registers* regs, SyscallArgs args)
return err(EINTR);
}
check(current->child_being_waited_for.value_or(-1) != -1);
check(current->child_being_waited_for != -1);
thread = TRY(Result<Thread*>::from_option(Scheduler::find_by_pid(*current->child_being_waited_for), ESRCH));
check(thread->state == ThreadState::Exited);
target = TRY(Result<Process*>::from_option(Scheduler::find_by_pid(current->child_being_waited_for), ESRCH));
check(!target->alive());
}
else
thread = child.value();
target = child.value();
}
else // FIXME: Now that we have process groups, implement the cases where pid = 0 and pid < -1.
return err(ENOTSUP);
current->child_being_waited_for = {};
current->child_being_waited_for = -2;
int status = (int)thread->status;
u64 id = thread->id;
int status = (int)target->status;
u64 id = target->id;
current->user_ticks_children += thread->user_ticks_self + thread->user_ticks_children;
current->kernel_ticks_children += thread->kernel_ticks_self + thread->kernel_ticks_children;
current->process->user_ticks_children += target->user_ticks_self + target->user_ticks_children;
current->process->kernel_ticks_children += target->kernel_ticks_self + target->kernel_ticks_children;
thread->state = ThreadState::Dying;
Scheduler::signal_reap_thread();
Scheduler::reap_process(target);
if (status_ptr)
if (!MemoryManager::copy_to_user_typed(status_ptr, &status)) return err(EFAULT);

View File

@ -124,7 +124,7 @@ void Clock::tick()
{
this->m_timer_queue.remove(t);
t->active_clock = nullptr;
t->thread->send_signal(t->signo);
t->process->send_signal(t->signo);
if (t->restart) timers_to_be_restarted.append(t);
return true;
}

View File

@ -11,8 +11,9 @@
#include <luna/Stack.h>
static Thread g_idle;
static Process g_idle_process;
static Thread* g_current = nullptr;
static Thread* g_init = nullptr;
static Process* g_init = nullptr;
static Thread* g_reap = nullptr;
static Thread* g_oom = nullptr;
@ -22,15 +23,20 @@ namespace Scheduler
{
void init()
{
g_idle.id = 0;
g_idle.tid = 0;
g_idle.init_regs_kernel();
g_idle.set_ip((u64)CPU::idle_loop);
g_idle.state = ThreadState::Idle;
g_idle.is_kernel = true;
g_idle.parent = nullptr;
g_idle.process = &g_idle_process;
g_idle.cmdline = "[idle]";
g_idle.active_directory = nullptr;
g_idle_process.id = 0;
g_idle_process.parent = nullptr;
g_idle_process.thread_count = 1;
g_idle_process.is_kernel = true;
g_idle.ticks_left = 1;
// Map some stack for the idle task
@ -42,7 +48,7 @@ namespace Scheduler
g_idle.stack = idle_stack;
kinfoln("Created idle thread: id %d with ip %#lx and sp %#lx", g_idle.id, g_idle.ip(), g_idle.sp());
kinfoln("Created idle thread: id %d with ip %#lx and sp %#lx", g_idle_process.id, g_idle.ip(), g_idle.sp());
g_current = &g_idle;
}
@ -57,7 +63,7 @@ namespace Scheduler
return &g_idle;
}
Thread* init_thread()
Process* init_process()
{
return g_init;
}
@ -88,30 +94,37 @@ namespace Scheduler
// If anything fails, make sure to clean up.
auto guard = make_scope_guard([&] { delete thread; });
Process* process = TRY(make<Process>());
auto guard2 = make_scope_guard([&] { delete process; });
const u64 thread_stack_vm = TRY(MemoryManager::alloc_for_kernel(4, MMU::NoExecute | MMU::ReadWrite));
guard.deactivate();
guard2.deactivate();
const Stack thread_stack { thread_stack_vm, ARCH_PAGE_SIZE * 4 };
thread->set_sp(thread_stack.top());
thread->stack = thread_stack;
thread->cmdline = name;
thread->is_kernel = true;
thread->active_directory = MMU::kernel_page_directory();
thread->virtual_clock.set_resolution(1'000'000);
thread->profiling_clock.set_resolution(1'000'000);
thread->process = process;
thread->auth = Credentials { .uid = 0, .euid = 0, .suid = 0, .gid = 0, .egid = 0, .sgid = 0 };
process->id = thread->tid;
process->parent = nullptr;
process->thread_count = 1;
process->virtual_clock.set_resolution(1'000'000);
process->profiling_clock.set_resolution(1'000'000);
process->is_kernel = true;
g_threads.append(thread);
g_processes.append(process);
thread->state = ThreadState::Runnable;
kinfoln("Created kernel thread: id %d with ip %#lx and sp %#lx", thread->id, thread->ip(), thread->sp());
kinfoln("Created kernel thread: id %d with ip %#lx and sp %#lx", process->id, thread->ip(), thread->sp());
return thread;
}
@ -149,14 +162,16 @@ namespace Scheduler
check(!g_init);
Thread* const thread = TRY(make<Thread>());
Process* const process = TRY(make<Process>());
thread->state = ThreadState::None;
thread->is_kernel = false;
thread->id = 1;
thread->pgid = 1;
thread->tid = 1;
thread->cmdline = name;
thread->auth = Credentials { .uid = 0, .euid = 0, .suid = 0, .gid = 0, .egid = 0, .sgid = 0 };
thread->extra_groups = {};
thread->process = process;
process->id = 1;
process->pgid = 1;
process->thread_count = 1;
Vector<String> args;
auto name_string = TRY(String::from_cstring(name));
@ -164,7 +179,10 @@ namespace Scheduler
Vector<String> env;
auto guard = make_scope_guard([&] { delete thread; });
auto guard = make_scope_guard([&] {
delete thread;
delete process;
});
// Contrary to other programs, which use BinaryFormat::create_loader(), init must be a native executable.
auto loader = TRY(ELFLoader::create(inode, nullptr, 0));
@ -188,11 +206,12 @@ namespace Scheduler
thread->signal_handlers[i] = { .sa_handler = SIG_DFL, .sa_mask = 0, .sa_flags = 0 };
}
kinfoln("Created userspace thread: id %d with ip %#.16lx and sp %#.16lx (ksp %#lx)", thread->id, thread->ip(),
kinfoln("Created userspace thread: id %d with ip %#.16lx and sp %#.16lx (ksp %#lx)", process->id, thread->ip(),
thread->sp(), thread->kernel_stack.top());
g_threads.append(thread);
g_init = thread;
g_processes.append(process);
g_init = process;
return thread;
}
@ -202,6 +221,25 @@ namespace Scheduler
g_threads.append(thread);
}
void add_process(Process* process)
{
g_processes.append(process);
}
void reap_process(Process* process)
{
// FIXME: Shouldn't all this be done when the timers' destructors are called?
process->real_timer.disarm();
process->virtual_timer.disarm();
process->profiling_timer.disarm();
for (int i = 0; i < MAX_POSIX_TIMERS; i++)
{
if (process->posix_timers[i].has_value()) process->posix_timers[i]->disarm();
}
delete process;
}
void reap_thread(Thread* thread)
{
CPU::disable_interrupts();
@ -221,15 +259,6 @@ namespace Scheduler
MemoryManager::unmap_owned_and_free_vm(stack.bottom(), stack.bytes() / ARCH_PAGE_SIZE).release_value();
}
// FIXME: Shouldn't all this be done when the timers' destructors are called?
thread->real_timer.disarm();
thread->virtual_timer.disarm();
thread->profiling_timer.disarm();
for (int i = 0; i < MAX_POSIX_TIMERS; i++)
{
if (thread->posix_timers[i].has_value()) thread->posix_timers[i]->disarm();
}
delete thread;
CPU::enable_interrupts();
@ -307,14 +336,19 @@ namespace Scheduler
{
CPU::disable_interrupts();
if (is_in_kernel(regs)) g_current->kernel_ticks_self++;
if (is_in_kernel(regs))
{
g_current->process->kernel_ticks_self++;
g_current->kernel_ticks_self++;
}
else
{
g_current->virtual_clock.tick();
g_current->process->virtual_clock.tick();
g_current->process->user_ticks_self++;
g_current->user_ticks_self++;
}
g_current->profiling_clock.tick();
g_current->process->profiling_clock.tick();
g_current->ticks_left--;
@ -344,21 +378,21 @@ namespace Scheduler
return result;
}
Option<Thread*> find_by_pid(pid_t pid)
Option<Process*> find_by_pid(pid_t pid)
{
for (auto* const thread : g_threads)
for (auto* const process : g_processes)
{
if (thread->id == pid && thread->state != ThreadState::Dying) return thread;
if (process->id == pid) return process;
}
return {};
}
bool has_children(Thread* thread)
bool has_children(Process* process)
{
bool result { false };
for_each_child(thread, [&](Thread*) {
for_each_child(process, [&](Process*) {
result = true;
return false;
});
@ -366,12 +400,12 @@ namespace Scheduler
return result;
}
Option<Thread*> find_exited_child(Thread* thread)
Option<Process*> find_exited_child(Process* process)
{
Option<Thread*> result;
Option<Process*> result;
for_each_child(thread, [&](Thread* child) {
if (!result.has_value() && child->state == ThreadState::Exited)
for_each_child(process, [&](Process* child) {
if (!result.has_value() && !child->alive())
{
result = child;
return false;
@ -387,16 +421,23 @@ namespace Scheduler
CPU::disable_interrupts();
kdbgln("--- BEGIN SCHEDULER DUMP ---");
kdbgln("current at %p, id = %d", g_current, g_current->id);
kdbgln("Current thread at %p, tid = %d", g_current, g_current->tid);
kdbgln("Current process at %p, pid = %d", g_current->process, g_current->process->id);
for (const auto* thread : g_threads)
{
kdbgln("%p %c [%-20s] %4d, parent = (%-18p,%d), state = %d, ticks: (k:%04zu,u:%04zu), status = "
"%d, cwd = %s",
thread, thread->is_kernel ? 'k' : 'u', thread->cmdline.chars(), thread->id, thread->parent,
thread->parent ? thread->parent->id : 0, (int)thread->state, thread->kernel_ticks_self,
thread->user_ticks_self, thread->status,
thread->current_directory_path.is_empty() ? "/" : thread->current_directory_path.chars());
kdbgln("Thread %p (belongs to pid %4d) %c [%-20s] %4d, state = %d", thread, thread->process->id,
thread->is_kernel ? 'k' : 'u', thread->cmdline.chars(), thread->tid, (int)thread->state);
}
for (const auto* process : g_processes)
{
kdbgln("Process %p (%zu threads) %4d, parent = (%-18p,%d), cwd = %s, ticks: (k:%04zu,u:%04zu), "
"status = %d",
process, process->thread_count.load(), process->id, process->parent,
process->parent ? process->parent->id : 0,
process->current_directory_path.is_empty() ? "/" : process->current_directory_path.chars(),
process->kernel_ticks_self.load(), process->user_ticks_self.load(), process->status);
}
kdbgln("--- END SCHEDULER DUMP ---");

View File

@ -8,7 +8,7 @@ namespace Scheduler
Thread* current();
Thread* idle();
Thread* init_thread();
Process* init_process();
void set_reap_thread(Thread*);
void signal_reap_thread();
@ -23,10 +23,12 @@ namespace Scheduler
Result<Thread*> create_init_process(SharedPtr<VFS::Inode> inode, const char* name);
void add_thread(Thread* thread);
void add_process(Process* process);
Thread* pick_task();
void reap_thread(Thread* thread);
void reap_process(Process* thread);
void switch_task(Registers* regs);
@ -34,13 +36,26 @@ namespace Scheduler
LinkedList<Thread> check_for_dying_threads();
Option<Thread*> find_by_pid(pid_t pid);
Option<Process*> find_by_pid(pid_t pid);
template <typename Callback> void for_each_child(Thread* thread, Callback callback)
template <typename Callback> void for_each_child(Process* process, Callback callback)
{
for (Thread* current = thread; current; current = g_threads.next(current).value_or(nullptr))
for (Process* current = process; current; current = g_processes.next(current).value_or(nullptr))
{
if (current->parent == thread)
if (current->parent == process)
{
bool should_continue = callback(current);
if (!should_continue) return;
}
}
}
template <typename Callback> void for_each_thread(Process* process, Callback callback)
{
for (Thread* current = g_threads.first().value_or(nullptr); current;
current = g_threads.next(current).value_or(nullptr))
{
if (current->process == process)
{
bool should_continue = callback(current);
if (!should_continue) return;
@ -50,8 +65,8 @@ namespace Scheduler
template <typename Callback> void for_each_in_process_group(pid_t group, Callback callback)
{
for (Thread* current = g_threads.first().value_or(nullptr); current;
current = g_threads.next(current).value_or(nullptr))
for (Process* current = g_processes.first().value_or(nullptr); current;
current = g_processes.next(current).value_or(nullptr))
{
if (current->pgid == group)
{
@ -63,8 +78,8 @@ namespace Scheduler
template <typename Callback> void for_each_in_session(pid_t sid, Callback callback)
{
for (Thread* current = g_threads.first().value_or(nullptr); current;
current = g_threads.next(current).value_or(nullptr))
for (Process* current = g_processes.first().value_or(nullptr); current;
current = g_processes.next(current).value_or(nullptr))
{
if (current->sid == sid)
{
@ -76,9 +91,9 @@ namespace Scheduler
void dump_state();
bool has_children(Thread* thread);
bool has_children(Process* thread);
Option<Thread*> find_exited_child(Thread* thread);
Option<Process*> find_exited_child(Process* thread);
}
extern "C" void kernel_yield();

View File

@ -15,6 +15,7 @@
static Atomic<pid_t> g_next_id;
LinkedList<Thread> g_threads;
LinkedList<Process> g_processes;
void Thread::init()
{
@ -25,7 +26,7 @@ Result<Thread*> new_thread()
{
Thread* const thread = TRY(make<Thread>());
thread->id = g_next_id++;
thread->tid = g_next_id++;
return thread;
}
@ -35,31 +36,49 @@ pid_t next_thread_id()
return g_next_id.load();
}
Result<int> Thread::allocate_fd(int min)
Result<int> Process::allocate_fd(int min, FileDescriptor& descriptor)
{
if (min < 0 || min >= FD_MAX) return err(EINVAL);
auto table = fd_table.lock();
for (int i = min; i < FD_MAX; i++)
{
// FIXME: Possible race condition if multiple threads share a FileDescriptorTable? Let's not worry about it for
// now, we're still a long way away from reaching that point.
if (!fd_table[i].has_value()) { return i; }
if (!(*table)[i].has_value())
{
(*table)[i] = descriptor;
return i;
}
}
return err(EMFILE);
}
Result<FileDescriptor*> Thread::resolve_fd(int fd)
Result<FileDescriptor*> Process::resolve_fd(int fd)
{
if (fd < 0 || fd >= FD_MAX) return err(EBADF);
Option<FileDescriptor>& maybe_descriptor = fd_table[fd];
auto table = fd_table.lock();
Option<FileDescriptor>& maybe_descriptor = (*table)[fd];
if (!maybe_descriptor.has_value()) return err(EBADF);
return maybe_descriptor.value_ptr();
}
Result<int> Thread::allocate_timerid()
Credentials Process::credentials()
{
auto credentials = auth.lock();
return *credentials;
}
Result<Vector<gid_t>> Process::copy_groups()
{
auto groups = extra_groups.lock();
return groups->shallow_copy();
}
Result<int> Process::allocate_timerid()
{
ScopedMutexLock lock(posix_timer_mutex);
@ -75,28 +94,25 @@ Result<int> Thread::allocate_timerid()
return err(EMFILE);
}
Result<Timer*> Thread::resolve_timerid(int tid)
Result<Timer*> Process::resolve_timerid(int _tid)
{
if (tid < 0 || tid >= MAX_POSIX_TIMERS) return err(EBADF);
if (_tid < 0 || _tid >= MAX_POSIX_TIMERS) return err(EBADF);
Option<Timer>& maybe_timer = posix_timers[tid];
Option<Timer>& maybe_timer = posix_timers[_tid];
if (!maybe_timer.has_value()) return err(EINVAL);
return maybe_timer.value_ptr();
}
Result<SharedPtr<VFS::Inode>> Thread::resolve_atfile(int dirfd, const String& path, bool allow_empty_path,
bool follow_last_symlink, SharedPtr<VFS::Inode>* parent_inode)
Result<SharedPtr<VFS::Inode>> Process::resolve_atfile(int dirfd, const String& path, bool allow_empty_path,
bool follow_last_symlink, SharedPtr<VFS::Inode>* parent_inode)
{
if (parent_inode) *parent_inode = this->current_directory;
if (PathParser::is_absolute(path.view()))
return VFS::resolve_path(path.chars(), this->auth, &this->extra_groups, {}, follow_last_symlink);
if (PathParser::is_absolute(path.view())) return VFS::resolve_path(path.chars(), this, {}, follow_last_symlink);
if (dirfd == AT_FDCWD)
return VFS::resolve_path(path.chars(), this->auth, &this->extra_groups, this->current_directory,
follow_last_symlink);
if (dirfd == AT_FDCWD) return VFS::resolve_path(path.chars(), this, this->current_directory, follow_last_symlink);
auto descriptor = TRY(resolve_fd(dirfd));
@ -104,60 +120,91 @@ Result<SharedPtr<VFS::Inode>> Thread::resolve_atfile(int dirfd, const String& pa
if (path.is_empty() && allow_empty_path) return descriptor->inode();
return VFS::resolve_path(path.chars(), this->auth, &this->extra_groups, descriptor->inode(), follow_last_symlink);
return VFS::resolve_path(path.chars(), this, descriptor->inode(), follow_last_symlink);
}
[[noreturn]] void Thread::exit_and_signal_parent(int _status)
[[noreturn]] void Process::exit(int _status)
{
check(!is_kernel);
check(this == Process::current()); // Process::exit() should only be called by the process itself.
#ifndef MOON_ENABLE_TESTING_FEATURES
if (this->id == 1) fail("the init process exited");
if (id == 1) fail("the init process exited");
#else
if (this->id == 1) CPU::magic_exit(_status);
if (id == 1) CPU::magic_exit(_status);
#endif
Scheduler::for_each_child(this, [](Thread* child) {
child->parent = Scheduler::init_thread();
Scheduler::for_each_thread(this, [](Thread* thread) {
thread->quit();
return true;
});
thread_count = 0;
status = _status;
Scheduler::for_each_child(this, [](Process* child) {
child->parent = Scheduler::init_process();
return true;
});
if (is_session_leader())
{
kinfoln("thread %d is exiting as a session leader, sending signals to session", id);
kinfoln("process %d is exiting as a session leader, sending signals to session", id);
// FIXME: Send SIGHUP only to the foreground process group if the session has a controlling terminal.
Scheduler::for_each_in_session(sid, [this](Thread* thread) {
if (thread == this) return true;
thread->sid = 0;
thread->controlling_terminal = {};
thread->send_signal(SIGHUP);
kinfoln("reparenting and sending SIGHUP to %d", thread->id);
Scheduler::for_each_in_session(sid, [this](Process* p) {
if (p == this) return true;
p->sid = 0;
p->controlling_terminal = {};
p->send_signal(SIGHUP);
kinfoln("reparenting and sending SIGHUP to %d", p->id);
return true;
});
}
if (parent)
{
if (parent->state == ThreadState::Waiting)
{
auto child = *parent->child_being_waited_for;
if (child == -1 || child == id)
Scheduler::for_each_thread(parent, [&](Thread* t) {
if (t->state == ThreadState::Waiting)
{
parent->child_being_waited_for = id;
parent->wake_up();
pid_t expected = -1;
if (t->child_being_waited_for.compare_exchange_strong(expected, id))
{
t->wake_up();
return false;
}
expected = id;
if (t->child_being_waited_for.compare_exchange_strong(expected, id))
{
t->wake_up();
return false;
}
}
}
while (parent->pending_signals.get(SIGCHLD - 1)) kernel_yield();
return true;
});
parent->send_signal(SIGCHLD);
}
state = ThreadState::Exited;
status = _status;
kernel_yield();
unreachable();
}
void Thread::quit()
{
state = ThreadState::Dying;
}
void Thread::exit(bool yield)
{
quit();
process->thread_count--;
if (process->thread_count == 0) { process->exit(0); }
if (yield) kernel_yield();
}
enum class DefaultSignalAction
{
Ignore,
@ -202,7 +249,7 @@ void Thread::process_pending_signals(Registers* current_regs)
if (handler.sa_handler == SIG_DFL || signo == SIGKILL || signo == SIGSTOP)
{
default_signal:
if (id == 1)
if (process->id == 1)
{
kwarnln("signal: init got a signal it has no handler for, ignoring");
return;
@ -213,9 +260,10 @@ void Thread::process_pending_signals(Registers* current_regs)
{
case DefaultSignalAction::Ignore: return;
case DefaultSignalAction::Terminate:
kwarnln("Terminating thread %d with signal %d", id, signo);
kwarnln("Terminating thread %d with signal %d", tid, signo);
CPU::print_stack_trace_at(current_regs);
exit_and_signal_parent(signo | _SIGBIT);
process->exit(signo | _SIGBIT);
unreachable();
case DefaultSignalAction::Stop: stop();
default: return;
}
@ -246,6 +294,14 @@ bool Thread::will_ignore_pending_signal()
return false;
}
void Process::send_signal(int signo)
{
Scheduler::for_each_thread(this, [signo](Thread* t) {
t->send_signal(signo);
return false;
});
}
void Thread::send_signal(int signo)
{
if (is_kernel) return;
@ -299,9 +355,11 @@ bool Thread::check_stack_on_exception(u64 stack_pointer)
return false;
}
auto address_space = process->address_space.lock();
// If we can, we'll add 2 more pages of buffer space, otherwise we use whatever we can.
usize bytes_to_grow = min(stack_space_remaining, exceeded_bytes + 2 * ARCH_PAGE_SIZE);
auto maybe_base = address_space->grow_region(stack.bottom(), bytes_to_grow / ARCH_PAGE_SIZE, true);
auto maybe_base = (*address_space)->grow_region(stack.bottom(), bytes_to_grow / ARCH_PAGE_SIZE, true);
if (maybe_base.has_error())
{
kwarnln("Failed to grow stack: could not allocate virtual memory space (%s)", maybe_base.error_string());
@ -313,7 +371,7 @@ bool Thread::check_stack_on_exception(u64 stack_pointer)
MMU::ReadWrite | MMU::NoExecute | MMU::User);
if (result.has_error())
{
address_space->free_region(base, bytes_to_grow / ARCH_PAGE_SIZE);
(*address_space)->free_region(base, bytes_to_grow / ARCH_PAGE_SIZE);
kwarnln("Failed to grow stack: could not allocate physical pages (%s)", result.error_string());
return false;
}
@ -333,3 +391,8 @@ void Thread::stop()
state = ThreadState::Stopped;
kernel_yield();
}
Process* Process::current()
{
return Scheduler::current()->process;
}

View File

@ -1,5 +1,4 @@
#pragma once
#include "arch/MMU.h"
#include "fs/OpenFileDescription.h"
#include "fs/VFS.h"
@ -48,59 +47,33 @@ struct Credentials
u32 sgid { 0 };
};
struct Thread : public LinkedListNode<Thread>
struct Process : public LinkedListNode<Process>
{
Registers regs;
Atomic<usize> thread_count;
pid_t id;
pid_t pgid { 0 };
pid_t sid { 0 };
Atomic<pid_t> pgid { 0 };
Atomic<pid_t> sid { 0 };
Credentials auth;
Vector<gid_t> extra_groups;
bool has_called_exec { false };
u64 user_ticks_self = 0;
u64 kernel_ticks_self = 0;
u64 user_ticks_children = 0;
u64 kernel_ticks_children = 0;
u64 ticks_left;
u64 sleep_ticks_left;
mode_t umask { 0 };
int promises { -1 };
int execpromises { -1 };
Stack stack;
Stack kernel_stack;
Process* parent { nullptr };
OwnedPtr<AddressSpace> address_space;
Option<FileDescriptor> fd_table[FD_MAX] = {};
MutexLocked<Credentials> auth { Credentials { 0, 0, 0, 0, 0, 0 } };
Result<int> allocate_fd(int min);
Result<FileDescriptor*> resolve_fd(int fd);
Result<SharedPtr<VFS::Inode>> resolve_atfile(int dirfd, const String& path, bool allow_empty_path,
bool follow_last_symlink,
SharedPtr<VFS::Inode>* parent_inode = nullptr);
MutexLocked<Vector<gid_t>> extra_groups { {} };
struct sigaction signal_handlers[NSIG];
Bitset<sigset_t> signal_mask { 0 };
Bitset<sigset_t> pending_signals { 0 };
bool interrupted { false };
Credentials credentials();
Result<Vector<gid_t>> copy_groups();
SharedPtr<VFS::Inode> controlling_terminal;
MutexLocked<OwnedPtr<AddressSpace>> address_space;
bool unrestricted_task { false };
FPData fp_data;
ThreadState state = ThreadState::Runnable;
bool is_kernel { true };
bool has_called_exec { false };
int status { 0 };
mode_t umask { 0 };
MutexLocked<Option<FileDescriptor>[FD_MAX]> fd_table = {};
Timer real_timer;
Timer virtual_timer;
@ -109,28 +82,99 @@ struct Thread : public LinkedListNode<Thread>
Clock virtual_clock;
Clock profiling_clock;
bool is_kernel { false };
Option<Timer> posix_timers[MAX_POSIX_TIMERS];
Mutex posix_timer_mutex;
StaticString<128> cmdline;
Atomic<u64> user_ticks_self = 0;
Atomic<u64> kernel_ticks_self = 0;
Atomic<u64> user_ticks_children = 0;
Atomic<u64> kernel_ticks_children = 0;
Result<int> allocate_timerid();
Result<Timer*> resolve_timerid(int id);
StaticString<128> cmdline;
Result<int> allocate_fd(int min, FileDescriptor& descriptor);
Result<FileDescriptor*> resolve_fd(int fd);
Result<SharedPtr<VFS::Inode>> resolve_atfile(int dirfd, const String& path, bool allow_empty_path,
bool follow_last_symlink,
SharedPtr<VFS::Inode>* parent_inode = nullptr);
String current_directory_path = {};
SharedPtr<VFS::Inode> current_directory = {};
Thread* parent { nullptr };
Option<pid_t> child_being_waited_for = {};
SharedPtr<VFS::Inode> controlling_terminal;
int status { 0 };
void send_signal(int signo);
bool is_session_leader()
{
return id == sid;
}
bool alive()
{
return thread_count > 0;
}
static Process* current();
[[noreturn]] void exit(int status);
};
struct Thread : public LinkedListNode<Thread>
{
Process* process;
pid_t tid;
Registers regs;
Atomic<u64> ticks_left;
Atomic<u64> sleep_ticks_left;
Atomic<u64> user_ticks_self = 0;
Atomic<u64> kernel_ticks_self = 0;
Stack stack;
Stack kernel_stack;
struct sigaction signal_handlers[NSIG];
Bitset<sigset_t> signal_mask { 0 };
Bitset<sigset_t> pending_signals { 0 };
bool interrupted { false };
Atomic<pid_t> child_being_waited_for = -2;
bool unrestricted_task { false };
FPData fp_data;
ThreadState state = ThreadState::Runnable;
bool is_kernel { false };
StaticString<128> cmdline;
PageDirectory* self_directory() const
{
return address_space->page_directory();
PageDirectory* result;
auto lambda = Function<OwnedPtr<AddressSpace>&>::wrap([&](OwnedPtr<AddressSpace>& space) {
result = space->page_directory();
}).release_value();
process->address_space.with_lock(move(lambda));
return result;
}
PageDirectory* active_directory { nullptr };
[[noreturn]] void exit_and_signal_parent(int status);
void quit();
void exit(bool yield = true);
bool is_idle()
{
@ -142,11 +186,6 @@ struct Thread : public LinkedListNode<Thread>
state = ThreadState::Runnable;
}
bool is_session_leader()
{
return id == sid;
}
void init_regs_kernel();
void init_regs_user();
@ -190,3 +229,4 @@ Result<Thread*> new_thread();
pid_t next_thread_id();
extern LinkedList<Thread> g_threads;
extern LinkedList<Process> g_processes;

View File

@ -51,7 +51,9 @@ Result<OwnedPtr<ThreadImage>> ThreadImage::clone_from_thread(Thread* parent)
{
auto image = TRY(make_owned<ThreadImage>());
auto address_space = TRY(parent->address_space->clone());
auto space = parent->process->address_space.lock();
auto address_space = TRY((*space)->clone());
const u64 kernel_stack_base = TRY(MemoryManager::alloc_for_kernel(4, MMU::ReadWrite | MMU::NoExecute));
Stack kernel_stack { kernel_stack_base, 4 * ARCH_PAGE_SIZE };
@ -98,5 +100,6 @@ void ThreadImage::apply(Thread* thread)
thread->active_directory = m_address_space->page_directory();
thread->address_space = move(m_address_space);
auto space = thread->process->address_space.lock();
*space = move(m_address_space);
}

View File

@ -2,7 +2,7 @@
#include <bits/signal.h>
#include <luna/LinkedList.h>
struct Thread;
struct Process;
struct Clock;
class Timer : public LinkedListNode<Timer>
@ -10,7 +10,7 @@ class Timer : public LinkedListNode<Timer>
public:
u64 delta_ticks { 0 };
u64 interval_ticks { 0 };
Thread* thread;
Process* process;
int signo { SIGALRM };
bool restart { false };