Luna/kernel/src/thread/Scheduler.cpp
apio c604c074a1 Kernel: Rename ASSERT() to ensure()
Doesn't get stripped on release builds, so it shouldn't be named assert.
2022-11-02 19:38:15 +01:00

669 lines
20 KiB
C++

#define MODULE "sched"
#include "thread/Scheduler.h"
#include "interrupts/Interrupts.h"
#include "log/Log.h"
#include "memory/MemoryManager.h"
#include "memory/PMM.h"
#include "memory/VMM.h"
#include "misc/hang.h"
#include "misc/reboot.h"
#include "misc/utils.h"
#include "panic/Panic.h"
#include "std/assert.h"
#include "std/errno.h"
#include "std/stdlib.h"
#include "std/string.h"
#include "sys/UserMemory.h"
#include "sys/elf/ELFLoader.h"
#include "thread/PIT.h"
#include "thread/Task.h"
#include "utils/Addresses.h"
#include "utils/Registers.h"
static uint64_t task_num = 0;
static Task idle_task;
static uint64_t free_pid = 0;
static Task* sched_current_task;
static Task* base_task;
static Task* end_task;
extern "C" void idle_task_function();
static uint64_t frequency;
template <typename Callback> void sched_for_each_task(Callback callback)
{
Task* task = base_task;
if (!task) return;
do {
bool will_continue = callback(task);
if (!will_continue) break;
task = task->next_task;
} while (task != base_task);
}
template <typename Callback> void sched_for_each_child(Task* task, Callback callback)
{
sched_for_each_task([&](Task* child) {
if (child->ppid == task->id) { return callback(child); }
return true;
});
}
Task* Scheduler::find_by_pid(uint64_t pid)
{
Task* result = nullptr;
sched_for_each_task([&](Task* task) {
if (task->id == pid)
{
result = task;
return false;
}
return true;
});
return result;
}
void Scheduler::append_task(Task* task)
{
if (!base_task)
{
ensure(!end_task);
base_task = task;
end_task = base_task;
task->next_task = task;
task->prev_task = task;
}
else
{
end_task->next_task = task;
task->prev_task = end_task;
base_task->prev_task = task;
task->next_task = base_task;
end_task = task;
}
}
void Scheduler::init()
{
memset(&idle_task, 0, sizeof(Task));
idle_task.id = free_pid++;
idle_task.regs.rip = (uint64_t)idle_task_function;
idle_task.regs.rsp = get_top_of_stack((uint64_t)MemoryManager::get_page(), 1);
idle_task.regs.cs = 0x08;
idle_task.regs.ss = 0x10;
idle_task.regs.rflags = (1 << 21) | (1 << 9);
idle_task.task_sleep = 1000;
idle_task.user_task = false;
idle_task.block_reason = BlockReason::None;
idle_task.state = idle_task.Idle;
strlcpy(idle_task.name, "[cpu-idle]", sizeof(idle_task.name));
sched_current_task = &idle_task;
frequency = 1000 / PIT::frequency();
}
void Scheduler::add_kernel_task(const char* taskname, void (*task)(void))
{
Task* new_task = new Task;
ensure(new_task);
new_task->user_task = false;
new_task->id = free_pid++;
new_task->ppid = 0;
new_task->uid = new_task->euid = new_task->gid = new_task->egid = 0;
new_task->regs.rip = (uint64_t)task;
new_task->allocated_stack =
(uint64_t)MemoryManager::get_pages(TASK_PAGES_IN_STACK); // 16 KB is enough for everyone, right?
new_task->regs.rsp = get_top_of_stack(new_task->allocated_stack, TASK_PAGES_IN_STACK);
new_task->regs.cs = 0x08;
new_task->regs.ss = 0x10;
new_task->regs.ds = 0x10;
new_task->regs.rflags = read_rflags() | 0x200; // enable interrupts
new_task->task_sleep = 0;
new_task->task_time = 0;
new_task->cpu_time = 0;
strlcpy(new_task->name, taskname, sizeof(new_task->name));
append_task(new_task);
new_task->block_reason = BlockReason::None;
new_task->state = new_task->Running;
task_num++;
kinfoln("Adding kernel task: %s, starts at %lx, PID %ld, stack at %lx, total tasks: %ld", new_task->name,
new_task->regs.rip, new_task->id, new_task->regs.rsp, task_num);
}
Task* Scheduler::create_user_task()
{
Task* new_task = new Task;
if (!new_task) return nullptr;
memset(&new_task->regs, 0, sizeof(Context));
new_task->user_task = true;
new_task->id = free_pid++;
new_task->ppid = 0;
new_task->task_sleep = 0;
new_task->task_time = 0;
new_task->cpu_time = 0;
new_task->block_reason = BlockReason::None;
append_task(new_task);
task_num++;
return new_task;
}
long Scheduler::load_user_task(const char* filename)
{
kinfoln("Loading user task: %s", filename);
Interrupts::push_and_disable();
Task* new_task = new Task;
ensure(new_task);
memset(&new_task->regs, 0, sizeof(Context));
new_task->id = free_pid++;
new_task->ppid = 0;
new_task->uid = new_task->euid = new_task->gid = new_task->egid = 0;
if (!new_task->allocator.init())
{
delete new_task;
free_pid--;
Interrupts::pop();
return -ENOMEM;
}
new_task->address_space = AddressSpace::create();
VMM::switch_to_user_address_space(new_task->address_space);
long result;
if ((result = ELFLoader::check_elf_image_from_filesystem(filename)) < 0)
{
delete new_task;
free_pid--;
kerrorln("Failed to load %s from initrd", filename);
Interrupts::pop();
return result;
}
if ((uint64_t)result > PMM::get_free())
{
delete new_task;
free_pid--;
kerrorln("Not enough memory for task %s", filename);
Interrupts::pop();
return -ENOMEM;
}
ELFImage* image = ELFLoader::load_elf_from_filesystem(filename);
ensure(image);
new_task->user_task = true;
new_task->regs.rip = image->entry;
new_task->image = image;
new_task->allocated_stack = (uint64_t)MemoryManager::get_pages_at(
0x100000, TASK_PAGES_IN_STACK, MAP_READ_WRITE | MAP_USER | MAP_AS_OWNED_BY_TASK); // 16 KB is enough for everyone, right?
if (!new_task->allocated_stack)
{
new_task->address_space.destroy();
delete new_task;
free_pid--;
ELFLoader::release_elf_image(image);
VMM::switch_back_to_kernel_address_space();
Interrupts::pop();
return -ENOMEM;
}
new_task->regs.rsp = get_top_of_stack(new_task->allocated_stack, TASK_PAGES_IN_STACK);
new_task->regs.cs = 0x18 | 0x03;
new_task->regs.ss = 0x20 | 0x03;
new_task->regs.ds = 0x20 | 0x03;
new_task->regs.rflags = (1 << 21) | (1 << 9); // enable interrupts
new_task->task_sleep = 0;
new_task->task_time = 0;
new_task->cpu_time = 0;
strlcpy(new_task->name, filename, sizeof(new_task->name));
append_task(new_task);
new_task->block_reason = BlockReason::None;
new_task->state = new_task->Running;
task_num++;
kinfoln("Adding user task: %s, loaded at %lx, PID %ld, stack at %lx, total tasks: %ld", new_task->name,
new_task->regs.rip, new_task->id, new_task->regs.rsp, task_num);
VMM::switch_back_to_kernel_address_space();
Interrupts::pop();
return (long)new_task->id;
}
void Scheduler::reset_task(Task* task, ELFImage* new_image)
{
memset(&task->regs, 0, sizeof(Context));
task->state = task->Running;
task->regs.rip = new_image->entry;
task->image = new_image;
task->regs.rsp = get_top_of_stack(task->allocated_stack, TASK_PAGES_IN_STACK);
task->regs.cs = 0x18 | 0x03;
task->regs.ss = 0x20 | 0x03;
task->regs.ds = 0x20 | 0x03;
task->regs.rflags = (1 << 21) | (1 << 9); // enable interrupts
task->task_sleep = 0;
task->cpu_time = 0;
task->block_reason = BlockReason::None;
kinfoln("Resetting task: %s, loaded at %lx, PID %ld, stack at %lx, total tasks: %ld", task->name, task->regs.rip,
task->id, task->regs.rsp, task_num);
}
void Scheduler::reap_task(Task* task)
{
ensure(!Interrupts::is_in_handler());
task_num--;
Task* exiting_task = task;
ensure(task->id != 0); // WHY IN THE WORLD WOULD WE BE REAPING THE IDLE TASK?
if (exiting_task->is_user_task())
{
VMM::switch_back_to_kernel_address_space();
VMM::apply_address_space();
VMM::switch_to_user_address_space(exiting_task->address_space);
}
kinfoln("reaping task %s, PID %ld, exited with code %ld", exiting_task->name, exiting_task->id,
exiting_task->exit_status);
if (exiting_task->id == (free_pid - 1)) free_pid--; // If we are the last spawned thread, free our PID.
if (exiting_task->allocated_stack && !exiting_task->is_user_task())
MemoryManager::release_pages((void*)exiting_task->allocated_stack, TASK_PAGES_IN_STACK);
if (exiting_task->image) kfree(exiting_task->image);
if (exiting_task->is_user_task())
{
exiting_task->allocator.free();
VMM::switch_back_to_kernel_address_space();
VMM::apply_address_space();
Interrupts::push_and_enable();
exiting_task->address_space.destroy();
Interrupts::pop();
}
for (int i = 0; i < TASK_MAX_FDS; i++) { exiting_task->files[i].close(); }
delete exiting_task;
}
void sched_common_exit(Context* context, int64_t status)
{
if (sched_current_task->id == 1) sched_current_task->state = sched_current_task->Exited;
else
sched_current_task->state = sched_current_task->Dying;
sched_current_task->exit_status = status;
if (sched_current_task->id != 1)
{
sched_for_each_child(sched_current_task, [](Task* child) {
if (child->state != child->Exited) child->ppid = 1;
return true;
});
}
else
{
#ifndef RUN_TEST_AS_INIT
reboot();
#else
hang();
#endif
}
Scheduler::task_yield(context);
}
void Scheduler::task_exit(Context* context, int64_t status)
{
ensure(Interrupts::is_in_handler());
kdbgln("exit: task %ld finished running, used %ld ms of cpu time", sched_current_task->id,
sched_current_task->cpu_time);
sched_common_exit(context, status);
}
void Scheduler::task_misbehave(Context* context, int64_t status)
{
ensure(Interrupts::is_in_handler());
kdbgln("exit: task %ld misbehaved, used %ld ms of cpu time", sched_current_task->id, sched_current_task->cpu_time);
sched_common_exit(context, status);
}
void Scheduler::reap_tasks()
{
Interrupts::disable();
ensure(!Interrupts::is_in_handler());
Task* reap_base = nullptr;
Task* reap_end = nullptr;
Task* task = base_task;
Task* task_reaping;
uint64_t iter_index = 0;
do {
if (task->state == task->Exited)
{
if (task == base_task && task == end_task) { panic("Last task exited"); }
else if (task == base_task) { base_task = task->next_task; }
else if (task == end_task) { end_task = task->prev_task; }
if (!reap_base)
{
reap_base = task;
reap_end = task;
task->prev_task->next_task = task->next_task;
task->next_task->prev_task = task->prev_task;
task->prev_task = nullptr;
task_reaping = task;
task = task->next_task;
task_reaping->next_task = nullptr;
}
else
{
reap_end->next_task = task;
task->prev_task->next_task = task->next_task;
task->next_task->prev_task = task->prev_task;
task->prev_task = nullptr;
reap_end = task;
task_reaping = task;
task = task->next_task;
task_reaping->next_task = nullptr;
}
}
else { task = task->next_task; }
iter_index++;
} while (iter_index < task_num);
task = reap_base;
while (task)
{
Task* reaped_task = task;
task = task->next_task;
reap_task(reaped_task);
}
Interrupts::enable();
}
static void sched_decrement_sleep_times()
{
sched_for_each_task([](Task* task) {
if (task->task_sleep > 0)
{
task->task_sleep -= frequency;
if (task->task_sleep < 0) task->task_sleep = 0;
}
if (task->task_sleep == 0 && task->state == task->Sleeping) task->state = task->Running;
return true;
});
}
void Scheduler::task_tick(Context* context)
{
ensure(Interrupts::is_in_handler());
Interrupts::disable();
sched_decrement_sleep_times();
sched_current_task->task_time -= frequency;
sched_current_task->cpu_time += frequency;
if (sched_current_task->id == 0) return task_yield(context);
if (sched_current_task->task_time <= 0)
{
sched_current_task->task_time = 0;
task_yield(context);
}
Interrupts::enable();
}
void Scheduler::task_yield(Context* context)
{
ensure(Interrupts::is_in_handler());
Interrupts::disable();
sched_current_task->save_context(context);
bool was_idle = false;
if (sched_current_task->state == sched_current_task->Idle)
{
sched_current_task = end_task;
was_idle = true;
}
Task* original_task = sched_current_task;
do {
sched_current_task = sched_current_task->next_task;
if (sched_current_task->state == sched_current_task->Blocking)
{
if (!sched_current_task->is_still_blocking()) sched_current_task->resume();
}
if (sched_current_task->state == sched_current_task->Running)
{
if (sched_current_task->id != original_task->id || was_idle)
{
if (!was_idle && original_task->is_user_task() && !original_task->has_died())
{
original_task->save_floating();
}
if (sched_current_task->is_user_task())
{
sched_current_task->switch_to_address_space();
sched_current_task->restore_floating();
}
else if (!was_idle && original_task->is_user_task() && !sched_current_task->is_user_task())
{
VMM::switch_back_to_kernel_address_space();
VMM::apply_address_space();
}
}
sched_current_task->task_time = 20;
sched_current_task->restore_context(context);
return;
}
} while (sched_current_task != original_task);
if (!was_idle && original_task->is_user_task() && original_task->state != original_task->Exited)
{
original_task->save_floating();
}
sched_current_task = &idle_task;
sched_current_task->task_time = frequency;
if (!was_idle) { sched_current_task->restore_context(context); }
return;
}
void Scheduler::yield()
{
asm volatile("int $0x42" : : "a"(1));
}
void Scheduler::exit(int status)
{
asm volatile("int $0x42" : : "a"(0), "D"(status));
}
void Scheduler::sleep(unsigned long ms)
{
asm volatile("int $0x42" : : "D"(ms), "a"(2));
}
Task* Scheduler::current_task()
{
return sched_current_task;
}
#define WNOHANG 1
void sys_waitpid(Context* context, long pid, int* wstatus,
int options) // FIXME: only allow waiting for child processes when specifying a PID.
{
Task* child = nullptr;
if (pid == -1)
{
sched_for_each_child(sched_current_task, [&](Task* task) {
if (task->state == task->Dying)
{
child = task;
return false;
}
return true;
});
if (!child)
{
if (options & WNOHANG)
{
context->rax = 0; // No child has exited, let's return 0.
return;
}
int* kwstatus;
if (wstatus)
{
kwstatus = obtain_user_ref(wstatus);
if (!kwstatus)
{
context->rax = -EFAULT;
return;
}
}
kdbgln("blocking wait on any child");
sched_current_task->state = sched_current_task->Blocking;
sched_current_task->block_reason = BlockReason::Waiting;
sched_current_task->blocking_wait_info.pid = -1;
if (wstatus) sched_current_task->blocking_wait_info.wstatus = kwstatus;
else
sched_current_task->blocking_wait_info.wstatus = nullptr;
return Scheduler::task_yield(context);
}
}
else
{
child = Scheduler::find_by_pid(pid);
if (!child)
{
context->rax = -ECHILD;
return;
}
}
if (child->ppid != sched_current_task->id)
{
// We are trying to call waitpid() on a task that isn't a child of ours. This is not allowed.
context->rax = -ECHILD;
return;
}
if (child->state != child->Dying)
{
if (options & WNOHANG)
{
context->rax = 0; // No child has exited, let's return 0.
return;
}
int* kwstatus;
if (wstatus)
{
kwstatus = obtain_user_ref(wstatus);
if (!kwstatus)
{
context->rax = -EFAULT;
return;
}
}
sched_current_task->state = sched_current_task->Blocking;
sched_current_task->block_reason = BlockReason::Waiting;
sched_current_task->blocking_wait_info.pid = pid;
if (wstatus) sched_current_task->blocking_wait_info.wstatus = kwstatus;
else
sched_current_task->blocking_wait_info.wstatus = nullptr;
return Scheduler::task_yield(context);
}
if (wstatus)
{
int* kwstatus = obtain_user_ref(wstatus);
if (kwstatus)
{
*kwstatus = (int)(child->exit_status & 0xff);
release_user_ref(kwstatus);
}
else
{
kinfoln("wstatus ptr is invalid: %p", (void*)wstatus);
child->state = child->Exited;
context->rax = -EFAULT;
return;
}
}
child->state = child->Exited;
context->rax = (long)child->id;
}
bool Task::is_wait_still_blocking()
{
Task* child = nullptr;
if (blocking_wait_info.pid == -1)
{
sched_for_each_child(sched_current_task, [&](Task* task) {
if (task->state == task->Dying)
{
child = task;
return false;
}
return true;
});
if (!child) return true;
else
{
blocking_wait_info.pid = child->id; // We're committed to this child now.
return false;
}
}
else
{
child = Scheduler::find_by_pid(blocking_wait_info.pid);
ensure(child); // since sys_waitpid should have validated this child, and the only way for it to disappear from
// the process list is for someone to wait for it, this should be pretty safe.
if (child->state != child->Dying) return true;
else
return false;
}
}
void Task::resume_wait()
{
ensure(blocking_wait_info.pid != -1); // is_wait_still_blocking should have chosen a child for us if the user
// process told us to wait for any child.
Task* child = Scheduler::find_by_pid(blocking_wait_info.pid);
ensure(child); // This should also already have been validated.
if (blocking_wait_info.wstatus)
{
*blocking_wait_info.wstatus = (int)(child->exit_status & 0xff);
release_user_ref(blocking_wait_info.wstatus);
}
child->state = child->Exited;
regs.rax = (long)child->id;
}
struct pstat
{
long pt_pid;
long pt_ppid;
char pt_name[128];
int pt_state;
long pt_time;
uid_t pt_uid;
gid_t pt_gid;
};
void sys_pstat(Context* context, long pid, struct pstat* buf)
{
Task* task;
if (pid == -1) task = Scheduler::find_by_pid(free_pid - 1);
else if (pid == 0)
task = &idle_task;
else
task = Scheduler::find_by_pid(pid);
if (!task)
{
context->rax = -ESRCH;
return;
}
if (task->state == task->Exited) // we're just waiting for the reaper to reap it
{
context->rax = -ESRCH;
return;
}
if (buf)
{
struct pstat* kpstat = obtain_user_ref(buf);
if (!kpstat)
{
context->rax = -EFAULT;
return;
}
kpstat->pt_pid = task->id;
kpstat->pt_ppid = task->ppid;
kpstat->pt_state = (int)task->state;
kpstat->pt_time = (long)task->cpu_time;
kpstat->pt_uid = task->uid;
kpstat->pt_gid = task->gid;
strlcpy(kpstat->pt_name, task->name, sizeof(kpstat->pt_name));
release_user_ref(kpstat);
}
context->rax = task->id;
return;
}