#define MODULE "sched" #include "thread/Scheduler.h" #include "interrupts/Interrupts.h" #include "log/Log.h" #include "memory/MemoryManager.h" #include "memory/PMM.h" #include "memory/VMM.h" #include "misc/hang.h" #include "misc/reboot.h" #include "misc/utils.h" #include "panic/Panic.h" #include "std/assert.h" #include "std/errno.h" #include "std/stdlib.h" #include "std/string.h" #include "sys/UserMemory.h" #include "sys/elf/ELFLoader.h" #include "thread/PIT.h" #include "thread/Task.h" #include "utils/Addresses.h" #include "utils/Registers.h" static uint64_t task_num = 0; static Task idle_task; static uint64_t free_pid = 0; static Task* sched_current_task; static Task* base_task; static Task* end_task; extern "C" void idle_task_function(); static uint64_t frequency; template void sched_for_each_task(Callback callback) { Task* task = base_task; if (!task) return; do { bool will_continue = callback(task); if (!will_continue) break; task = task->next_task; } while (task != base_task); } template void sched_for_each_child(Task* task, Callback callback) { sched_for_each_task([&](Task* child) { if (child->ppid == task->id) { return callback(child); } return true; }); } Task* Scheduler::find_by_pid(uint64_t pid) { Task* result = nullptr; sched_for_each_task([&](Task* task) { if (task->id == pid) { result = task; return false; } return true; }); return result; } void Scheduler::append_task(Task* task) { if (!base_task) { ensure(!end_task); base_task = task; end_task = base_task; task->next_task = task; task->prev_task = task; } else { end_task->next_task = task; task->prev_task = end_task; base_task->prev_task = task; task->next_task = base_task; end_task = task; } } void Scheduler::init() { memset(&idle_task, 0, sizeof(Task)); idle_task.id = free_pid++; idle_task.regs.rip = (uint64_t)idle_task_function; idle_task.regs.rsp = get_top_of_stack((uint64_t)MemoryManager::get_page(), 1); idle_task.regs.cs = 0x08; idle_task.regs.ss = 0x10; idle_task.regs.rflags = (1 << 21) | (1 << 9); idle_task.task_sleep = 1000; idle_task.user_task = false; idle_task.block_reason = BlockReason::None; idle_task.state = idle_task.Idle; strlcpy(idle_task.name, "[cpu-idle]", sizeof(idle_task.name)); sched_current_task = &idle_task; frequency = 1000 / PIT::frequency(); } void Scheduler::add_kernel_task(const char* taskname, void (*task)(void)) { Task* new_task = new Task; ensure(new_task); new_task->user_task = false; new_task->id = free_pid++; new_task->ppid = 0; new_task->uid = new_task->euid = new_task->gid = new_task->egid = 0; new_task->regs.rip = (uint64_t)task; new_task->allocated_stack = (uint64_t)MemoryManager::get_pages(TASK_PAGES_IN_STACK); // 16 KB is enough for everyone, right? new_task->regs.rsp = get_top_of_stack(new_task->allocated_stack, TASK_PAGES_IN_STACK); new_task->regs.cs = 0x08; new_task->regs.ss = 0x10; new_task->regs.ds = 0x10; new_task->regs.rflags = read_rflags() | 0x200; // enable interrupts new_task->task_sleep = 0; new_task->task_time = 0; new_task->cpu_time = 0; strlcpy(new_task->name, taskname, sizeof(new_task->name)); append_task(new_task); new_task->block_reason = BlockReason::None; new_task->state = new_task->Running; task_num++; kinfoln("Adding kernel task: %s, starts at %lx, PID %ld, stack at %lx, total tasks: %ld", new_task->name, new_task->regs.rip, new_task->id, new_task->regs.rsp, task_num); } Task* Scheduler::create_user_task() { Task* new_task = new Task; if (!new_task) return nullptr; memset(&new_task->regs, 0, sizeof(Context)); new_task->user_task = true; new_task->id = free_pid++; new_task->ppid = 0; new_task->task_sleep = 0; new_task->task_time = 0; new_task->cpu_time = 0; new_task->block_reason = BlockReason::None; append_task(new_task); task_num++; return new_task; } long Scheduler::load_user_task(const char* filename) { kinfoln("Loading user task: %s", filename); Interrupts::push_and_disable(); Task* new_task = new Task; ensure(new_task); memset(&new_task->regs, 0, sizeof(Context)); new_task->id = free_pid++; new_task->ppid = 0; new_task->uid = new_task->euid = new_task->gid = new_task->egid = 0; if (!new_task->allocator.init()) { delete new_task; free_pid--; Interrupts::pop(); return -ENOMEM; } new_task->address_space = AddressSpace::create(); VMM::switch_to_user_address_space(new_task->address_space); long result; if ((result = ELFLoader::check_elf_image_from_filesystem(filename)) < 0) { delete new_task; free_pid--; kerrorln("Failed to load %s from initrd", filename); Interrupts::pop(); return result; } if ((uint64_t)result > PMM::get_free()) { delete new_task; free_pid--; kerrorln("Not enough memory for task %s", filename); Interrupts::pop(); return -ENOMEM; } ELFImage* image = ELFLoader::load_elf_from_filesystem(filename); ensure(image); new_task->user_task = true; new_task->regs.rip = image->entry; new_task->image = image; new_task->allocated_stack = (uint64_t)MemoryManager::get_pages_at( 0x100000, TASK_PAGES_IN_STACK, MAP_READ_WRITE | MAP_USER | MAP_AS_OWNED_BY_TASK); // 16 KB is enough for everyone, right? if (!new_task->allocated_stack) { new_task->address_space.destroy(); delete new_task; free_pid--; ELFLoader::release_elf_image(image); VMM::switch_back_to_kernel_address_space(); Interrupts::pop(); return -ENOMEM; } new_task->regs.rsp = get_top_of_stack(new_task->allocated_stack, TASK_PAGES_IN_STACK); new_task->regs.cs = 0x18 | 0x03; new_task->regs.ss = 0x20 | 0x03; new_task->regs.ds = 0x20 | 0x03; new_task->regs.rflags = (1 << 21) | (1 << 9); // enable interrupts new_task->task_sleep = 0; new_task->task_time = 0; new_task->cpu_time = 0; strlcpy(new_task->name, filename, sizeof(new_task->name)); append_task(new_task); new_task->block_reason = BlockReason::None; new_task->state = new_task->Running; task_num++; kinfoln("Adding user task: %s, loaded at %lx, PID %ld, stack at %lx, total tasks: %ld", new_task->name, new_task->regs.rip, new_task->id, new_task->regs.rsp, task_num); VMM::switch_back_to_kernel_address_space(); Interrupts::pop(); return (long)new_task->id; } void Scheduler::reset_task(Task* task, ELFImage* new_image) { memset(&task->regs, 0, sizeof(Context)); task->state = task->Running; task->regs.rip = new_image->entry; task->image = new_image; task->regs.rsp = get_top_of_stack(task->allocated_stack, TASK_PAGES_IN_STACK); task->regs.cs = 0x18 | 0x03; task->regs.ss = 0x20 | 0x03; task->regs.ds = 0x20 | 0x03; task->regs.rflags = (1 << 21) | (1 << 9); // enable interrupts task->task_sleep = 0; task->cpu_time = 0; task->block_reason = BlockReason::None; kinfoln("Resetting task: %s, loaded at %lx, PID %ld, stack at %lx, total tasks: %ld", task->name, task->regs.rip, task->id, task->regs.rsp, task_num); } void Scheduler::reap_task(Task* task) { ensure(!Interrupts::is_in_handler()); task_num--; Task* exiting_task = task; ensure(task->id != 0); // WHY IN THE WORLD WOULD WE BE REAPING THE IDLE TASK? if (exiting_task->is_user_task()) { VMM::switch_back_to_kernel_address_space(); VMM::apply_address_space(); VMM::switch_to_user_address_space(exiting_task->address_space); } kinfoln("reaping task %s, PID %ld, exited with code %ld", exiting_task->name, exiting_task->id, exiting_task->exit_status); if (exiting_task->id == (free_pid - 1)) free_pid--; // If we are the last spawned thread, free our PID. if (exiting_task->allocated_stack && !exiting_task->is_user_task()) MemoryManager::release_pages((void*)exiting_task->allocated_stack, TASK_PAGES_IN_STACK); if (exiting_task->image) kfree(exiting_task->image); if (exiting_task->is_user_task()) { exiting_task->allocator.free(); VMM::switch_back_to_kernel_address_space(); VMM::apply_address_space(); Interrupts::push_and_enable(); exiting_task->address_space.destroy(); Interrupts::pop(); } for (int i = 0; i < TASK_MAX_FDS; i++) { exiting_task->files[i].close(); } delete exiting_task; } void sched_common_exit(Context* context, int64_t status) { if (sched_current_task->id == 1) sched_current_task->state = sched_current_task->Exited; else sched_current_task->state = sched_current_task->Dying; sched_current_task->exit_status = status; if (sched_current_task->id != 1) { sched_for_each_child(sched_current_task, [](Task* child) { if (child->state != child->Exited) child->ppid = 1; return true; }); } else { #ifndef RUN_TEST_AS_INIT reboot(); #else hang(); #endif } Scheduler::task_yield(context); } void Scheduler::task_exit(Context* context, int64_t status) { ensure(Interrupts::is_in_handler()); kdbgln("exit: task %ld finished running, used %ld ms of cpu time", sched_current_task->id, sched_current_task->cpu_time); sched_common_exit(context, status); } void Scheduler::task_misbehave(Context* context, int64_t status) { ensure(Interrupts::is_in_handler()); kdbgln("exit: task %ld misbehaved, used %ld ms of cpu time", sched_current_task->id, sched_current_task->cpu_time); sched_common_exit(context, status); } void Scheduler::reap_tasks() { Interrupts::disable(); ensure(!Interrupts::is_in_handler()); Task* reap_base = nullptr; Task* reap_end = nullptr; Task* task = base_task; Task* task_reaping; uint64_t iter_index = 0; do { if (task->state == task->Exited) { if (task == base_task && task == end_task) { panic("Last task exited"); } else if (task == base_task) { base_task = task->next_task; } else if (task == end_task) { end_task = task->prev_task; } if (!reap_base) { reap_base = task; reap_end = task; task->prev_task->next_task = task->next_task; task->next_task->prev_task = task->prev_task; task->prev_task = nullptr; task_reaping = task; task = task->next_task; task_reaping->next_task = nullptr; } else { reap_end->next_task = task; task->prev_task->next_task = task->next_task; task->next_task->prev_task = task->prev_task; task->prev_task = nullptr; reap_end = task; task_reaping = task; task = task->next_task; task_reaping->next_task = nullptr; } } else { task = task->next_task; } iter_index++; } while (iter_index < task_num); task = reap_base; while (task) { Task* reaped_task = task; task = task->next_task; reap_task(reaped_task); } Interrupts::enable(); } static void sched_decrement_sleep_times() { sched_for_each_task([](Task* task) { if (task->task_sleep > 0) { task->task_sleep -= frequency; if (task->task_sleep < 0) task->task_sleep = 0; } if (task->task_sleep == 0 && task->state == task->Sleeping) task->state = task->Running; return true; }); } void Scheduler::task_tick(Context* context) { ensure(Interrupts::is_in_handler()); Interrupts::disable(); sched_decrement_sleep_times(); sched_current_task->task_time -= frequency; sched_current_task->cpu_time += frequency; if (sched_current_task->id == 0) return task_yield(context); if (sched_current_task->task_time <= 0) { sched_current_task->task_time = 0; task_yield(context); } Interrupts::enable(); } void Scheduler::task_yield(Context* context) { ensure(Interrupts::is_in_handler()); Interrupts::disable(); sched_current_task->save_context(context); bool was_idle = false; if (sched_current_task->state == sched_current_task->Idle) { sched_current_task = end_task; was_idle = true; } Task* original_task = sched_current_task; do { sched_current_task = sched_current_task->next_task; if (sched_current_task->state == sched_current_task->Blocking) { if (!sched_current_task->is_still_blocking()) sched_current_task->resume(); } if (sched_current_task->state == sched_current_task->Running) { if (sched_current_task->id != original_task->id || was_idle) { if (!was_idle && original_task->is_user_task() && !original_task->has_died()) { original_task->save_floating(); } if (sched_current_task->is_user_task()) { sched_current_task->switch_to_address_space(); sched_current_task->restore_floating(); } else if (!was_idle && original_task->is_user_task() && !sched_current_task->is_user_task()) { VMM::switch_back_to_kernel_address_space(); VMM::apply_address_space(); } } sched_current_task->task_time = 20; sched_current_task->restore_context(context); return; } } while (sched_current_task != original_task); if (!was_idle && original_task->is_user_task() && original_task->state != original_task->Exited) { original_task->save_floating(); } sched_current_task = &idle_task; sched_current_task->task_time = frequency; if (!was_idle) { sched_current_task->restore_context(context); } return; } void Scheduler::yield() { asm volatile("int $0x42" : : "a"(1)); } void Scheduler::exit(int status) { asm volatile("int $0x42" : : "a"(0), "D"(status)); } void Scheduler::sleep(unsigned long ms) { asm volatile("int $0x42" : : "D"(ms), "a"(2)); } Task* Scheduler::current_task() { return sched_current_task; } #define WNOHANG 1 void sys_waitpid(Context* context, long pid, int* wstatus, int options) // FIXME: only allow waiting for child processes when specifying a PID. { Task* child = nullptr; if (pid == -1) { sched_for_each_child(sched_current_task, [&](Task* task) { if (task->state == task->Dying) { child = task; return false; } return true; }); if (!child) { if (options & WNOHANG) { context->rax = 0; // No child has exited, let's return 0. return; } int* kwstatus; if (wstatus) { kwstatus = obtain_user_ref(wstatus); if (!kwstatus) { context->rax = -EFAULT; return; } } kdbgln("blocking wait on any child"); sched_current_task->state = sched_current_task->Blocking; sched_current_task->block_reason = BlockReason::Waiting; sched_current_task->blocking_wait_info.pid = -1; if (wstatus) sched_current_task->blocking_wait_info.wstatus = kwstatus; else sched_current_task->blocking_wait_info.wstatus = nullptr; return Scheduler::task_yield(context); } } else { child = Scheduler::find_by_pid(pid); if (!child) { context->rax = -ECHILD; return; } } if (child->ppid != sched_current_task->id) { // We are trying to call waitpid() on a task that isn't a child of ours. This is not allowed. context->rax = -ECHILD; return; } if (child->state != child->Dying) { if (options & WNOHANG) { context->rax = 0; // No child has exited, let's return 0. return; } int* kwstatus; if (wstatus) { kwstatus = obtain_user_ref(wstatus); if (!kwstatus) { context->rax = -EFAULT; return; } } sched_current_task->state = sched_current_task->Blocking; sched_current_task->block_reason = BlockReason::Waiting; sched_current_task->blocking_wait_info.pid = pid; if (wstatus) sched_current_task->blocking_wait_info.wstatus = kwstatus; else sched_current_task->blocking_wait_info.wstatus = nullptr; return Scheduler::task_yield(context); } if (wstatus) { int* kwstatus = obtain_user_ref(wstatus); if (kwstatus) { *kwstatus = (int)(child->exit_status & 0xff); release_user_ref(kwstatus); } else { kinfoln("wstatus ptr is invalid: %p", (void*)wstatus); child->state = child->Exited; context->rax = -EFAULT; return; } } child->state = child->Exited; context->rax = (long)child->id; } bool Task::is_wait_still_blocking() { Task* child = nullptr; if (blocking_wait_info.pid == -1) { sched_for_each_child(sched_current_task, [&](Task* task) { if (task->state == task->Dying) { child = task; return false; } return true; }); if (!child) return true; else { blocking_wait_info.pid = child->id; // We're committed to this child now. return false; } } else { child = Scheduler::find_by_pid(blocking_wait_info.pid); ensure(child); // since sys_waitpid should have validated this child, and the only way for it to disappear from // the process list is for someone to wait for it, this should be pretty safe. if (child->state != child->Dying) return true; else return false; } } void Task::resume_wait() { ensure(blocking_wait_info.pid != -1); // is_wait_still_blocking should have chosen a child for us if the user // process told us to wait for any child. Task* child = Scheduler::find_by_pid(blocking_wait_info.pid); ensure(child); // This should also already have been validated. if (blocking_wait_info.wstatus) { *blocking_wait_info.wstatus = (int)(child->exit_status & 0xff); release_user_ref(blocking_wait_info.wstatus); } child->state = child->Exited; regs.rax = (long)child->id; } struct pstat { long pt_pid; long pt_ppid; char pt_name[128]; int pt_state; long pt_time; uid_t pt_uid; gid_t pt_gid; }; void sys_pstat(Context* context, long pid, struct pstat* buf) { Task* task; if (pid == -1) task = Scheduler::find_by_pid(free_pid - 1); else if (pid == 0) task = &idle_task; else task = Scheduler::find_by_pid(pid); if (!task) { context->rax = -ESRCH; return; } if (task->state == task->Exited) // we're just waiting for the reaper to reap it { context->rax = -ESRCH; return; } if (buf) { struct pstat* kpstat = obtain_user_ref(buf); if (!kpstat) { context->rax = -EFAULT; return; } kpstat->pt_pid = task->id; kpstat->pt_ppid = task->ppid; kpstat->pt_state = (int)task->state; kpstat->pt_time = (long)task->cpu_time; kpstat->pt_uid = task->uid; kpstat->pt_gid = task->gid; strlcpy(kpstat->pt_name, task->name, sizeof(kpstat->pt_name)); release_user_ref(kpstat); } context->rax = task->id; return; }