Luna/kernel/src/thread/Scheduler.cpp

#define MODULE "sched"

#include "thread/Scheduler.h"
#include "errno.h"
#include "interrupts/Interrupts.h"
#include "kassert.h"
#include "log/Log.h"
#include "memory/MemoryManager.h"
#include "memory/PMM.h"
#include "memory/VMM.h"
#include "misc/hang.h"
#include "misc/utils.h"
#include "panic/Panic.h"
#include "std/stdlib.h"
#include "std/string.h"
#include "sys/elf/ELFLoader.h"
#include "thread/PIT.h"
#include "thread/Task.h"
#include "utils/Addresses.h"
#include "utils/Registers.h"

static uint64_t task_num = 0;

static Task idle_task;

static uint64_t free_tid = 0;

static Task* sched_current_task;
static Task* base_task;
static Task* end_task;

extern "C" void idle_task_function();

static uint64_t frequency;

void Scheduler::append_task(Task* task)
{
    if (!base_task)
    {
        ASSERT(!end_task);
        base_task = task;
        end_task = base_task;
        task->next_task = task;
        task->prev_task = task;
    }
    else
    {
        end_task->next_task = task;
        task->prev_task = end_task;
        base_task->prev_task = task;
        task->next_task = base_task;
        end_task = task;
    }
}

void Scheduler::init()
{
    memset(&idle_task, 0, sizeof(Task));
    idle_task.id = free_tid++;
    idle_task.regs.rip = (uint64_t)idle_task_function;
    idle_task.regs.rsp = get_top_of_stack((uint64_t)MemoryManager::get_page(), 1);
    idle_task.regs.cs = 0x08;
    idle_task.regs.ss = 0x10;
    idle_task.regs.rflags = (1 << 21) | (1 << 9);
    idle_task.task_sleep = 1000;
    idle_task.user_task = false;
    idle_task.state = idle_task.Idle;

    base_task = new Task;
    end_task = base_task;
    sched_current_task = base_task;
    sched_current_task->id = free_tid++;
    sched_current_task->task_time = 20; // gets 20 ms of cpu time before next switch
    sched_current_task->next_task = sched_current_task;
    sched_current_task->prev_task = sched_current_task;
    sched_current_task->state = sched_current_task->Running;
    sched_current_task->user_task = false;
    task_num++;
    // the other registers will be saved next task switch

    frequency = 1000 / PIT::frequency();
}

void Scheduler::add_kernel_task(void (*task)(void))
{
    Task* new_task = new Task;
    ASSERT(new_task);
    new_task->user_task = false;
    new_task->id = free_tid++;
    new_task->regs.rip = (uint64_t)task;
    new_task->allocated_stack =
        (uint64_t)MemoryManager::get_pages(TASK_PAGES_IN_STACK); // 16 KB is enough for everyone, right?
    new_task->regs.rsp = get_top_of_stack(new_task->allocated_stack, TASK_PAGES_IN_STACK);
    new_task->regs.cs = 0x08;
    new_task->regs.ss = 0x10;
    new_task->regs.ds = 0x10;
    new_task->regs.rflags = read_rflags() | 0x200; // enable interrupts
    new_task->task_sleep = 0;
    new_task->task_time = 0;
    new_task->cpu_time = 0;
    append_task(new_task);
    new_task->state = new_task->Running;
    task_num++;
    kinfoln("Adding kernel task: starts at %lx, tid %ld, stack at %lx, total tasks: %ld", new_task->regs.rip,
            new_task->id, new_task->regs.rsp, task_num);
}

Task* Scheduler::create_user_task()
{
    Task* new_task = new Task;
    if (!new_task) return nullptr;
    memset(&new_task->regs, 0, sizeof(Context));
    new_task->user_task = true;
    new_task->id = free_tid++;
    new_task->task_sleep = 0;
    new_task->task_time = 0;
    new_task->cpu_time = 0;
    append_task(new_task);
    task_num++;
    return new_task;
}

long Scheduler::load_user_task(const char* filename)
{
    kinfoln("Loading user task: %s", filename);
    Interrupts::push_and_disable();
    long result;
    if ((result = ELFLoader::check_elf_image_from_filesystem(filename)) < 0)
    {
        kerrorln("Failed to load %s from initrd", filename);
        Interrupts::pop();
        return result;
    }
    if ((uint64_t)result > PMM::get_free()) { return -ENOMEM; }
    Task* new_task = new Task;
    ASSERT(new_task);
    memset(&new_task->regs, 0, sizeof(Context));
    new_task->id = free_tid++;
    if (!new_task->allocator.init())
    {
        delete new_task;
        Interrupts::pop();
        return -ENOMEM;
    }
    new_task->address_space = AddressSpace::create();
    VMM::switch_to_user_address_space(new_task->address_space);
    ELFImage* image = ELFLoader::load_elf_from_filesystem(
        filename); // FIXME: TOCTOU? Right now, impossible, since interrupts are disabled and SMP is not a thing. But in
                   // the future, it might be possible.
    ASSERT(image);
    new_task->user_task = true;
    new_task->regs.rip = image->entry;
    new_task->image = image;
    new_task->allocated_stack = (uint64_t)MemoryManager::get_pages_at(
        0x100000, TASK_PAGES_IN_STACK, MAP_READ_WRITE | MAP_USER); // 16 KB is enough for everyone, right?
    if (!new_task->allocated_stack)
    {
        new_task->address_space.destroy();
        delete new_task;
        ELFLoader::release_elf_image(image);
        VMM::switch_back_to_kernel_address_space();
        Interrupts::pop();
        return -ENOMEM;
    }
    new_task->regs.rsp = get_top_of_stack(new_task->allocated_stack, TASK_PAGES_IN_STACK);
    new_task->regs.cs = 0x18 | 0x03;
    new_task->regs.ss = 0x20 | 0x03;
    new_task->regs.ds = 0x20 | 0x03;
    new_task->regs.rflags = (1 << 21) | (1 << 9); // enable interrupts
    new_task->task_sleep = 0;
    new_task->task_time = 0;
    new_task->cpu_time = 0;
    append_task(new_task);
    new_task->state = new_task->Running;
    task_num++;
    kinfoln("Adding user task: loaded at %lx, tid %ld, stack at %lx, total tasks: %ld", new_task->regs.rip,
            new_task->id, new_task->regs.rsp, task_num);
    VMM::switch_back_to_kernel_address_space();
    Interrupts::pop();
    return (long)new_task->id;
}

void Scheduler::reset_task(Task* task, ELFImage* new_image)
{
    memset(&task->regs, 0, sizeof(Context));
    task->state = task->Running;
    task->regs.rip = new_image->entry;
    task->image = new_image;
    task->regs.rsp = get_top_of_stack(task->allocated_stack, TASK_PAGES_IN_STACK);
    task->regs.cs = 0x18 | 0x03;
    task->regs.ss = 0x20 | 0x03;
    task->regs.ds = 0x20 | 0x03;
    task->regs.rflags = (1 << 21) | (1 << 9); // enable interrupts
    task->task_sleep = 0;
    task->cpu_time = 0;
    kinfoln("Resetting task: loaded at %lx, tid %ld, stack at %lx, total tasks: %ld", task->regs.rip, task->id,
            task->regs.rsp, task_num);
}

void Scheduler::reap_task(Task* task)
{
    ASSERT(!Interrupts::is_in_handler());
    task_num--;
    Task* exiting_task = task;
    ASSERT(task->id != 0); // WHY IN THE WORLD WOULD WE BE REAPING THE IDLE TASK?
    if (exiting_task->is_user_task())
    {
        VMM::switch_back_to_kernel_address_space();
        VMM::apply_address_space();
        VMM::switch_to_user_address_space(exiting_task->address_space);
    }
    kinfoln("reaping task %ld, exited with code %ld", exiting_task->id, exiting_task->exit_status);
    if (exiting_task->allocated_stack && !exiting_task->is_user_task())
        MemoryManager::release_pages((void*)exiting_task->allocated_stack, TASK_PAGES_IN_STACK);
    if (exiting_task->image) // FIXME: Also free pages the task has mmap-ed but not munmap-ed.
    {
        // ELFLoader::release_elf_image(exiting_task->image);
        kfree(exiting_task->image);
    }
    if (exiting_task->is_user_task())
    {
        exiting_task->allocator.free();
        VMM::switch_back_to_kernel_address_space();
        VMM::apply_address_space();
        Interrupts::push_and_enable();
        exiting_task->address_space.destroy();
        Interrupts::pop();
    }
    for (int i = 0; i < TASK_MAX_FDS; i++) { exiting_task->files[i].close(); }
    delete exiting_task;
}

void Scheduler::task_exit(Context* context, int64_t status)
{
    ASSERT(Interrupts::is_in_handler());
    kdbgln("exit: task %ld finished running, used %ld ms of cpu time", sched_current_task->id,
           sched_current_task->cpu_time);
    sched_current_task->state = sched_current_task->Exited;
    sched_current_task->exit_status = status;
    task_yield(context);
}

void Scheduler::task_misbehave(Context* context, int64_t status)
{
    ASSERT(Interrupts::is_in_handler());
    kdbgln("exit: task %ld misbehaved, used %ld ms of cpu time", sched_current_task->id, sched_current_task->cpu_time);
    sched_current_task->state = sched_current_task->Exited;
    sched_current_task->exit_status = status;
    task_yield(context);
}

void Scheduler::reap_tasks()
{
    Interrupts::disable();
    ASSERT(!Interrupts::is_in_handler());
    Task* reap_base = nullptr;
    Task* reap_end = nullptr;
    Task* task = base_task;
    Task* task_reaping;
    uint64_t iter_index = 0;
    do {
        if (task->state == task->Exited)
        {
            if (task == base_task && task == end_task) { panic("Last task exited"); }
            else if (task == base_task) { base_task = task->next_task; }
            else if (task == end_task) { end_task = task->prev_task; }
            if (!reap_base)
            {
                reap_base = task;
                reap_end = task;
                task->prev_task->next_task = task->next_task;
                task->next_task->prev_task = task->prev_task;
                task->prev_task = nullptr;
                task_reaping = task;
                task = task->next_task;
                task_reaping->next_task = nullptr;
            }
            else
            {
                reap_end->next_task = task;
                task->prev_task->next_task = task->next_task;
                task->next_task->prev_task = task->prev_task;
                task->prev_task = nullptr;
                reap_end = task;
                task_reaping = task;
                task = task->next_task;
                task_reaping->next_task = nullptr;
            }
        }
        else { task = task->next_task; }
        iter_index++;
    } while (iter_index < task_num);
    task = reap_base;
    while (task)
    {
        Task* reaped_task = task;
        task = task->next_task;
        reap_task(reaped_task);
    }
    Interrupts::enable();
}

static void sched_decrement_sleep_times()
{
    Task* task = base_task;
    if (!task) return;
    do {
        if (task->task_sleep > 0)
        {
            task->task_sleep -= frequency;
            if (task->task_sleep < 0) task->task_sleep = 0;
        }
        if (task->task_sleep == 0 && task->state == task->Sleeping) task->state = task->Running;
        task = task->next_task;
    } while (task != base_task);
}

void Scheduler::task_tick(Context* context)
{
    ASSERT(Interrupts::is_in_handler());
    Interrupts::disable();
    sched_decrement_sleep_times();
    if (sched_current_task->id == 0) return task_yield(context);
    sched_current_task->task_time -= frequency;
    sched_current_task->cpu_time += frequency;
    if (sched_current_task->task_time < 0)
    {
        sched_current_task->task_time = 0;
        task_yield(context);
    }
    Interrupts::enable();
}

void Scheduler::task_yield(Context* context)
{
    ASSERT(Interrupts::is_in_handler());
    Interrupts::disable();
    sched_current_task->save_context(context);
    bool was_idle = false;
    if (sched_current_task->state == sched_current_task->Idle)
    {
        sched_current_task = end_task;
        was_idle = true;
    }
    Task* original_task = sched_current_task;
    do {
        sched_current_task = sched_current_task->next_task;
        if (sched_current_task->state == sched_current_task->Running)
        {
            if (sched_current_task->id != original_task->id || was_idle)
            {
                if (!was_idle && original_task->is_user_task() && !original_task->has_died())
                {
                    original_task->save_floating();
                }
                if (sched_current_task->is_user_task())
                {
                    sched_current_task->switch_to_address_space();
                    sched_current_task->restore_floating();
                }
                else if (!was_idle && original_task->is_user_task() && !sched_current_task->is_user_task())
                {
                    VMM::switch_back_to_kernel_address_space();
                    VMM::apply_address_space();
                }
            }
            sched_current_task->task_time = 20;
            sched_current_task->restore_context(context);
            return;
        }
    } while (sched_current_task != original_task);
    if (!was_idle && original_task->is_user_task() && original_task->state != original_task->Exited)
    {
        original_task->save_floating();
    }
    sched_current_task = &idle_task;
    sched_current_task->task_time = frequency;
    if (!was_idle) { sched_current_task->restore_context(context); }
    return;
}

void Scheduler::yield()
{
    asm volatile("int $0x42" : : "a"(1));
}

void Scheduler::exit(int status)
{
    asm volatile("int $0x42" : : "a"(0), "D"(status));
}

void Scheduler::sleep(unsigned long ms)
{
    asm volatile("int $0x42" : : "D"(ms), "a"(2));
}

Task* Scheduler::current_task()
{
    return sched_current_task;
}