Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add FPU support in scheduler #20

Merged
merged 1 commit into from
Nov 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 7 additions & 9 deletions include/scheduler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,22 +13,21 @@ namespace edge {
extern "C" {
void PendSV_Handler();
void SVC_Handler();
void restore();
}

struct saved_registers {
struct stack_registers {
unsigned R0{};
unsigned R1{};
unsigned R2{};
unsigned R3{};
unsigned R12{};
unsigned LR{};
unsigned RETURN_ADDR{};
unsigned FLAG{};
unsigned CTRL{0x01000000};
unsigned FP_REGS[16]{};
unsigned FPSCR{};

saved_registers(unsigned return_addr, unsigned flag = 0x01000000) :
RETURN_ADDR(return_addr), FLAG(flag)
{}
stack_registers(unsigned return_addr) : RETURN_ADDR(return_addr) {}
};

class Scheduler {
Expand All @@ -41,7 +40,7 @@ class Scheduler {

// ===== DO NOT REARRANGE THESE =====
etl::array<unsigned, STACK_SIZE_IN_UNSIGNED> stack{};
saved_registers first_stack_frame;
stack_registers first_stack_frame;
// ==================================

public:
Expand All @@ -51,7 +50,7 @@ class Scheduler {
// This is useful if we want to adjust the ratio of driver to process runtime
uint8_t consecutive_quantums_to_run;

Task(const saved_registers& initial_stack_frame, uint8_t initial_priority) :
Task(const stack_registers& initial_stack_frame, uint8_t initial_priority) :
first_stack_frame(initial_stack_frame),
consecutive_quantums_to_run(initial_priority)
{}
Expand All @@ -74,7 +73,6 @@ class Scheduler {

friend void PendSV_Handler(void);
friend void SVC_Handler(void);
friend void restore(void);
};

extern Scheduler scheduler;
Expand Down
88 changes: 58 additions & 30 deletions src/scheduler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ void Scheduler::start_scheduler()
void Scheduler::add_task(void (*function)(void), uint8_t priority)
{
task_stack.emplace_back(
saved_registers{reinterpret_cast<unsigned>(function)}, priority
stack_registers{reinterpret_cast<unsigned>(function)}, priority
);
}

Expand Down Expand Up @@ -61,8 +61,9 @@ __attribute__((naked, used)) void PendSV_Handler()
asm volatile("CPSID I");
if (--scheduler.slices_remaining == 0) {
asm volatile("mrs r0,psp\n"
"sub r0,#32\n"
"stm r0!,{r4-r11}");
"sub r0,#96\n"
"stm r0!,{r4-r11}\n"
"vstm r0!, {s16-s31}");

// This function will dirty registers. That's okay
scheduler.task_stack[scheduler.current_task_index].stack_ptr_loc =
Expand All @@ -81,16 +82,18 @@ __attribute__((naked, used)) void PendSV_Handler()
));

asm volatile("mrs r0,psp\n"
"sub r0,#32\n"
"ldm r0!,{r4-r11}\n");
"sub r0,#96\n"
"ldm r0!,{r4-r11}\n"
"vldm r0!, {s16-s31}");
}

asm volatile("CPSIE I");

// Always want to call drivers on context switch
drivers::do_async_work();

asm volatile("ldr r0,=0xfffffffd\n"
// Return in thumb/process mode and restore using extended stack frame
asm volatile("ldr r0,=0xffffffed\n"
"bx r0");
}

Expand All @@ -101,24 +104,38 @@ __attribute__((used)) void SysTick_Handler()
}

// Runs in userspace after async callback has finished
// I don't think there's any way to make this cleaner lol
__attribute__((used, naked)) void restore_regs()
{
asm volatile("pop {r0, r1, r2, r3, r12, lr}\npop {pc}");
}

// Returns old flag
unsigned move_registers_up(saved_registers* registers)
{
auto old_flag = registers->FLAG;
registers->FLAG = registers->RETURN_ADDR;
registers->RETURN_ADDR = registers->LR;
registers->LR = registers->R12;
registers->R12 = registers->R3;
registers->R3 = registers->R2;
registers->R2 = registers->R1;
registers->R1 = registers->R0;
registers->R0 = 0;
return old_flag;
// Load fpscr first so we can avoid dirtying r0 after its popped
asm volatile("ldr r0, [sp, #96]\n"
"vmsr fpscr, r0\n");

// Pop regs as usual
asm volatile("pop {r0, r1, r2, r3, r12, lr}");

// Skip SP and RETPSR. SP will be loaded last
// RETPSR should be ignored because we already popped when returning from exception
asm volatile("add sp, #8");

// Pop caller saved FP registers
asm volatile("vpop {s0-s15}");

// *sigh*
// We need to account for whether sp is 4- or 8-byte aligned
// diagram: https://shorturl.at/85lyY
asm volatile("push {r0}\n"
"mrs r0, psp\n"
"tst r0, #0x4\n"
"pop {r0}\n"
"ite eq\n"
"ADDEQ SP, #8\n"
"ADDNE SP, #4");

// Skip FPSCR (already loaded)
asm volatile("add sp, #4");

asm volatile("ldr pc, [sp, #-84]");
}

void Scheduler::yield_current_task()
Expand All @@ -135,21 +152,32 @@ void Scheduler::yield_current_task()

// This stack frame, originally created by the exception handler, will be popped
// by restore()
t.stack_ptr_loc = reinterpret_cast<unsigned*>(__get_PSP());
auto stored_registers = reinterpret_cast<saved_registers*>(t.stack_ptr_loc);
stored_registers->RETURN_ADDR++;

unsigned old_flag = move_registers_up(stored_registers);
t.stack_ptr_loc = (unsigned*)__get_PSP();
auto stored_registers = reinterpret_cast<stack_registers*>(t.stack_ptr_loc);

// Account for stack pointer alignment
// diagram: https://shorturl.at/85lyY
bool eight_byte_aligned = ((unsigned)t.stack_ptr_loc) & 0x7;
if (eight_byte_aligned) {
t.stack_ptr_loc -= 1;
}
else {
t.stack_ptr_loc -= 2;
}

// "Push" registers, create a fake stack frame
// This will be popped by the exception handler
t.stack_ptr_loc -= 7;
auto new_registers = reinterpret_cast<saved_registers*>(t.stack_ptr_loc);
t.stack_ptr_loc -= (sizeof(stack_registers) / sizeof(unsigned));
auto new_registers = reinterpret_cast<stack_registers*>(t.stack_ptr_loc);
new_registers->R0 = static_cast<unsigned>(arg1);
new_registers->R1 = static_cast<unsigned>(arg2);

// Return to our restore_regs function so we can pop the caller-saved registers of
// the saved/previous execution path
new_registers->LR = reinterpret_cast<unsigned>(&restore_regs);
new_registers->RETURN_ADDR = reinterpret_cast<unsigned>(callback_address);
new_registers->FLAG = old_flag;

new_registers->CTRL = stored_registers->CTRL;
__set_PSP(reinterpret_cast<unsigned>(t.stack_ptr_loc));
}

Expand Down