To protect your data, the CISO officer has suggested users to enable GitLab 2FA as soon as possible.

Commit da783a85 authored by Isaac Oscar Gariano's avatar Isaac Oscar Gariano
Browse files

Reimplemented new thread and stack things for aarch64

parent 7d018fc8
......@@ -96,7 +96,8 @@ impl Instruction {
match self.v {
ExprCall { .. } | ExprCCall { .. } | Load { .. } | Store { .. } | CmpXchg { .. } | AtomicRMW { .. } |
New(_) | AllocA(_) | NewHybrid(_, _) | AllocAHybrid(_, _) | NewStack(_) | NewThread(_, _) |
NewThreadExn(_, _) | NewFrameCursor(_) | Fence(_) | Return(_) | ThreadExit | Throw(_) |
NewThreadExn(_, _) | NewFrameCursor(_) | Fence(_) | Return(_) | ThreadExit | KillStack(_) |
Throw(_) |
TailCall(_) | Branch1(_) | Branch2 { .. } | Watchpoint { .. } | WPBranch { .. } |
Call { .. } | CCall { .. }| SwapStackExpr{..}| SwapStackExc { .. } | SwapStackKill { .. } | Switch { .. } | ExnInstruction { .. } |
CommonInst_GetThreadLocal | CommonInst_SetThreadLocal(_) | CommonInst_Pin(_) | CommonInst_Unpin(_) |
......@@ -246,6 +247,12 @@ pub enum Instruction_ {
/// args: functionref of the entry function
NewStack(OpIndex),
/// Kills the given Mu stack
/// args: stackref to kill
KillStack(OpIndex),
CurrentStack,
/// create a new Mu thread, yields thread reference
/// args: stackref of a Mu stack, a list of arguments
NewThread(OpIndex, Vec<OpIndex>), // stack, args
......@@ -511,7 +518,7 @@ impl Instruction_ {
&Instruction_::AllocA(ref ty) => format!("ALLOCA {}", ty),
&Instruction_::NewHybrid(ref ty, len) => format!("NEWHYBRID {} {}", ty, ops[len]),
&Instruction_::AllocAHybrid(ref ty, len) => format!("ALLOCAHYBRID {} {}", ty, ops[len]),
&Instruction_::NewStack(func) => format!("NEWSTACK {}", ops[func]),
&Instruction_::NewStack(func) => format!("NEW_STACK {}", ops[func]),
&Instruction_::NewThread(stack, ref args) => {
format!(
"NEWTHREAD {} PASS_VALUES {}",
......@@ -557,6 +564,8 @@ impl Instruction_ {
&Instruction_::Return(ref vals) => format!("RET {}", op_vector_str(vals, ops)),
&Instruction_::ThreadExit => "THREADEXIT".to_string(),
&Instruction_::CurrentStack => "CURRENT_STACK".to_string(),
&Instruction_::KillStack(s) => format!("RET {}", ops[s]),
&Instruction_::Throw(exn_obj) => format!("THROW {}", ops[exn_obj]),
&Instruction_::TailCall(ref call) => format!("TAILCALL {}", call.debug_str(ops)),
&Instruction_::Branch1(ref dest) => format!("BRANCH {}", dest.debug_str(ops)),
......
......@@ -1415,11 +1415,17 @@ impl<'a> InstructionSelection {
// get thread local and add offset to get sp_loc
let tl = self.emit_get_threadlocal(f_context, vm);
self.backend
.emit_add_imm(&tl, &tl, *thread::NATIVE_SP_LOC_OFFSET as u16, false);
emit_load_base_offset(
self.backend.as_mut(),
&tl,
&tl,
*thread::NATIVE_SP_LOC_OFFSET as i64,
f_context,
vm
);
self.emit_runtime_entry(
&entrypoints::SWAP_BACK_TO_NATIVE_STACK,
&entrypoints::MUENTRY_THREAD_EXIT,
vec![tl.clone()],
None,
Some(node),
......@@ -1428,6 +1434,31 @@ impl<'a> InstructionSelection {
);
}
Instruction_::NewStack(func) => {
trace!("instsel on NEWSTACK");
let ref ops = inst.ops;
let ref func = ops[func];
let tmp_res = self.get_result_value(node, 0);
let tmp_func = self.emit_ireg(func, f_content, f_context, vm);
let sig = match tmp_func.ty.v {
MuType_::FuncRef(ref sig) => sig.clone(),
_ => panic!("expected funcref")
};
let (_, _, stack_arg_size) = compute_argument_locations(&sig.arg_tys, &SP, 0, &vm);
self.emit_runtime_entry(
&entrypoints::MUENTRY_NEW_STACK,
vec![tmp_func, make_value_int_const(stack_arg_size as u64, vm)],
None,
Some(node),
f_context,
vm
);
}
Instruction_::CommonInst_GetThreadLocal => {
trace!("instsel on GETTHREADLOCAL");
// get thread local
......@@ -3717,181 +3748,6 @@ impl<'a> InstructionSelection {
}
}
// TODO: Thoroughly test this
// (compare with code generated by GCC with variouse different types???)
// The algorithm presented here is derived from the ARM AAPCS64 reference
// Returns a vector indicating whether each should be passed as an IRef (and not directly),
// a vector referencing to the location of each argument (in memory or a register) and
// the amount of stack space used
// NOTE: It currently does not support vectors/SIMD types (or aggregates of such types)
fn compute_argument_locations(
&mut self,
arg_types: &Vec<P<MuType>>,
stack: &P<Value>,
offset: i64,
vm: &VM
) -> (Vec<bool>, Vec<P<Value>>, usize) {
if arg_types.len() == 0 {
// nothing to do
return (vec![], vec![], 0);
}
let mut ngrn = 0 as usize; // The Next General-purpose Register Number
let mut nsrn = 0 as usize; // The Next SIMD and Floating-point Register Number
let mut nsaa = 0 as usize; // The next stacked argument address (an offset from the SP)
use ast::types::MuType_::*;
// reference[i] = true indicates the argument is passed an IRef to a location on the stack
let mut reference: Vec<bool> = vec![];
for t in arg_types {
reference.push(
hfa_length(t) == 0 && // HFA's aren't converted to IRef's
match t.v {
// size can't be statically determined
Hybrid(_) => panic!("Hybrid argument not supported"),
// type is too large
Struct(_) | Array(_, _) if vm.get_backend_type_size(t.id()) > 16 => true,
Vector(_, _) => unimplemented!(),
_ => false
}
);
}
// TODO: How does passing arguments by reference effect the stack size??
let mut locations: Vec<P<Value>> = vec![];
for i in 0..arg_types.len() {
let i = i as usize;
let t = if reference[i] {
P(MuType::new(
new_internal_id(),
MuType_::IRef(arg_types[i].clone())
))
} else {
arg_types[i].clone()
};
let size = align_up(vm.get_backend_type_size(t.id()), 8);
let align = get_type_alignment(&t, vm);
match t.v {
Hybrid(_) => panic!("hybrid argument not supported"),
Vector(_, _) => unimplemented!(),
Float | Double => {
if nsrn < 8 {
locations.push(get_alias_for_length(
ARGUMENT_FPRS[nsrn].id(),
get_bit_size(&t, vm)
));
nsrn += 1;
} else {
nsrn = 8;
locations.push(make_value_base_offset(
&stack,
offset + (nsaa as i64),
&t,
vm
));
nsaa += size;
}
}
Struct(_) | Array(_, _) => {
let hfa_n = hfa_length(&t);
if hfa_n > 0 {
if nsrn + hfa_n <= 8 {
// Note: the argument will occupy succesiv registers
// (one for each element)
locations.push(get_alias_for_length(
ARGUMENT_FPRS[nsrn].id(),
get_bit_size(&t, vm) / hfa_n
));
nsrn += hfa_n;
} else {
nsrn = 8;
locations.push(make_value_base_offset(
&stack,
offset + (nsaa as i64),
&t,
vm
));
nsaa += size;
}
} else {
if align == 16 {
ngrn = align_up(ngrn, 2); // align NGRN to the next even number
}
if size <= 8 * (8 - ngrn) {
// The struct should be packed, starting here
// (note: this may result in multiple struct fields in the same regsiter
// or even floating points in a GPR)
locations.push(ARGUMENT_GPRS[ngrn].clone());
// How many GPRS are taken up by t
ngrn += if size % 8 != 0 {
size / 8 + 1
} else {
size / 8
};
} else {
ngrn = 8;
nsaa = align_up(nsaa, align_up(align, 8));
locations.push(make_value_base_offset(
&stack,
offset + (nsaa as i64) as i64,
&t,
vm
));
nsaa += size;
}
}
}
Void => panic!("void argument not supported"),
// Integral or pointer type
_ => {
if size <= 8 {
if ngrn < 8 {
locations.push(get_alias_for_length(
ARGUMENT_GPRS[ngrn].id(),
get_bit_size(&t, vm)
));
ngrn += 1;
} else {
nsaa = align_up(nsaa, align_up(align, 8));
locations.push(make_value_base_offset(
&stack,
offset + (nsaa as i64) as i64,
&t,
vm
));
nsaa += size;
}
} else if size == 16 {
ngrn = align_up(ngrn, 2); // align NGRN to the next even number
if ngrn < 7 {
locations.push(ARGUMENT_GPRS[ngrn].clone());
ngrn += 2;
} else {
ngrn = 8;
nsaa = align_up(nsaa, 16);
locations.push(make_value_base_offset(
&stack,
offset + (nsaa as i64) as i64,
&t,
vm
));
nsaa += 16;
}
} else {
unimplemented!(); // Integer type is too large
}
}
}
}
(reference, locations, align_up(nsaa, 16) as usize)
}
// returns the stack arg offset - we will need this to collapse stack after the call
// as well as a list of argument registers
......@@ -3909,7 +3765,7 @@ impl<'a> InstructionSelection {
// If we're tail calling, use the current frame's argument location instead
let mut arg_regs = Vec::<P<Value>>::new();
let (_, locations, stack_size) =
self.compute_argument_locations(&arg_tys, arg_base, arg_offset as i64, &vm);
compute_argument_locations(&arg_tys, arg_base, arg_offset as i64, &vm);
if is_tail {
if stack_size > self.current_stack_arg_size {
......@@ -4480,7 +4336,7 @@ impl<'a> InstructionSelection {
None => vec![]
};
let (_, res_locs, res_stack_size) = self.compute_argument_locations(&res_tys, &SP, 0, &vm);
let (_, res_locs, res_stack_size) = compute_argument_locations(&res_tys, &SP, 0, &vm);
if !is_kill {
// Reserve space on the stack for the return values of the swap stack
......@@ -4854,7 +4710,7 @@ impl<'a> InstructionSelection {
}
let (_, locations, stack_arg_size) =
self.compute_argument_locations(&sig.arg_tys, &FP, 16, &vm);
compute_argument_locations(&sig.arg_tys, &FP, 16, &vm);
self.current_stack_arg_size = stack_arg_size;
self.emit_unload_arguments(args, locations, f_context, vm);
self.finish_block();
......
......@@ -849,7 +849,6 @@ pub fn get_callee_saved_offset(reg: MuID) -> isize {
}*/
pub fn is_callee_saved(reg_id: MuID) -> bool {
for reg in CALLEE_SAVED_GPRS.iter() {
if reg_id == reg.extract_ssa_id().unwrap() {
return true;
......@@ -864,6 +863,10 @@ pub fn is_callee_saved(reg_id: MuID) -> bool {
false
}
// The stack size needed for a call to the given function signature
pub fn call_stack_size(sig: P<MuFuncSig>, vm: &VM) -> usize {
compute_argument_locations(&sig.ret_tys, &SP, 0, &vm).2
}
// TODO: Check that these numbers are reasonable (THEY ARE ONLY AN ESTIMATE)
use ast::inst::*;
pub fn estimate_insts_for_ir(inst: &Instruction) -> usize {
......@@ -907,7 +910,7 @@ pub fn estimate_insts_for_ir(inst: &Instruction) -> usize {
// runtime
New(_) | NewHybrid(_, _) => 10,
NewStack(_) | NewThread(_, _) | NewThreadExn(_, _) | NewFrameCursor(_) => 10,
ThreadExit => 10,
ThreadExit => 10, CurrentStack => 10, KillStack(_) => 10, NewStack(_) => 10,
Throw(_) => 10,
SwapStackExpr { .. } | SwapStackExc { .. } | SwapStackKill { .. } => 10,
CommonInst_GetThreadLocal | CommonInst_SetThreadLocal(_) => 10,
......@@ -2830,3 +2833,177 @@ fn is_int_ex_reg(val: &P<Value>) -> bool {
fn is_fp_reg(val: &P<Value>) -> bool {
RegGroup::get_from_value(&val) == RegGroup::FPR && (val.is_reg() || val.is_const())
}
// TODO: Thoroughly test this
// (compare with code generated by GCC with variouse different types???)
// The algorithm presented here is derived from the ARM AAPCS64 reference
// Returns a vector indicating whether each should be passed as an IRef (and not directly),
// a vector referencing to the location of each argument (in memory or a register) and
// the amount of stack space used
// NOTE: It currently does not support vectors/SIMD types (or aggregates of such types)
fn compute_argument_locations(
arg_types: &Vec<P<MuType>>,
stack: &P<Value>,
offset: i64,
vm: &VM
) -> (Vec<bool>, Vec<P<Value>>, usize) {
if arg_types.len() == 0 {
// nothing to do
return (vec![], vec![], 0);
}
let mut ngrn = 0 as usize; // The Next General-purpose Register Number
let mut nsrn = 0 as usize; // The Next SIMD and Floating-point Register Number
let mut nsaa = 0 as usize; // The next stacked argument address (an offset from the SP)
use ast::types::MuType_::*;
// reference[i] = true indicates the argument is passed an IRef to a location on the stack
let mut reference: Vec<bool> = vec![];
for t in arg_types {
reference.push(
hfa_length(t) == 0 && // HFA's aren't converted to IRef's
match t.v {
// size can't be statically determined
Hybrid(_) => panic!("Hybrid argument not supported"),
// type is too large
Struct(_) | Array(_, _) if vm.get_backend_type_size(t.id()) > 16 => true,
Vector(_, _) => unimplemented!(),
_ => false
}
);
}
// TODO: How does passing arguments by reference effect the stack size??
let mut locations: Vec<P<Value>> = vec![];
for i in 0..arg_types.len() {
let i = i as usize;
let t = if reference[i] {
P(MuType::new(
new_internal_id(),
MuType_::IRef(arg_types[i].clone())
))
} else {
arg_types[i].clone()
};
let size = align_up(vm.get_backend_type_size(t.id()), 8);
let align = get_type_alignment(&t, vm);
match t.v {
Hybrid(_) => panic!("hybrid argument not supported"),
Vector(_, _) => unimplemented!(),
Float | Double => {
if nsrn < 8 {
locations.push(get_alias_for_length(
ARGUMENT_FPRS[nsrn].id(),
get_bit_size(&t, vm)
));
nsrn += 1;
} else {
nsrn = 8;
locations.push(make_value_base_offset(
&stack,
offset + (nsaa as i64),
&t,
vm
));
nsaa += size;
}
}
Struct(_) | Array(_, _) => {
let hfa_n = hfa_length(&t);
if hfa_n > 0 {
if nsrn + hfa_n <= 8 {
// Note: the argument will occupy succesiv registers
// (one for each element)
locations.push(get_alias_for_length(
ARGUMENT_FPRS[nsrn].id(),
get_bit_size(&t, vm) / hfa_n
));
nsrn += hfa_n;
} else {
nsrn = 8;
locations.push(make_value_base_offset(
&stack,
offset + (nsaa as i64),
&t,
vm
));
nsaa += size;
}
} else {
if align == 16 {
ngrn = align_up(ngrn, 2); // align NGRN to the next even number
}
if size <= 8 * (8 - ngrn) {
// The struct should be packed, starting here
// (note: this may result in multiple struct fields in the same regsiter
// or even floating points in a GPR)
locations.push(ARGUMENT_GPRS[ngrn].clone());
// How many GPRS are taken up by t
ngrn += if size % 8 != 0 {
size / 8 + 1
} else {
size / 8
};
} else {
ngrn = 8;
nsaa = align_up(nsaa, align_up(align, 8));
locations.push(make_value_base_offset(
&stack,
offset + (nsaa as i64) as i64,
&t,
vm
));
nsaa += size;
}
}
}
Void => panic!("void argument not supported"),
// Integral or pointer type
_ => {
if size <= 8 {
if ngrn < 8 {
locations.push(get_alias_for_length(
ARGUMENT_GPRS[ngrn].id(),
get_bit_size(&t, vm)
));
ngrn += 1;
} else {
nsaa = align_up(nsaa, align_up(align, 8));
locations.push(make_value_base_offset(
&stack,
offset + (nsaa as i64) as i64,
&t,
vm
));
nsaa += size;
}
} else if size == 16 {
ngrn = align_up(ngrn, 2); // align NGRN to the next even number
if ngrn < 7 {
locations.push(ARGUMENT_GPRS[ngrn].clone());
ngrn += 2;
} else {
ngrn = 8;
nsaa = align_up(nsaa, 16);
locations.push(make_value_base_offset(
&stack,
offset + (nsaa as i64) as i64,
&t,
vm
));
nsaa += 16;
}
} else {
unimplemented!(); // Integer type is too large
}
}
}
}
(reference, locations, align_up(nsaa, 16) as usize)
}
......@@ -654,7 +654,7 @@ pub fn estimate_insts_for_ir(inst: &Instruction) -> usize {
// runtime call
New(_) | NewHybrid(_, _) => 10,
NewStack(_) | NewThread(_, _) | NewThreadExn(_, _) | NewFrameCursor(_) => 10,
ThreadExit => 10,
ThreadExit => 10, CurrentStack => 10, KillStack(_) => 10,
Throw(_) => 10,
SwapStackExpr { .. } | SwapStackExc { .. } | SwapStackKill { .. } => 10,
CommonInst_GetThreadLocal | CommonInst_SetThreadLocal(_) => 10,
......@@ -668,3 +668,7 @@ pub fn estimate_insts_for_ir(inst: &Instruction) -> usize {
_ => unimplemented!()
}
}
pub fn call_stack_size(sig: P<MuFuncSig>, vm: &VM) -> usize {
0
}
......@@ -116,6 +116,8 @@ pub use compiler::backend::x86_64::spill_rewrite;
pub use compiler::backend::x86_64::ARGUMENT_GPRS;
#[cfg(target_arch = "x86_64")]
pub use compiler::backend::x86_64::ARGUMENT_FPRS;
#[cfg(target_arch = "x86_64")]
pub use compiler::backend::x86_64::call_stack_size;
/// --- aarch64 backend ---
#[cfg(target_arch = "aarch64")]
......@@ -184,6 +186,8 @@ pub use compiler::backend::aarch64::spill_rewrite;
pub use compiler::backend::aarch64::ARGUMENT_GPRS;
#[cfg(target_arch = "aarch64")]
pub use compiler::backend::aarch64::ARGUMENT_FPRS;
#[cfg(target_arch = "aarch64")]
pub use compiler::backend::aarch64::call_stack_size;
use vm::VM;
use ast::types::*;
......
......@@ -94,6 +94,17 @@ pop_pair d3, d2, \stack
pop_pair d1, d0, \stack
.endm
.macro load_arguments stack=SP
LDP D0, D1, [\stack, #-2*8]
LDP D2, D3, [\stack, #-4*8]
LDP D4, D5, [\stack, #-6*8]
LDP D6, D7, [\stack, #-8*8]
LDP X0, X1, [\stack, #-10*8]
LDP X2, X3, [\stack, #-12*8]
LDP X4, X5, [\stack, #-14*8]
LDP X6, X7, [\stack, #-16*8]
.endm
.macro mov_args_to_callee_saved
MOV X19, X0
MOV X20, X1
......
......@@ -57,14 +57,26 @@ lazy_static! {
// impl: runtime_asm_ARCH_OS.s
// decl: thread.rs
pub static ref SWAP_BACK_TO_NATIVE_STACK : RuntimeEntrypoint = RuntimeEntrypoint {
pub static ref MUENTRY_THREAD_EXIT : RuntimeEntrypoint = RuntimeEntrypoint {
sig: P(MuFuncSig{
hdr: MuEntityHeader::unnamed(ir::new_internal_id()),
ret_tys: vec![],
arg_tys: vec![ADDRESS_TYPE.clone()]
}),
aot: ValueLocation::Relocatable(RegGroup::GPR,
String::from("muentry_swap_back_to_native_stack")),
String::from("muentry_thread_exit")),
jit: RwLock::new(None),
};
}
lazy_static! {
// impl/decl: thread.rs
pub static ref MUENTRY_NEW_STACK: RuntimeEntrypoint = RuntimeEntrypoint {
sig: P(MuFuncSig{
hdr: MuEntityHeader::unnamed(ir::new_internal_id()),
ret_tys: vec![STACKREF_TYPE.clone()],
arg_tys: vec![ADDRESS_TYPE.clone(), ADDRESS_TYPE.clone()]
}),
aot: ValueLocation::Relocatable(RegGroup::GPR, String::from("muentry_new_stack")),
jit: RwLock::new(None),
};