Commit c313f4d7 authored by Isaac Oscar Gariano's avatar Isaac Oscar Gariano

Fixed all failed testcases on both aarch64 and x86-64.

parent 4e35ae29
Pipeline #439 passed with stage
in 74 minutes and 54 seconds
......@@ -16,6 +16,7 @@ pub trait CodeGenerator {
fn print_cur_code(&self);
fn start_block(&mut self, block_name: MuName);
fn block_exists(&self, block_name: MuName) -> bool;
fn start_exception_block(&mut self, block_name: MuName) -> ValueLocation;
fn set_block_livein(&mut self, block_name: MuName, live_in: &Vec<P<Value>>);
fn set_block_liveout(&mut self, block_name: MuName, live_out: &Vec<P<Value>>);
......@@ -38,6 +39,10 @@ pub trait CodeGenerator {
fn emit_push_pair(&mut self, src1: Reg, src2: Reg, stack: Reg); // Emits a STP
fn emit_pop_pair(&mut self, dest1: Reg, dest2: Reg, stack: Reg); // Emits a LDP
// For callee saved loads and stores (flags them so that only they are removed)
fn emit_ldr_callee_saved(&mut self, dest: Reg, src: Mem);
fn emit_str_callee_saved(&mut self, dest: Mem, src: Reg);
/* DON'T IMPLEMENT
SIMD instructions (unless they operate soley on GPRS or Dn, and Sn registers)
......@@ -266,6 +271,9 @@ TODO:
fn emit_orr_shift(&mut self, dest: Reg, src1: Reg, src2: Reg, shift: &str, amount: u8);
// binary ops with immediates
// The 'str' will be patched by the linker (used to access global variables)
fn emit_add_str(&mut self, dest: Reg, src1: Reg, src2: &str);
fn emit_add_imm(&mut self, dest: Reg, src1: Reg, src2: u16, shift: bool);
fn emit_adds_imm(&mut self, dest: Reg, src1: Reg, src2: u16, shift: bool);
fn emit_sub_imm(&mut self, dest: Reg, src1: Reg, src2: u16, shift: bool);
......
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -335,7 +335,7 @@ lazy_static! {
X27.clone(),
X28.clone(),
// Note: These two are technically CALEE saved but need to be dealt with specially
// Note: These two are technically CALLEE saved but need to be dealt with specially
//X29.clone(), // Frame Pointer
//X30.clone() // Link Register
];
......@@ -608,6 +608,33 @@ lazy_static! {
map
};
pub static ref CALLEE_SAVED_REGs : [P<Value>; 18] = [
X19.clone(),
X20.clone(),
X21.clone(),
X22.clone(),
X23.clone(),
X24.clone(),
X25.clone(),
X26.clone(),
X27.clone(),
X28.clone(),
// Note: These two are technically CALLEE saved but need to be dealt with specially
//X29.clone(), // Frame Pointer
//X30.clone() // Link Register
D8.clone(),
D9.clone(),
D10.clone(),
D11.clone(),
D12.clone(),
D13.clone(),
D14.clone(),
D15.clone()
];
// put caller saved regs first (they imposes no overhead if there is no call instruction)
pub static ref ALL_USABLE_MACHINE_REGs : Vec<P<Value>> = vec![
X19.clone(),
......@@ -789,7 +816,7 @@ pub fn estimate_insts_for_ir(inst: &Instruction) -> usize {
// Splits an integer immediate into four 16-bit segments (returns the least significant first)
pub fn split_aarch64_iimm(val: u64) -> (u16, u16, u16, u16) {
pub fn split_aarch64_imm_u64(val: u64) -> (u16, u16, u16, u16) {
(val as u16, (val >> 16) as u16, (val >> 32) as u16, (val >> 48) as u16)
}
......@@ -852,10 +879,10 @@ pub fn get_bit(x: u64, i: usize) -> bool {
(x & ((1 as u64) << i) ) != 0
}
// Returns true if val = A << S, from some A < 4096, and S = 0 or S = 12
// Returns true if val = A << S, from some 0 <= A < 4096, and S = 0 or S = 12
// Note: Even though '0' is a valid arithmetic immediate, the Zero register should be used instead
pub fn is_valid_arithmetic_imm(val : u64) -> bool {
val < 4096 || ((val & 0b111111111111) == 0 && val < (4096 << 12))
val > 0 && val < 4096 || ((val & 0b111111111111) == 0 && val < (4096 << 12))
}
// aarch64 instructions only operate on 32 and 64-bit registers
......@@ -1107,15 +1134,15 @@ pub fn round_up(n: usize, d: usize) -> usize { ((n + d - 1)/d)*d }
// TODO: Implement this more efficiently?
pub fn log2(val: u64) -> u64 {
debug_assert!(val.is_power_of_two());
/*debug_assert!(val != 0);
debug_assert!(val != 0);
let mut ret = 0;
for i in 0..63 {
if val & (1 << i) != 0 {
ret = i;
}
}*/
}
// WARNING: This will only work for val < 2^31
let ret = (val as f64).log2() as u64;
//let ret = (val as f64).log2() as u64;
debug_assert!(val == 1 << ret);
ret
}
......
......@@ -25,6 +25,7 @@ use std::str;
use std::usize;
use std::slice::Iter;
use std::ops;
use std::collections::HashSet;
struct ASMCode {
name: MuName,
......@@ -655,51 +656,41 @@ impl MachineCode for ASMCode {
// self.code.insert(index, ASMInst::nop());
}
fn remove_unnecessary_callee_saved(&mut self, used_callee_saved: Vec<MuID>) -> (Vec<MuID>, usize) {
fn remove_unnecessary_callee_saved(&mut self, used_callee_saved: Vec<MuID>) -> HashSet<MuID> {
// we always save rbp
let rbp = x86_64::RBP.extract_ssa_id().unwrap();
// every push/pop will use/define rsp
let rsp = x86_64::RSP.extract_ssa_id().unwrap();
let find_op_other_than_rsp = |inst: &ASMInst| -> Option<MuID> {
let find_op_other_than_rbp = |inst: &ASMInst| -> MuID {
for id in inst.defines.keys() {
if *id != rsp && *id != rbp {
return Some(*id);
if *id != rbp {
return *id;
}
}
for id in inst.uses.keys() {
if *id != rsp && *id != rbp {
return Some(*id);
if *id != rbp {
return *id;
}
}
None
panic!("Expected to find a used register other than the rbp");
};
let mut inst_to_remove = vec![];
let mut regs_to_remove = vec![];
let mut kept_callee_saved = 0;
let mut regs_to_remove = HashSet::new();
for i in 0..self.number_of_insts() {
let ref inst = self.code[i];
if inst.code.contains("push") || inst.code.contains("pop") {
match find_op_other_than_rsp(inst) {
Some(op) => {
// if this push/pop instruction is about a callee saved register
// and the register is not used, we set the instruction as nop
if x86_64::is_callee_saved(op) {
if used_callee_saved.contains(&op) {
kept_callee_saved += 1;
} else {
trace!("removing instruction {:?} for save/restore unnecessary callee saved regs", inst);
regs_to_remove.push(op);
inst_to_remove.push(i);
}
}
match inst.spill_info {
Some(SpillMemInfo::CalleeSaved) => {
let reg = find_op_other_than_rbp(inst);
if !used_callee_saved.contains(&reg) {
trace!("removing instruction {:?} for save/restore unnecessary callee saved regs", inst);
regs_to_remove.insert(reg);
inst_to_remove.push(i);
}
None => {}
}
_ => {}
}
}
......@@ -707,11 +698,10 @@ impl MachineCode for ASMCode {
self.set_inst_nop(i);
}
(regs_to_remove, kept_callee_saved)
regs_to_remove
}
#[allow(unused_variables)]
fn patch_frame_size(&mut self, size: usize, size_used: usize) {
fn patch_frame_size(&mut self, size: usize) {
let size = size.to_string();
debug_assert!(size.len() <= FRAME_SIZE_PLACEHOLDER_LEN);
......@@ -841,7 +831,8 @@ enum ASMBranchTarget {
#[derive(Clone, Debug)]
enum SpillMemInfo {
Load(P<Value>),
Store(P<Value>)
Store(P<Value>),
CalleeSaved, // Callee saved record
}
#[derive(Clone, Debug)]
......@@ -1080,6 +1071,16 @@ impl ASMCodeGen {
self.add_asm_inst_internal(code, defines, uses, is_using_mem_op, ASMBranchTarget::None, None)
}
fn add_asm_inst_with_callee_saved(
&mut self,
code: String,
defines: LinkedHashMap<MuID, Vec<ASMLocation>>,
uses: LinkedHashMap<MuID, Vec<ASMLocation>>,
is_using_mem_op: bool,
) {
self.add_asm_inst_internal(code, defines, uses, is_using_mem_op, ASMBranchTarget::None, Some(SpillMemInfo::CalleeSaved))
}
fn add_asm_inst_with_spill(
&mut self,
code: String,
......@@ -1624,7 +1625,7 @@ impl ASMCodeGen {
}
fn internal_mov_r_mem(&mut self, inst: &str, dest: Reg, src: Mem,
is_spill_related: bool
is_spill_related: bool, is_callee_saved: bool
) {
let len = check_op_len(dest);
......@@ -1636,7 +1637,16 @@ impl ASMCodeGen {
let asm = format!("{} {},{}", inst, mem, reg);
if is_spill_related {
if is_callee_saved {
self.add_asm_inst_with_callee_saved(
asm,
linked_hashmap!{
id2 => vec![loc2]
},
uses,
true,
)
} else if is_spill_related {
self.add_asm_inst_with_spill(
asm,
linked_hashmap!{
......@@ -1659,7 +1669,7 @@ impl ASMCodeGen {
}
fn internal_mov_mem_r(&mut self, inst: &str, dest: Mem, src: Reg,
is_spill_related: bool)
is_spill_related: bool, is_callee_saved: bool)
{
let len = check_op_len(src);
......@@ -1680,7 +1690,14 @@ impl ASMCodeGen {
let asm = format!("{} {},{}", inst, reg, mem);
if is_spill_related {
if is_callee_saved {
self.add_asm_inst_with_callee_saved(
asm,
linked_hashmap! {},
uses,
true,
)
} else if is_spill_related {
self.add_asm_inst_with_spill(
asm,
linked_hashmap!{},
......@@ -1843,11 +1860,11 @@ impl ASMCodeGen {
}
fn emit_spill_store_gpr(&mut self, dest: Mem, src: Reg) {
self.internal_mov_mem_r("mov", dest, src, true)
self.internal_mov_mem_r("mov", dest, src, true, false)
}
fn emit_spill_load_gpr(&mut self, dest: Reg, src: Mem) {
self.internal_mov_r_mem("mov", dest, src, true)
self.internal_mov_r_mem("mov", dest, src, true, false)
}
fn emit_spill_store_fpr(&mut self, dest: Mem, src: Reg) {
......@@ -2127,18 +2144,24 @@ impl CodeGenerator for ASMCodeGen {
self.internal_mov_r_imm("mov", dest, src)
}
fn emit_mov_r_mem (&mut self, dest: &P<Value>, src: &P<Value>) {
self.internal_mov_r_mem("mov", dest, src, false)
self.internal_mov_r_mem("mov", dest, src, false, false)
}
fn emit_mov_r_r (&mut self, dest: &P<Value>, src: &P<Value>) {
self.internal_mov_r_r("mov", dest, src)
}
fn emit_mov_mem_r (&mut self, dest: &P<Value>, src: &P<Value>) {
self.internal_mov_mem_r("mov", dest, src, false)
self.internal_mov_mem_r("mov", dest, src, false, false)
}
fn emit_mov_mem_imm(&mut self, dest: &P<Value>, src: i32, oplen: usize) {
self.internal_mov_mem_imm("mov", dest, src, oplen)
}
fn emit_mov_r_mem_callee_saved (&mut self, dest: &P<Value>, src: &P<Value>) {
self.internal_mov_r_mem("mov", dest, src, false, true)
}
fn emit_mov_mem_r_callee_saved (&mut self, dest: &P<Value>, src: &P<Value>) {
self.internal_mov_mem_r("mov", dest, src, false, true)
}
// zero/sign extend mov
fn emit_movs_r_r (&mut self, dest: Reg, src: Reg) {
......@@ -2328,7 +2351,7 @@ impl CodeGenerator for ASMCodeGen {
// lea
fn emit_lea_r64(&mut self, dest: &P<Value>, src: &P<Value>) {
self.internal_mov_r_mem("lea", dest, src, false)
self.internal_mov_r_mem("lea", dest, src, false, false)
}
// and
......
......@@ -54,6 +54,10 @@ pub trait CodeGenerator {
// because mem may only have type as ADDRESS_TYPE
fn emit_mov_mem_imm(&mut self, dest: Mem, src: i32, oplen: usize); // store
fn emit_mov_mem_r_callee_saved (&mut self, dest: Mem, src: Reg); // store callee saved register
fn emit_mov_r_mem_callee_saved (&mut self, dest: Reg, src: Mem); // load callee saved register
// zero/sign extend mov
fn emit_movs_r_r (&mut self, dest: Reg, src: Reg);
fn emit_movz_r_r (&mut self, dest: Reg, src: Reg);
......
......@@ -1768,7 +1768,7 @@ impl <'a> InstructionSelection {
let reg_tmp = self.get_result_value(node);
self.emit_runtime_entry(&entrypoints::FREM, vec![reg_op1.clone(), reg_op2.clone()], Some(vec![reg_tmp.clone()]), Some(node), f_content, f_context, vm);
self.emit_runtime_entry(&entrypoints::FREM64, vec![reg_op1.clone(), reg_op2.clone()], Some(vec![reg_tmp.clone()]), Some(node), f_content, f_context, vm);
} else {
panic!("unexpected fdiv: {}", node)
}
......@@ -2688,6 +2688,10 @@ impl <'a> InstructionSelection {
self.backend.add_cfi_def_cfa_register(&x86_64::RBP);
}
// reserve spaces for current frame
// add x, rbp -> rbp (x is negative, however we do not know x now)
self.backend.emit_frame_grow();
// push all callee-saved registers
{
let frame = self.current_frame.as_mut().unwrap();
......@@ -2698,16 +2702,13 @@ impl <'a> InstructionSelection {
// not pushing rbp (as we have done that)
if reg.extract_ssa_id().unwrap() != rbp {
trace!("allocate frame slot for reg {}", reg);
self.backend.emit_push_r64(&reg);
frame.alloc_slot_for_callee_saved_reg(reg.clone(), vm);
let loc = frame.alloc_slot_for_callee_saved_reg(reg.clone(), vm);
self.backend.emit_mov_mem_r_callee_saved(&loc, &reg);
}
}
}
// reserve spaces for current frame
// add x, rbp -> rbp (x is negative, however we do not know x now)
self.backend.emit_frame_grow();
// unload arguments by registers
let mut gpr_arg_count = 0;
let mut fpr_arg_count = 0;
......@@ -2825,17 +2826,21 @@ impl <'a> InstructionSelection {
}
}
// frame shrink
self.backend.emit_frame_shrink();
// pop all callee-saved registers - reverse order
for i in (0..x86_64::CALLEE_SAVED_GPRs.len()).rev() {
let ref reg = x86_64::CALLEE_SAVED_GPRs[i];
if reg.extract_ssa_id().unwrap() != x86_64::RBP.extract_ssa_id().unwrap() {
self.backend.emit_pop_r64(&reg);
{
let frame = self.current_frame.as_mut().unwrap();
for i in (0..x86_64::CALLEE_SAVED_GPRs.len()).rev() {
let ref reg = x86_64::CALLEE_SAVED_GPRs[i];
let reg_id = reg.extract_ssa_id().unwrap();
if reg_id != x86_64::RBP.extract_ssa_id().unwrap() {
let loc = frame.allocated.get(&reg_id).unwrap().make_memory_op(reg.ty.clone(), vm);
self.backend.emit_mov_r_mem_callee_saved(&reg, &loc);
}
}
}
// frame shrink
self.backend.emit_frame_shrink();
// pop rbp
self.backend.emit_pop_r64(&x86_64::RBP);
}
......
......@@ -68,26 +68,16 @@ impl RegisterAllocation {
.collect();
let used_callee_saved: Vec<MuID> = used_callee_saved.into_iter().collect();
let n_used_callee_saved = used_callee_saved.len();
let (removed_callee_saved, n_kept_callee_saved) = coloring.cf.mc_mut().remove_unnecessary_callee_saved(used_callee_saved);
let removed_callee_saved = coloring.cf.mc_mut().remove_unnecessary_callee_saved(used_callee_saved);
for reg in removed_callee_saved {
coloring.cf.frame.remove_record_for_callee_saved_reg(reg);
}
// patch frame size
// size for callee saved regs
let size_for_callee_saved_regs = n_kept_callee_saved * POINTER_SIZE;
trace!("callee saved registers used {} bytes", size_for_callee_saved_regs);
let total_frame_size = coloring.cf.frame.cur_size();
trace!("frame reserved for {} bytes", total_frame_size);
let size_to_patch = total_frame_size - size_for_callee_saved_regs;
trace!("patching the code to grow/shrink size of {} bytes", size_to_patch);
coloring.cf.mc_mut().patch_frame_size(size_to_patch, size_for_callee_saved_regs);
let frame_size = coloring.cf.frame.cur_size();
trace!("patching the code to grow/shrink size of {} bytes", frame_size);
coloring.cf.mc_mut().patch_frame_size(frame_size);
}
coloring.cf.mc().trace_mc();
......
......@@ -15,12 +15,13 @@ use vm::VM;
// | spilled
// |---------------
// | alloca area
// Total size for all callee saved registers
#[derive(RustcEncodable, RustcDecodable, Clone)]
pub struct Frame {
func_ver_id: MuID,
cur_offset: isize, // offset to frame base pointer
pub argument_by_reg: HashMap<MuID, P<Value>>,
pub argument_by_stack: HashMap<MuID, P<Value>>,
......@@ -50,7 +51,6 @@ impl Frame {
Frame {
func_ver_id: func_ver_id,
cur_offset: 0,
argument_by_reg: HashMap::new(),
argument_by_stack: HashMap::new(),
......@@ -105,7 +105,7 @@ impl Frame {
}
#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))]
fn alloc_slot(&mut self, val: &P<Value>, vm: &VM) -> &FrameSlot {
pub fn alloc_slot(&mut self, val: &P<Value>, vm: &VM) -> &FrameSlot {
// RBP/FP is 16 bytes aligned, we are offsetting from RBP/FP
// every value should be properly aligned
......@@ -131,9 +131,9 @@ impl Frame {
let id = val.id();
let ret = FrameSlot {
offset: self.cur_offset,
value: val.clone()
value: val.clone(),
};
self.allocated.insert(id, ret);
self.allocated.get(&id).unwrap()
}
......@@ -142,13 +142,19 @@ impl Frame {
#[derive(RustcEncodable, RustcDecodable, Clone)]
pub struct FrameSlot {
pub offset: isize,
pub value: P<Value>
pub value: P<Value>,
}
impl fmt::Display for FrameSlot {
#[cfg(target_arch = "x86_64")]
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}(RBP): {}", self.offset, self.value)
}
#[cfg(target_arch = "aarch64")]
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "[FP, #{}]: {}", self.offset, self.value)
}
}
impl FrameSlot {
......@@ -177,10 +183,10 @@ impl FrameSlot {
hdr: MuEntityHeader::unnamed(vm.next_id()),
ty: ty.clone(),
v: Value_::Memory(
MemoryLocation::Address{
MemoryLocation::VirtualAddress{
base: aarch64::FP.clone(),
offset: Some(Value::make_int_const(vm.next_id(), self.offset as u64)),
shift: 0,
scale: 1,
signed: false
}
)
......
......@@ -5,6 +5,7 @@ use runtime::ValueLocation;
use std::ops;
use std::collections::HashMap;
use std::collections::HashSet;
use rustc_serialize::{Encodable, Encoder, Decodable, Decoder};
......@@ -191,9 +192,9 @@ pub trait MachineCode {
/// remove unnecessary push/pop if the callee saved register is not used
/// returns what registers push/pop have been deleted, and the number of callee saved registers
/// that weren't deleted
fn remove_unnecessary_callee_saved(&mut self, used_callee_saved: Vec<MuID>) -> (Vec<MuID>, usize);
fn remove_unnecessary_callee_saved(&mut self, used_callee_saved: Vec<MuID>) -> HashSet<MuID>;
/// patch frame size
fn patch_frame_size(&mut self, size: usize, size_used: usize);
fn patch_frame_size(&mut self, size: usize);
fn as_any(&self) -> &Any;
}
......
......@@ -9,12 +9,12 @@ fn main() {
#[cfg(target_os = "linux")]
#[cfg(target_arch = "aarch64")]
fn main() {
gcc::compile_library("libgc_clib_aarch64.a", &["src/heap/gc/clib_aarch64.c"]);
gcc::compile_library("libgc_clib_aarch64.a", &["src/heap/gc/clib_aarch64.S"]);
}
// This is here to enable cross compiling from windows/x86_64 to linux/aarch64
#[cfg(target_os = "windows")]
#[cfg(target_arch = "x86_64")]
fn main() {
gcc::compile_library("libgc_clib_aarch64.a", &["src/heap/gc/clib_aarch64.c"]);
gcc::compile_library("libgc_clib_aarch64.a", &["src/heap/gc/clib_aarch64.S"]);
}
.arch armv8-a
#include "../../../../runtime/asm_common_aarch64.S.inc"
//__thread uintptr_t low_water_mark
.type low_water_mark,@object
.section tbss,"awT",@nobits
.globl low_water_mark
.balign 8
low_water_mark:
.xword 0
.size low_water_mark, 8
#malloc_zero(size_t size) -> void*
# X0 X0
begin_func malloc_zero
MOV X1, X0 // X1 = size
MOV X0, #1
B calloc // tail call calloc(1, size)
end_func malloc_zero
#immmix_get_stack_ptr() -> uintptr_t
# X0
begin_func immmix_get_stack_ptr
MOV X0, SP
RET
end_func immmix_get_stack_ptr
#get_registers_count() -> int
# W0
begin_func get_registers_count
MOV X0, 31
end_func get_registers_count
# get_registers() -> uintptr_t*
# X0
begin_func get_registers
push_pair X30, X29
mov x29, sp
# push registers onto the stack
push_pair X28, X27
push_pair X26, X25
push_pair X24, X23
push_pair X22, X21
push_pair X20, X19
push_pair X18, X17
push_pair X16, X15
push_pair X14, X13
push_pair X12, X11
push_pair X10, X9
push_pair X8, X7
push_pair X6, X5
push_pair X4, X3
push_pair X2, X1
push_pair XZR, X0
# sizeof(uintptr_t) * 31
MOV X0, #244 // 244 bytes to allocate
BL malloc
# Now X0 contains the value returned by malloc
# ret[0] = x0; (use X2 and X3 as temporaries)
MOV X1, X0 // Make a copy of X0, that can be freely modified (X0 will be returned)
LDP XZR, X2, [SP],#16 // X2 = original value of X0
STR X2, [X1],#8 // X1[0] = original value of X0
# Pop the top two registers from the stack, and store them in X1, and increment x1
# (do this 15 times for each pair of register (ignoring X0, which was popped above)
LDP X2, X3, [SP],#16
STP X2, X3, [X1],#16
LDP X2, X3, [SP],#16
STP X2, X3, [X1],#16
LDP X2, X3, [SP],#16
STP X2, X3, [X1],#16
LDP X2, X3, [SP],#16
STP X2, X3, [X1],#16
LDP X2, X3, [SP],#16
STP X2, X3, [X1],#16
LDP X2, X3, [SP],#16
STP X2, X3, [X1],#16
LDP X2, X3, [SP],#16
STP X2, X3, [X1],#16
LDP X2, X3, [SP],#16
STP X2, X3, [X1],#16
LDP X2, X3, [SP],#16
STP X2, X3, [X1],#16
LDP X2, X3, [SP],#16
STP X2, X3, [X1],#16
LDP X2, X3, [SP],#16
STP X2, X3, [X1],#16
LDP X2, X3, [SP],#16
STP X2, X3, [X1],#16
LDP X2, X3, [SP],#16
STP X2, X3, [X1],#16
LDP X2, X3, [SP],#16
STP X2, X3, [X1],#16
LDP X2, X3, [SP],#16 // These will be the frame pointer and link register
STP X2, X3, [X1],#16
# X0 contains the value returned from malloc
# The frame pointer and link register were popped above, so they have now been restored
RET X30
end_func get_registers
# set_low_water_mark()
begin_func set_low_water_mark
// low_water_mark = SP
MRS x8, TPIDR_EL0
ADD X8, X8, :tprel_hi12:low_water_mark
ADD X8, X8, :tprel_lo12_nc:low_water_mark
MOV SP, X0 // cant store the SP directly
STR X0, [x8]
RET
end_func set_low_water_mark
# get_low_water_mark() -> uintptr_t
# X0
begin_func get_low_water_mark
MRS x8, TPIDR_EL0
ADD X8, X8, :tprel_hi12:low_water_mark
ADD X8, X8, :tprel_lo12_nc:low_water_mark
LDR X0, [x8]
RET
end_func get_low_water_mark
#include <inttypes.h>
#include <stdlib.h>
#include <string.h>
void* malloc_zero(size_t size) {
void* ret = malloc(size);
memset(ret, 0, size);
return ret;
}
//uintptr_t immmix_get_stack_ptr()
__asm__(".text\n"
".global immmix_get_stack_ptr\n"
".type immmix_get_stack_ptr,@function\n"
".balign 16\n"
"immmix_get_stack_ptr:\n"
"MOV X0, SP\n"
"RET\n"