inst_sel.rs 201 KB
Newer Older
1 2
#![warn(unused_imports)]
#![warn(unreachable_code)]
3

4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
use ast::ir::*;
use ast::ptr::*;
use ast::inst::*;
use ast::op;
use ast::op::*;
use ast::types::*;
use vm::VM;
use runtime::mm;
use runtime::mm::objectmodel::OBJECT_HEADER_SIZE;

use runtime::ValueLocation;
use runtime::thread;
use runtime::entrypoints;
use runtime::entrypoints::RuntimeEntrypoint;

use compiler::CompilerPass;

use compiler::backend::PROLOGUE_BLOCK_NAME;
22
use compiler::backend::EPILOGUE_BLOCK_NAME;
23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40

use compiler::backend::aarch64::*;
use compiler::machine_code::CompiledFunction;
use compiler::frame::Frame;

use std::collections::HashMap;
use std::any::Any;

const INLINE_FASTPATH : bool = false;

pub struct InstructionSelection {
    name: &'static str,
    backend: Box<CodeGenerator>,

    current_fv_id: MuID,
    current_callsite_id: usize,
    current_frame: Option<Frame>,
    current_block: Option<MuName>,
41
    current_block_in_ir: Option<MuName>,
42 43 44 45 46
    current_func_start: Option<ValueLocation>,
    // key: block id, val: callsite that names the block as exception block
    current_exn_callsites: HashMap<MuID, Vec<ValueLocation>>,
    // key: block id, val: block location
    current_exn_blocks: HashMap<MuID, ValueLocation>,
47
    current_xr_value: Option<P<Value>>, // A temporary that holds to saved XR value (if needed)
48 49 50 51
    current_constants: HashMap<MuID, P<Value>>,
    current_constants_locs: HashMap<MuID, P<Value>>
}

52
// TODO: Move all functions that are in here that don't need access to 'self' (or only call functions that don't need access to self (even if called on self)) to Mod.rs
53 54 55 56 57 58 59 60 61 62 63
impl <'a> InstructionSelection {
    #[cfg(feature = "aot")]
    pub fn new() -> InstructionSelection {
        InstructionSelection {
            name: "Instruction Selection (x64)",
            backend: Box::new(ASMCodeGen::new()),

            current_fv_id: 0,
            current_callsite_id: 0,
            current_frame: None,
            current_block: None,
64 65 66 67 68 69
            current_block_in_ir: None,  // it is possible the block is newly created in instruction selection
                                        // but sometimes we want to know its control flow
                                        // so we need to track what block it is from the IR

                                        // FIXME: ideally we should not create new blocks in instruction selection
                                        // see Issue #6
70 71 72 73
            current_func_start: None,
            // key: block id, val: callsite that names the block as exception block
            current_exn_callsites: HashMap::new(),
            current_exn_blocks: HashMap::new(),
74
            current_xr_value: None,
75 76 77
            current_constants: HashMap::new(),
            current_constants_locs: HashMap::new()
        }
78

79 80 81 82 83 84 85 86 87 88
    }

    #[cfg(feature = "jit")]
    pub fn new() -> InstructionSelection {
        unimplemented!()
    }

    // in this pass, we assume that
    // * we do not need to backup/restore caller-saved registers
    // if any of these assumption breaks, we will need to re-emit the code
89
    fn instruction_select(&mut self, node: &'a TreeNode, f_content: &FunctionContent, f_context: &mut FunctionContext, vm: &VM) {
90 91 92 93 94
        trace!("instsel on node#{} {}", node.id(), node);

        match node.v {
            TreeNode_::Instruction(ref inst) => {
                match inst.v {
95
                    // TODO: Optimise if cond is a flag from a binary operation?
96
                    Instruction_::Branch2 { cond, ref true_dest, ref false_dest, .. } => {
97 98
                        trace!("instsel on BRANCH2");
                        let (fallthrough_dest, branch_dest, branch_if_true) = {
99 100 101 102
                            let cur_block = f_content.get_block_by_name(self.current_block_in_ir.as_ref().unwrap().clone());
                            let next_block_in_trace = cur_block.control_flow.get_hottest_succ().unwrap();

                            if next_block_in_trace == true_dest.target {
103 104 105 106 107 108
                                (true_dest, false_dest, false)
                            } else {
                                (false_dest, true_dest, true)
                            }
                        };

109
                        let ref ops = inst.ops;
110 111 112 113 114 115 116 117

                        self.process_dest(&ops, fallthrough_dest, f_content, f_context, vm);
                        self.process_dest(&ops, branch_dest, f_content, f_context, vm);

                        let branch_target = f_content.get_block(branch_dest.target).name().unwrap();

                        let ref cond = ops[cond];

118
                        if self.match_cmp_res(cond) {
119
                            trace!("emit cmp_res-branch2");
120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136
                            // Emit a CBNZ for 128-bit comparisons that are not symmetric
                            let use_cbnz = self.is_int128_asym_cmp(cond);
                            let tmp_cond =
                                if use_cbnz { Some(make_temporary(f_context, UINT1_TYPE.clone(), vm)) }
                                else { None };
                            let cond_box =
                                if use_cbnz { Some(Box::new(tmp_cond.as_ref().unwrap().clone())) }
                                else { None };

                            let mut cmpop = self.emit_cmp_res(cond, cond_box, f_content, f_context, vm);

                            if use_cbnz {
                                if !branch_if_true {
                                    self.backend.emit_cbz(tmp_cond.as_ref().unwrap(), branch_target);
                                } else {
                                    self.backend.emit_cbnz(tmp_cond.as_ref().unwrap(), branch_target);
                                }
137

138
                            } else {
139 140 141
                                if !branch_if_true {
                                    cmpop = cmpop.invert();
                                }
142

143 144 145 146 147 148 149 150 151 152 153 154
                                let cond = get_condition_codes(cmpop);

                                if cmpop == op::CmpOp::FFALSE {
                                    ; // Do nothing
                                } else if cmpop == op::CmpOp::FTRUE {
                                    self.backend.emit_b(branch_target);
                                } else {
                                    self.backend.emit_b_cond(cond[0], branch_target.clone());

                                    if cond.len() == 2 {
                                        self.backend.emit_b_cond(cond[1], branch_target);
                                    }
155 156
                                }
                            }
157
                        } else {
158
                            let cond_reg = self.emit_ireg(cond, f_content, f_context, vm);
159

160 161 162 163
                            if branch_if_true {
                                self.backend.emit_tbnz(&cond_reg, 0, branch_target.clone());
                            } else {
                                self.backend.emit_tbz(&cond_reg, 0, branch_target.clone());
164
                            }
165
                        };
166 167 168

                        // it is possible that the fallthrough block is scheduled somewhere else
                        // we need to explicitly jump to it
169
                        self.finish_block();
170 171 172 173 174
                        let fallthrough_temp_block = format!("{}_{}_branch_fallthrough", self.current_fv_id, node.id());
                        self.start_block(fallthrough_temp_block, &vec![]);

                        let fallthrough_target = f_content.get_block(fallthrough_dest.target).name().unwrap();
                        self.backend.emit_b(fallthrough_target);
175 176 177 178 179 180
                    },

                    Instruction_::Select { cond, true_val, false_val } => {
                        use ast::op::CmpOp::*;

                        trace!("instsel on SELECT");
181
                        let ref ops = inst.ops;
182 183 184 185 186

                        let ref cond = ops[cond];
                        let ref true_val = ops[true_val];
                        let ref false_val = ops[false_val];

187
                        let tmp_res = self.get_result_value(node, 0);
188 189 190 191

                        // moving integers/pointers
                        // generate compare
                        let cmpop = if self.match_cmp_res(cond) {
192
                            self.emit_cmp_res(cond, None, f_content, f_context, vm)
193
                        } else if self.match_ireg(cond) {
194
                            let tmp_cond = self.emit_ireg(cond, f_content, f_context, vm);
195 196 197 198 199 200
                            self.backend.emit_cmp_imm(&tmp_cond, 0, false);
                            NE
                        } else {
                            panic!("expected ireg, found {}", cond)
                        };

201 202
                        let tmp_true = self.emit_reg(true_val, f_content, f_context, vm);
                        let tmp_false = self.emit_reg(false_val, f_content, f_context, vm);
203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230

                        let cond = get_condition_codes(cmpop);

                        if self.match_ireg(true_val) {
                            if cmpop == FFALSE {
                                self.backend.emit_mov(&tmp_res, &tmp_false);
                            } else if cmpop == FTRUE {
                                self.backend.emit_mov(&tmp_res, &tmp_true);
                            } else {
                                self.backend.emit_csel(&tmp_res, &tmp_true, &tmp_false, cond[0]);

                                if cond.len() == 2 {
                                    self.backend.emit_csel(&tmp_res, &tmp_true, &tmp_res, cond[1]);
                                }
                            }
                        } else if self.match_fpreg(true_val) {
                            if cmpop == FFALSE {
                                self.backend.emit_fmov(&tmp_res, &tmp_false);
                            } else if cmpop == FTRUE {
                                self.backend.emit_fmov(&tmp_res, &tmp_true);
                            } else {
                                self.backend.emit_fcsel(&tmp_res, &tmp_true, &tmp_false, cond[0]);

                                if cond.len() == 2 {
                                    self.backend.emit_fcsel(&tmp_res, &tmp_true, &tmp_res, cond[1]);
                                }
                            }
                        } else {
231
                            // moving vectors
232 233 234 235 236 237 238 239
                            unimplemented!()
                        }
                    },

                    Instruction_::CmpOp(op, op1, op2) => {
                        use ast::op::CmpOp::*;

                        trace!("instsel on CMPOP");
240
                        let ref ops = inst.ops;
241 242 243
                        let ref op1 = ops[op1];
                        let ref op2 = ops[op2];

244
                        let tmp_res = self.get_result_value(node, 0);
245 246 247 248

                        debug_assert!(tmp_res.ty.get_int_length().is_some());
                        debug_assert!(tmp_res.ty.get_int_length().unwrap() == 1);

249
                        let cmpop = self.emit_cmp_res_op(op, Some(Box::new(tmp_res.clone())), &op1, &op2, f_content, f_context, vm);
250 251
                        let cond = get_condition_codes(cmpop);

252 253 254 255 256 257 258 259
                        // emit_cmp_res_op will set tmp_res for 128-bit assymettric comparisons
                        if !self.is_int128_asym_cmp(node) {
                            if cmpop == FFALSE {
                                emit_mov_u64(self.backend.as_mut(), &tmp_res, 0);
                            } else if cmpop == FTRUE {
                                emit_mov_u64(self.backend.as_mut(), &tmp_res, 1);
                            } else {
                                self.backend.emit_cset(&tmp_res, cond[0]);
260

261 262 263 264 265
                                // Note: some compariosns can't be computed based on a single aarch64 flag
                                // insted they are computed as a condition OR NOT another condition.
                                if cond.len() == 2 {
                                    self.backend.emit_csinc(&tmp_res, &tmp_res, &WZR, invert_condition_code(cond[1]));
                                }
266 267 268 269 270 271
                            }
                        }
                    }

                    Instruction_::Branch1(ref dest) => {
                        trace!("instsel on BRANCH1");
272
                        let ref ops = inst.ops;
273 274 275 276 277 278 279 280 281 282 283 284

                        self.process_dest(&ops, dest, f_content, f_context, vm);

                        let target = f_content.get_block(dest.target).name().unwrap();

                        trace!("emit branch1");
                        // jmp
                        self.backend.emit_b(target);
                    },

                    Instruction_::Switch { cond, ref default, ref branches } => {
                        trace!("instsel on SWITCH");
285
                        let ref ops = inst.ops;
286 287 288 289

                        let ref cond = ops[cond];

                        if self.match_ireg(cond) {
290
                            let tmp_cond = self.emit_ireg(cond, f_content, f_context, vm);
291
                            emit_zext(self.backend.as_mut(), &tmp_cond);
292 293 294 295 296 297 298 299 300 301 302 303

                            // emit each branch
                            for &(case_op_index, ref case_dest) in branches {
                                let ref case_op = ops[case_op_index];

                                // process dest
                                self.process_dest(&ops, case_dest, f_content, f_context, vm);

                                let target = f_content.get_block(case_dest.target).name().unwrap();

                                let mut imm_val = 0 as u64;
                                // Is one of the arguments a valid immediate?
304 305
                                let emit_imm = if match_node_int_imm(&case_op) {
                                    imm_val = node_imm_to_u64(&case_op);
306 307 308 309 310 311 312 313 314 315
                                    is_valid_arithmetic_imm(imm_val)
                                } else {
                                    false
                                };

                                if emit_imm {
                                    let imm_shift = imm_val > 4096;
                                    let imm_op2 = if imm_shift { imm_val >> 12 } else { imm_val };
                                    self.backend.emit_cmp_imm(&tmp_cond, imm_op2 as u16, imm_shift);
                                } else {
316
                                    let tmp_case_op = self.emit_ireg(case_op, f_content, f_context, vm);
317
                                    emit_zext(self.backend.as_mut(), &tmp_case_op);
318 319 320 321 322
                                    self.backend.emit_cmp(&tmp_cond, &tmp_case_op);
                                }

                                self.backend.emit_b_cond("EQ", target);

323
                                self.finish_block();
324
                                self.start_block(format!("{}_switch_not_met_case_{}", node.id(), case_op_index), &vec![]);
325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343
                            }

                            // emit default
                            self.process_dest(&ops, default, f_content, f_context, vm);

                            let default_target = f_content.get_block(default.target).name().unwrap();
                            self.backend.emit_b(default_target);
                        } else {
                            panic!("expecting cond in switch to be ireg: {}", cond);
                        }
                    }

                    Instruction_::ExprCall { ref data, is_abort } => {
                        trace!("instsel on EXPRCALL");

                        if is_abort {
                            unimplemented!()
                        }

344
                        self.emit_mu_call(
345 346 347 348 349 350 351 352 353 354
                            inst, // inst: &Instruction,
                            data, // calldata: &CallData,
                            None, // resumption: Option<&ResumptionData>,
                            node, // cur_node: &TreeNode, 
                            f_content, f_context, vm);
                    },

                    Instruction_::Call { ref data, ref resume } => {
                        trace!("instsel on CALL");

355
                        self.emit_mu_call(
356 357 358 359 360 361 362 363 364 365 366 367 368 369
                            inst,
                            data,
                            Some(resume),
                            node,
                            f_content, f_context, vm);
                    },

                    Instruction_::ExprCCall { ref data, is_abort } => {
                        trace!("instsel on EXPRCCALL");

                        if is_abort {
                            unimplemented!()
                        }

370
                        self.emit_c_call_ir(inst, data, None, node, f_content, f_context, vm);
371 372 373 374 375
                    }

                    Instruction_::CCall { ref data, ref resume } => {
                        trace!("instsel on CCALL");

376
                        self.emit_c_call_ir(inst, data, Some(resume), node, f_content, f_context, vm);
377 378
                    }

379
                    Instruction_::Return(ref vals) => {
380 381
                        trace!("instsel on RETURN");

382
                        // prepare return regs
383
                        let ref ops = inst.ops;
384
                        // TODO: Are vals in the same order as the return types in the functions signature?
385

386
                        let ret_tys = vals.iter().map(|i| node_type(&ops[*i])).collect();
387
                        let ret_type = self.combine_return_types(&ret_tys);
388

389
                        let n = ret_tys.len(); // number of return values
390 391
                        let xr_value = self.current_xr_value.as_ref().unwrap().clone();

392 393
                        if n == 0 {
                            // Do nothing
394
                        } else if n == 1 {
395
                            let ret_loc = self.compute_return_locations(&ret_type, &xr_value, &vm);
396 397 398 399 400 401 402 403 404 405 406
                            let ret_val = self.emit_node_value(&ops[vals[0]], f_content, f_context, vm);

                            if is_machine_reg(&ret_loc) && is_int_ex_reg(&ret_val) {
                                let (val_l, val_h) = split_int128(&ret_val, f_context, vm);
                                let ret_loc_h = get_register_from_id(ret_loc.id() + 2);
                                // nothing special needs to be done
                                emit_move_value_to_value(self.backend.as_mut(), &ret_loc, &val_l, f_context, vm);
                                emit_move_value_to_value(self.backend.as_mut(), &ret_loc_h, &val_h, f_context, vm);
                            } else {
                                emit_move_value_to_value(self.backend.as_mut(), &ret_loc, &ret_val, f_context, vm);
                            }
407
                        } else {
408
                            let ret_loc = self.compute_return_locations(&ret_type, &xr_value, &vm);
409

410
                            let mut i = 0;
411
                            for ret_index in vals {
412 413 414 415 416 417 418 419
                                let ret_val = self.emit_node_value(&ops[*ret_index], f_content, f_context, vm);
                                let ref ty = ret_val.ty;
                                let offset = self.get_field_offset(&ret_type, i, &vm);

                                match ty.v {
                                    MuType_::Vector(_, _) | MuType_::Tagref64 => unimplemented!(),
                                    MuType_::Void => panic!("Unexpected void"),
                                    MuType_::Struct(_) | MuType_::Array(_, _) => unimplemented!(),
420 421
                                    MuType_::Hybrid(_) => panic!("Can't return a hybrid"),
                                    // Integral, pointer or floating point type
422 423 424 425 426 427
                                    _ => self.insert_bytes(&ret_loc, &ret_val, offset as i64, f_context, vm),
                                }

                                i += 1;
                            }
                        }
428

429
                        self.backend.emit_b(EPILOGUE_BLOCK_NAME.to_string());
430 431 432 433
                    },

                    Instruction_::BinOp(op, op1, op2) => {
                        trace!("instsel on BINOP");
434
                        self.emit_binop(node, inst, op, BinOpStatus { flag_n: false, flag_z: false, flag_c: false, flag_v: false }, op1, op2, f_content, f_context, vm);
435 436 437 438
                    },

                    Instruction_::BinOpWithStatus(op, status, op1, op2) => {
                        trace!("instsel on BINOP_STATUS");
439
                        self.emit_binop(node, inst, op, status, op1, op2, f_content, f_context, vm);
440 441 442 443 444
                    }

                    Instruction_::ConvOp { operation, ref from_ty, ref to_ty, operand } => {
                        trace!("instsel on CONVOP");

445
                        let ref ops = inst.ops;
446 447 448

                        let ref op = ops[operand];

449
                        let tmp_res = self.get_result_value(node, 0);
450
                        let tmp_op = self.emit_reg(op, f_content, f_context, vm);
451 452 453 454 455 456

                        let from_ty_size = get_bit_size(&from_ty, vm);
                        let to_ty_size = get_bit_size(&to_ty, vm);

                        match operation {
                            op::ConvOp::TRUNC => {
457 458 459 460 461 462 463 464 465 466 467 468 469 470 471
                                // src is in one register
                                if self.match_ireg(op) {
                                    self.backend.emit_mov(&tmp_res, &cast_value(&tmp_op, &to_ty));
                                } else if self.match_ireg_ex(op) {
                                    // Move the lower word
                                    if from_ty_size != to_ty_size {
                                        let (op_l, _) = self.emit_ireg_ex(op, f_content, f_context, vm);
                                        self.backend.emit_mov(&tmp_res, &cast_value(&op_l, &to_ty));
                                    } else {
                                        self.emit_move_node_to_value(&tmp_res, op, f_content, f_context, vm);
                                    }
                                } else {
                                    panic!("unexpected op (expect ireg): {}", op);
                                }

472
                            },
473

474 475
                            op::ConvOp::ZEXT => {
                                if from_ty_size != to_ty_size {
476 477 478 479 480 481 482 483 484 485 486 487
                                    if to_ty_size <= 64 {
                                        self.backend.emit_ubfx(&tmp_res, &cast_value(&tmp_op, &to_ty), 0, from_ty_size as u8);
                                    } else if to_ty_size == 128 {
                                        let (res_l, res_h) = split_int128(&tmp_res, f_context, vm);

                                        // res_l = ZEXT src
                                        self.backend.emit_ubfx(&res_l, &cast_value(&tmp_op, &UINT64_TYPE), 0, from_ty_size as u8);
                                        self.backend.emit_mov(&res_h, &XZR); // res_h = 0

                                    } else {
                                        panic!("unexpected int length {}", to_ty_size);
                                    }
488
                                } else {
489 490
                                    // Trivial, just do a move
                                    emit_move_value_to_value(self.backend.as_mut(), &tmp_res, &tmp_op, f_context, vm);
491 492
                                }
                            },
493 494


495 496
                            op::ConvOp::SEXT => {
                                if from_ty_size != to_ty_size {
497 498 499 500 501 502 503 504 505 506 507 508 509
                                    if to_ty_size <= 64 {
                                        self.backend.emit_sbfx(&tmp_res, &cast_value(&tmp_op, &to_ty), 0, from_ty_size as u8);
                                    } else if to_ty_size == 128 {
                                        let (res_l, res_h) = split_int128(&tmp_res, f_context, vm);

                                        // res_l = SEXT src
                                        self.backend.emit_sbfx(&res_l, &cast_value(&tmp_op, &UINT64_TYPE), 0, from_ty_size as u8);
                                        self.backend.emit_asr_imm(&res_h, &tmp_op, 63); // res_h = ASHR src, 63

                                    } else {
                                        panic!("unexpected int length {}", to_ty_size);
                                    }

510
                                } else {
511 512
                                    // Trivial, just do a move
                                    emit_move_value_to_value(self.backend.as_mut(), &tmp_res, &tmp_op, f_context, vm);
513 514 515 516 517 518 519 520
                                }
                            },
                            op::ConvOp::REFCAST | op::ConvOp::PTRCAST => {
                                // just a mov (and hopefully reg alloc will coalesce it)
                                self.backend.emit_mov(&tmp_res, &tmp_op);
                            },

                            op::ConvOp::UITOFP => {
521 522 523 524 525
                                if from_ty_size == 128 {
                                    if to_ty_size == 64 {
                                        self.emit_runtime_entry(&entrypoints::UITOFP_U128_DOUBLE,
                                            vec![tmp_op.clone()],
                                            Some(vec![tmp_res.clone()]),
526
                                            Some(node), f_context, vm);
527 528 529 530
                                    } else {
                                        self.emit_runtime_entry(&entrypoints::UITOFP_U128_FLOAT,
                                            vec![tmp_op.clone()],
                                            Some(vec![tmp_res.clone()]),
531
                                            Some(node), f_context, vm);
532 533 534 535
                                    }
                                } else {
                                    self.backend.emit_ucvtf(&tmp_res, &tmp_op);
                                }
536 537 538
                            },

                            op::ConvOp::SITOFP => {
539 540 541 542 543
                                if from_ty_size == 128 {
                                    if to_ty_size == 64 {
                                        self.emit_runtime_entry(&entrypoints::SITOFP_I128_DOUBLE,
                                            vec![tmp_op.clone()],
                                            Some(vec![tmp_res.clone()]),
544
                                            Some(node), f_context, vm);
545 546 547 548
                                    } else {
                                        self.emit_runtime_entry(&entrypoints::SITOFP_I128_FLOAT,
                                            vec![tmp_op.clone()],
                                            Some(vec![tmp_res.clone()]),
549
                                            Some(node), f_context, vm);
550 551 552
                                    }                                } else {
                                    self.backend.emit_scvtf(&tmp_res, &tmp_op);
                                }
553 554 555
                            },

                            op::ConvOp::FPTOUI => {
556 557 558 559 560
                                if to_ty_size == 128 {
                                    if from_ty_size == 64 {
                                        self.emit_runtime_entry(&entrypoints::FPTOUI_DOUBLE_U128,
                                            vec![tmp_op.clone()],
                                            Some(vec![tmp_res.clone()]),
561
                                            Some(node), f_context, vm);
562 563 564 565
                                    } else {
                                        self.emit_runtime_entry(&entrypoints::FPTOUI_FLOAT_U128,
                                            vec![tmp_op.clone()],
                                            Some(vec![tmp_res.clone()]),
566
                                            Some(node), f_context, vm);
567 568 569 570
                                    }
                                } else {
                                    self.backend.emit_fcvtzu(&tmp_res, &tmp_op);
                                }
571 572 573
                            },

                            op::ConvOp::FPTOSI => {
574 575 576 577 578
                                if to_ty_size == 128 {
                                    if from_ty_size == 64 {
                                        self.emit_runtime_entry(&entrypoints::FPTOSI_DOUBLE_I128,
                                            vec![tmp_op.clone()],
                                            Some(vec![tmp_res.clone()]),
579
                                            Some(node), f_context, vm);
580 581 582 583
                                    } else {
                                        self.emit_runtime_entry(&entrypoints::FPTOSI_FLOAT_I128,
                                            vec![tmp_op.clone()],
                                            Some(vec![tmp_res.clone()]),
584
                                            Some(node), f_context, vm);
585 586 587 588
                                    }
                                } else {
                                    self.backend.emit_fcvtzs(&tmp_res, &tmp_op);
                                }
589 590 591 592 593 594 595 596 597 598 599
                            },

                            op::ConvOp::BITCAST => {
                                self.backend.emit_fmov(&tmp_res, &tmp_op);
                            },
                            op::ConvOp::FPTRUNC | op::ConvOp::FPEXT => {
                                self.backend.emit_fcvt(&tmp_res, &tmp_op);
                            },
                        }
                    }

600
                    Instruction_::Load { order, mem_loc, .. } => {
601
                        trace!("instsel on LOAD");
602
                        let ref ops = inst.ops;
603 604
                        let ref loc_op = ops[mem_loc];

605
                        let resolved_loc = self.emit_node_addr_to_value(loc_op, f_content, f_context, vm);
606
                        let res = self.get_result_value(node, 0);
607

608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635
                        if self.match_ireg(node) || self.match_fpreg(node) {
                            // Whether to use a load acquire
                            let use_acquire = match order {
                                MemoryOrder::Relaxed | MemoryOrder::NotAtomic => false,
                                MemoryOrder::Consume | MemoryOrder::Acquire | MemoryOrder::SeqCst => true,
                                _ => panic!("didnt expect order {:?} with load inst", order)
                            };


                            if use_acquire {
                                // Can only have a base for a LDAR
                                let temp_loc = emit_mem_base(self.backend.as_mut(), &resolved_loc, f_context, vm);
                                match res.ty.v {
                                    // Have to load a temporary GPR first
                                    MuType_::Float => {
                                        let temp = make_temporary(f_context, UINT32_TYPE.clone(), vm);
                                        self.backend.emit_ldar(&temp, &temp_loc);
                                        self.backend.emit_fmov(&res, &temp);
                                    }
                                    MuType_::Double => {
                                        let temp = make_temporary(f_context, UINT64_TYPE.clone(), vm);
                                        self.backend.emit_ldar(&temp, &temp_loc);
                                        self.backend.emit_fmov(&res, &temp);
                                    }
                                    // Can load the register directly
                                    _ => self.backend.emit_ldar(&res, &temp_loc)
                                };
                            } else {
636
                                let temp_loc = emit_mem(self.backend.as_mut(), &resolved_loc, get_type_alignment(&res.ty, vm), f_context, vm);
637 638 639 640 641 642 643
                                self.backend.emit_ldr(&res, &temp_loc, false);
                            }
                        } else if self.match_ireg_ex(node) {
                            let (res_l, res_h) = split_int128(&res, f_context, vm);

                            match order {
                                MemoryOrder::NotAtomic => {
644
                                    let temp_loc = emit_mem(self.backend.as_mut(), &resolved_loc, get_type_alignment(&res.ty, vm), f_context, vm);
645
                                    self.backend.emit_ldp(&res_l, &res_h, &temp_loc);
646
                                }
647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666

                                // Aarch64 dosn't have a load acquire pair instruction
                                // So instead we have to write a loop using load/store exclusive pairs
                                _ => {
                                    // Whether to use a load exclusive acquire
                                    let use_acquire = match order {
                                        MemoryOrder::Relaxed  => false,
                                        MemoryOrder::Consume | MemoryOrder::Acquire | MemoryOrder::SeqCst => true,
                                        _ => panic!("didnt expect order {:?} with atomic load inst", order)
                                    };
                                    // Whether to use a store exclusive release
                                    let use_release = match order {
                                        MemoryOrder::Relaxed | MemoryOrder::Consume | MemoryOrder::Acquire  => false,
                                        MemoryOrder::SeqCst => true,
                                        _ => panic!("didnt expect order {:?} with atomic load inst", order)
                                    };

                                    // Exclusive loads/stores, only supports a base address
                                    let temp_loc = emit_mem_base(self.backend.as_mut(), &resolved_loc, f_context, vm);

667
                                    self.finish_block();
668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692

                                    let blk_load_start = format!("{}_load_start", node.id());

                                    // load_start:
                                    self.start_block(blk_load_start.clone(), &vec![temp_loc.clone()]);


                                    // Load the value:
                                    if use_acquire {
                                        self.backend.emit_ldaxp(&res_l, &res_h, &temp_loc);
                                    } else {
                                        self.backend.emit_ldxp(&res_l, &res_h, &temp_loc);
                                    }

                                    let success = make_temporary(f_context, UINT1_TYPE.clone(), vm);

                                    // Store the value we just read back to memory
                                    if use_release {
                                        self.backend.emit_stlxp(&temp_loc, &success, &res_l, &res_h);
                                    } else {
                                        self.backend.emit_stxp(&temp_loc, &success, &res_l, &res_h);
                                    }

                                    // If the store failed, then branch back to 'load_start:'
                                    self.backend.emit_cbnz(&success, blk_load_start.clone())
693
                                }
694
                            }
695
                        } else {
696
                            unimplemented!();
697 698 699
                        }
                    }

700
                    Instruction_::Store { order, mem_loc, value, .. } => {
701
                        trace!("instsel on STORE");
702
                        let ref ops = inst.ops;
703 704 705
                        let ref loc_op = ops[mem_loc];
                        let ref val_op = ops[value];

706
                        let resolved_loc = self.emit_node_addr_to_value(loc_op, f_content, f_context, vm);
707

708 709 710 711 712 713 714
                        if self.match_ireg(val_op) || self.match_fpreg(val_op) {
                            // Whether to use a store release or not
                            let use_release = match order {
                                MemoryOrder::Relaxed | MemoryOrder::NotAtomic => false,
                                MemoryOrder::Release | MemoryOrder::SeqCst => true,
                                _ => panic!("didnt expect order {:?} with load inst", order)
                            };
715

716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737
                            let val = self.emit_reg(val_op, f_content, f_context, vm);

                            if use_release {
                                // Can only have a base for a STLR
                                let temp_loc = emit_mem_base(self.backend.as_mut(), &resolved_loc, f_context, vm);

                                match val.ty.v {
                                    // Have to store a temporary GPR
                                    MuType_::Float => {
                                        let temp = make_temporary(f_context, UINT32_TYPE.clone(), vm);
                                        self.backend.emit_fmov(&temp, &val);
                                              self.backend.emit_stlr(&temp_loc, &temp);
                                    }
                                    MuType_::Double => {
                                        let temp = make_temporary(f_context, UINT64_TYPE.clone(), vm);
                                        self.backend.emit_fmov(&temp, &val);
                                        self.backend.emit_stlr(&temp_loc, &temp);
                                    }
                                    // Can load the register directly
                                    _ => self.backend.emit_stlr(&temp_loc, &val)
                                };
                            } else {
738
                                let temp_loc = emit_mem(self.backend.as_mut(), &resolved_loc, get_type_alignment(&val.ty, vm), f_context, vm);
739 740 741 742 743 744 745
                                self.backend.emit_str(&temp_loc, &val);
                            }
                        } else if self.match_ireg_ex(val_op) {
                            let (val_l, val_h) = self.emit_ireg_ex(val_op, f_content, f_context, vm);

                            match order {
                                MemoryOrder::NotAtomic => {
746
                                    let temp_loc = emit_mem(self.backend.as_mut(), &resolved_loc, 16, f_context, vm);
747
                                    self.backend.emit_stp(&temp_loc, &val_l, &val_h);
748
                                }
749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768

                                // Aarch64 dosn't have a store release pair instruction
                                // So instead we have to write a loop using load/store exclusive pairs
                                _ => {
                                    // Whether to use a load exclusive acquire
                                    let use_acquire = match order {
                                        MemoryOrder::Relaxed | MemoryOrder::Release => false,
                                        MemoryOrder::SeqCst => true,
                                        _ => panic!("didnt expect order {:?} with atomic store inst", order)
                                    };
                                    // Whether to use a store exclusive release
                                    let use_release = match order {
                                        MemoryOrder::Relaxed  => false,
                                        MemoryOrder::Release | MemoryOrder::SeqCst => true,
                                        _ => panic!("didnt expect order {:?} with atomic store inst", order)
                                    };

                                    // Exclusive loads/stores, only supports a base address
                                    let temp_loc = emit_mem_base(self.backend.as_mut(), &resolved_loc, f_context, vm);

769
                                    self.finish_block();
770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793

                                    let blk_store_start = format!("{}_store_start", node.id());

                                    // store_start:
                                    self.start_block(blk_store_start.clone(), &vec![temp_loc.clone()]);

                                    let success = make_temporary(f_context, UINT1_TYPE.clone(), vm);
                                    let discard_reg = cast_value(&success, &UINT64_TYPE);
                                    // Load a value (discard it)
                                    if use_acquire {
                                        self.backend.emit_ldaxp(&XZR, &discard_reg, &temp_loc);
                                    } else {
                                        self.backend.emit_ldxp(&XZR, &discard_reg, &temp_loc);
                                    }

                                    // Store the value
                                    if use_release {
                                        self.backend.emit_stlxp(&temp_loc, &success, &val_l, &val_h);
                                    } else {
                                        self.backend.emit_stxp(&temp_loc, &success, &val_l, &val_h);
                                    }

                                    // If the store failed, then branch back to 'store_start:'
                                    self.backend.emit_cbnz(&success, blk_store_start.clone())
794
                                }
795
                            }
796
                        } else {
797
                            unimplemented!();
798
                        }
799

800 801
                    }

802
                    Instruction_::CmpXchg{is_weak, success_order, fail_order, mem_loc, expected_value, desired_value, ..} => {
803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829
                        // Note: this uses the same operations as GCC (for the C++ atomic cmpxchg)
                        // Clang is slightly different and ignores the 'fail_order'
                        let use_acquire = match fail_order {
                            MemoryOrder::Acquire | MemoryOrder::SeqCst => true,
                            MemoryOrder::Relaxed => match success_order {
                                MemoryOrder::Acquire | MemoryOrder::AcqRel | MemoryOrder::SeqCst => true,
                                MemoryOrder::Relaxed | MemoryOrder::Release => false,
                                _ => panic!("didnt expect success order {:?} for cmpxchg", success_order)
                            },
                            _ => panic!("didnt expect fail order {:?} for cmpxchg", fail_order)
                        };
                        let use_release = match fail_order {
                            MemoryOrder::Acquire => match success_order {
                                MemoryOrder::Relaxed | MemoryOrder::Release | MemoryOrder::AcqRel | MemoryOrder::SeqCst => true,
                                MemoryOrder::Acquire => false,
                                _ => panic!("didnt expect success order {:?} for cmpxchg", success_order)
                            },
                            MemoryOrder::SeqCst => true,
                            MemoryOrder::Relaxed => match success_order {
                                MemoryOrder::Release | MemoryOrder::AcqRel | MemoryOrder::SeqCst => true,
                                MemoryOrder::Relaxed | MemoryOrder::Acquire => false,
                                _ => panic!("didnt expect success order {:?} for cmpxchg", success_order)
                            },
                            _ => panic!("didnt expect fail order {:?} for cmpxchg", fail_order)
                        };


830
                        let ref ops = inst.ops;
831 832 833 834 835 836 837 838 839 840 841 842
                        let loc = self.emit_node_addr_to_value(&ops[mem_loc], f_content, f_context, vm);
                        let expected = self.emit_reg(&ops[expected_value], f_content, f_context, vm);
                        let desired = self.emit_reg(&ops[desired_value], f_content, f_context, vm);

                        let res_value = self.get_result_value(node, 0);
                        let res_success = self.get_result_value(node, 1);


                        let blk_cmpxchg_start = format!("{}_cmpxchg_start", node.id());
                        let blk_cmpxchg_failed = format!("{}_cmpxchg_failed", node.id());
                        let blk_cmpxchg_succeded = format!("{}_cmpxchg_succeded", node.id());

843
                        self.finish_block();
844

845
                        // cmpxchg_start:
846 847 848
                        self.start_block(blk_cmpxchg_start.clone(), &vec![loc.clone(),expected.clone(), desired.clone()]);

                        if use_acquire {
849 850 851 852 853 854 855 856 857 858 859 860 861 862 863
                            match res_value.ty.v {
                                // Have to load a temporary GPR first
                                MuType_::Float => {
                                    let temp = make_temporary(f_context, UINT32_TYPE.clone(), vm);
                                    self.backend.emit_ldaxr(&temp, &loc);
                                    self.backend.emit_fmov(&res_value, &temp);
                                }
                                MuType_::Double => {
                                    let temp = make_temporary(f_context, UINT64_TYPE.clone(), vm);
                                    self.backend.emit_ldaxr(&temp, &loc);
                                    self.backend.emit_fmov(&res_value, &temp);
                                }
                                // Can load the register directly
                                _ => self.backend.emit_ldaxr(&res_value, &loc)
                            };
864
                        } else {
865 866 867 868 869 870 871 872 873 874 875 876 877 878 879
                            match res_value.ty.v {
                                // Have to load a temporary GPR first
                                MuType_::Float => {
                                    let temp = make_temporary(f_context, UINT32_TYPE.clone(), vm);
                                    self.backend.emit_ldxr(&temp, &loc);
                                    self.backend.emit_fmov(&res_value, &temp);
                                }
                                MuType_::Double => {
                                    let temp = make_temporary(f_context, UINT64_TYPE.clone(), vm);
                                    self.backend.emit_ldxr(&temp, &loc);
                                    self.backend.emit_fmov(&res_value, &temp);
                                }
                                // Can load the register directly
                                _ => self.backend.emit_ldxr(&res_value, &loc)
                            };
880 881
                        }

882
                        if is_int_reg(&expected) {
883 884 885 886 887 888 889
                            self.backend.emit_cmp(&res_value, &expected);
                        } else {
                            self.backend.emit_fcmp(&res_value, &expected);
                        }
                        self.backend.emit_b_cond("NE", blk_cmpxchg_failed.clone());

                        if use_release {
890 891 892 893 894 895 896 897 898 899 900 901 902 903 904
                            match desired.ty.v {
                                // Have to store a temporary GPR
                                MuType_::Float => {
                                    let temp = make_temporary(f_context, UINT32_TYPE.clone(), vm);
                                    self.backend.emit_fmov(&temp, &desired);
                                    self.backend.emit_stlxr(&loc, &res_success, &temp);
                                }
                                MuType_::Double => {
                                    let temp = make_temporary(f_context, UINT64_TYPE.clone(), vm);
                                    self.backend.emit_fmov(&temp, &desired);
                                    self.backend.emit_stlxr(&loc, &res_success, &temp);
                                }
                                // Can load the register directly
                                _ => self.backend.emit_stlxr(&loc, &res_success, &desired)
                            };
905
                        } else {
906 907 908 909 910 911 912 913 914 915 916 917 918 919 920
                            match desired.ty.v {
                                // Have to store a temporary GPR
                                MuType_::Float => {
                                    let temp = make_temporary(f_context, UINT32_TYPE.clone(), vm);
                                    self.backend.emit_fmov(&temp, &desired);
                                    self.backend.emit_stxr(&loc, &res_success, &temp);
                                }
                                MuType_::Double => {
                                    let temp = make_temporary(f_context, UINT64_TYPE.clone(), vm);
                                    self.backend.emit_fmov(&temp, &desired);
                                    self.backend.emit_stxr(&loc, &res_success, &temp);
                                }
                                // Can load the register directly
                                _ => self.backend.emit_stxr(&loc, &res_success, &desired)
                            };
921 922 923 924 925 926 927 928 929
                        }

                        if !is_weak {
                            // Store failed, try again
                            self.backend.emit_cbnz(&res_success, blk_cmpxchg_start.clone());
                        }

                        self.backend.emit_b(blk_cmpxchg_succeded.clone());

930
                        self.finish_block();
931

932
                        // cmpxchg_failed:
933 934 935 936 937 938
                        self.start_block(blk_cmpxchg_failed.clone(), &vec![res_success.clone(), res_value.clone()]);

                        self.backend.emit_clrex();
                        // Set res_success to 1 (the same value STXR/STLXR uses to indicate failure)
                        self.backend.emit_mov_imm(&res_success, 1);

939
                        self.finish_block();
940

941
                        // cmpxchg_succeded:
942 943 944 945
                        self.start_block(blk_cmpxchg_succeded.clone(), &vec![res_success.clone(), res_value.clone()]);
                        // this NOT is needed as STXR/STLXR returns sucess as '0', wheras the Mu spec says it should be 1
                        self.backend.emit_eor_imm(&res_success, &res_success, 1);
                    }
946 947 948 949 950 951
                    Instruction_::GetIRef(_)
                    | Instruction_::GetFieldIRef { .. }
                    | Instruction_::GetElementIRef{..}
                    | Instruction_::GetVarPartIRef { .. }
                    | Instruction_::ShiftIRef { .. } => {
                        trace!("instsel on GET/FIELD/VARPARTIREF, SHIFTIREF");
952
                        let mem_addr = self.emit_get_mem_from_inst(node, f_content, f_context, vm);
953
                        let tmp_res = self.get_result_value(node, 0);
954
                        emit_calculate_address(self.backend.as_mut(), &tmp_res, &mem_addr, f_context, vm);
955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974
                    }

                    Instruction_::Fence(order) => {
                        trace!("instsel on FENCE");

                        // Whether to emit a load fence or a normal one
                        let use_load = match order {
                            MemoryOrder::Release | MemoryOrder::SeqCst | MemoryOrder::AcqRel => false,
                            MemoryOrder::Acquire => true,
                            _ => panic!("didnt expect order {:?} with load inst", order)
                        };

                        if use_load {
                            // Data Memory Barrirer for Inner Shariable Domain (for Load accesses only)
                            self.backend.emit_dmb("ISHLD");
                        } else {
                            // Data Memory Barrirer for Inner Shariable Domain
                            self.backend.emit_dmb("ISH");
                        }
                    }
975

976 977 978
                    // TODO: Implement this similar to a return (where theres a common exit block)
                    // and change SWAP_BACK_TO_NATIV_STACK and swap_to_mu_stack so they don't handle the callee saved registers
                    // (this instruction should then guarentee that they are restored (in the same way a Return does)
979 980 981 982 983
                    Instruction_::ThreadExit => {
                        trace!("instsel on THREADEXIT");
                        // emit a call to swap_back_to_native_stack(sp_loc: Address)

                        // get thread local and add offset to get sp_loc
984
                        let tl = self.emit_get_threadlocal(f_context, vm);
985 986
                        self.backend.emit_add_imm(&tl, &tl, *thread::NATIVE_SP_LOC_OFFSET as u16, false);

987
                        self.emit_runtime_entry(&entrypoints::SWAP_BACK_TO_NATIVE_STACK, vec![tl.clone()], None, Some(node), f_context, vm);
988 989
                    }

990

991 992 993
                    Instruction_::CommonInst_GetThreadLocal => {
                        trace!("instsel on GETTHREADLOCAL");
                        // get thread local
994
                        let tl = self.emit_get_threadlocal(f_context, vm);
995

996
                        let tmp_res = self.get_result_value(node, 0);
997 998

                        // load [tl + USER_TLS_OFFSET] -> tmp_res
999
                        emit_load_base_offset(self.backend.as_mut(), &tmp_res, &tl, *thread::USER_TLS_OFFSET as i64, f_context, vm);
1000 1001
                    }

1002

1003 1004
                    Instruction_::CommonInst_SetThreadLocal(op) => {
                        trace!("instsel on SETTHREADLOCAL");
1005
                        let ref ops = inst.ops;
1006 1007 1008 1009
                        let ref op = ops[op];

                        debug_assert!(self.match_ireg(op));

1010
                        let tmp_op = self.emit_ireg(op, f_content, f_context, vm);
1011 1012

                        // get thread local
1013
                        let tl = self.emit_get_threadlocal(f_context, vm);
1014 1015

                        // store tmp_op -> [tl + USER_TLS_OFFSTE]
1016
                        emit_store_base_offset(self.backend.as_mut(), &tl, *thread::USER_TLS_OFFSET as i64, &tmp_op, f_context, vm);
1017 1018 1019 1020 1021 1022
                    }

                    Instruction_::CommonInst_Pin(op) => {
                        trace!("instsel on PIN");
                        if !mm::GC_MOVES_OBJECT {
                            // non-moving GC: pin is a nop (move from op to result)
1023
                            let ref ops = inst.ops;
1024 1025
                            let ref op = ops[op];

1026
                            let tmp_res = self.get_result_value(node, 0);
1027

1028
                            self.emit_move_node_to_value(&tmp_res, op, f_content, f_context, vm);
1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044
                        } else {
                            unimplemented!()
                        }
                    }

                    Instruction_::CommonInst_Unpin(_) => {
                        trace!("instsel on UNPIN");
                        if !mm::GC_MOVES_OBJECT {
                            // do nothing
                        } else {
                            unimplemented!()
                        }
                    }

                    Instruction_::Move(op) => {
                        trace!("instsel on MOVE (internal IR)");
1045
                        let ref ops = inst.ops;
1046 1047
                        let ref op = ops[op];

1048
                        let tmp_res = self.get_result_value(node, 0);
1049

1050
                        self.emit_move_node_to_value(&tmp_res, op, f_content, f_context, vm);
1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065
                    }

                    Instruction_::New(ref ty) => {
                        trace!("instsel on NEW");
                        if cfg!(debug_assertions) {
                            match ty.v {
                                MuType_::Hybrid(_) => panic!("cannot use NEW for hybrid, use NEWHYBRID instead"),
                                _ => {}
                            }
                        }

                        let ty_info = vm.get_backend_type_info(ty.id());
                        let size = ty_info.size;
                        let ty_align = ty_info.alignment;

1066
                        let const_size = make_value_int_const(size as u64, vm);
1067

1068
                        let tmp_allocator = self.emit_get_allocator(f_context, vm);
1069
                        let tmp_res = self.emit_alloc_sequence(tmp_allocator.clone(), const_size, ty_align, node, f_context, vm);
1070 1071

                        // ASM: call muentry_init_object(%allocator, %tmp_res, %encode)
1072
                        let encode = make_value_int_const(mm::get_gc_type_encode(ty_info.gc_type.id), vm);
1073
                        self.emit_runtime_entry(
1074 1075 1076
                            &entrypoints::INIT_OBJ,
                            vec![tmp_allocator.clone(), tmp_res.clone(), encode],
                            None,
1077
                            Some(node), f_context, vm
1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105
                        );
                    }

                    Instruction_::NewHybrid(ref ty, var_len) => {
                        trace!("instsel on NEWHYBRID");
                        if cfg!(debug_assertions) {
                            match ty.v {
                                MuType_::Hybrid(_) => {},
                                _ => panic!("NEWHYBRID is only for allocating hybrid types, use NEW for others")
                            }
                        }

                        let ty_info = vm.get_backend_type_info(ty.id());
                        let ty_align = ty_info.alignment;
                        let fix_part_size = ty_info.size;
                        let var_ty_size = match ty.v {
                            MuType_::Hybrid(ref name) => {
                                let map_lock = HYBRID_TAG_MAP.read().unwrap();
                                let hybrid_ty_ = map_lock.get(name).unwrap();
                                let var_ty = hybrid_ty_.get_var_ty();

                                vm.get_backend_type_info(var_ty.id()).size
                            },
                            _ => panic!("only expect HYBRID type here")
                        };

                        // actual size = fix_part_size + var_ty_size * len
                        let (actual_size, length) = {
1106
                            let ref ops = inst.ops;
1107 1108
                            let ref var_len = ops[var_len];

1109 1110
                            if match_node_int_imm(var_len) {
                                let var_len = node_imm_to_u64(var_len);
1111 1112
                                let actual_size = fix_part_size + var_ty_size * (var_len as usize);
                                (
1113 1114
                                    make_value_int_const(actual_size as u64, vm),
                                    make_value_int_const(var_len as u64, vm)
1115 1116
                                )
                            } else {
1117
                                let tmp_actual_size = make_temporary(f_context, UINT64_TYPE.clone(), vm);
1118
                                let tmp_var_len = self.emit_ireg(var_len, f_content, f_context, vm);
1119 1120

                                // tmp_actual_size = tmp_var_len*var_ty_size
1121
                                emit_mul_u64(self.backend.as_mut(), &tmp_actual_size, &tmp_var_len, f_context, vm, var_ty_size as u64);
1122
                                // tmp_actual_size = tmp_var_len*var_ty_size + fix_part_size
1123
                                emit_add_u64(self.backend.as_mut(), &tmp_actual_size, &tmp_actual_size, f_context, vm, fix_part_size as u64);
1124 1125 1126 1127
                                (tmp_actual_size, tmp_var_len)
                            }
                        };

1128
                        let tmp_allocator = self.emit_get_allocator(f_context, vm);
1129
                        let tmp_res = self.emit_alloc_sequence(tmp_allocator.clone(), actual_size, ty_align, node, f_context, vm);
1130 1131

                        // ASM: call muentry_init_object(%allocator, %tmp_res, %encode)
1132
                        let encode = make_value_int_const(mm::get_gc_type_encode(ty_info.gc_type.id), vm);
1133
                        self.emit_runtime_entry(
1134 1135 1136
                            &entrypoints::INIT_HYBRID,
                            vec![tmp_allocator.clone(), tmp_res.clone(), encode, length],
                            None,
1137
                            Some(node), f_context, vm
1138 1139 1140 1141 1142 1143
                        );
                    }

                    // Runtime Entry
                    Instruction_::Throw(op_index) => {
                        trace!("instsel on THROW");
1144
                        let ref ops = inst.ops;
1145 1146
                        let ref exception_obj = ops[op_index];

1147
                        self.emit_runtime_entry(
1148 1149 1150
                            &entrypoints::THROW_EXCEPTION,
                            vec![exception_obj.clone_value()],
                            None,
1151
                            Some(node), f_context, vm);
1152 1153 1154 1155 1156
                    }

                    // Runtime Entry
                    Instruction_::PrintHex(index) => {
                        trace!("instsel on PRINTHEX");
1157
                        let ref ops = inst.ops;
1158 1159
                        let ref op = ops[index];

1160
                        self.emit_runtime_entry(
1161 1162 1163
                            &entrypoints::PRINT_HEX,
                            vec![op.clone_value()],
                            None,
1164
                            Some(node), f_context, vm
1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177
                        );
                    }

                    _ => unimplemented!()
                } // main switch
            },

            TreeNode_::Value(_) => {}
        }
    }

    // Returns the size of the operation
    // TODO: If the RHS of an ADD is negative change it to a SUB (and vice versa)
1178 1179
    // TODO: Treat XOR 1....1, arg and XOR arg, 1....1 specially (1....1 is an invalid logical immediate, but the operation is non trivial so it should be optimised to res = MVN arg)
    // Note: Assume that trivial operations are to be optimised by the Mu IR compiler (but this function still needs to work correctly if they aren't optimsed away)
1180
    // TODO: Use a shift when dividing or multiplying by a power of two
1181
    fn emit_binop(&mut self, node: &TreeNode, inst: &Instruction, op: BinOp, status: BinOpStatus, op1: OpIndex, op2: OpIndex, f_content: &FunctionContent, f_context: &mut FunctionContext, vm: &VM) {
1182 1183 1184
        use std;
        let mut op1 = op1;
        let mut op2 = op2;
1185
        let ref ops = inst.ops;
1186
        let res = self.get_result_value(node, 0);
1187 1188 1189 1190 1191 1192 1193 1194

        // Get the size (in bits) of the type the operation is on
        let n = get_bit_size(&res.ty, vm);
        let output_status = status.flag_n || status.flag_z || status.flag_c || status.flag_v;
        let mut status_value_index = 0;
        // NOTE: XZR is just a dummy value here (it will not be used)
        let tmp_status_n = if status.flag_n {
            status_value_index += 1;
1195
            self.get_result_value(node, status_value_index)
1196 1197 1198
        } else { XZR.clone() };
        let tmp_status_z = if status.flag_z {
            status_value_index += 1;
1199
            self.get_result_value(node, status_value_index)
1200 1201 1202
        } else { XZR.clone() };
        let tmp_status_c = if status.flag_c {
            status_value_index += 1;
1203
            self.get_result_value(node, status_value_index)
1204 1205 1206
        } else { XZR.clone() };
        let tmp_status_v = if status.flag_v {
            status_value_index += 1;
1207
            self.get_result_value(node, status_value_index)
1208 1209 1210 1211 1212 1213 1214 1215 1216
        } else { XZR.clone() };

        // TODO: Division by zero exception (note: must explicitly check for this, arm dosn't do it)
        match op {
            // The lower n bits of the result will be correct, and will not depend
            // on the > n bits of op1 or op2
            op::BinOp::Add => {
                let mut imm_val = 0 as u64;
                // Is one of the arguments a valid immediate?
1217 1218
                let emit_imm = if match_node_int_imm(&ops[op2]) {
                    imm_val = node_imm_to_u64(&ops[op2]);
1219
                    is_valid_arithmetic_imm(imm_val)
1220 1221
                } else if match_node_int_imm(&ops[op1]) {
                    imm_val = node_imm_to_u64(&ops[op1]);
1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235
                    // if op1 is a valid immediate, swap it with op2
                    if is_valid_arithmetic_imm(imm_val) {
                        std::mem::swap(&mut op1, &mut op2);
                        true
                    } else {
                        false
                    }
                } else {
                    false
                };

                if emit_imm {
                    trace!("emit add-ireg-imm");

1236
                    let reg_op1 = self.emit_ireg(&ops[op1], f_content, f_context, vm);
1237 1238 1239 1240
                    let imm_shift = imm_val > 4096;
                    let imm_op2 = if imm_shift { imm_val >> 12 } else { imm_val };

                    if output_status {
1241
                        emit_zext(self.backend.as_mut(), &reg_op1);
1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279
                        self.backend.emit_adds_imm(&res, &reg_op1, imm_op2 as u16, imm_shift);

                        if status.flag_v {
                            if n < 32 {
                                // tmp_status[n-1] = 1 iff res and op1 have different signs
                                self.backend.emit_eor(&tmp_status_v, &res, &reg_op1);
                                // tmp[n-1] = 1 iff op1 and op2 have different signs

                                // Sign bit of op2 is 0
                                if !get_bit(imm_val, n - 1) {
                                    // tmp_status[n-1] = 1 iff res and op1 have different signs
                                    //      and op1 has the same sign as op2 (which is 0)
                                    self.backend.emit_bic(&tmp_status_v, &tmp_status_v, &reg_op1);
                                } else {
                                    // tmp_status[n-1] = 1 iff res and op1 have different signs
                                    //      and op1 has the same sign as op2 (which is 1)
                                    self.backend.emit_and(&tmp_status_v, &tmp_status_v, &reg_op1);
                                }

                                // Check the sign bit of tmp_status (i.e. tmp_status[n-1])
                                self.backend.emit_tst_imm(&tmp_status_v, 1 << (n - 1));
                                self.backend.emit_cset(&tmp_status_v, "NE");
                            } else {
                                self.backend.emit_cset(&tmp_status_v, "VS");
                            }
                        }
                        if status.flag_c {
                            if n < 32 {
                                // Test the carry bit of res
                                self.backend.emit_tst_imm(&res, 1 << n);
                                self.backend.emit_cset(&tmp_status_c, "NE");
                            } else {
                                self.backend.emit_cset(&tmp_status_c, "CS");
                            }
                        }
                    } else {
                        self.backend.emit_add_imm(&res, &reg_op1, imm_op2 as u16, imm_shift);
                    }
1280
                } else if self.match_ireg(&ops[op1]) && self.match_ireg(&ops[op2]) {
1281 1282
                    trace!("emit add-ireg-ireg");

1283 1284
                    let reg_op1 = self.emit_ireg(&ops[op1], f_content, f_context, vm);
                    let reg_op2 = self.emit_ireg(&ops[op2], f_content, f_context, vm);
1285 1286

                    if output_status {
1287
                        emit_zext(self.backend.as_mut(), &reg_op1);
1288 1289
                        if n == 1 {
                            // adds_ext dosn't support extending 1 bit numbers
1290
                            emit_zext(self.backend.as_mut(), &reg_op2);
1291 1292 1293 1294 1295 1296