inst_sel.rs 202 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14
// Copyright 2017 The Australian National University
// 
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// 
//     http://www.apache.org/licenses/LICENSE-2.0
// 
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

15 16
#![warn(unused_imports)]
#![warn(unreachable_code)]
17

18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35
use ast::ir::*;
use ast::ptr::*;
use ast::inst::*;
use ast::op;
use ast::op::*;
use ast::types::*;
use vm::VM;
use runtime::mm;
use runtime::mm::objectmodel::OBJECT_HEADER_SIZE;

use runtime::ValueLocation;
use runtime::thread;
use runtime::entrypoints;
use runtime::entrypoints::RuntimeEntrypoint;

use compiler::CompilerPass;

use compiler::backend::PROLOGUE_BLOCK_NAME;
36
use compiler::backend::EPILOGUE_BLOCK_NAME;
37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54

use compiler::backend::aarch64::*;
use compiler::machine_code::CompiledFunction;
use compiler::frame::Frame;

use std::collections::HashMap;
use std::any::Any;

const INLINE_FASTPATH : bool = false;

pub struct InstructionSelection {
    name: &'static str,
    backend: Box<CodeGenerator>,

    current_fv_id: MuID,
    current_callsite_id: usize,
    current_frame: Option<Frame>,
    current_block: Option<MuName>,
55
    current_block_in_ir: Option<MuName>,
56 57 58 59 60
    current_func_start: Option<ValueLocation>,
    // key: block id, val: callsite that names the block as exception block
    current_exn_callsites: HashMap<MuID, Vec<ValueLocation>>,
    // key: block id, val: block location
    current_exn_blocks: HashMap<MuID, ValueLocation>,
61
    current_xr_value: Option<P<Value>>, // A temporary that holds to saved XR value (if needed)
62 63 64 65
    current_constants: HashMap<MuID, P<Value>>,
    current_constants_locs: HashMap<MuID, P<Value>>
}

66
// TODO: Move all functions that are in here that don't need access to 'self' (or only call functions that don't need access to self (even if called on self)) to Mod.rs
67 68 69 70 71 72 73 74 75 76 77
impl <'a> InstructionSelection {
    #[cfg(feature = "aot")]
    pub fn new() -> InstructionSelection {
        InstructionSelection {
            name: "Instruction Selection (x64)",
            backend: Box::new(ASMCodeGen::new()),

            current_fv_id: 0,
            current_callsite_id: 0,
            current_frame: None,
            current_block: None,
78 79 80 81 82 83
            current_block_in_ir: None,  // it is possible the block is newly created in instruction selection
                                        // but sometimes we want to know its control flow
                                        // so we need to track what block it is from the IR

                                        // FIXME: ideally we should not create new blocks in instruction selection
                                        // see Issue #6
84 85 86 87
            current_func_start: None,
            // key: block id, val: callsite that names the block as exception block
            current_exn_callsites: HashMap::new(),
            current_exn_blocks: HashMap::new(),
88
            current_xr_value: None,
89 90 91
            current_constants: HashMap::new(),
            current_constants_locs: HashMap::new()
        }
92

93 94 95 96 97 98 99 100 101 102
    }

    #[cfg(feature = "jit")]
    pub fn new() -> InstructionSelection {
        unimplemented!()
    }

    // in this pass, we assume that
    // * we do not need to backup/restore caller-saved registers
    // if any of these assumption breaks, we will need to re-emit the code
103
    fn instruction_select(&mut self, node: &'a TreeNode, f_content: &FunctionContent, f_context: &mut FunctionContext, vm: &VM) {
104 105 106 107 108
        trace!("instsel on node#{} {}", node.id(), node);

        match node.v {
            TreeNode_::Instruction(ref inst) => {
                match inst.v {
109
                    // TODO: Optimise if cond is a flag from a binary operation?
110
                    Instruction_::Branch2 { cond, ref true_dest, ref false_dest, .. } => {
111 112
                        trace!("instsel on BRANCH2");
                        let (fallthrough_dest, branch_dest, branch_if_true) = {
113 114 115 116
                            let cur_block = f_content.get_block_by_name(self.current_block_in_ir.as_ref().unwrap().clone());
                            let next_block_in_trace = cur_block.control_flow.get_hottest_succ().unwrap();

                            if next_block_in_trace == true_dest.target {
117 118 119 120 121 122
                                (true_dest, false_dest, false)
                            } else {
                                (false_dest, true_dest, true)
                            }
                        };

123
                        let ref ops = inst.ops;
124 125 126 127 128 129 130 131

                        self.process_dest(&ops, fallthrough_dest, f_content, f_context, vm);
                        self.process_dest(&ops, branch_dest, f_content, f_context, vm);

                        let branch_target = f_content.get_block(branch_dest.target).name().unwrap();

                        let ref cond = ops[cond];

132
                        if self.match_cmp_res(cond) {
133
                            trace!("emit cmp_res-branch2");
134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150
                            // Emit a CBNZ for 128-bit comparisons that are not symmetric
                            let use_cbnz = self.is_int128_asym_cmp(cond);
                            let tmp_cond =
                                if use_cbnz { Some(make_temporary(f_context, UINT1_TYPE.clone(), vm)) }
                                else { None };
                            let cond_box =
                                if use_cbnz { Some(Box::new(tmp_cond.as_ref().unwrap().clone())) }
                                else { None };

                            let mut cmpop = self.emit_cmp_res(cond, cond_box, f_content, f_context, vm);

                            if use_cbnz {
                                if !branch_if_true {
                                    self.backend.emit_cbz(tmp_cond.as_ref().unwrap(), branch_target);
                                } else {
                                    self.backend.emit_cbnz(tmp_cond.as_ref().unwrap(), branch_target);
                                }
151

152
                            } else {
153 154 155
                                if !branch_if_true {
                                    cmpop = cmpop.invert();
                                }
156

157 158 159 160 161 162 163 164 165 166 167 168
                                let cond = get_condition_codes(cmpop);

                                if cmpop == op::CmpOp::FFALSE {
                                    ; // Do nothing
                                } else if cmpop == op::CmpOp::FTRUE {
                                    self.backend.emit_b(branch_target);
                                } else {
                                    self.backend.emit_b_cond(cond[0], branch_target.clone());

                                    if cond.len() == 2 {
                                        self.backend.emit_b_cond(cond[1], branch_target);
                                    }
169 170
                                }
                            }
171
                        } else {
172
                            let cond_reg = self.emit_ireg(cond, f_content, f_context, vm);
173

174 175 176 177
                            if branch_if_true {
                                self.backend.emit_tbnz(&cond_reg, 0, branch_target.clone());
                            } else {
                                self.backend.emit_tbz(&cond_reg, 0, branch_target.clone());
178
                            }
179
                        };
180 181 182

                        // it is possible that the fallthrough block is scheduled somewhere else
                        // we need to explicitly jump to it
183
                        self.finish_block();
184 185 186 187 188
                        let fallthrough_temp_block = format!("{}_{}_branch_fallthrough", self.current_fv_id, node.id());
                        self.start_block(fallthrough_temp_block, &vec![]);

                        let fallthrough_target = f_content.get_block(fallthrough_dest.target).name().unwrap();
                        self.backend.emit_b(fallthrough_target);
189 190 191 192 193 194
                    },

                    Instruction_::Select { cond, true_val, false_val } => {
                        use ast::op::CmpOp::*;

                        trace!("instsel on SELECT");
195
                        let ref ops = inst.ops;
196 197 198 199 200

                        let ref cond = ops[cond];
                        let ref true_val = ops[true_val];
                        let ref false_val = ops[false_val];

201
                        let tmp_res = self.get_result_value(node, 0);
202 203 204 205

                        // moving integers/pointers
                        // generate compare
                        let cmpop = if self.match_cmp_res(cond) {
206
                            self.emit_cmp_res(cond, None, f_content, f_context, vm)
207
                        } else if self.match_ireg(cond) {
208
                            let tmp_cond = self.emit_ireg(cond, f_content, f_context, vm);
209 210 211 212 213 214
                            self.backend.emit_cmp_imm(&tmp_cond, 0, false);
                            NE
                        } else {
                            panic!("expected ireg, found {}", cond)
                        };

215 216
                        let tmp_true = self.emit_reg(true_val, f_content, f_context, vm);
                        let tmp_false = self.emit_reg(false_val, f_content, f_context, vm);
217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244

                        let cond = get_condition_codes(cmpop);

                        if self.match_ireg(true_val) {
                            if cmpop == FFALSE {
                                self.backend.emit_mov(&tmp_res, &tmp_false);
                            } else if cmpop == FTRUE {
                                self.backend.emit_mov(&tmp_res, &tmp_true);
                            } else {
                                self.backend.emit_csel(&tmp_res, &tmp_true, &tmp_false, cond[0]);

                                if cond.len() == 2 {
                                    self.backend.emit_csel(&tmp_res, &tmp_true, &tmp_res, cond[1]);
                                }
                            }
                        } else if self.match_fpreg(true_val) {
                            if cmpop == FFALSE {
                                self.backend.emit_fmov(&tmp_res, &tmp_false);
                            } else if cmpop == FTRUE {
                                self.backend.emit_fmov(&tmp_res, &tmp_true);
                            } else {
                                self.backend.emit_fcsel(&tmp_res, &tmp_true, &tmp_false, cond[0]);

                                if cond.len() == 2 {
                                    self.backend.emit_fcsel(&tmp_res, &tmp_true, &tmp_res, cond[1]);
                                }
                            }
                        } else {
245
                            // moving vectors
246 247 248 249 250 251 252 253
                            unimplemented!()
                        }
                    },

                    Instruction_::CmpOp(op, op1, op2) => {
                        use ast::op::CmpOp::*;

                        trace!("instsel on CMPOP");
254
                        let ref ops = inst.ops;
255 256 257
                        let ref op1 = ops[op1];
                        let ref op2 = ops[op2];

258
                        let tmp_res = self.get_result_value(node, 0);
259 260 261 262

                        debug_assert!(tmp_res.ty.get_int_length().is_some());
                        debug_assert!(tmp_res.ty.get_int_length().unwrap() == 1);

263
                        let cmpop = self.emit_cmp_res_op(op, Some(Box::new(tmp_res.clone())), &op1, &op2, f_content, f_context, vm);
264 265
                        let cond = get_condition_codes(cmpop);

266 267 268 269 270 271 272 273
                        // emit_cmp_res_op will set tmp_res for 128-bit assymettric comparisons
                        if !self.is_int128_asym_cmp(node) {
                            if cmpop == FFALSE {
                                emit_mov_u64(self.backend.as_mut(), &tmp_res, 0);
                            } else if cmpop == FTRUE {
                                emit_mov_u64(self.backend.as_mut(), &tmp_res, 1);
                            } else {
                                self.backend.emit_cset(&tmp_res, cond[0]);
274

275 276 277 278 279
                                // Note: some compariosns can't be computed based on a single aarch64 flag
                                // insted they are computed as a condition OR NOT another condition.
                                if cond.len() == 2 {
                                    self.backend.emit_csinc(&tmp_res, &tmp_res, &WZR, invert_condition_code(cond[1]));
                                }
280 281 282 283 284 285
                            }
                        }
                    }

                    Instruction_::Branch1(ref dest) => {
                        trace!("instsel on BRANCH1");
286
                        let ref ops = inst.ops;
287 288 289 290 291 292 293 294 295 296 297 298

                        self.process_dest(&ops, dest, f_content, f_context, vm);

                        let target = f_content.get_block(dest.target).name().unwrap();

                        trace!("emit branch1");
                        // jmp
                        self.backend.emit_b(target);
                    },

                    Instruction_::Switch { cond, ref default, ref branches } => {
                        trace!("instsel on SWITCH");
299
                        let ref ops = inst.ops;
300 301 302 303

                        let ref cond = ops[cond];

                        if self.match_ireg(cond) {
304
                            let tmp_cond = self.emit_ireg(cond, f_content, f_context, vm);
305
                            emit_zext(self.backend.as_mut(), &tmp_cond);
306 307 308 309 310 311 312 313 314 315 316 317

                            // emit each branch
                            for &(case_op_index, ref case_dest) in branches {
                                let ref case_op = ops[case_op_index];

                                // process dest
                                self.process_dest(&ops, case_dest, f_content, f_context, vm);

                                let target = f_content.get_block(case_dest.target).name().unwrap();

                                let mut imm_val = 0 as u64;
                                // Is one of the arguments a valid immediate?
318 319
                                let emit_imm = if match_node_int_imm(&case_op) {
                                    imm_val = node_imm_to_u64(&case_op);
320 321 322 323 324 325 326 327 328 329
                                    is_valid_arithmetic_imm(imm_val)
                                } else {
                                    false
                                };

                                if emit_imm {
                                    let imm_shift = imm_val > 4096;
                                    let imm_op2 = if imm_shift { imm_val >> 12 } else { imm_val };
                                    self.backend.emit_cmp_imm(&tmp_cond, imm_op2 as u16, imm_shift);
                                } else {
330
                                    let tmp_case_op = self.emit_ireg(case_op, f_content, f_context, vm);
331
                                    emit_zext(self.backend.as_mut(), &tmp_case_op);
332 333 334 335 336
                                    self.backend.emit_cmp(&tmp_cond, &tmp_case_op);
                                }

                                self.backend.emit_b_cond("EQ", target);

337
                                self.finish_block();
338
                                self.start_block(format!("{}_switch_not_met_case_{}", node.id(), case_op_index), &vec![]);
339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357
                            }

                            // emit default
                            self.process_dest(&ops, default, f_content, f_context, vm);

                            let default_target = f_content.get_block(default.target).name().unwrap();
                            self.backend.emit_b(default_target);
                        } else {
                            panic!("expecting cond in switch to be ireg: {}", cond);
                        }
                    }

                    Instruction_::ExprCall { ref data, is_abort } => {
                        trace!("instsel on EXPRCALL");

                        if is_abort {
                            unimplemented!()
                        }

358
                        self.emit_mu_call(
359 360 361 362 363 364 365 366 367 368
                            inst, // inst: &Instruction,
                            data, // calldata: &CallData,
                            None, // resumption: Option<&ResumptionData>,
                            node, // cur_node: &TreeNode, 
                            f_content, f_context, vm);
                    },

                    Instruction_::Call { ref data, ref resume } => {
                        trace!("instsel on CALL");

369
                        self.emit_mu_call(
370 371 372 373 374 375 376 377 378 379 380 381 382 383
                            inst,
                            data,
                            Some(resume),
                            node,
                            f_content, f_context, vm);
                    },

                    Instruction_::ExprCCall { ref data, is_abort } => {
                        trace!("instsel on EXPRCCALL");

                        if is_abort {
                            unimplemented!()
                        }

384
                        self.emit_c_call_ir(inst, data, None, node, f_content, f_context, vm);
385 386 387 388 389
                    }

                    Instruction_::CCall { ref data, ref resume } => {
                        trace!("instsel on CCALL");

390
                        self.emit_c_call_ir(inst, data, Some(resume), node, f_content, f_context, vm);
391 392
                    }

393
                    Instruction_::Return(ref vals) => {
394 395
                        trace!("instsel on RETURN");

396
                        // prepare return regs
397
                        let ref ops = inst.ops;
398
                        // TODO: Are vals in the same order as the return types in the functions signature?
399

400
                        let ret_tys = vals.iter().map(|i| node_type(&ops[*i])).collect();
401
                        let ret_type = self.combine_return_types(&ret_tys);
402

403
                        let n = ret_tys.len(); // number of return values
404 405
                        let xr_value = self.current_xr_value.as_ref().unwrap().clone();

406 407
                        if n == 0 {
                            // Do nothing
408
                        } else if n == 1 {
409
                            let ret_loc = self.compute_return_locations(&ret_type, &xr_value, &vm);
410 411 412 413 414 415 416 417 418 419 420
                            let ret_val = self.emit_node_value(&ops[vals[0]], f_content, f_context, vm);

                            if is_machine_reg(&ret_loc) && is_int_ex_reg(&ret_val) {
                                let (val_l, val_h) = split_int128(&ret_val, f_context, vm);
                                let ret_loc_h = get_register_from_id(ret_loc.id() + 2);
                                // nothing special needs to be done
                                emit_move_value_to_value(self.backend.as_mut(), &ret_loc, &val_l, f_context, vm);
                                emit_move_value_to_value(self.backend.as_mut(), &ret_loc_h, &val_h, f_context, vm);
                            } else {
                                emit_move_value_to_value(self.backend.as_mut(), &ret_loc, &ret_val, f_context, vm);
                            }
421
                        } else {
422
                            let ret_loc = self.compute_return_locations(&ret_type, &xr_value, &vm);
423

424
                            let mut i = 0;
425
                            for ret_index in vals {
426 427 428 429 430 431 432 433
                                let ret_val = self.emit_node_value(&ops[*ret_index], f_content, f_context, vm);
                                let ref ty = ret_val.ty;
                                let offset = self.get_field_offset(&ret_type, i, &vm);

                                match ty.v {
                                    MuType_::Vector(_, _) | MuType_::Tagref64 => unimplemented!(),
                                    MuType_::Void => panic!("Unexpected void"),
                                    MuType_::Struct(_) | MuType_::Array(_, _) => unimplemented!(),
434 435
                                    MuType_::Hybrid(_) => panic!("Can't return a hybrid"),
                                    // Integral, pointer or floating point type
436 437 438 439 440 441
                                    _ => self.insert_bytes(&ret_loc, &ret_val, offset as i64, f_context, vm),
                                }

                                i += 1;
                            }
                        }
442

443
                        self.backend.emit_b(EPILOGUE_BLOCK_NAME.to_string());
444 445 446 447
                    },

                    Instruction_::BinOp(op, op1, op2) => {
                        trace!("instsel on BINOP");
448
                        self.emit_binop(node, inst, op, BinOpStatus { flag_n: false, flag_z: false, flag_c: false, flag_v: false }, op1, op2, f_content, f_context, vm);
449 450 451 452
                    },

                    Instruction_::BinOpWithStatus(op, status, op1, op2) => {
                        trace!("instsel on BINOP_STATUS");
453
                        self.emit_binop(node, inst, op, status, op1, op2, f_content, f_context, vm);
454 455 456 457 458
                    }

                    Instruction_::ConvOp { operation, ref from_ty, ref to_ty, operand } => {
                        trace!("instsel on CONVOP");

459
                        let ref ops = inst.ops;
460 461 462

                        let ref op = ops[operand];

463
                        let tmp_res = self.get_result_value(node, 0);
464
                        let tmp_op = self.emit_reg(op, f_content, f_context, vm);
465 466 467 468 469 470

                        let from_ty_size = get_bit_size(&from_ty, vm);
                        let to_ty_size = get_bit_size(&to_ty, vm);

                        match operation {
                            op::ConvOp::TRUNC => {
471 472 473 474 475 476 477 478 479 480 481 482 483 484 485
                                // src is in one register
                                if self.match_ireg(op) {
                                    self.backend.emit_mov(&tmp_res, &cast_value(&tmp_op, &to_ty));
                                } else if self.match_ireg_ex(op) {
                                    // Move the lower word
                                    if from_ty_size != to_ty_size {
                                        let (op_l, _) = self.emit_ireg_ex(op, f_content, f_context, vm);
                                        self.backend.emit_mov(&tmp_res, &cast_value(&op_l, &to_ty));
                                    } else {
                                        self.emit_move_node_to_value(&tmp_res, op, f_content, f_context, vm);
                                    }
                                } else {
                                    panic!("unexpected op (expect ireg): {}", op);
                                }

486
                            },
487

488 489
                            op::ConvOp::ZEXT => {
                                if from_ty_size != to_ty_size {
490 491 492 493 494 495 496 497 498 499 500 501
                                    if to_ty_size <= 64 {
                                        self.backend.emit_ubfx(&tmp_res, &cast_value(&tmp_op, &to_ty), 0, from_ty_size as u8);
                                    } else if to_ty_size == 128 {
                                        let (res_l, res_h) = split_int128(&tmp_res, f_context, vm);

                                        // res_l = ZEXT src
                                        self.backend.emit_ubfx(&res_l, &cast_value(&tmp_op, &UINT64_TYPE), 0, from_ty_size as u8);
                                        self.backend.emit_mov(&res_h, &XZR); // res_h = 0

                                    } else {
                                        panic!("unexpected int length {}", to_ty_size);
                                    }
502
                                } else {
503 504
                                    // Trivial, just do a move
                                    emit_move_value_to_value(self.backend.as_mut(), &tmp_res, &tmp_op, f_context, vm);
505 506
                                }
                            },
507 508


509 510
                            op::ConvOp::SEXT => {
                                if from_ty_size != to_ty_size {
511 512 513 514 515 516 517 518 519 520 521 522 523
                                    if to_ty_size <= 64 {
                                        self.backend.emit_sbfx(&tmp_res, &cast_value(&tmp_op, &to_ty), 0, from_ty_size as u8);
                                    } else if to_ty_size == 128 {
                                        let (res_l, res_h) = split_int128(&tmp_res, f_context, vm);

                                        // res_l = SEXT src
                                        self.backend.emit_sbfx(&res_l, &cast_value(&tmp_op, &UINT64_TYPE), 0, from_ty_size as u8);
                                        self.backend.emit_asr_imm(&res_h, &tmp_op, 63); // res_h = ASHR src, 63

                                    } else {
                                        panic!("unexpected int length {}", to_ty_size);
                                    }

524
                                } else {
525 526
                                    // Trivial, just do a move
                                    emit_move_value_to_value(self.backend.as_mut(), &tmp_res, &tmp_op, f_context, vm);
527 528 529 530 531 532 533 534
                                }
                            },
                            op::ConvOp::REFCAST | op::ConvOp::PTRCAST => {
                                // just a mov (and hopefully reg alloc will coalesce it)
                                self.backend.emit_mov(&tmp_res, &tmp_op);
                            },

                            op::ConvOp::UITOFP => {
535 536 537 538 539
                                if from_ty_size == 128 {
                                    if to_ty_size == 64 {
                                        self.emit_runtime_entry(&entrypoints::UITOFP_U128_DOUBLE,
                                            vec![tmp_op.clone()],
                                            Some(vec![tmp_res.clone()]),
540
                                            Some(node), f_context, vm);
541 542 543 544
                                    } else {
                                        self.emit_runtime_entry(&entrypoints::UITOFP_U128_FLOAT,
                                            vec![tmp_op.clone()],
                                            Some(vec![tmp_res.clone()]),
545
                                            Some(node), f_context, vm);
546 547 548 549
                                    }
                                } else {
                                    self.backend.emit_ucvtf(&tmp_res, &tmp_op);
                                }
550 551 552
                            },

                            op::ConvOp::SITOFP => {
553 554 555 556 557
                                if from_ty_size == 128 {
                                    if to_ty_size == 64 {
                                        self.emit_runtime_entry(&entrypoints::SITOFP_I128_DOUBLE,
                                            vec![tmp_op.clone()],
                                            Some(vec![tmp_res.clone()]),
558
                                            Some(node), f_context, vm);
559 560 561 562
                                    } else {
                                        self.emit_runtime_entry(&entrypoints::SITOFP_I128_FLOAT,
                                            vec![tmp_op.clone()],
                                            Some(vec![tmp_res.clone()]),
563
                                            Some(node), f_context, vm);
564 565 566
                                    }                                } else {
                                    self.backend.emit_scvtf(&tmp_res, &tmp_op);
                                }
567 568 569
                            },

                            op::ConvOp::FPTOUI => {
570 571 572 573 574
                                if to_ty_size == 128 {
                                    if from_ty_size == 64 {
                                        self.emit_runtime_entry(&entrypoints::FPTOUI_DOUBLE_U128,
                                            vec![tmp_op.clone()],
                                            Some(vec![tmp_res.clone()]),
575
                                            Some(node), f_context, vm);
576 577 578 579
                                    } else {
                                        self.emit_runtime_entry(&entrypoints::FPTOUI_FLOAT_U128,
                                            vec![tmp_op.clone()],
                                            Some(vec![tmp_res.clone()]),
580
                                            Some(node), f_context, vm);
581 582 583 584
                                    }
                                } else {
                                    self.backend.emit_fcvtzu(&tmp_res, &tmp_op);
                                }
585 586 587
                            },

                            op::ConvOp::FPTOSI => {
588 589 590 591 592
                                if to_ty_size == 128 {
                                    if from_ty_size == 64 {
                                        self.emit_runtime_entry(&entrypoints::FPTOSI_DOUBLE_I128,
                                            vec![tmp_op.clone()],
                                            Some(vec![tmp_res.clone()]),
593
                                            Some(node), f_context, vm);
594 595 596 597
                                    } else {
                                        self.emit_runtime_entry(&entrypoints::FPTOSI_FLOAT_I128,
                                            vec![tmp_op.clone()],
                                            Some(vec![tmp_res.clone()]),
598
                                            Some(node), f_context, vm);
599 600 601 602
                                    }
                                } else {
                                    self.backend.emit_fcvtzs(&tmp_res, &tmp_op);
                                }
603 604 605 606 607 608 609 610 611 612 613
                            },

                            op::ConvOp::BITCAST => {
                                self.backend.emit_fmov(&tmp_res, &tmp_op);
                            },
                            op::ConvOp::FPTRUNC | op::ConvOp::FPEXT => {
                                self.backend.emit_fcvt(&tmp_res, &tmp_op);
                            },
                        }
                    }

614
                    Instruction_::Load { order, mem_loc, .. } => {
615
                        trace!("instsel on LOAD");
616
                        let ref ops = inst.ops;
617 618
                        let ref loc_op = ops[mem_loc];

619
                        let resolved_loc = self.emit_node_addr_to_value(loc_op, f_content, f_context, vm);
620
                        let res = self.get_result_value(node, 0);
621

622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649
                        if self.match_ireg(node) || self.match_fpreg(node) {
                            // Whether to use a load acquire
                            let use_acquire = match order {
                                MemoryOrder::Relaxed | MemoryOrder::NotAtomic => false,
                                MemoryOrder::Consume | MemoryOrder::Acquire | MemoryOrder::SeqCst => true,
                                _ => panic!("didnt expect order {:?} with load inst", order)
                            };


                            if use_acquire {
                                // Can only have a base for a LDAR
                                let temp_loc = emit_mem_base(self.backend.as_mut(), &resolved_loc, f_context, vm);
                                match res.ty.v {
                                    // Have to load a temporary GPR first
                                    MuType_::Float => {
                                        let temp = make_temporary(f_context, UINT32_TYPE.clone(), vm);
                                        self.backend.emit_ldar(&temp, &temp_loc);
                                        self.backend.emit_fmov(&res, &temp);
                                    }
                                    MuType_::Double => {
                                        let temp = make_temporary(f_context, UINT64_TYPE.clone(), vm);
                                        self.backend.emit_ldar(&temp, &temp_loc);
                                        self.backend.emit_fmov(&res, &temp);
                                    }
                                    // Can load the register directly
                                    _ => self.backend.emit_ldar(&res, &temp_loc)
                                };
                            } else {
650
                                let temp_loc = emit_mem(self.backend.as_mut(), &resolved_loc, get_type_alignment(&res.ty, vm), f_context, vm);
651 652 653 654 655 656 657
                                self.backend.emit_ldr(&res, &temp_loc, false);
                            }
                        } else if self.match_ireg_ex(node) {
                            let (res_l, res_h) = split_int128(&res, f_context, vm);

                            match order {
                                MemoryOrder::NotAtomic => {
658
                                    let temp_loc = emit_mem(self.backend.as_mut(), &resolved_loc, get_type_alignment(&res.ty, vm), f_context, vm);
659
                                    self.backend.emit_ldp(&res_l, &res_h, &temp_loc);
660
                                }
661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680

                                // Aarch64 dosn't have a load acquire pair instruction
                                // So instead we have to write a loop using load/store exclusive pairs
                                _ => {
                                    // Whether to use a load exclusive acquire
                                    let use_acquire = match order {
                                        MemoryOrder::Relaxed  => false,
                                        MemoryOrder::Consume | MemoryOrder::Acquire | MemoryOrder::SeqCst => true,
                                        _ => panic!("didnt expect order {:?} with atomic load inst", order)
                                    };
                                    // Whether to use a store exclusive release
                                    let use_release = match order {
                                        MemoryOrder::Relaxed | MemoryOrder::Consume | MemoryOrder::Acquire  => false,
                                        MemoryOrder::SeqCst => true,
                                        _ => panic!("didnt expect order {:?} with atomic load inst", order)
                                    };

                                    // Exclusive loads/stores, only supports a base address
                                    let temp_loc = emit_mem_base(self.backend.as_mut(), &resolved_loc, f_context, vm);

681
                                    self.finish_block();
682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706

                                    let blk_load_start = format!("{}_load_start", node.id());

                                    // load_start:
                                    self.start_block(blk_load_start.clone(), &vec![temp_loc.clone()]);


                                    // Load the value:
                                    if use_acquire {
                                        self.backend.emit_ldaxp(&res_l, &res_h, &temp_loc);
                                    } else {
                                        self.backend.emit_ldxp(&res_l, &res_h, &temp_loc);
                                    }

                                    let success = make_temporary(f_context, UINT1_TYPE.clone(), vm);

                                    // Store the value we just read back to memory
                                    if use_release {
                                        self.backend.emit_stlxp(&temp_loc, &success, &res_l, &res_h);
                                    } else {
                                        self.backend.emit_stxp(&temp_loc, &success, &res_l, &res_h);
                                    }

                                    // If the store failed, then branch back to 'load_start:'
                                    self.backend.emit_cbnz(&success, blk_load_start.clone())
707
                                }
708
                            }
709
                        } else {
710
                            unimplemented!();
711 712 713
                        }
                    }

714
                    Instruction_::Store { order, mem_loc, value, .. } => {
715
                        trace!("instsel on STORE");
716
                        let ref ops = inst.ops;
717 718 719
                        let ref loc_op = ops[mem_loc];
                        let ref val_op = ops[value];

720
                        let resolved_loc = self.emit_node_addr_to_value(loc_op, f_content, f_context, vm);
721

722 723 724 725 726 727 728
                        if self.match_ireg(val_op) || self.match_fpreg(val_op) {
                            // Whether to use a store release or not
                            let use_release = match order {
                                MemoryOrder::Relaxed | MemoryOrder::NotAtomic => false,
                                MemoryOrder::Release | MemoryOrder::SeqCst => true,
                                _ => panic!("didnt expect order {:?} with load inst", order)
                            };
729

730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751
                            let val = self.emit_reg(val_op, f_content, f_context, vm);

                            if use_release {
                                // Can only have a base for a STLR
                                let temp_loc = emit_mem_base(self.backend.as_mut(), &resolved_loc, f_context, vm);

                                match val.ty.v {
                                    // Have to store a temporary GPR
                                    MuType_::Float => {
                                        let temp = make_temporary(f_context, UINT32_TYPE.clone(), vm);
                                        self.backend.emit_fmov(&temp, &val);
                                              self.backend.emit_stlr(&temp_loc, &temp);
                                    }
                                    MuType_::Double => {
                                        let temp = make_temporary(f_context, UINT64_TYPE.clone(), vm);
                                        self.backend.emit_fmov(&temp, &val);
                                        self.backend.emit_stlr(&temp_loc, &temp);
                                    }
                                    // Can load the register directly
                                    _ => self.backend.emit_stlr(&temp_loc, &val)
                                };
                            } else {
752
                                let temp_loc = emit_mem(self.backend.as_mut(), &resolved_loc, get_type_alignment(&val.ty, vm), f_context, vm);
753 754 755 756 757 758 759
                                self.backend.emit_str(&temp_loc, &val);
                            }
                        } else if self.match_ireg_ex(val_op) {
                            let (val_l, val_h) = self.emit_ireg_ex(val_op, f_content, f_context, vm);

                            match order {
                                MemoryOrder::NotAtomic => {
760
                                    let temp_loc = emit_mem(self.backend.as_mut(), &resolved_loc, 16, f_context, vm);
761
                                    self.backend.emit_stp(&temp_loc, &val_l, &val_h);
762
                                }
763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782

                                // Aarch64 dosn't have a store release pair instruction
                                // So instead we have to write a loop using load/store exclusive pairs
                                _ => {
                                    // Whether to use a load exclusive acquire
                                    let use_acquire = match order {
                                        MemoryOrder::Relaxed | MemoryOrder::Release => false,
                                        MemoryOrder::SeqCst => true,
                                        _ => panic!("didnt expect order {:?} with atomic store inst", order)
                                    };
                                    // Whether to use a store exclusive release
                                    let use_release = match order {
                                        MemoryOrder::Relaxed  => false,
                                        MemoryOrder::Release | MemoryOrder::SeqCst => true,
                                        _ => panic!("didnt expect order {:?} with atomic store inst", order)
                                    };

                                    // Exclusive loads/stores, only supports a base address
                                    let temp_loc = emit_mem_base(self.backend.as_mut(), &resolved_loc, f_context, vm);

783
                                    self.finish_block();
784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807

                                    let blk_store_start = format!("{}_store_start", node.id());

                                    // store_start:
                                    self.start_block(blk_store_start.clone(), &vec![temp_loc.clone()]);

                                    let success = make_temporary(f_context, UINT1_TYPE.clone(), vm);
                                    let discard_reg = cast_value(&success, &UINT64_TYPE);
                                    // Load a value (discard it)
                                    if use_acquire {
                                        self.backend.emit_ldaxp(&XZR, &discard_reg, &temp_loc);
                                    } else {
                                        self.backend.emit_ldxp(&XZR, &discard_reg, &temp_loc);
                                    }

                                    // Store the value
                                    if use_release {
                                        self.backend.emit_stlxp(&temp_loc, &success, &val_l, &val_h);
                                    } else {
                                        self.backend.emit_stxp(&temp_loc, &success, &val_l, &val_h);
                                    }

                                    // If the store failed, then branch back to 'store_start:'
                                    self.backend.emit_cbnz(&success, blk_store_start.clone())
808
                                }
809
                            }
810
                        } else {
811
                            unimplemented!();
812
                        }
813

814 815
                    }

816
                    Instruction_::CmpXchg{is_weak, success_order, fail_order, mem_loc, expected_value, desired_value, ..} => {
817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843
                        // Note: this uses the same operations as GCC (for the C++ atomic cmpxchg)
                        // Clang is slightly different and ignores the 'fail_order'
                        let use_acquire = match fail_order {
                            MemoryOrder::Acquire | MemoryOrder::SeqCst => true,
                            MemoryOrder::Relaxed => match success_order {
                                MemoryOrder::Acquire | MemoryOrder::AcqRel | MemoryOrder::SeqCst => true,
                                MemoryOrder::Relaxed | MemoryOrder::Release => false,
                                _ => panic!("didnt expect success order {:?} for cmpxchg", success_order)
                            },
                            _ => panic!("didnt expect fail order {:?} for cmpxchg", fail_order)
                        };
                        let use_release = match fail_order {
                            MemoryOrder::Acquire => match success_order {
                                MemoryOrder::Relaxed | MemoryOrder::Release | MemoryOrder::AcqRel | MemoryOrder::SeqCst => true,
                                MemoryOrder::Acquire => false,
                                _ => panic!("didnt expect success order {:?} for cmpxchg", success_order)
                            },
                            MemoryOrder::SeqCst => true,
                            MemoryOrder::Relaxed => match success_order {
                                MemoryOrder::Release | MemoryOrder::AcqRel | MemoryOrder::SeqCst => true,
                                MemoryOrder::Relaxed | MemoryOrder::Acquire => false,
                                _ => panic!("didnt expect success order {:?} for cmpxchg", success_order)
                            },
                            _ => panic!("didnt expect fail order {:?} for cmpxchg", fail_order)
                        };


844
                        let ref ops = inst.ops;
845 846 847 848 849 850 851 852 853 854 855 856
                        let loc = self.emit_node_addr_to_value(&ops[mem_loc], f_content, f_context, vm);
                        let expected = self.emit_reg(&ops[expected_value], f_content, f_context, vm);
                        let desired = self.emit_reg(&ops[desired_value], f_content, f_context, vm);

                        let res_value = self.get_result_value(node, 0);
                        let res_success = self.get_result_value(node, 1);


                        let blk_cmpxchg_start = format!("{}_cmpxchg_start", node.id());
                        let blk_cmpxchg_failed = format!("{}_cmpxchg_failed", node.id());
                        let blk_cmpxchg_succeded = format!("{}_cmpxchg_succeded", node.id());

857
                        self.finish_block();
858

859
                        // cmpxchg_start:
860 861 862
                        self.start_block(blk_cmpxchg_start.clone(), &vec![loc.clone(),expected.clone(), desired.clone()]);

                        if use_acquire {
863 864 865 866 867 868 869 870 871 872 873 874 875 876 877
                            match res_value.ty.v {
                                // Have to load a temporary GPR first
                                MuType_::Float => {
                                    let temp = make_temporary(f_context, UINT32_TYPE.clone(), vm);
                                    self.backend.emit_ldaxr(&temp, &loc);
                                    self.backend.emit_fmov(&res_value, &temp);
                                }
                                MuType_::Double => {
                                    let temp = make_temporary(f_context, UINT64_TYPE.clone(), vm);
                                    self.backend.emit_ldaxr(&temp, &loc);
                                    self.backend.emit_fmov(&res_value, &temp);
                                }
                                // Can load the register directly
                                _ => self.backend.emit_ldaxr(&res_value, &loc)
                            };
878
                        } else {
879 880 881 882 883 884 885 886 887 888 889 890 891 892 893
                            match res_value.ty.v {
                                // Have to load a temporary GPR first
                                MuType_::Float => {
                                    let temp = make_temporary(f_context, UINT32_TYPE.clone(), vm);
                                    self.backend.emit_ldxr(&temp, &loc);
                                    self.backend.emit_fmov(&res_value, &temp);
                                }
                                MuType_::Double => {
                                    let temp = make_temporary(f_context, UINT64_TYPE.clone(), vm);
                                    self.backend.emit_ldxr(&temp, &loc);
                                    self.backend.emit_fmov(&res_value, &temp);
                                }
                                // Can load the register directly
                                _ => self.backend.emit_ldxr(&res_value, &loc)
                            };
894 895
                        }

896
                        if is_int_reg(&expected) {
897 898 899 900 901 902 903
                            self.backend.emit_cmp(&res_value, &expected);
                        } else {
                            self.backend.emit_fcmp(&res_value, &expected);
                        }
                        self.backend.emit_b_cond("NE", blk_cmpxchg_failed.clone());

                        if use_release {
904 905 906 907 908 909 910 911 912 913 914 915 916 917 918
                            match desired.ty.v {
                                // Have to store a temporary GPR
                                MuType_::Float => {
                                    let temp = make_temporary(f_context, UINT32_TYPE.clone(), vm);
                                    self.backend.emit_fmov(&temp, &desired);
                                    self.backend.emit_stlxr(&loc, &res_success, &temp);
                                }
                                MuType_::Double => {
                                    let temp = make_temporary(f_context, UINT64_TYPE.clone(), vm);
                                    self.backend.emit_fmov(&temp, &desired);
                                    self.backend.emit_stlxr(&loc, &res_success, &temp);
                                }
                                // Can load the register directly
                                _ => self.backend.emit_stlxr(&loc, &res_success, &desired)
                            };
919
                        } else {
920 921 922 923 924 925 926 927 928 929 930 931 932 933 934
                            match desired.ty.v {
                                // Have to store a temporary GPR
                                MuType_::Float => {
                                    let temp = make_temporary(f_context, UINT32_TYPE.clone(), vm);
                                    self.backend.emit_fmov(&temp, &desired);
                                    self.backend.emit_stxr(&loc, &res_success, &temp);
                                }
                                MuType_::Double => {
                                    let temp = make_temporary(f_context, UINT64_TYPE.clone(), vm);
                                    self.backend.emit_fmov(&temp, &desired);
                                    self.backend.emit_stxr(&loc, &res_success, &temp);
                                }
                                // Can load the register directly
                                _ => self.backend.emit_stxr(&loc, &res_success, &desired)
                            };
935 936 937 938 939 940 941 942 943
                        }

                        if !is_weak {
                            // Store failed, try again
                            self.backend.emit_cbnz(&res_success, blk_cmpxchg_start.clone());
                        }

                        self.backend.emit_b(blk_cmpxchg_succeded.clone());

944
                        self.finish_block();
945

946
                        // cmpxchg_failed:
947 948 949 950 951 952
                        self.start_block(blk_cmpxchg_failed.clone(), &vec![res_success.clone(), res_value.clone()]);

                        self.backend.emit_clrex();
                        // Set res_success to 1 (the same value STXR/STLXR uses to indicate failure)
                        self.backend.emit_mov_imm(&res_success, 1);

953
                        self.finish_block();
954

955
                        // cmpxchg_succeded:
956 957 958 959
                        self.start_block(blk_cmpxchg_succeded.clone(), &vec![res_success.clone(), res_value.clone()]);
                        // this NOT is needed as STXR/STLXR returns sucess as '0', wheras the Mu spec says it should be 1
                        self.backend.emit_eor_imm(&res_success, &res_success, 1);
                    }
960 961 962 963 964 965
                    Instruction_::GetIRef(_)
                    | Instruction_::GetFieldIRef { .. }
                    | Instruction_::GetElementIRef{..}
                    | Instruction_::GetVarPartIRef { .. }
                    | Instruction_::ShiftIRef { .. } => {
                        trace!("instsel on GET/FIELD/VARPARTIREF, SHIFTIREF");
966
                        let mem_addr = self.emit_get_mem_from_inst(node, f_content, f_context, vm);
967
                        let tmp_res = self.get_result_value(node, 0);
968
                        emit_calculate_address(self.backend.as_mut(), &tmp_res, &mem_addr, f_context, vm);
969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988
                    }

                    Instruction_::Fence(order) => {
                        trace!("instsel on FENCE");

                        // Whether to emit a load fence or a normal one
                        let use_load = match order {
                            MemoryOrder::Release | MemoryOrder::SeqCst | MemoryOrder::AcqRel => false,
                            MemoryOrder::Acquire => true,
                            _ => panic!("didnt expect order {:?} with load inst", order)
                        };

                        if use_load {
                            // Data Memory Barrirer for Inner Shariable Domain (for Load accesses only)
                            self.backend.emit_dmb("ISHLD");
                        } else {
                            // Data Memory Barrirer for Inner Shariable Domain
                            self.backend.emit_dmb("ISH");
                        }
                    }
989

990 991 992
                    // TODO: Implement this similar to a return (where theres a common exit block)
                    // and change SWAP_BACK_TO_NATIV_STACK and swap_to_mu_stack so they don't handle the callee saved registers
                    // (this instruction should then guarentee that they are restored (in the same way a Return does)
993 994 995 996 997
                    Instruction_::ThreadExit => {
                        trace!("instsel on THREADEXIT");
                        // emit a call to swap_back_to_native_stack(sp_loc: Address)

                        // get thread local and add offset to get sp_loc
998
                        let tl = self.emit_get_threadlocal(f_context, vm);
999 1000
                        self.backend.emit_add_imm(&tl, &tl, *thread::NATIVE_SP_LOC_OFFSET as u16, false);

1001
                        self.emit_runtime_entry(&entrypoints::SWAP_BACK_TO_NATIVE_STACK, vec![tl.clone()], None, Some(node), f_context, vm);
1002 1003
                    }

1004

1005 1006 1007
                    Instruction_::CommonInst_GetThreadLocal => {
                        trace!("instsel on GETTHREADLOCAL");
                        // get thread local
1008
                        let tl = self.emit_get_threadlocal(f_context, vm);
1009

1010
                        let tmp_res = self.get_result_value(node, 0);
1011 1012

                        // load [tl + USER_TLS_OFFSET] -> tmp_res
1013
                        emit_load_base_offset(self.backend.as_mut(), &tmp_res, &tl, *thread::USER_TLS_OFFSET as i64, f_context, vm);
1014 1015
                    }

1016

1017 1018
                    Instruction_::CommonInst_SetThreadLocal(op) => {
                        trace!("instsel on SETTHREADLOCAL");
1019
                        let ref ops = inst.ops;
1020 1021 1022 1023
                        let ref op = ops[op];

                        debug_assert!(self.match_ireg(op));

1024
                        let tmp_op = self.emit_ireg(op, f_content, f_context, vm);
1025 1026

                        // get thread local
1027
                        let tl = self.emit_get_threadlocal(f_context, vm);
1028 1029

                        // store tmp_op -> [tl + USER_TLS_OFFSTE]
1030
                        emit_store_base_offset(self.backend.as_mut(), &tl, *thread::USER_TLS_OFFSET as i64, &tmp_op, f_context, vm);
1031 1032 1033 1034 1035 1036
                    }

                    Instruction_::CommonInst_Pin(op) => {
                        trace!("instsel on PIN");
                        if !mm::GC_MOVES_OBJECT {
                            // non-moving GC: pin is a nop (move from op to result)
1037
                            let ref ops = inst.ops;
1038 1039
                            let ref op = ops[op];

1040
                            let tmp_res = self.get_result_value(node, 0);
1041

1042
                            self.emit_move_node_to_value(&tmp_res, op, f_content, f_context, vm);
1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058
                        } else {
                            unimplemented!()
                        }
                    }

                    Instruction_::CommonInst_Unpin(_) => {
                        trace!("instsel on UNPIN");
                        if !mm::GC_MOVES_OBJECT {
                            // do nothing
                        } else {
                            unimplemented!()
                        }
                    }

                    Instruction_::Move(op) => {
                        trace!("instsel on MOVE (internal IR)");
1059
                        let ref ops = inst.ops;
1060 1061
                        let ref op = ops[op];

1062
                        let tmp_res = self.get_result_value(node, 0);
1063

1064
                        self.emit_move_node_to_value(&tmp_res, op, f_content, f_context, vm);
1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079
                    }

                    Instruction_::New(ref ty) => {
                        trace!("instsel on NEW");
                        if cfg!(debug_assertions) {
                            match ty.v {
                                MuType_::Hybrid(_) => panic!("cannot use NEW for hybrid, use NEWHYBRID instead"),
                                _ => {}
                            }
                        }

                        let ty_info = vm.get_backend_type_info(ty.id());
                        let size = ty_info.size;
                        let ty_align = ty_info.alignment;

1080
                        let const_size = make_value_int_const(size as u64, vm);
1081

1082
                        let tmp_allocator = self.emit_get_allocator(f_context, vm);
1083
                        let tmp_res = self.emit_alloc_sequence(tmp_allocator.clone(), const_size, ty_align, node, f_context, vm);
1084 1085

                        // ASM: call muentry_init_object(%allocator, %tmp_res, %encode)
1086
                        let encode = make_value_int_const(mm::get_gc_type_encode(ty_info.gc_type.id), vm);
1087
                        self.emit_runtime_entry(
1088 1089 1090
                            &entrypoints::INIT_OBJ,
                            vec![tmp_allocator.clone(), tmp_res.clone(), encode],
                            None,
1091
                            Some(node), f_context, vm
1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119
                        );
                    }

                    Instruction_::NewHybrid(ref ty, var_len) => {
                        trace!("instsel on NEWHYBRID");
                        if cfg!(debug_assertions) {
                            match ty.v {
                                MuType_::Hybrid(_) => {},
                                _ => panic!("NEWHYBRID is only for allocating hybrid types, use NEW for others")
                            }
                        }

                        let ty_info = vm.get_backend_type_info(ty.id());
                        let ty_align = ty_info.alignment;
                        let fix_part_size = ty_info.size;
                        let var_ty_size = match ty.v {
                            MuType_::Hybrid(ref name) => {
                                let map_lock = HYBRID_TAG_MAP.read().unwrap();
                                let hybrid_ty_ = map_lock.get(name).unwrap();
                                let var_ty = hybrid_ty_.get_var_ty();

                                vm.get_backend_type_info(var_ty.id()).size
                            },
                            _ => panic!("only expect HYBRID type here")
                        };

                        // actual size = fix_part_size + var_ty_size * len
                        let (actual_size, length) = {
1120
                            let ref ops = inst.ops;
1121 1122
                            let ref var_len = ops[var_len];

1123 1124
                            if match_node_int_imm(var_len) {
                                let var_len = node_imm_to_u64(var_len);
1125 1126
                                let actual_size = fix_part_size + var_ty_size * (var_len as usize);
                                (
1127 1128
                                    make_value_int_const(actual_size as u64, vm),
                                    make_value_int_const(var_len as u64, vm)
1129 1130
                                )
                            } else {
1131
                                let tmp_actual_size = make_temporary(f_context, UINT64_TYPE.clone(), vm);
1132
                                let tmp_var_len = self.emit_ireg(var_len, f_content, f_context, vm);
1133 1134

                                // tmp_actual_size = tmp_var_len*var_ty_size
1135
                                emit_mul_u64(self.backend.as_mut(), &tmp_actual_size, &tmp_var_len, f_context, vm, var_ty_size as u64);
1136
                                // tmp_actual_size = tmp_var_len*var_ty_size + fix_part_size
1137
                                emit_add_u64(self.backend.as_mut(), &tmp_actual_size, &tmp_actual_size, f_context, vm, fix_part_size as u64);
1138 1139 1140 1141
                                (tmp_actual_size, tmp_var_len)
                            }
                        };

1142
                        let tmp_allocator = self.emit_get_allocator(f_context, vm);
1143
                        let tmp_res = self.emit_alloc_sequence(tmp_allocator.clone(), actual_size, ty_align, node, f_context, vm);
1144 1145

                        // ASM: call muentry_init_object(%allocator, %tmp_res, %encode)
1146
                        let encode = make_value_int_const(mm::get_gc_type_encode(ty_info.gc_type.id), vm);
1147
                        self.emit_runtime_entry(
1148 1149 1150
                            &entrypoints::INIT_HYBRID,
                            vec![tmp_allocator.clone(), tmp_res.clone(), encode, length],
                            None,
1151
                            Some(node), f_context, vm
1152 1153 1154 1155 1156 1157
                        );
                    }

                    // Runtime Entry
                    Instruction_::Throw(op_index) => {
                        trace!("instsel on THROW");
1158
                        let ref ops = inst.ops;
1159 1160
                        let ref exception_obj = ops[op_index];

1161
                        self.emit_runtime_entry(
1162 1163 1164
                            &entrypoints::THROW_EXCEPTION,
                            vec![exception_obj.clone_value()],
                            None,
1165
                            Some(node), f_context, vm);
1166 1167 1168 1169 1170
                    }

                    // Runtime Entry
                    Instruction_::PrintHex(index) => {
                        trace!("instsel on PRINTHEX");
1171
                        let ref ops = inst.ops;
1172 1173
                        let ref op = ops[index];

1174
                        self.emit_runtime_entry(
1175 1176 1177
                            &entrypoints::PRINT_HEX,
                            vec![op.clone_value()],
                            None,
1178
                            Some(node), f_context, vm
1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191
                        );
                    }

                    _ => unimplemented!()
                } // main switch
            },

            TreeNode_::Value(_) => {}
        }
    }

    // Returns the size of the operation
    // TODO: If the RHS of an ADD is negative change it to a SUB (and vice versa)
1192 1193
    // TODO: Treat XOR 1....1, arg and XOR arg, 1....1 specially (1....1 is an invalid logical immediate, but the operation is non trivial so it should be optimised to res = MVN arg)
    // Note: Assume that trivial operations are to be optimised by the Mu IR compiler (but this function still needs to work correctly if they aren't optimsed away)
1194
    // TODO: Use a shift when dividing or multiplying by a power of two
1195
    fn emit_binop(&mut self, node: &TreeNode, inst: &Instruction, op: BinOp, status: BinOpStatus, op1: OpIndex, op2: OpIndex, f_content: &FunctionContent, f_context: &mut FunctionContext, vm: &VM) {
1196 1197 1198
        use std;
        let mut op1 = op1;
        let mut op2 = op2;
1199
        let ref ops = inst.ops;
1200
        let res = self.get_result_value(node, 0);
1201 1202 1203 1204 1205 1206 1207 1208

        // Get the size (in bits) of the type the operation is on
        let n = get_bit_size(&res.ty, vm);
        let output_status = status.flag_n || status.flag_z || status.flag_c || status.flag_v;
        let mut status_value_index = 0;
        // NOTE: XZR is just a dummy value here (it will not be used)
        let tmp_status_n = if status.flag_n {
            status_value_index += 1;
1209
            self.get_result_value(node, status_value_index)
1210 1211 1212
        } else { XZR.clone() };
        let tmp_status_z = if status.flag_z {
            status_value_index += 1;
1213
            self.get_result_value(node, status_value_index)
1214 1215 1216
        } else { XZR.clone() };
        let tmp_status_c = if status.flag_c {
            status_value_index += 1;
1217
            self.get_result_value(node, status_value_index)
1218 1219 1220
        } else { XZR.clone() };
        let tmp_status_v = if status.flag_v {
            status_value_index += 1;
1221
            self.get_result_value(node, status_value_index)
1222 1223 1224 1225 1226 1227 1228 1229 1230
        } else { XZR.clone() };

        // TODO: Division by zero exception (note: must explicitly check for this, arm dosn't do it)
        match op {
            // The lower n bits of the result will be correct, and will not depend
            // on the > n bits of op1 or op2
            op::BinOp::Add => {
                let mut imm_val = 0 as u64;
                // Is one of the arguments a valid immediate?
1231 1232
                let emit_imm = if match_node_int_imm(&ops[op2]) {
                    imm_val = node_imm_to_u64(&ops[op2]);
1233
                    is_valid_arithmetic_imm(imm_val)
1234 1235
                } else if match_node_int_imm(&ops[op1]) {
                    imm_val = node_imm_to_u64(&ops[op1]);
1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249
                    // if op1 is a valid immediate, swap it with op2
                    if is_valid_arithmetic_imm(imm_val) {
                        std::mem::swap(&mut op1, &mut op2);
                        true
                    } else {
                        false
                    }
                } else {
                    false
                };

                if emit_imm {
                    trace!("emit add-ireg-imm");

1250
                    let reg_op1 = self.emit_ireg(&ops[op1], f_content, f_context, vm);
1251 1252 1253 1254
                    let imm_shift = imm_val > 4096;
                    let imm_op2 = if imm_shift { imm_val >> 12 } else { imm_val };

                    if output_status {
1255
                        emit_zext(self.backend.as_mut(), &reg_op1);
1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293
                        self.backend.emit_adds_imm(&res, &reg_op1, imm_op2 as u16, imm_shift);

                        if status.flag_v {
                            if n < 32 {
                                // tmp_status[n-1] = 1 iff res and op1 have different signs
                                self.backend.emit_eor(&tmp_status_v, &res, &reg_op1);
                                // tmp[n-1] = 1 iff op1 and op2 have different signs

                                // Sign bit of op2 is 0
                                if !get_bit(imm_val, n - 1) {
                                    // tmp_status[n-1] = 1 iff res and op1 have different signs
                                    //      and op1 has the same sign as op2 (which is 0)
                                    self.backend.emit_bic(&tmp_status_v, &tmp_status_v, &reg_op1);
                                } else {
                                    // tmp_status[n-1] = 1 iff res and op1 have different signs
                                    //      and op1 has the same sign as op2 (which is 1)
                                    self.backend.emit_and(&tmp_status_v, &tmp_status_v, &reg_op1);
                                }

                                // Check the sign bit of tmp_status (i.e. tmp_status[n-1])
                                self.backend.emit_tst_imm(&tmp_status_v, 1 << (n - 1));
                                self.backend.emit_cset(&tmp_status_v, "NE");
                            } else {
                                self.backend.emit_cset(&tmp_status_v, "VS");
                            }
                        }
                        if status.flag_c {
                            if n < 32 {
                                // Test the carry bit of res
                                self.backend.emit_tst_imm(&res, 1 << n);
                                self.backend.emit_cset(&tmp_status_c, "NE");
                            } else {
                                self.backend.emit_cset(&tmp_status_c, "CS");
                            }
                        }
                    } else {
                        self.backend.emit_add_imm(&res, &reg_op1, imm_op2 as u16, imm_shift);
                    }
1294
                } else if self.match_ireg(&ops[op1]) && self.match_ireg(&ops[op2]) {
1295 1296
                    trace!("emit add-ireg-ireg");

1297 1298
                    let reg_op1 = self.emit_ireg(&ops[op1], f_content, f_context, vm);
                    let reg_op2 = self.emit_ireg(&ops[op2], f_content, f_context, vm);
1299 1300

                    if output_status {
1301
                        emit_zext(self.backend.as_mut(), &reg_op1);
1302 1303
                        if n == 1 {
                            // adds_ext dosn't support extending 1 bit numbers
1304
                            emit_zext(self.backend.as_mut(), &reg_op2);
1305 1306 1307 1308 1309 1310 1311 1312
                            self.backend.emit_adds(&res, &reg_op1, &reg_op2);
                        } else {
                            // Emit an adds that zero extends op2
                            self.backend.emit_adds_ext(&res, &reg_op1, &reg_op2, false, 0);
                        }

                        if status.flag_v {
                            if n < 32 {
1313
                                let tmp = make_temporary(f_context, UINT32_TYPE.clone(), vm);
1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343

                                // tmp_status[n-1] = 1 iff res and op1 have different signs
                                self.backend.emit_eor(&tmp_status_v, &res, &reg_op1);
                                // tmp[n-1] = 1 iff op1 and op2 have different signs
                                self.backend.emit_eor(&tmp, &reg_op1, &reg_op2);

                                // tmp_status[n-1] = 1 iff res and op1 have different signs
                                //      and op1 and op2 have the same sign
                                self.backend.emit_bic(&tmp_status_v, &tmp_status_v, &tmp);

                                // Check tmp_status[n-1]
                                self.backend.emit_tst_imm(&tmp_status_v, 1 << (n - 1));
                                self.backend.emit_cset(&tmp_status_v, "NE");
                            } else {
                                self.backend.emit_cset(&tmp_status_v, "VS");
                            }
                        }

                        if status.flag_c {
                            if n < 32 {
                                // Test the carry bit of res
                                self.backend.emit_tst_imm(&res, 1 << n);
                                self.backend.emit_cset(&tmp_status_c, "NE");
                            } else {
                                self.backend.emit_cset(&tmp_status_c, "CS");
                            }
                        }
                    } else {
                        self.backend.emit_add(&res, &reg_op1, &reg_op2);
                    }
1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401
                } else if self.match_ireg_ex(&ops[op1]) && self.match_ireg_ex(&ops[op2]) {
                    trace!("emit add-iregex-iregex");

                    let (op1_l, op1_h) = self.emit_ireg_ex(&ops[op1], f_content, f_context, vm);
                    let (op2_l, op2_h) = self.emit_ireg_ex(&ops[op2], f_content, f_context, vm);
                    let (res_l, res_h) = split_int128(&res, f_context, vm);

                    // Add the lower halvess (whilst setting flags)
                    self.backend.emit_adds(&res_l, &op1_l, &op2_l);
                    if output_status {
                        if status.flag_z {
                            // tmp_status_z = '1' IFF res_l == 0 (the result of the last operation)
                            self.backend.emit_cset(&tmp_status_z, "EQ")
                        }
                        // Add the upper halvess, with carry and setting flags
                        self.backend.emit_adcs(&res_h, &op1_h, &op2_h);

                        if status.flag_z {
                            // tmp_status_z = (res_h == 0 ? tmp_status_z : 0)
                            // (i.e. tmp_status_z = '1' IFF tmp_status_z == '1' && res_h == 0)
                            self.backend.emit_csel(&tmp_status_z, &tmp_status_z, &WZR, "EQ")
                        }
                        if status.flag_n {
                            // tmp_status_n = '1' IFF res_h < 0
                            self.backend.emit_cset(&tmp_status_n, "MI")
                        }

                        if status.flag_c {
                            self.backend.emit_cset(&tmp_status_c, "CS");
                        }

                        if status.flag_v {
                            let tmp1 = cast_value(&tmp_status_v, &UINT64_TYPE);
                            let tmp2 = make_temporary(f_context, UINT64_TYPE.clone(), vm);


                            // tmp1[63] = 1 IFF res and op1 have different signs
                            self.backend.emit_eor(&tmp1, &res_h, &op1_h);


                            // tmp2[63] =  1 IFF op1 and op2 have different signs
                            self.backend.emit_eor(&tmp2, &op1_h, &op2_h);

                            // tmp1[63] = 1 iff res and op1 have different signs
                            //      and op1 and op2 (or -op2) have the same sign
                            self.backend.emit_bic(&tmp1, &tmp1, &tmp2);

                            // Check tmp1[n-1]
                            self.backend.emit_tst_imm(&tmp1, 1 << 63);
                            // Set tmp_status_v to 1 IFF tmp1[63] != 1
                            self.backend.emit_cset(&tmp_status_v, "NE");
                        }
                    } else {
                        // Add the upper halvess, with carry
                        self.backend.emit_adc(&res_h, &op1_h, &op2_h);
                    }
                } else {
                    unimplemented!();
1402 1403 1404
                }
            },
            op::BinOp::Sub => {
1405 1406
                if match_node_int_imm(&ops[op2]) &&
                    is_valid_arithmetic_imm(node_imm_to_u64(&ops[op2])) &&
1407 1408 1409

                    // If this was true, then the immediate would need to be 1 extended,
                    // which would result in an immediate with too many bits
1410 1411 1412 1413
                    !(status.flag_c && n < 32) {
                    // Can't compute the carry but using a subs_imm instruction
                    trace!("emit sub-ireg-imm");

1414
                    let reg_op1 = self.emit_ireg(&ops[op1], f_content, f_context, vm);
1415
                    let imm_val = node_imm_to_u64(&ops[op2]);
1416 1417 1418 1419
                    let imm_shift = imm_val > 4096;
                    let imm_op2 = if imm_shift { imm_val >> 12 } else { imm_val };

                    if output_status {
1420
                        emit_zext(self.backend.as_mut(), &reg_op1);
1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459
                        self.backend.emit_subs_imm(&res, &reg_op1, imm_op2 as u16, imm_shift);

                        if status.flag_v {
                            if n < 32 {
                                // tmp_status[n-1] = 1 iff res and op1 have different signs
                                self.backend.emit_eor(&tmp_status_v, &res, &reg_op1);
                                // tmp[n-1] = 1 iff op1 and op2 have different signs

                                // Sign bit of op2 is 0
                                if imm_val & (1 << (n - 1)) == 0 {
                                    // tmp_status[n-1] = 1 iff res and op1 have different signs
                                    //      and op1 has the same sign as -op2 (which is 1)
                                    self.backend.emit_and(&tmp_status_v, &tmp_status_v, &reg_op1);
                                } else {
                                    // tmp_status[n-1] = 1 iff res and op1 have different signs
                                    //      and op1 has the same sign as op2 (which is 0)
                                    self.backend.emit_bic(&tmp_status_v, &tmp_status_v, &reg_op1);
                                }

                                // Check the sign bit of tmp_status (i.e. tmp_status[n-1])
                                self.backend.emit_tst_imm(&tmp_status_v, 1 << (n - 1));
                                self.backend.emit_cset(&tmp_status_v, "NE");
                            } else {
                                self.backend.emit_cset(&tmp_status_v, "VS");
                            }
                        }

                        if status.flag_c {
                            if n < 32 {
                                // Test the carry bit of res
                                self.backend.emit_tst_imm(&res, 1 << n);
                                self.backend.emit_cset(&tmp_status_c, "NE");
                            } else {
                                self.backend.emit_cset(&tmp_status_c, "CS");
                            }
                        }
                    } else {
                        self.backend.emit_sub_imm(&res, &reg_op1, imm_op2 as u16, imm_shift);
                    }
1460
                } else if self.match_ireg(&ops[op1]) && self.match_ireg(&ops[op2]) {
1461 1462
                    trace!("emit sub-ireg-ireg");

1463 1464
                    let reg_op1 = self.emit_ireg(&ops[op1], f_content, f_context, vm);
                    let reg_op2 = self.emit_ireg(&ops[op2], f_content, f_context, vm);
1465 1466

                    if output_status {
1467
                        emit_zext(self.backend.as_mut(), &reg_op1);
1468 1469 1470 1471 1472 1473

                        if status.flag_c {
                            // Note: reg_op2 is 'one'-extended so that SUB res, zext(reg_op1), oext(reg_op2)
                            // Is equivelent to: ADD res, zext(reg_op1), zext(~reg_op2), +1
                            // (this allows the carry flag to be computed as the 'n'th bit of res

1474
                            emit_oext(self.backend.as_mut(), &reg_op2);
1475 1476 1477
                            self.backend.emit_subs(&res, &reg_op1, &reg_op2);
                        } else if n == 1 {
                            // if the carry flag isn't been computed, just zero extend op2
1478
                            emit_zext(self.backend.as_mut(), &reg_op2);
1479 1480 1481 1482 1483 1484 1485 1486 1487
                            self.backend.emit_subs(&res, &reg_op1, &reg_op2);
                        } else {
                            // Emit an subs that zero extends op2
                            self.backend.emit_subs_ext(&res, &reg_op1, &reg_op2, false, 0);
                        }


                        if status.flag_v {
                            if n < 32 {
1488
                                let tmp = make_temporary(f_context, UINT32_TYPE.clone(), vm);
1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518

                                // tmp_status[n-1] = 1 iff res and op1 have different signs
                                self.backend.emit_eor(&tmp_status_v, &res, &reg_op1);
                                // tmp[n-1] = 1 iff op1 and -op2 have different signs
                                self.backend.emit_eon(&tmp, &reg_op1, &reg_op2);

                                // tmp_status[n-1] = 1 iff res and op1 have different signs
                                //      and op1 and op2 have the same sign
                                self.backend.emit_bic(&tmp_status_v, &tmp_status_v, &tmp);

                                // Check tmp_status[n-1]
                                self.backend.emit_tst_imm(&tmp_status_v, 1 << (n - 1));
                                self.backend.emit_cset(&tmp_status_v, "NE");
                            } else {
                                self.backend.emit_cset(&tmp_status_v, "VS");
                            }
                        }

                        if status.flag_c {
                            if n < 32 {
                                // Test the carry bit of res
                                self.backend.emit_tst_imm(&res, 1 << n);
                                self.backend.emit_cset(&tmp_status_c, "NE");
                            } else {
                                self.backend.emit_cset(&tmp_status_c, "CS");
                            }
                        }
                    } else {
                        self.backend.emit_sub(&res, &reg_op1, &reg_op2);
                    }
1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557