GitLab will continue to be upgraded from 11.4.5-ce.0 on November 25th 2019 at 4.00pm (AEDT) to 5.00pm (AEDT) due to Critical Security Patch Availability. During the update, GitLab and Mattermost services will not be available.

inst_sel.rs 199 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
#![allow(unused_variables)]
#![warn(unused_imports)]
#![warn(unreachable_code)]
#![warn(dead_code)]
use ast::ir::*;
use ast::ptr::*;
use ast::inst::*;
use ast::op;
use ast::op::*;
use ast::types;
use ast::types::*;
use vm::VM;
use runtime::mm;
use runtime::mm::objectmodel::OBJECT_HEADER_SIZE;

use runtime::ValueLocation;
use runtime::thread;
use runtime::entrypoints;
use runtime::entrypoints::RuntimeEntrypoint;

use compiler::CompilerPass;

use compiler::backend::PROLOGUE_BLOCK_NAME;
24
use compiler::backend::EPILOGUE_BLOCK_NAME;
25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81

use compiler::backend::aarch64::*;
use compiler::machine_code::CompiledFunction;
use compiler::frame::Frame;

use std::collections::HashMap;
use std::any::Any;

const INLINE_FASTPATH : bool = false;

pub struct InstructionSelection {
    name: &'static str,
    backend: Box<CodeGenerator>,

    current_fv_id: MuID,
    current_callsite_id: usize,
    current_frame: Option<Frame>,
    current_block: Option<MuName>,
    current_func_start: Option<ValueLocation>,
    // key: block id, val: callsite that names the block as exception block
    current_exn_callsites: HashMap<MuID, Vec<ValueLocation>>,
    // key: block id, val: block location
    current_exn_blocks: HashMap<MuID, ValueLocation>,

    current_constants: HashMap<MuID, P<Value>>,
    current_constants_locs: HashMap<MuID, P<Value>>
}

impl <'a> InstructionSelection {
    #[cfg(feature = "aot")]
    pub fn new() -> InstructionSelection {
        InstructionSelection {
            name: "Instruction Selection (x64)",
            backend: Box::new(ASMCodeGen::new()),

            current_fv_id: 0,
            current_callsite_id: 0,
            current_frame: None,
            current_block: None,
            current_func_start: None,
            // key: block id, val: callsite that names the block as exception block
            current_exn_callsites: HashMap::new(),
            current_exn_blocks: HashMap::new(),

            current_constants: HashMap::new(),
            current_constants_locs: HashMap::new()
        }
    }

    #[cfg(feature = "jit")]
    pub fn new() -> InstructionSelection {
        unimplemented!()
    }

    // in this pass, we assume that
    // * we do not need to backup/restore caller-saved registers
    // if any of these assumption breaks, we will need to re-emit the code
82
    fn instruction_select(&mut self, node: &'a TreeNode, f_content: &FunctionContent, f_context: &mut FunctionContext, vm: &VM) {
83 84 85 86 87
        trace!("instsel on node#{} {}", node.id(), node);

        match node.v {
            TreeNode_::Instruction(ref inst) => {
                match inst.v {
88
                    // TODO: Optimise if cond is a flag from a binary operation?
89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107
                    Instruction_::Branch2 { cond, ref true_dest, ref false_dest, true_prob } => {
                        trace!("instsel on BRANCH2");
                        let (fallthrough_dest, branch_dest, branch_if_true) = {
                            if true_prob > 0.5f32 {
                                (true_dest, false_dest, false)
                            } else {
                                (false_dest, true_dest, true)
                            }
                        };

                        let ops = inst.ops.read().unwrap();

                        self.process_dest(&ops, fallthrough_dest, f_content, f_context, vm);
                        self.process_dest(&ops, branch_dest, f_content, f_context, vm);

                        let branch_target = f_content.get_block(branch_dest.target).name().unwrap();

                        let ref cond = ops[cond];

108
                        if self.match_cmp_res(cond) {
109
                            trace!("emit cmp_res-branch2");
110 111 112 113
                            let mut cmpop = self.emit_cmp_res(cond, f_content, f_context, vm);
                            if !branch_if_true {
                                cmpop = cmpop.invert();
                            }
114

115
                            let cond = get_condition_codes(cmpop);
116

117 118 119 120 121 122
                            if cmpop == op::CmpOp::FFALSE {
                                ; // Do nothing
                            } else if cmpop == op::CmpOp::FTRUE {
                                self.backend.emit_b(branch_target);
                            } else {
                                self.backend.emit_b_cond(cond[0], branch_target.clone());
123

124 125 126 127
                                if cond.len() == 2 {
                                    self.backend.emit_b_cond(cond[1], branch_target);
                                }
                            }
128
                        } else {
129
                            let cond_reg = self.emit_ireg(cond, f_content, f_context, vm);
130

131 132 133 134
                            if branch_if_true {
                                self.backend.emit_tbnz(&cond_reg, 0, branch_target.clone());
                            } else {
                                self.backend.emit_tbz(&cond_reg, 0, branch_target.clone());
135
                            }
136
                        };
137 138 139 140 141 142 143 144 145 146 147 148
                    },

                    Instruction_::Select { cond, true_val, false_val } => {
                        use ast::op::CmpOp::*;

                        trace!("instsel on SELECT");
                        let ops = inst.ops.read().unwrap();

                        let ref cond = ops[cond];
                        let ref true_val = ops[true_val];
                        let ref false_val = ops[false_val];

149
                        let tmp_res = self.get_result_value(node, 0);
150 151 152 153

                        // moving integers/pointers
                        // generate compare
                        let cmpop = if self.match_cmp_res(cond) {
154
                            self.emit_cmp_res(cond, f_content, f_context, vm)
155
                        } else if self.match_ireg(cond) {
156
                            let tmp_cond = self.emit_ireg(cond, f_content, f_context, vm);
157 158 159 160 161 162
                            self.backend.emit_cmp_imm(&tmp_cond, 0, false);
                            NE
                        } else {
                            panic!("expected ireg, found {}", cond)
                        };

163 164
                        let tmp_true = self.emit_reg(true_val, f_content, f_context, vm);
                        let tmp_false = self.emit_reg(false_val, f_content, f_context, vm);
165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205

                        let cond = get_condition_codes(cmpop);

                        if self.match_ireg(true_val) {
                            if cmpop == FFALSE {
                                self.backend.emit_mov(&tmp_res, &tmp_false);
                            } else if cmpop == FTRUE {
                                self.backend.emit_mov(&tmp_res, &tmp_true);
                            } else {
                                self.backend.emit_csel(&tmp_res, &tmp_true, &tmp_false, cond[0]);

                                if cond.len() == 2 {
                                    self.backend.emit_csel(&tmp_res, &tmp_true, &tmp_res, cond[1]);
                                }
                            }
                        } else if self.match_fpreg(true_val) {
                            if cmpop == FFALSE {
                                self.backend.emit_fmov(&tmp_res, &tmp_false);
                            } else if cmpop == FTRUE {
                                self.backend.emit_fmov(&tmp_res, &tmp_true);
                            } else {
                                self.backend.emit_fcsel(&tmp_res, &tmp_true, &tmp_false, cond[0]);

                                if cond.len() == 2 {
                                    self.backend.emit_fcsel(&tmp_res, &tmp_true, &tmp_res, cond[1]);
                                }
                            }
                        } else {
                            // moving vectors, floatingpoints
                            unimplemented!()
                        }
                    },

                    Instruction_::CmpOp(op, op1, op2) => {
                        use ast::op::CmpOp::*;

                        trace!("instsel on CMPOP");
                        let ops = inst.ops.read().unwrap();
                        let ref op1 = ops[op1];
                        let ref op2 = ops[op2];

206
                        let tmp_res = self.get_result_value(node, 0);
207 208 209 210

                        debug_assert!(tmp_res.ty.get_int_length().is_some());
                        debug_assert!(tmp_res.ty.get_int_length().unwrap() == 1);

211
                        let cmpop = self.emit_cmp_res_op(op, &op1, &op2, f_content, f_context, vm);
212 213 214
                        let cond = get_condition_codes(cmpop);

                        if cmpop == FFALSE {
215
                            self.emit_mov_u64(&tmp_res, 0);
216
                        } else if cmpop == FTRUE {
217
                            self.emit_mov_u64(&tmp_res, 1);
218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248
                        } else {
                            self.backend.emit_cset(&tmp_res, cond[0]);

                            // Note: some compariosns can't be computed based on a single aarch64 flag
                            // insted they are computed as a condition OR NOT another condition.
                            if cond.len() == 2 {
                                self.backend.emit_csinc(&tmp_res, &tmp_res, &WZR, invert_condition_code(cond[1]));
                            }
                        }
                    }

                    Instruction_::Branch1(ref dest) => {
                        trace!("instsel on BRANCH1");
                        let ops = inst.ops.read().unwrap();

                        self.process_dest(&ops, dest, f_content, f_context, vm);

                        let target = f_content.get_block(dest.target).name().unwrap();

                        trace!("emit branch1");
                        // jmp
                        self.backend.emit_b(target);
                    },

                    Instruction_::Switch { cond, ref default, ref branches } => {
                        trace!("instsel on SWITCH");
                        let ops = inst.ops.read().unwrap();

                        let ref cond = ops[cond];

                        if self.match_ireg(cond) {
249 250
                            let tmp_cond = self.emit_ireg(cond, f_content, f_context, vm);
                            self.emit_zext(&tmp_cond);
251 252 253 254 255 256 257 258 259 260 261 262

                            // emit each branch
                            for &(case_op_index, ref case_dest) in branches {
                                let ref case_op = ops[case_op_index];

                                // process dest
                                self.process_dest(&ops, case_dest, f_content, f_context, vm);

                                let target = f_content.get_block(case_dest.target).name().unwrap();

                                let mut imm_val = 0 as u64;
                                // Is one of the arguments a valid immediate?
263 264
                                let emit_imm = if self.match_node_int_imm(&case_op) {
                                    imm_val = self.node_imm_to_u64(&case_op);
265 266 267 268 269 270 271 272 273 274 275
                                    is_valid_arithmetic_imm(imm_val)
                                } else {
                                    false
                                };

                                if emit_imm {
                                    let imm_shift = imm_val > 4096;
                                    let imm_op2 = if imm_shift { imm_val >> 12 } else { imm_val };

                                    self.backend.emit_cmp_imm(&tmp_cond, imm_op2 as u16, imm_shift);
                                } else {
276 277
                                    let tmp_case_op = self.emit_ireg(case_op, f_content, f_context, vm);
                                    self.emit_zext(&tmp_case_op);
278 279 280 281 282
                                    self.backend.emit_cmp(&tmp_cond, &tmp_case_op);
                                }

                                self.backend.emit_b_cond("EQ", target);

283 284
                                self.finish_block(&vec![]);
                                self.start_block(format!("{}_switch_not_met_case_{}", node.id(), case_op_index), &vec![]);
285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303
                            }

                            // emit default
                            self.process_dest(&ops, default, f_content, f_context, vm);

                            let default_target = f_content.get_block(default.target).name().unwrap();
                            self.backend.emit_b(default_target);
                        } else {
                            panic!("expecting cond in switch to be ireg: {}", cond);
                        }
                    }

                    Instruction_::ExprCall { ref data, is_abort } => {
                        trace!("instsel on EXPRCALL");

                        if is_abort {
                            unimplemented!()
                        }

304
                        self.emit_mu_call(
305 306 307 308 309 310 311 312 313 314
                            inst, // inst: &Instruction,
                            data, // calldata: &CallData,
                            None, // resumption: Option<&ResumptionData>,
                            node, // cur_node: &TreeNode, 
                            f_content, f_context, vm);
                    },

                    Instruction_::Call { ref data, ref resume } => {
                        trace!("instsel on CALL");

315
                        self.emit_mu_call(
316 317 318 319 320 321 322 323 324 325 326 327 328 329
                            inst,
                            data,
                            Some(resume),
                            node,
                            f_content, f_context, vm);
                    },

                    Instruction_::ExprCCall { ref data, is_abort } => {
                        trace!("instsel on EXPRCCALL");

                        if is_abort {
                            unimplemented!()
                        }

330
                        self.emit_c_call_ir(inst, data, None, node, f_content, f_context, vm);
331 332 333 334 335
                    }

                    Instruction_::CCall { ref data, ref resume } => {
                        trace!("instsel on CCALL");

336
                        self.emit_c_call_ir(inst, data, Some(resume), node, f_content, f_context, vm);
337 338 339 340 341
                    }

                    Instruction_::Return(_) => {
                        trace!("instsel on RETURN");

342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367
                        // prepare return regs
                        let ref ops = inst.ops.read().unwrap();
                        // TODO: Are ret_val_indices in the same order as the return types in the functions signature?
                        let ret_val_indices = match inst.v {
                            Instruction_::Return(ref vals) => vals,
                            _ => panic!("expected ret inst")
                        };

                        let ret_tys = ret_val_indices.iter().map(|i| self.node_type(&ops[*i])).collect();
                        let ret_type = self.combine_return_types(&ret_tys);
                        // Note: this shouldn't cause any overhead in the generated code if the register is never used
                        let temp_xr = self.make_temporary(f_context, ADDRESS_TYPE.clone(), vm);

                        if self.compute_return_allocation(&ret_type, &vm) > 0 {
                            // Load the saved value of XR into temp_xr
                            self.emit_load_base_offset(&temp_xr, &FP, -8, f_context, vm);
                        }

                        let n = ret_tys.len(); // number of return values
                        if n == 0 {
                            // Do nothing
                        } else if n == 1{
                            let ret_loc = self.compute_return_locations(&ret_type, &temp_xr, &vm);
                            self.emit_move_node_to_value(&ret_loc, &ops[ret_val_indices[0]], f_content, f_context, vm);
                        } else {
                            let ret_loc = self.compute_return_locations(&ret_type, &temp_xr, &vm);
368

369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388
                            let mut i = 0;
                            for ret_index in ret_val_indices {
                                let ret_val = self.emit_node_value(&ops[*ret_index], f_content, f_context, vm);
                                let ref ty = ret_val.ty;
                                let offset = self.get_field_offset(&ret_type, i, &vm);

                                match ty.v {
                                    MuType_::Vector(_, _) | MuType_::Tagref64 => unimplemented!(),
                                    MuType_::Void => panic!("Unexpected void"),
                                    MuType_::Struct(_) | MuType_::Array(_, _) => unimplemented!(),

                                    // Integral, pointer of floating point type
                                    _ => self.insert_bytes(&ret_loc, &ret_val, offset as i64, f_context, vm),
                                }

                                i += 1;
                            }
                        }
                        
                        self.backend.emit_b(EPILOGUE_BLOCK_NAME.to_string());
389 390 391 392
                    },

                    Instruction_::BinOp(op, op1, op2) => {
                        trace!("instsel on BINOP");
393
                        self.emit_binop(node, inst, op, BinOpStatus { flag_n: false, flag_z: false, flag_c: false, flag_v: false }, op1, op2, f_content, f_context, vm);
394 395 396 397
                    },

                    Instruction_::BinOpWithStatus(op, status, op1, op2) => {
                        trace!("instsel on BINOP_STATUS");
398
                        self.emit_binop(node, inst, op, status, op1, op2, f_content, f_context, vm);
399 400 401 402 403 404 405 406 407
                    }

                    Instruction_::ConvOp { operation, ref from_ty, ref to_ty, operand } => {
                        trace!("instsel on CONVOP");

                        let ops = inst.ops.read().unwrap();

                        let ref op = ops[operand];

408
                        let tmp_res = self.get_result_value(node, 0);
409
                        let tmp_op = self.emit_reg(op, f_content, f_context, vm);
410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473

                        let from_ty_size = get_bit_size(&from_ty, vm);
                        let to_ty_size = get_bit_size(&to_ty, vm);

                        match operation {
                            op::ConvOp::TRUNC => {
                                self.backend.emit_mov(&tmp_res, unsafe { &tmp_op.as_type(tmp_res.ty.clone()) });
                            },
                            op::ConvOp::ZEXT => {
                                if from_ty_size != to_ty_size {
                                    self.backend.emit_ubfx(&tmp_res, unsafe { &tmp_op.as_type(tmp_res.ty.clone()) }, 0, from_ty_size as u8);
                                } else {
                                    self.backend.emit_mov(&tmp_res, &tmp_op);
                                }
                            },
                            op::ConvOp::SEXT => {
                                if from_ty_size != to_ty_size {
                                    self.backend.emit_sbfx(&tmp_res, unsafe { &tmp_op.as_type(tmp_res.ty.clone()) }, 0, from_ty_size as u8);
                                } else {
                                    self.backend.emit_mov(&tmp_res, &tmp_op);
                                }
                            },
                            op::ConvOp::REFCAST | op::ConvOp::PTRCAST => {
                                // just a mov (and hopefully reg alloc will coalesce it)
                                self.backend.emit_mov(&tmp_res, &tmp_op);
                            },

                            op::ConvOp::UITOFP => {
                                self.backend.emit_ucvtf(&tmp_res, &tmp_op);
                            },

                            op::ConvOp::SITOFP => {
                                self.backend.emit_scvtf(&tmp_res, &tmp_op);
                            },

                            op::ConvOp::FPTOUI => {
                                self.backend.emit_fcvtzu(&tmp_res, &tmp_op);
                            },

                            op::ConvOp::FPTOSI => {
                                self.backend.emit_fcvtzs(&tmp_res, &tmp_op);
                            },

                            op::ConvOp::BITCAST => {
                                self.backend.emit_fmov(&tmp_res, &tmp_op);
                            },
                            op::ConvOp::FPTRUNC | op::ConvOp::FPEXT => {
                                self.backend.emit_fcvt(&tmp_res, &tmp_op);
                            },
                        }
                    }

                    Instruction_::Load { is_ptr, order, mem_loc } => {
                        trace!("instsel on LOAD");
                        let ops = inst.ops.read().unwrap();
                        let ref loc_op = ops[mem_loc];

                        // Whether to use a load acquire
                        let use_acquire = match order {
                            MemoryOrder::Relaxed | MemoryOrder::NotAtomic => false,
                            MemoryOrder::Consume | MemoryOrder::Acquire | MemoryOrder::SeqCst => true,
                            _ => panic!("didnt expect order {:?} with load inst", order)
                        };

474
                        let resolved_loc = self.emit_node_addr_to_value(loc_op, f_content, f_context, vm);
475
                        let res_temp = self.get_result_value(node, 0);
476 477 478

                        if use_acquire {
                            // Can only have a base for a LDAR
479
                            let temp_loc = self.emit_mem_base(&resolved_loc, f_context, vm);
480 481
                            self.backend.emit_ldar(&res_temp, &temp_loc);
                        } else {
482
                            let temp_loc = self.emit_mem(&resolved_loc, f_context, vm);
483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499
                            self.backend.emit_ldr(&res_temp, &temp_loc, false);
                        }
                    }

                    Instruction_::Store { is_ptr, order, mem_loc, value } => {
                        trace!("instsel on STORE");
                        let ops = inst.ops.read().unwrap();
                        let ref loc_op = ops[mem_loc];
                        let ref val_op = ops[value];

                        // Whether to use a store release or not
                        let use_release = match order {
                            MemoryOrder::Relaxed | MemoryOrder::NotAtomic => false,
                            MemoryOrder::Release | MemoryOrder::SeqCst => true,
                            _ => panic!("didnt expect order {:?} with load inst", order)
                        };

500 501
                        let resolved_loc = self.emit_node_addr_to_value(loc_op, f_content, f_context, vm);
                        let val = self.emit_reg(val_op, f_content, f_context, vm);
502 503 504

                        if use_release {
                            // Can only have a base for a STLR
505
                            let temp_loc = self.emit_mem_base(&resolved_loc, f_context, vm);
506 507
                            self.backend.emit_stlr(&temp_loc, &val);
                        } else {
508
                            let temp_loc = self.emit_mem(&resolved_loc, f_context, vm);
509 510 511 512
                            self.backend.emit_str(&temp_loc, &val);
                        }
                    }

513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600
                    Instruction_::CmpXchg{is_ptr, is_weak, success_order, fail_order, mem_loc, expected_value, desired_value} => {
                        // Note: this uses the same operations as GCC (for the C++ atomic cmpxchg)
                        // Clang is slightly different and ignores the 'fail_order'
                        let use_acquire = match fail_order {
                            MemoryOrder::Acquire | MemoryOrder::SeqCst => true,
                            MemoryOrder::Relaxed => match success_order {
                                MemoryOrder::Acquire | MemoryOrder::AcqRel | MemoryOrder::SeqCst => true,
                                MemoryOrder::Relaxed | MemoryOrder::Release => false,
                                _ => panic!("didnt expect success order {:?} for cmpxchg", success_order)
                            },
                            _ => panic!("didnt expect fail order {:?} for cmpxchg", fail_order)
                        };
                        let use_release = match fail_order {
                            MemoryOrder::Acquire => match success_order {
                                MemoryOrder::Relaxed | MemoryOrder::Release | MemoryOrder::AcqRel | MemoryOrder::SeqCst => true,
                                MemoryOrder::Acquire => false,
                                _ => panic!("didnt expect success order {:?} for cmpxchg", success_order)
                            },
                            MemoryOrder::SeqCst => true,
                            MemoryOrder::Relaxed => match success_order {
                                MemoryOrder::Release | MemoryOrder::AcqRel | MemoryOrder::SeqCst => true,
                                MemoryOrder::Relaxed | MemoryOrder::Acquire => false,
                                _ => panic!("didnt expect success order {:?} for cmpxchg", success_order)
                            },
                            _ => panic!("didnt expect fail order {:?} for cmpxchg", fail_order)
                        };


                        let ops = inst.ops.read().unwrap();
                        let loc = self.emit_node_addr_to_value(&ops[mem_loc], f_content, f_context, vm);
                        let expected = self.emit_reg(&ops[expected_value], f_content, f_context, vm);
                        let desired = self.emit_reg(&ops[desired_value], f_content, f_context, vm);

                        let res_value = self.get_result_value(node, 0);
                        let res_success = self.get_result_value(node, 1);


                        let blk_cmpxchg_start = format!("{}_cmpxchg_start", node.id());
                        let blk_cmpxchg_failed = format!("{}_cmpxchg_failed", node.id());
                        let blk_cmpxchg_succeded = format!("{}_cmpxchg_succeded", node.id());

                        self.finish_block(&vec![loc.clone(),expected.clone(), desired.clone()]);

                    // cmpxchg_start:
                        self.start_block(blk_cmpxchg_start.clone(), &vec![loc.clone(),expected.clone(), desired.clone()]);

                        if use_acquire {
                            self.backend.emit_ldaxr(&res_value, &loc);
                        } else {
                            self.backend.emit_ldxr(&res_value, &loc);
                        }

                        if expected.is_int_reg() {
                            self.backend.emit_cmp(&res_value, &expected);
                        } else {
                            self.backend.emit_fcmp(&res_value, &expected);
                        }
                        self.backend.emit_b_cond("NE", blk_cmpxchg_failed.clone());

                        if use_release {
                            self.backend.emit_stlxr(&loc, &res_success, &desired);
                        } else {
                            self.backend.emit_stxr(&loc, &res_success, &desired);
                        }

                        if !is_weak {
                            // Store failed, try again
                            self.backend.emit_cbnz(&res_success, blk_cmpxchg_start.clone());
                        }

                        self.backend.emit_b(blk_cmpxchg_succeded.clone());

                        self.finish_block(&vec![res_success.clone(), res_value.clone()]);

                    // cmpxchg_failed:
                        self.start_block(blk_cmpxchg_failed.clone(), &vec![res_success.clone(), res_value.clone()]);

                        self.backend.emit_clrex();
                        // Set res_success to 1 (the same value STXR/STLXR uses to indicate failure)
                        self.backend.emit_mov_imm(&res_success, 1);

                        self.finish_block(&vec![res_success.clone(), res_value.clone()]);

                    // cmpxchg_succeded:
                        self.start_block(blk_cmpxchg_succeded.clone(), &vec![res_success.clone(), res_value.clone()]);
                        // this NOT is needed as STXR/STLXR returns sucess as '0', wheras the Mu spec says it should be 1
                        self.backend.emit_eor_imm(&res_success, &res_success, 1);
                    }
601 602 603 604 605 606
                    Instruction_::GetIRef(_)
                    | Instruction_::GetFieldIRef { .. }
                    | Instruction_::GetElementIRef{..}
                    | Instruction_::GetVarPartIRef { .. }
                    | Instruction_::ShiftIRef { .. } => {
                        trace!("instsel on GET/FIELD/VARPARTIREF, SHIFTIREF");
607
                        let mem_addr = self.emit_get_mem_from_inst(node, f_content, f_context, vm);
608
                        let tmp_res = self.get_result_value(node, 0);
609
                        self.emit_calculate_address(&tmp_res, &mem_addr, f_context, vm);
610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629
                    }

                    Instruction_::Fence(order) => {
                        trace!("instsel on FENCE");

                        // Whether to emit a load fence or a normal one
                        let use_load = match order {
                            MemoryOrder::Release | MemoryOrder::SeqCst | MemoryOrder::AcqRel => false,
                            MemoryOrder::Acquire => true,
                            _ => panic!("didnt expect order {:?} with load inst", order)
                        };

                        if use_load {
                            // Data Memory Barrirer for Inner Shariable Domain (for Load accesses only)
                            self.backend.emit_dmb("ISHLD");
                        } else {
                            // Data Memory Barrirer for Inner Shariable Domain
                            self.backend.emit_dmb("ISH");
                        }
                    }
630

631 632 633 634 635
                    Instruction_::ThreadExit => {
                        trace!("instsel on THREADEXIT");
                        // emit a call to swap_back_to_native_stack(sp_loc: Address)

                        // get thread local and add offset to get sp_loc
636
                        let tl = self.emit_get_threadlocal(Some(node), f_content, f_context, vm);
637 638
                        self.backend.emit_add_imm(&tl, &tl, *thread::NATIVE_SP_LOC_OFFSET as u16, false);

639
                        self.emit_runtime_entry(&entrypoints::SWAP_BACK_TO_NATIVE_STACK, vec![tl.clone()], None, Some(node), f_content, f_context, vm);
640 641
                    }

642

643 644 645
                    Instruction_::CommonInst_GetThreadLocal => {
                        trace!("instsel on GETTHREADLOCAL");
                        // get thread local
646
                        let tl = self.emit_get_threadlocal(Some(node), f_content, f_context, vm);
647

648
                        let tmp_res = self.get_result_value(node, 0);
649 650

                        // load [tl + USER_TLS_OFFSET] -> tmp_res
651
                        self.emit_load_base_offset(&tmp_res, &tl, *thread::USER_TLS_OFFSET as i64, f_context, vm);
652 653
                    }

654

655 656 657 658 659 660 661
                    Instruction_::CommonInst_SetThreadLocal(op) => {
                        trace!("instsel on SETTHREADLOCAL");
                        let ops = inst.ops.read().unwrap();
                        let ref op = ops[op];

                        debug_assert!(self.match_ireg(op));

662
                        let tmp_op = self.emit_ireg(op, f_content, f_context, vm);
663 664

                        // get thread local
665
                        let tl = self.emit_get_threadlocal(Some(node), f_content, f_context, vm);
666 667

                        // store tmp_op -> [tl + USER_TLS_OFFSTE]
668
                        self.emit_store_base_offset(&tl, *thread::USER_TLS_OFFSET as i64, &tmp_op, f_context, vm);
669 670 671 672 673 674 675 676 677
                    }

                    Instruction_::CommonInst_Pin(op) => {
                        trace!("instsel on PIN");
                        if !mm::GC_MOVES_OBJECT {
                            // non-moving GC: pin is a nop (move from op to result)
                            let ops = inst.ops.read().unwrap();
                            let ref op = ops[op];

678
                            let tmp_res = self.get_result_value(node, 0);
679

680
                            self.emit_move_node_to_value(&tmp_res, op, f_content, f_context, vm);
681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699
                        } else {
                            unimplemented!()
                        }
                    }

                    Instruction_::CommonInst_Unpin(_) => {
                        trace!("instsel on UNPIN");
                        if !mm::GC_MOVES_OBJECT {
                            // do nothing
                        } else {
                            unimplemented!()
                        }
                    }

                    Instruction_::Move(op) => {
                        trace!("instsel on MOVE (internal IR)");
                        let ops = inst.ops.read().unwrap();
                        let ref op = ops[op];

700
                        let tmp_res = self.get_result_value(node, 0);
701

702
                        self.emit_move_node_to_value(&tmp_res, op, f_content, f_context, vm);
703 704 705 706 707 708 709 710 711 712 713 714 715 716 717
                    }

                    Instruction_::New(ref ty) => {
                        trace!("instsel on NEW");
                        if cfg!(debug_assertions) {
                            match ty.v {
                                MuType_::Hybrid(_) => panic!("cannot use NEW for hybrid, use NEWHYBRID instead"),
                                _ => {}
                            }
                        }

                        let ty_info = vm.get_backend_type_info(ty.id());
                        let size = ty_info.size;
                        let ty_align = ty_info.alignment;

718
                        let const_size = self.make_value_int_const(size as u64, vm);
719

720 721
                        let tmp_allocator = self.emit_get_allocator(node, f_content, f_context, vm);
                        let tmp_res = self.emit_alloc_sequence(tmp_allocator.clone(), const_size, ty_align, node, f_content, f_context, vm);
722 723

                        // ASM: call muentry_init_object(%allocator, %tmp_res, %encode)
724 725
                        let encode = self.make_value_int_const(mm::get_gc_type_encode(ty_info.gc_type.id), vm);
                        self.emit_runtime_entry(
726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760
                            &entrypoints::INIT_OBJ,
                            vec![tmp_allocator.clone(), tmp_res.clone(), encode],
                            None,
                            Some(node), f_content, f_context, vm
                        );
                    }

                    Instruction_::NewHybrid(ref ty, var_len) => {
                        trace!("instsel on NEWHYBRID");
                        if cfg!(debug_assertions) {
                            match ty.v {
                                MuType_::Hybrid(_) => {},
                                _ => panic!("NEWHYBRID is only for allocating hybrid types, use NEW for others")
                            }
                        }

                        let ty_info = vm.get_backend_type_info(ty.id());
                        let ty_align = ty_info.alignment;
                        let fix_part_size = ty_info.size;
                        let var_ty_size = match ty.v {
                            MuType_::Hybrid(ref name) => {
                                let map_lock = HYBRID_TAG_MAP.read().unwrap();
                                let hybrid_ty_ = map_lock.get(name).unwrap();
                                let var_ty = hybrid_ty_.get_var_ty();

                                vm.get_backend_type_info(var_ty.id()).size
                            },
                            _ => panic!("only expect HYBRID type here")
                        };

                        // actual size = fix_part_size + var_ty_size * len
                        let (actual_size, length) = {
                            let ops = inst.ops.read().unwrap();
                            let ref var_len = ops[var_len];

761 762
                            if self.match_node_int_imm(var_len) {
                                let var_len = self.node_imm_to_u64(var_len);
763 764
                                let actual_size = fix_part_size + var_ty_size * (var_len as usize);
                                (
765 766
                                    self.make_value_int_const(actual_size as u64, vm),
                                    self.make_value_int_const(var_len as u64, vm)
767 768 769
                                )
                            } else {
                                let tmp_actual_size = self.make_temporary(f_context, UINT64_TYPE.clone(), vm);
770
                                let tmp_var_len = self.emit_ireg(var_len, f_content, f_context, vm);
771 772

                                // tmp_actual_size = tmp_var_len*var_ty_size
773
                                self.emit_mul_u64(&tmp_actual_size, &tmp_var_len, f_context, vm, var_ty_size as u64);
774
                                // tmp_actual_size = tmp_var_len*var_ty_size + fix_part_size
775
                                self.emit_add_u64(&tmp_actual_size, &tmp_actual_size, f_context, vm, fix_part_size as u64);
776 777 778 779
                                (tmp_actual_size, tmp_var_len)
                            }
                        };

780 781
                        let tmp_allocator = self.emit_get_allocator(node, f_content, f_context, vm);
                        let tmp_res = self.emit_alloc_sequence(tmp_allocator.clone(), actual_size, ty_align, node, f_content, f_context, vm);
782 783

                        // ASM: call muentry_init_object(%allocator, %tmp_res, %encode)
784 785
                        let encode = self.make_value_int_const(mm::get_gc_type_encode(ty_info.gc_type.id), vm);
                        self.emit_runtime_entry(
786 787 788 789 790 791 792 793 794 795 796 797 798
                            &entrypoints::INIT_HYBRID,
                            vec![tmp_allocator.clone(), tmp_res.clone(), encode, length],
                            None,
                            Some(node), f_content, f_context, vm
                        );
                    }

                    // Runtime Entry
                    Instruction_::Throw(op_index) => {
                        trace!("instsel on THROW");
                        let ops = inst.ops.read().unwrap();
                        let ref exception_obj = ops[op_index];

799
                        self.emit_runtime_entry(
800 801 802 803 804 805 806 807 808 809 810 811
                            &entrypoints::THROW_EXCEPTION,
                            vec![exception_obj.clone_value()],
                            None,
                            Some(node), f_content, f_context, vm);
                    }

                    // Runtime Entry
                    Instruction_::PrintHex(index) => {
                        trace!("instsel on PRINTHEX");
                        let ops = inst.ops.read().unwrap();
                        let ref op = ops[index];

812
                        self.emit_runtime_entry(
813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832
                            &entrypoints::PRINT_HEX,
                            vec![op.clone_value()],
                            None,
                            Some(node), f_content, f_context, vm
                        );
                    }

                    _ => unimplemented!()
                } // main switch
            },

            TreeNode_::Value(_) => {}
        }
    }

    fn make_temporary(&mut self, f_context: &mut FunctionContext, ty: P<MuType>, vm: &VM) -> P<Value> {
        f_context.make_temporary(vm.next_id(), ty).clone_value()
    }


833
    fn make_value_int_const(&mut self, val: u64, vm: &VM) -> P<Value> {
834 835 836 837 838 839 840
        P(Value {
            hdr: MuEntityHeader::unnamed(vm.next_id()),
            ty: UINT64_TYPE.clone(),
            v: Value_::Constant(Constant::Int(val))
        })
    }

841 842 843
    fn make_value_base_offset(&mut self, base: &P<Value>, offset: i64, ty: &P<MuType>, vm: &VM) -> P<Value> {
        let mem = self.make_memory_location_base_offset(base, offset, vm);
        self.make_value_from_memory(mem, ty, vm)
844 845
    }

846
    fn make_value_from_memory(&mut self, mem: MemoryLocation, ty: &P<MuType>, vm: &VM) -> P<Value> {
847 848 849 850 851 852 853
        P(Value {
            hdr: MuEntityHeader::unnamed(vm.next_id()),
            ty: ty.clone(),
            v: Value_::Memory(mem)
        })
    }

854
    fn make_memory_location_base_offset(&mut self, base: &P<Value>, offset: i64, vm: &VM) -> MemoryLocation {
855 856 857 858 859 860 861 862 863 864
        if offset == 0 {
            MemoryLocation::VirtualAddress{
                base: base.clone(),
                offset: None,
                scale: 1,
                signed: true,
            }
        } else {
            MemoryLocation::VirtualAddress{
                base: base.clone(),
865
                offset: Some(self.make_value_int_const(offset as u64, vm)),
866 867 868 869 870 871
                scale: 1,
                signed: true,
            }
        }
    }

872
    fn emit_mem(&mut self, pv: &P<Value>, f_context: &mut FunctionContext, vm: &VM) -> P<Value> {
873 874 875 876 877 878 879 880 881
        let n = vm.get_backend_type_info(pv.ty.id()).alignment;
        match pv.v {
            Value_::Memory(ref mem) => {
                match mem {
                    &MemoryLocation::VirtualAddress{ref base, ref offset, scale, signed} => {
                        let mut shift = 0 as u8;
                        let offset =
                            if offset.is_some() {
                                let offset = offset.as_ref().unwrap();
882 883
                                if self.match_value_int_imm(offset) {
                                    let mut offset_val = self.value_imm_to_i64(offset);
884 885
                                    offset_val *= scale as i64;
                                    if is_valid_immediate_offset(offset_val, n) {
886
                                        Some(self.make_value_int_const(offset_val as u64, vm))
887 888
                                    } else {
                                        let offset = self.make_temporary(f_context, UINT64_TYPE.clone(), vm);
889
                                        self.emit_mov_u64(&offset, offset_val as u64);
890 891 892
                                        Some(offset)
                                    }
                                } else {
893
                                    let offset = self.emit_ireg_value(offset, f_context, vm);
894 895 896 897 898

                                    // TODO: If scale == n*m (for some m), set shift = n, and multiply index by m
                                    if !is_valid_immediate_scale(scale, n) {
                                        let temp = self.make_temporary(f_context, offset.ty.clone(), vm);

899
                                        self.emit_mul_u64(&temp, &offset, f_context, vm, scale);
900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921
                                        Some(temp)
                                    } else {
                                        shift = log2(scale) as u8;
                                        Some(offset)
                                    }
                                }
                            }
                            else {
                                None
                            };

                        P(Value {
                            hdr: MuEntityHeader::unnamed(vm.next_id()),
                            ty: pv.ty.clone(),
                            v: Value_::Memory(MemoryLocation::Address {
                                base: base.clone(),
                                offset: offset,
                                shift: shift,
                                signed: signed
                            })
                        })
                    }
922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947
                    &MemoryLocation::Symbolic{ref label, is_global} => {
                        if is_global {
                            let temp = self.make_temporary(f_context, ADDRESS_TYPE.clone(), vm);
                            self.backend.emit_adrp(&temp, &pv);
                            // Note: The offset should always be a valid immediate offset
                            // as it is relative to a page boundary (i.e. it is < 4KB)
                            let offset = P(Value {
                                hdr: MuEntityHeader::unnamed(vm.next_id()),
                                ty: UINT64_TYPE.clone(),
                                v: Value_::Constant(Constant::ExternSym(format!(":lo12:{}", label)))
                            });

                            P(Value {
                                hdr: MuEntityHeader::unnamed(vm.next_id()),
                                ty: pv.ty.clone(),
                                v: Value_::Memory(MemoryLocation::Address {
                                    base: temp,
                                    offset: Some(offset),
                                    shift: 0,
                                    signed: false,
                                })
                            })
                        } else {
                            pv.clone()
                        }
                    }
948 949 950 951 952 953 954 955
                    _ => pv.clone()
                }
            }
            _ => panic!("expected memory")
        }
    }

    #[warn(unused_variables)] // Same as emit_mem except returns a memory location with only a base
956 957
    // NOTE: This code duplicates allot of code in emit_mem and emit_calculate_address
    fn emit_mem_base(&mut self, pv: &P<Value>, f_context: &mut FunctionContext, vm: &VM) -> P<Value> {
958 959 960 961 962 963
        match pv.v {
            Value_::Memory(ref mem) => {
                let base = match mem {
                    &MemoryLocation::VirtualAddress{ref base, ref offset, scale, signed} => {
                        if offset.is_some() {
                            let offset = offset.as_ref().unwrap();
964 965
                            if self.match_value_int_imm(offset) {
                                let offset_val = self.value_imm_to_i64(offset);
966 967 968 969
                                if offset_val == 0 {
                                    base.clone() // trivial
                                } else {
                                    let temp = self.make_temporary(f_context, ADDRESS_TYPE.clone(), vm);
970
                                    self.emit_add_u64(&temp, &base, f_context, vm, (offset_val * scale as i64) as u64);
971 972 973
                                    temp
                                }
                            } else {
974
                                let offset = self.emit_ireg_value(offset, f_context, vm);
975 976 977 978 979 980 981 982 983 984

                                // TODO: If scale == r*m (for some 0 <= m <= 4), multiply offset by r
                                // then use and add_ext(,...,m)
                                if scale.is_power_of_two() && is_valid_immediate_extension(log2(scale)) {
                                    let temp = self.make_temporary(f_context, ADDRESS_TYPE.clone(), vm);
                                    self.backend.emit_add_ext(&temp, &base, &offset, signed, log2(scale) as u8);
                                    temp
                                } else {
                                    let temp_offset = self.make_temporary(f_context, offset.ty.clone(), vm);

985
                                    self.emit_mul_u64(&temp_offset, &offset, f_context, vm, scale);
986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002

                                    // Don't need to create a new register, just overwrite temp_offset
                                    let temp = unsafe { temp_offset.as_type(ADDRESS_TYPE.clone()) };
                                    // Need to use add_ext, in case offset is 32-bits
                                    self.backend.emit_add_ext(&temp, &base, &temp_offset, signed, 0);
                                    temp
                                }
                            }
                        }
                        else {
                            base.clone() // trivial
                        }
                    }
                    &MemoryLocation::Address{ref base, ref offset, shift, signed} => {
                        if offset.is_some() {
                            let ref offset = offset.as_ref().unwrap();

1003 1004
                            if self.match_value_int_imm(&offset) {
                                let offset = self.value_imm_to_u64(&offset);
1005 1006 1007 1008 1009
                                if offset == 0 {
                                    // Offset is 0, it can be ignored
                                    base.clone()
                                } else {
                                    let temp = self.make_temporary(f_context, ADDRESS_TYPE.clone(), vm);
1010
                                    self.emit_add_u64(&temp, &base, f_context, vm, offset as u64);
1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024
                                    temp
                                }
                            } else if offset.is_int_reg() {
                                let temp = self.make_temporary(f_context, ADDRESS_TYPE.clone(), vm);
                                self.backend.emit_add_ext(&temp, &base, &offset, signed, shift);
                                temp
                            } else {
                                panic!("Offset should be an integer register or a constant")
                            }
                        } else {
                            // Simple base address
                            base.clone()
                        }
                    }
1025
                    &MemoryLocation::Symbolic{ref label, is_global} => {
1026
                        let temp = self.make_temporary(f_context, ADDRESS_TYPE.clone(), vm);
1027 1028 1029 1030 1031 1032
                        if is_global {
                            self.backend.emit_adrp(&temp, &pv);
                            self.backend.emit_add_str(&temp, &temp, format!(":lo12:{}", label).as_str());
                        } else {
                            self.backend.emit_adr(&temp, &pv);
                        }
1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051
                        temp
                    },
                };

                P(Value {
                    hdr: MuEntityHeader::unnamed(vm.next_id()),
                    ty: pv.ty.clone(),
                    v: Value_::Memory(MemoryLocation::Address {
                        base: base.clone(),
                        offset: None,
                        shift: 0,
                        signed: false
                    })
                })
            }
            _ => panic!("expected memory")
        }
    }

1052
    fn make_memory_location_base_offset_scale(&mut self, base: &P<Value>, offset: &P<Value>, scale: u64, signed: bool) -> MemoryLocation {
1053 1054 1055 1056 1057 1058 1059 1060 1061
        MemoryLocation::VirtualAddress{
            base: base.clone(),
            offset: Some(offset.clone()),
            scale: scale,
            signed: signed
        }
    }

    // Returns a memory location pointing to 'base + (offset+more_offset)*scale'
1062
    /*fn memory_location_adjust_offset(&mut self, mem: MemoryLocation, more_offset: i64, f_context: &mut FunctionContext, vm: &VM) -> MemoryLocation {
1063 1064 1065 1066 1067
        match mem {
            MemoryLocation::VirtualAddress { base, offset, scale, signed } => {
                let offset =
                    if offset.is_some() {
                        let offset = offset.unwrap();
1068
                        if self.match_value_int_imm(&offset) {
1069
                            let offset = offset.extract_int_const() + (more_offset as u64);
1070
                            self.make_value_int_const(offset as u64, vm)
1071 1072
                        } else {
                            let temp = self.make_temporary(f_context, offset.ty.clone(), vm);
1073 1074
                            let offset = self.emit_ireg_value(&offset, f_context, vm);
                            self.emit_add_u64(&temp, &offset, f_context, vm, more_offset as u64);
1075 1076 1077 1078
                            temp
                        }
                    }
                    else {
1079
                        self.make_value_int_const(more_offset as u64, vm)
1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092
                    };
                MemoryLocation::VirtualAddress {
                    base: base.clone(),
                    offset: Some(offset),
                    scale: scale,
                    signed: signed,
                }

            },
            _ => panic!("expected a VirtualAddress memory location")
        }
    }*/
    // Returns a memory location that points to 'Base + offset*scale + more_offset'
1093
    fn memory_location_shift(&mut self, mem: MemoryLocation, more_offset: i64, f_context: &mut FunctionContext, vm: &VM) -> MemoryLocation {
1094 1095
        match mem {
            MemoryLocation::VirtualAddress { base, offset, scale, signed } => {
1096 1097
                let mut new_scale = 1;
                let new_offset =
1098 1099
                    if offset.is_some() {
                        let offset = offset.unwrap();
1100
                        if self.match_value_int_imm(&offset) {
1101
                            let offset = offset.extract_int_const()*scale + (more_offset as u64);
1102
                            self.make_value_int_const(offset as u64, vm)
1103
                        } else {
1104
                            let offset = self.emit_ireg_value(&offset, f_context, vm);
1105 1106
                            let temp = self.make_temporary(f_context, offset.ty.clone(), vm);

1107 1108 1109 1110
                            if more_offset % (scale as i64) == 0 {
                                // temp = offset + more_offset/scale
                                self.emit_add_u64(&temp, &offset, f_context, vm, (more_offset/(scale as i64)) as u64);
                                new_scale = scale;
1111
                            } else {
1112 1113 1114
                                // temp = offset*scale + more_offset
                                self.emit_mul_u64(&temp, &offset, f_context, vm, scale);
                                self.emit_add_u64(&temp, &temp, f_context, vm, more_offset as u64);
1115 1116 1117 1118 1119 1120
                            }

                            temp
                        }
                    }
                    else {
1121
                        self.make_value_int_const(more_offset as u64, vm)
1122
                    };
1123 1124 1125 1126 1127 1128 1129 1130

                // if offset was an immediate or more_offset % scale != 0:
                //      new_offset = offset*scale+more_offset
                //      new_scale = 1
                // otherwise:
                //      new_offset = offset + more_offset/scale
                //      new_scale = scale
                // Either way: (new_offset*new_scale) = offset*scale+more_offset
1131 1132
                MemoryLocation::VirtualAddress {
                    base: base.clone(),
1133 1134
                    offset: Some(new_offset),
                    scale: new_scale,
1135 1136 1137 1138 1139 1140 1141 1142 1143
                    signed: signed,
                }

            },
            _ => panic!("expected a VirtualAddress memory location")
        }
    }

    // Returns a memory location that points to 'Base + offset*scale + more_offset*new_scale'
1144
    fn memory_location_shift_scale(&mut self, mem: MemoryLocation, more_offset:  &P<Value>, new_scale: u64, f_context: &mut FunctionContext, vm: &VM) -> MemoryLocation {
1145 1146
        if self.match_value_int_imm(&more_offset) {
            let more_offset = self.value_imm_to_i64(&more_offset);
1147 1148
            let mem = self.memory_location_shift(mem, more_offset, f_context, vm);
            self.memory_location_append_scale(mem, new_scale)
1149
        } else {
1150
            let mut new_scale = new_scale;
1151 1152 1153 1154 1155
            match mem {
                MemoryLocation::VirtualAddress { base, offset, scale, signed } => {
                    let offset =
                        if offset.is_some() {
                            let offset = offset.unwrap();
1156
                            if self.match_value_int_imm(&offset) {
1157
                                let temp = self.make_temporary(f_context, offset.ty.clone(), vm);
1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168
                                let offset_scaled = (offset.extract_int_const() as i64)*(scale as i64);
                                if offset_scaled % (new_scale as i64) == 0 {
                                    self.emit_add_u64(&temp, &more_offset, f_context, vm, (offset_scaled / (new_scale as i64)) as u64);
                                    // new_scale*temp = (more_offset + (offset*scale)/new_scale)
                                    //                = more_offset*new_scale + offset*scale
                                } else {
                                    // temp = more_offset*new_scale + offset*scale
                                    self.emit_mul_u64(&temp, &more_offset, f_context, vm, new_scale);
                                    self.emit_add_u64(&temp, &temp, f_context, vm, offset_scaled as u64);
                                    new_scale = 1;
                                }
1169 1170
                                temp
                            } else {
1171
                                let offset = self.emit_ireg_value(&offset, f_context, vm);
1172 1173
                                let temp = self.make_temporary(f_context, offset.ty.clone(), vm);

1174 1175 1176 1177
                                if new_scale == scale {
                                    // just add the offsets
                                    self.backend.emit_add_ext(&temp, &more_offset, &temp, signed, 0);
                                }  else {
1178
                                    // temp = offset * scale
1179
                                    self.emit_mul_u64(&temp, &offset, f_context, vm, scale);
1180

1181 1182 1183 1184 1185 1186 1187
                                    if new_scale.is_power_of_two() && is_valid_immediate_extension(log2(new_scale)) {
                                        // temp = (offset * scale) + more_offset << log2(new_scale)
                                        self.backend.emit_add_ext(&temp, &temp, &more_offset, signed, log2(new_scale) as u8);
                                    } else {
                                        // temp_more = more_offset * new_scale
                                        let temp_more = self.make_temporary(f_context, offset.ty.clone(), vm);
                                        self.emit_mul_u64(&temp_more, &more_offset, f_context, vm, new_scale);
1188

1189 1190 1191
                                        // temp = (offset * scale) + (more_offset * new_scale);
                                        self.backend.emit_add_ext(&temp, &temp_more, &temp, signed, 0);
                                    }
1192

1193 1194
                                    new_scale = 1;
                                }
1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213
                                temp
                            }
                        } else {
                            more_offset.clone()
                        };
                    MemoryLocation::VirtualAddress {
                        base: base.clone(),
                        offset: Some(offset),
                        scale: new_scale,
                        signed: signed,
                    }
                },
                _ => panic!("expected a VirtualAddress memory location")
            }
        }
    }


    // UNUSED
1214
    fn memory_location_append_offset(&mut self, mem: MemoryLocation, new_offset: &P<Value>, new_signed: bool) -> MemoryLocation {
1215 1216
        match mem {
            MemoryLocation::VirtualAddress { base, offset, scale, signed } => {
1217
                self.make_memory_location_base_offset_scale(&base, &new_offset, scale, new_signed)
1218 1219 1220 1221 1222 1223
            },
            _ => panic!("expected an address memory location")
        }
    }

    // UNUSED
1224
    fn memory_location_append_offset_scale(&mut self, mem: MemoryLocation, new_offset: &P<Value>, new_scale: u64, new_signed: bool) -> MemoryLocation {
1225 1226
        match mem {
            MemoryLocation::VirtualAddress { ref base, ref offset, scale, signed } => {
1227
                self.make_memory_location_base_offset_scale(&base, &new_offset, new_scale, new_signed)
1228 1229 1230 1231 1232 1233
            },
            _ => panic!("expected an address memory location")
        }
    }

    // UNUSED
1234
    fn memory_location_append_scale(&mut self, mem: MemoryLocation, new_scale: u64) -> MemoryLocation {
1235 1236 1237
        match mem {
            MemoryLocation::VirtualAddress { ref base, ref offset, scale, signed } => {
                match offset.as_ref() {
1238
                    Some(ref offset) => self.make_memory_location_base_offset_scale(&base, &offset, new_scale, signed),
1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251
                    _ => panic!("A scale requires an offset")
                }

            },
            _ => panic!("expected an address memory location")
        }
    }

    // Returns the size of the operation
    // TODO: If the RHS of an ADD is negative change it to a SUB (and vice versa)
    // TODO: Treat SUB 0, Op2  and EOR 0, Op2 specially
    // Note: Assume that trivial operations will be optimised away by the Mu IR compiler
    // TODO: Use a shift when dividing or multiplying by a power of two
1252
    fn emit_binop(&mut self, node: &TreeNode, inst: &Instruction, op: BinOp, status: BinOpStatus, op1: OpIndex, op2: OpIndex, f_content: &FunctionContent, f_context: &mut FunctionContext, vm: &VM) {
1253 1254 1255 1256
        use std;
        let mut op1 = op1;
        let mut op2 = op2;
        let ops = inst.ops.read().unwrap();
1257
        let res = self.get_result_value(node, 0);
1258 1259 1260 1261 1262 1263 1264 1265

        // Get the size (in bits) of the type the operation is on
        let n = get_bit_size(&res.ty, vm);
        let output_status = status.flag_n || status.flag_z || status.flag_c || status.flag_v;
        let mut status_value_index = 0;
        // NOTE: XZR is just a dummy value here (it will not be used)
        let tmp_status_n = if status.flag_n {
            status_value_index += 1;
1266
            self.get_result_value(node, status_value_index)
1267 1268 1269
        } else { XZR.clone() };
        let tmp_status_z = if status.flag_z {
            status_value_index += 1;
1270
            self.get_result_value(node, status_value_index)
1271 1272 1273
        } else { XZR.clone() };
        let tmp_status_c = if status.flag_c {
            status_value_index += 1;
1274
            self.get_result_value(node, status_value_index)
1275 1276 1277
        } else { XZR.clone() };
        let tmp_status_v = if status.flag_v {
            status_value_index += 1;
1278
            self.get_result_value(node, status_value_index)
1279 1280 1281 1282 1283 1284 1285 1286 1287 1288
        } else { XZR.clone() };

        // TODO: Division by zero exception (note: must explicitly check for this, arm dosn't do it)
        // TODO: (Unneccesary??) Check that flags aren't output for instructions that don't support them
        match op {
            // The lower n bits of the result will be correct, and will not depend
            // on the > n bits of op1 or op2
            op::BinOp::Add => {
                let mut imm_val = 0 as u64;
                // Is one of the arguments a valid immediate?
1289 1290
                let emit_imm = if self.match_node_int_imm(&ops[op2]) {
                    imm_val = self.node_imm_to_u64(&ops[op2]);
1291
                    is_valid_arithmetic_imm(imm_val)
1292 1293
                } else if self.match_node_int_imm(&ops[op1]) {
                    imm_val = self.node_imm_to_u64(&ops[op1]);
1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307
                    // if op1 is a valid immediate, swap it with op2
                    if is_valid_arithmetic_imm(imm_val) {
                        std::mem::swap(&mut op1, &mut op2);
                        true
                    } else {
                        false
                    }
                } else {
                    false
                };

                if emit_imm {
                    trace!("emit add-ireg-imm");

1308
                    let reg_op1 = self.emit_ireg(&ops[op1], f_content, f_context, vm);
1309 1310 1311 1312
                    let imm_shift = imm_val > 4096;
                    let imm_op2 = if imm_shift { imm_val >> 12 } else { imm_val };

                    if output_status {
1313
                        self.emit_zext(&reg_op1);
1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354
                        self.backend.emit_adds_imm(&res, &reg_op1, imm_op2 as u16, imm_shift);

                        if status.flag_v {
                            if n < 32 {
                                // tmp_status[n-1] = 1 iff res and op1 have different signs
                                self.backend.emit_eor(&tmp_status_v, &res, &reg_op1);
                                // tmp[n-1] = 1 iff op1 and op2 have different signs

                                // Sign bit of op2 is 0
                                if !get_bit(imm_val, n - 1) {
                                    // tmp_status[n-1] = 1 iff res and op1 have different signs
                                    //      and op1 has the same sign as op2 (which is 0)
                                    self.backend.emit_bic(&tmp_status_v, &tmp_status_v, &reg_op1);
                                } else {
                                    // tmp_status[n-1] = 1 iff res and op1 have different signs
                                    //      and op1 has the same sign as op2 (which is 1)
                                    self.backend.emit_and(&tmp_status_v, &tmp_status_v, &reg_op1);
                                }

                                // Check the sign bit of tmp_status (i.e. tmp_status[n-1])
                                self.backend.emit_tst_imm(&tmp_status_v, 1 << (n - 1));
                                self.backend.emit_cset(&tmp_status_v, "NE");
                            } else {
                                self.backend.emit_cset(&tmp_status_v, "VS");
                            }
                        }
                        if status.flag_c {
                            if n < 32 {
                                // Test the carry bit of res
                                self.backend.emit_tst_imm(&res, 1 << n);
                                self.backend.emit_cset(&tmp_status_c, "NE");
                            } else {
                                self.backend.emit_cset(&tmp_status_c, "CS");
                            }
                        }
                    } else {
                        self.backend.emit_add_imm(&res, &reg_op1, imm_op2 as u16, imm_shift);
                    }
                } else {
                    trace!("emit add-ireg-ireg");

1355 1356
                    let reg_op1 = self.emit_ireg(&ops[op1], f_content, f_context, vm);
                    let reg_op2 = self.emit_ireg(&ops[op2], f_content, f_context, vm);
1357 1358

                    if output_status {
1359
                        self.emit_zext(&reg_op1);
1360 1361
                        if n == 1 {
                            // adds_ext dosn't support extending 1 bit numbers
1362
                            self.emit_zext(&reg_op2);
1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404
                            self.backend.emit_adds(&res, &reg_op1, &reg_op2);
                        } else {
                            // Emit an adds that zero extends op2
                            self.backend.emit_adds_ext(&res, &reg_op1, &reg_op2, false, 0);
                        }

                        if status.flag_v {
                            if n < 32 {
                                let tmp = self.make_temporary(f_context, UINT32_TYPE.clone(), vm);

                                // tmp_status[n-1] = 1 iff res and op1 have different signs
                                self.backend.emit_eor(&tmp_status_v, &res, &reg_op1);
                                // tmp[n-1] = 1 iff op1 and op2 have different signs
                                self.backend.emit_eor(&tmp, &reg_op1, &reg_op2);

                                // tmp_status[n-1] = 1 iff res and op1 have different signs
                                //      and op1 and op2 have the same sign
                                self.backend.emit_bic(&tmp_status_v, &tmp_status_v, &tmp);

                                // Check tmp_status[n-1]
                                self.backend.emit_tst_imm(&tmp_status_v, 1 << (n - 1));
                                self.backend.emit_cset(&tmp_status_v, "NE");
                            } else {
                                self.backend.emit_cset(&tmp_status_v, "VS");
                            }
                        }

                        if status.flag_c {
                            if n < 32 {
                                // Test the carry bit of res
                                self.backend.emit_tst_imm(&res, 1 << n);
                                self.backend.emit_cset(&tmp_status_c, "NE");
                            } else {
                                self.backend.emit_cset(&tmp_status_c, "CS");
                            }
                        }
                    } else {
                        self.backend.emit_add(&res, &reg_op1, &reg_op2);
                    }
                }
            },
            op::BinOp::Sub => {
1405 1406 1407 1408 1409
                if self.match_node_int_imm(&ops[op2]) &&
                    is_valid_arithmetic_imm(self.node_imm_to_u64(&ops[op2])) &&

                    // If this was true, then the immediate would need to be 1 extended,
                    // which would result in an immediate with too many bits
1410 1411 1412 1413
                    !(status.flag_c && n < 32) {
                    // Can't compute the carry but using a subs_imm instruction
                    trace!("emit sub-ireg-imm");

1414
                    let reg_op1 = self.emit_ireg(&ops[op1], f_content, f_context, vm);
1415
                    let imm_val = self.node_imm_to_u64(&ops[op2]);
1416 1417 1418 1419
                    let imm_shift = imm_val > 4096;
                    let imm_op2 = if imm_shift { imm_val >> 12 } else { imm_val };

                    if output_status {
1420
                        self.emit_zext(&reg_op1);
1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462
                        self.backend.emit_subs_imm(&res, &reg_op1, imm_op2 as u16, imm_shift);

                        if status.flag_v {
                            if n < 32 {
                                // tmp_status[n-1] = 1 iff res and op1 have different signs
                                self.backend.emit_eor(&tmp_status_v, &res, &reg_op1);
                                // tmp[n-1] = 1 iff op1 and op2 have different signs

                                // Sign bit of op2 is 0
                                if imm_val & (1 << (n - 1)) == 0 {
                                    // tmp_status[n-1] = 1 iff res and op1 have different signs
                                    //      and op1 has the same sign as -op2 (which is 1)
                                    self.backend.emit_and(&tmp_status_v, &tmp_status_v, &reg_op1);
                                } else {
                                    // tmp_status[n-1] = 1 iff res and op1 have different signs
                                    //      and op1 has the same sign as op2 (which is 0)
                                    self.backend.emit_bic(&tmp_status_v, &tmp_status_v, &reg_op1);
                                }

                                // Check the sign bit of tmp_status (i.e. tmp_status[n-1])
                                self.backend.emit_tst_imm(&tmp_status_v, 1 << (n - 1));
                                self.backend.emit_cset(&tmp_status_v, "NE");
                            } else {
                                self.backend.emit_cset(&tmp_status_v, "VS");
                            }
                        }

                        if status.flag_c {
                            if n < 32 {
                                // Test the carry bit of res
                                self.backend.emit_tst_imm(&res, 1 << n);
                                self.backend.emit_cset(&tmp_status_c, "NE");
                            } else {
                                self.backend.emit_cset(&tmp_status_c, "CS");
                            }
                        }
                    } else {
                        self.backend.emit_sub_imm(&res, &reg_op1, imm_op2 as u16, imm_shift);
                    }
                } else {
                    trace!("emit sub-ireg-ireg");

1463 1464
                    let reg_op1 = self.emit_ireg(&ops[op1], f_content, f_context, vm);
                    let reg_op2 = self.emit_ireg(&ops[op2], f_content, f_context, vm);
1465 1466

                    if output_status {
1467
                        self.emit_zext(&reg_op1);
1468 1469 1470 1471 1472 1473

                        if status.flag_c {
                            // Note: reg_op2 is 'one'-extended so that SUB res, zext(reg_op1), oext(reg_op2)
                            // Is equivelent to: ADD res, zext(reg_op1), zext(~reg_op2), +1
                            // (this allows the carry flag to be computed as the 'n'th bit of res

1474
                            self.emit_oext(&reg_op2);
1475 1476 1477
                            self.backend.emit_subs(&res, &reg_op1, &reg_op2);
                        } else if n == 1 {
                            // if the carry flag isn't been computed, just zero extend op2
1478
                            self.emit_zext(&reg_op2);
1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524
                            self.backend.emit_subs(&res, &reg_op1, &reg_op2);
                        } else {
                            // Emit an subs that zero extends op2
                            self.backend.emit_subs_ext(&res, &reg_op1, &reg_op2, false, 0);
                        }


                        if status.flag_v {
                            if n < 32 {
                                let tmp = self.make_temporary(f_context, UINT32_TYPE.clone(), vm);

                                // tmp_status[n-1] = 1 iff res and op1 have different signs
                                self.backend.emit_eor(&tmp_status_v, &res, &reg_op1);
                                // tmp[n-1] = 1 iff op1 and -op2 have different signs
                                self.backend.emit_eon(&tmp, &reg_op1, &reg_op2);

                                // tmp_status[n-1] = 1 iff res and op1 have different signs
                                //      and op1 and op2 have the same sign
                                self.backend.emit_bic(&tmp_status_v, &tmp_status_v, &tmp);

                                // Check tmp_status[n-1]
                                self.backend.emit_tst_imm(&tmp_status_v, 1 << (n - 1));
                                self.backend.emit_cset(&tmp_status_v, "NE");
                            } else {
                                self.backend.emit_cset(&tmp_status_v, "VS");
                            }
                        }

                        if status.flag_c {
                            if n < 32 {
                                // Test the carry bit of res
                                self.backend.emit_tst_imm(&res, 1 << n);
                                self.backend.emit_cset(&tmp_status_c, "NE");
                            } else {
                                self.backend.emit_cset(&tmp_status_c, "CS");
                            }
                        }
                    } else {
                        self.backend.emit_sub(&res, &reg_op1, &reg_op2);
                    }
                }
            },

            op::BinOp::And => {
                let mut imm_val = 0 as u64;
                // Is one of the arguments a valid immediate?
1525 1526
                let emit_imm = if self.match_node_int_imm(&ops[op2]) {
                    imm_val = self.node_imm_to_u64(&ops[op2]);
1527
                    is_valid_logical_imm(imm_val, n)
1528 1529
                } else if self.match_node_int_imm(&ops[op1]) {
                    imm_val = self.node_imm_to_u64(&ops[op1]);
1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543
                    // if op1 is a valid immediate, swap it with op2
                    if is_valid_logical_imm(imm_val, n) {
                        std::mem::swap(&mut op1, &mut op2);
                        true
                    } else {
                        false
                    }
                } else {
                    false
                };

                if emit_imm {
                    trace!("emit and-ireg-imm");

1544
                    let reg_op1 = self.emit_ireg(&ops[op1], f_content, f_context, vm);
1545 1546 1547 1548 1549 1550 1551 1552 1553

                    if output_status {
                        self.backend.emit_ands_imm(&res, &reg_op1, replicate_logical_imm(imm_val, n));
                    } else {
                        self.backend.emit_and_imm(&res, &reg_op1, replicate_logical_imm(imm_val, n));
                    }
                } else {
                    trace!("emit and-ireg-ireg");

1554 1555
                    let reg_op1 = self.emit_ireg(&ops[op1], f_content, f_context, vm);
                    let reg_op2 = self.emit_ireg(&ops[op2], f_content, f_context, vm);
1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566

                    if output_status {
                        self.backend.emit_ands(&res, &reg_op1, &reg_op2);
                    } else {
                        self.backend.emit_and(&res, &reg_op1, &reg_op2);
                    }
                }
            },
            op::BinOp::Or => {
                let mut imm_val = 0 as u64;
                // Is one of the arguments a valid immediate?
1567 1568
                let emit_imm = if self.match_node_int_imm(&ops[op2]) {
                    imm_val = self.node_imm_to_u64(&ops[op2]);
1569
                    is_valid_logical_imm(imm_val, n)
1570 1571
                } else if self.match_node_int_imm(&ops[op1]) {
                    imm_val = self.node_imm_to_u64(&ops[op1]);
1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585
                    // if op1 is a valid immediate, swap it with op2
                    if is_valid_logical_imm(imm_val, n) {
                        std::mem::swap(&mut op1, &mut op2);
                        true
                    } else {
                        false
                    }
                } else {
                    false
                };

                if emit_imm {
                    trace!("emit or-ireg-imm");

1586
                    let reg_op1 = self.emit_ireg(&ops[op1], f_content, f_context, vm);
1587 1588 1589 1590 1591

                    self.backend.emit_orr_imm(&res, &reg_op1, replicate_logical_imm(imm_val, n));
                } else {
                    trace!("emit or-ireg-ireg");

1592 1593
                    let reg_op1 = self.emit_ireg(&ops[op1], f_content, f_context, vm);
                    let reg_op2 = self.emit_ireg(&ops[op2], f_content, f_context, vm);
1594 1595 1596 1597 1598 1599 1600

                    self.backend.emit_orr(&res, &reg_op1, &reg_op2);
                }
            },
            op::BinOp::Xor => {
                let mut imm_val = 0 as u64;
                // Is one of the arguments a valid immediate?
1601 1602
                let emit_imm = if self.match_node_int_imm(&ops[op2]) {
                    imm_val = self.node_imm_to_u64(&ops[op2]);
1603
                    is_valid_logical_imm(imm_val, n)
1604 1605
                } else if self.match_node_int_imm(&ops[op1]) {
                    imm_val = self.node_imm_to_u64(&ops[op1]);
1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619
                    // if op1 is a valid immediate, swap it with op2
                    if is_valid_logical_imm(imm_val, n) {
                        std::mem::swap(&mut op1, &mut op2);
                        true
                    } else {
                        false
                    }
                } else {
                    false
                };

                if emit_imm {
                    trace!("emit xor-ireg-imm");

1620
                    let reg_op1 = self.emit_ireg(&ops[op1], f_content, f_context, vm);
1621 1622 1623 1624 1625

                    self.backend.emit_eor_imm(&res, &reg_op1, replicate_logical_imm(imm_val, n));
                } else {
                    trace!("emit xor-ireg-ireg");

1626 1627
                    let reg_op1 = self.emit_ireg(&ops[op1], f_content, f_context, vm);
                    let reg_op2 = self.emit_ireg(&ops[op2], f_content, f_context, vm);
1628 1629 1630 1631 1632 1633 1634 1635

                    self.backend.emit_eor(&res, &reg_op1, &reg_op2);
                }
            },

            op::BinOp::Mul => {
                trace!("emit mul-ireg-ireg");

1636 1637
                let reg_op1 = self.emit_ireg(&ops[op1], f_content, f_context, vm);
                let reg_op2 = self.emit_ireg(&ops[op2], f_content, f_context, vm);
1638

1639 1640
                self.emit_zext(&reg_op1);
                self.emit_zext(&reg_op2);
1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679

                if status.flag_c || status.flag_v {
                    if n < 32 {
                        // A normal multiply will give the correct upper 'n' bits
                        self.backend.emit_mul(&res, &reg_op1, &reg_op2);
                        // Test the upper 'n' bits of the result
                        self.backend.emit_tst_imm(&res, (bits_ones(n) << n));
                    } else if n == 32 {
                        // the 64-bit register version of res
                        let res_64 = unsafe { &res.as_type(UINT64_TYPE.clone()) };
                        // Compute the full 64-bit product of reg_op1 and reg_op2
                        self.backend.emit_umull(&res_64, &reg_op1, &reg_op2);
                        // Test the upper n bits of the result
                        self.backend.emit_tst_imm(&res, 0xFFFFFFFF00000000);
                    } else if n == 64 {
                        // Compute the upper 64-bits of the true product
                        self.backend.emit_umulh(&res, &reg_op1, &reg_op2);
                        // Test the 64-bits of res
                        self.backend.emit_tst_imm(&res, 0xFFFFFFFFFFFFFFFF);
                        // Get the lower 64-bits of the true product
                        self.backend.emit_mul(&res, &reg_op1, &reg_op2);
                    } else {
                        panic!("Unexpeceded integer length {}", n);
                    }

                    // Flags C and V are the same
                    if status.flag_c {
                        self.backend.emit_cset(&tmp_status_c, "NE");
                    }

                    if status.flag_v {
                        self.backend.emit_cset(&tmp_status_v, "NE");
                    }
                } else {
                    // Just do a normal multiply
                    self.backend.emit_mul(&res, &reg_op1, &reg_op2);
                }

                if status.flag_n || status.flag_z {
1680
                    self.emit_sext(&res);
1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694
                    self.backend.emit_cmp_imm(&res, 0, false);

                    if status.flag_n {
                        self.backend.emit_cset(&tmp_status_n, "MI");
                    }

                    if status.flag_z {
                        self.backend.emit_cset(&tmp_status_z, "EQ");
                    }
                }
            },
            op::BinOp::Udiv => {
                trace!("emit mul-ireg-ireg");

1695 1696
                let reg_op1 = self.emit_ireg(&ops[op1], f_content, f_context, vm);
                let reg_op2 = self.emit_ireg(&ops[op2], f_content, f_context, vm);
1697 1698

                // zero extend both arguments (in case they are less than 32 bits)
1699 1700
                self.emit_zext(&reg_op1);
                self.emit_zext(&reg_op2);
1701 1702 1703 1704 1705
                self.backend.emit_udiv(&res, &reg_op1, &reg_op2);
            },
            op::BinOp::Sdiv => {
                trace!("emit mul-ireg-ireg");

1706 1707
                let reg_op1 = self.emit_ireg(&ops[op1], f_content, f_context, vm);
                let reg_op2 = self.emit_ireg(&ops[op2], f_content, f_context, vm);
1708 1709

                // sign extend both arguments (in case they are less than 32 bits)
1710