inst_sel.rs 91.5 KB
Newer Older
1
use ast::ir::*;
2
use ast::ptr::*;
qinsoon's avatar
qinsoon committed
3
use ast::inst::*;
4
use ast::op;
qinsoon's avatar
qinsoon committed
5
use ast::op::OpCode;
qinsoon's avatar
qinsoon committed
6
use ast::types;
qinsoon's avatar
qinsoon committed
7
use ast::types::*;
qinsoon's avatar
qinsoon committed
8
use vm::VM;
qinsoon's avatar
qinsoon committed
9
use runtime::mm;
10 11 12 13
use runtime::ValueLocation;
use runtime::thread;
use runtime::entrypoints;
use runtime::entrypoints::RuntimeEntrypoint;
14 15

use compiler::CompilerPass;
16
use compiler::backend;
qinsoon's avatar
qinsoon committed
17
use compiler::backend::PROLOGUE_BLOCK_NAME;
qinsoon's avatar
qinsoon committed
18 19 20
use compiler::backend::x86_64;
use compiler::backend::x86_64::CodeGenerator;
use compiler::backend::x86_64::ASMCodeGen;
qinsoon's avatar
qinsoon committed
21 22
use compiler::machine_code::CompiledFunction;
use compiler::frame::Frame;
23

24
use std::collections::HashMap;
qinsoon's avatar
qinsoon committed
25
use std::any::Any;
26

27
pub struct InstructionSelection {
28
    name: &'static str,
29 30
    backend: Box<CodeGenerator>,
    
qinsoon's avatar
qinsoon committed
31
    current_callsite_id: usize,
qinsoon's avatar
qinsoon committed
32 33
    current_frame: Option<Frame>,
    current_block: Option<MuName>,
qinsoon's avatar
qinsoon committed
34 35 36 37 38
    current_func_start: Option<ValueLocation>,
    // key: block id, val: callsite that names the block as exception block
    current_exn_callsites: HashMap<MuID, Vec<ValueLocation>>,
    // key: block id, val: block location
    current_exn_blocks: HashMap<MuID, ValueLocation>     
39 40
}

41
impl <'a> InstructionSelection {
qinsoon's avatar
qinsoon committed
42
    #[cfg(feature = "aot")]
43
    pub fn new() -> InstructionSelection {
44 45
        InstructionSelection{
            name: "Instruction Selection (x64)",
46
            backend: Box::new(ASMCodeGen::new()),
qinsoon's avatar
qinsoon committed
47
            
qinsoon's avatar
qinsoon committed
48
            current_callsite_id: 0,
qinsoon's avatar
qinsoon committed
49 50 51
            current_frame: None,
            current_block: None,
            current_func_start: None,
qinsoon's avatar
qinsoon committed
52 53 54
            // key: block id, val: callsite that names the block as exception block
            current_exn_callsites: HashMap::new(), 
            current_exn_blocks: HashMap::new()
55 56
        }
    }
qinsoon's avatar
qinsoon committed
57 58 59 60 61

    #[cfg(feature = "jit")]
    pub fn new() -> InstructionSelection {
        unimplemented!()
    }
62 63 64 65 66 67 68
    
    // in this pass, we assume that
    // 1. all temporaries will use 64bit registers
    // 2. we do not need to backup/restore caller-saved registers
    // 3. we need to backup/restore all the callee-saved registers
    // if any of these assumption breaks, we will need to re-emit the code
    #[allow(unused_variables)]
69
    fn instruction_select(&mut self, node: &'a TreeNode, f_content: &FunctionContent, f_context: &mut FunctionContext, vm: &VM) {
qinsoon's avatar
qinsoon committed
70 71 72
        trace!("instsel on node {}", node);
        
        match node.v {
73 74
            TreeNode_::Instruction(ref inst) => {
                match inst.v {
qinsoon's avatar
qinsoon committed
75
                    Instruction_::Branch2{cond, ref true_dest, ref false_dest, true_prob} => {
76 77
                        // 'branch_if_true' == true, we emit cjmp the same as CmpOp  (je  for EQ, jne for NE)
                        // 'branch_if_true' == false, we emit opposite cjmp as CmpOp (jne for EQ, je  for NE)
78 79 80 81 82
                        let (fallthrough_dest, branch_dest, branch_if_true) = {
                            if true_prob > 0.5f32 {
                                (true_dest, false_dest, false)
                            } else {
                                (false_dest, true_dest, true)
83
                            }
84
                        };
85
                        
qinsoon's avatar
qinsoon committed
86
                        let ops = inst.ops.read().unwrap();
87
                        
88 89
                        self.process_dest(&ops, fallthrough_dest, f_content, f_context, vm);
                        self.process_dest(&ops, branch_dest, f_content, f_context, vm);
qinsoon's avatar
qinsoon committed
90
                        
91
                        let branch_target = f_content.get_block(branch_dest.target).name().unwrap();
92 93 94
    
                        let ref cond = ops[cond];
                        
qinsoon's avatar
qinsoon committed
95 96
                        if self.match_cmp_res(cond) {
                            trace!("emit cmp_eq-branch2");
97
                            match self.emit_cmp_res(cond, f_content, f_context, vm) {
98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167
                                op::CmpOp::EQ => {
                                    if branch_if_true {
                                        self.backend.emit_je(branch_target);
                                    } else {
                                        self.backend.emit_jne(branch_target);
                                    }
                                },
                                op::CmpOp::NE => {
                                    if branch_if_true {
                                        self.backend.emit_jne(branch_target);
                                    } else {
                                        self.backend.emit_je(branch_target);
                                    }
                                },
                                op::CmpOp::UGE => {
                                    if branch_if_true {
                                        self.backend.emit_jae(branch_target);
                                    } else {
                                        self.backend.emit_jb(branch_target);
                                    }
                                },
                                op::CmpOp::UGT => {
                                    if branch_if_true {
                                        self.backend.emit_ja(branch_target);
                                    } else {
                                        self.backend.emit_jbe(branch_target);
                                    }
                                },
                                op::CmpOp::ULE => {
                                    if branch_if_true {
                                        self.backend.emit_jbe(branch_target);
                                    } else {
                                        self.backend.emit_ja(branch_target);
                                    }
                                },
                                op::CmpOp::ULT => {
                                    if branch_if_true {
                                        self.backend.emit_jb(branch_target);
                                    } else {
                                        self.backend.emit_jae(branch_target);
                                    }
                                },
                                op::CmpOp::SGE => {
                                    if branch_if_true {
                                        self.backend.emit_jge(branch_target);
                                    } else {
                                        self.backend.emit_jl(branch_target);
                                    }
                                },
                                op::CmpOp::SGT => {
                                    if branch_if_true {
                                        self.backend.emit_jg(branch_target);
                                    } else {
                                        self.backend.emit_jle(branch_target);
                                    }
                                },
                                op::CmpOp::SLE => {
                                    if branch_if_true {
                                        self.backend.emit_jle(branch_target);
                                    } else {
                                        self.backend.emit_jg(branch_target);
                                    }
                                },
                                op::CmpOp::SLT => {
                                    if branch_if_true {
                                        self.backend.emit_jl(branch_target);
                                    } else {
                                        self.backend.emit_jge(branch_target);
                                    }
                                },
qinsoon's avatar
qinsoon committed
168 169 170 171
                                _ => unimplemented!()
                            }
                        } else if self.match_ireg(cond) {
                            trace!("emit ireg-branch2");
172
                            
173
                            let cond_reg = self.emit_ireg(cond, f_content, f_context, vm);
174
                            
qinsoon's avatar
qinsoon committed
175
                            // emit: cmp cond_reg 1
176
                            self.backend.emit_cmp_imm32_r64(1, &cond_reg);
qinsoon's avatar
qinsoon committed
177
                            // emit: je #branch_dest
178
                            self.backend.emit_je(branch_target);
qinsoon's avatar
qinsoon committed
179 180
                        } else {
                            unimplemented!();
181
                        }
182 183
                    },
                    
qinsoon's avatar
qinsoon committed
184
                    Instruction_::Branch1(ref dest) => {
qinsoon's avatar
qinsoon committed
185
                        let ops = inst.ops.read().unwrap();
186
                                            
187
                        self.process_dest(&ops, dest, f_content, f_context, vm);
188
                        
189
                        let target = f_content.get_block(dest.target).name().unwrap();
qinsoon's avatar
qinsoon committed
190
                        
qinsoon's avatar
qinsoon committed
191
                        trace!("emit branch1");
192
                        // jmp
qinsoon's avatar
qinsoon committed
193
                        self.backend.emit_jmp(target);
194
                    },
qinsoon's avatar
qinsoon committed
195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240

                    Instruction_::Switch{cond, ref default, ref branches} => {
                        let ops = inst.ops.read().unwrap();

                        let ref cond = ops[cond];

                        if self.match_ireg(cond) {
                            let tmp_cond = self.emit_ireg(cond, f_content, f_context, vm);

                            // emit each branch
                            for &(case_op_index, ref case_dest) in branches {
                                let ref case_op = ops[case_op_index];

                                // process dest
                                self.process_dest(&ops, case_dest, f_content, f_context, vm);

                                let target = f_content.get_block(case_dest.target).name().unwrap();

                                if self.match_iimm(case_op) {
                                    let imm = self.node_iimm_to_i32(case_op);

                                    // cmp case cond
                                    self.backend.emit_cmp_imm32_r64(imm, &tmp_cond);
                                    // je dest
                                    self.backend.emit_je(target);
                                } else if self.match_ireg(case_op) {
                                    let tmp_case_op = self.emit_ireg(case_op, f_content, f_context, vm);

                                    // cmp case cond
                                    self.backend.emit_cmp_r64_r64(&tmp_case_op, &tmp_cond);
                                    // je dest
                                    self.backend.emit_je(target);
                                } else {
                                    panic!("expecting ireg cond to be either iimm or ireg: {}", cond);
                                }
                            }

                            // emit default
                            self.process_dest(&ops, default, f_content, f_context, vm);
                            
                            let default_target = f_content.get_block(default.target).name().unwrap();
                            self.backend.emit_jmp(default_target);
                        } else {
                            panic!("expecting cond in switch to be ireg: {}", cond);
                        }
                    }
241
                    
qinsoon's avatar
qinsoon committed
242
                    Instruction_::ExprCall{ref data, is_abort} => {
qinsoon's avatar
qinsoon committed
243 244
                        if is_abort {
                            unimplemented!()
245
                        }
246
                        
qinsoon's avatar
qinsoon committed
247 248 249 250 251
                        self.emit_mu_call(
                            inst, // inst: &Instruction,
                            data, // calldata: &CallData,
                            None, // resumption: Option<&ResumptionData>,
                            node, // cur_node: &TreeNode, 
252
                            f_content, f_context, vm);
253 254
                    },
                    
qinsoon's avatar
qinsoon committed
255 256 257 258 259 260 261 262 263
                    Instruction_::Call{ref data, ref resume} => {
                        self.emit_mu_call(
                            inst, 
                            data, 
                            Some(resume), 
                            node, 
                            f_content, f_context, vm);
                    }
                    
264
                    Instruction_::Return(_) => {
265
                        self.emit_common_epilogue(inst, f_content, f_context, vm);
266
                        
qinsoon's avatar
qinsoon committed
267
                        self.backend.emit_ret();
268 269
                    },
                    
qinsoon's avatar
qinsoon committed
270
                    Instruction_::BinOp(op, op1, op2) => {
qinsoon's avatar
qinsoon committed
271
                        let ops = inst.ops.read().unwrap();
272 273

                        let res_tmp = self.get_result_value(node);
qinsoon's avatar
qinsoon committed
274
                        
275 276
                        match op {
                            op::BinOp::Add => {
qinsoon's avatar
qinsoon committed
277
                                if self.match_ireg(&ops[op1]) && self.match_iimm(&ops[op2]) {
qinsoon's avatar
qinsoon committed
278 279
                                    trace!("emit add-ireg-imm");
                                    
280
                                    let reg_op1 = self.emit_ireg(&ops[op1], f_content, f_context, vm);
281
                                    let reg_op2 = self.node_iimm_to_i32(&ops[op2]);
qinsoon's avatar
qinsoon committed
282 283 284 285 286 287 288 289
                                    
                                    // mov op1, res
                                    self.backend.emit_mov_r64_r64(&res_tmp, &reg_op1);
                                    // add op2, res
                                    self.backend.emit_add_r64_imm32(&res_tmp, reg_op2);
                                } else if self.match_ireg(&ops[op1]) && self.match_mem(&ops[op2]) {
                                    trace!("emit add-ireg-mem");
                                    
290
                                    let reg_op1 = self.emit_ireg(&ops[op1], f_content, f_context, vm);
qinsoon's avatar
shl  
qinsoon committed
291
                                    let reg_op2 = self.emit_mem(&ops[op2], vm);
qinsoon's avatar
qinsoon committed
292 293 294 295 296
                                    
                                    // mov op1, res
                                    self.backend.emit_mov_r64_r64(&res_tmp, &reg_op1);
                                    // add op2 res
                                    self.backend.emit_add_r64_mem64(&res_tmp, &reg_op2);
qinsoon's avatar
qinsoon committed
297 298 299 300 301 302 303 304 305 306
                                } else if self.match_ireg(&ops[op1]) && self.match_ireg(&ops[op2]) {
                                    trace!("emit add-ireg-ireg");

                                    let reg_op1 = self.emit_ireg(&ops[op1], f_content, f_context, vm);
                                    let reg_op2 = self.emit_ireg(&ops[op2], f_content, f_context, vm);

                                    // mov op1, res
                                    self.backend.emit_mov_r64_r64(&res_tmp, &reg_op1);
                                    // add op2 res
                                    self.backend.emit_add_r64_r64(&res_tmp, &reg_op2);
qinsoon's avatar
qinsoon committed
307 308 309
                                } else {
                                    unimplemented!()
                                }
310 311
                            },
                            op::BinOp::Sub => {
312
                                if self.match_ireg(&ops[op1]) && self.match_iimm(&ops[op2]) {
qinsoon's avatar
qinsoon committed
313 314
                                    trace!("emit sub-ireg-imm");

315
                                    let reg_op1 = self.emit_ireg(&ops[op1], f_content, f_context, vm);
316
                                    let imm_op2 = self.node_iimm_to_i32(&ops[op2]);
qinsoon's avatar
qinsoon committed
317 318 319 320
                                    
                                    // mov op1, res
                                    self.backend.emit_mov_r64_r64(&res_tmp, &reg_op1);
                                    // add op2, res
321
                                    self.backend.emit_sub_r64_imm32(&res_tmp, imm_op2);
qinsoon's avatar
qinsoon committed
322 323 324
                                } else if self.match_ireg(&ops[op1]) && self.match_mem(&ops[op2]) {
                                    trace!("emit sub-ireg-mem");
                                    
325
                                    let reg_op1 = self.emit_ireg(&ops[op1], f_content, f_context, vm);
qinsoon's avatar
shl  
qinsoon committed
326
                                    let mem_op2 = self.emit_mem(&ops[op2], vm);
qinsoon's avatar
qinsoon committed
327 328 329 330
                                    
                                    // mov op1, res
                                    self.backend.emit_mov_r64_r64(&res_tmp, &reg_op1);
                                    // sub op2 res
331
                                    self.backend.emit_sub_r64_mem64(&res_tmp, &mem_op2);
332 333
                                } else if self.match_ireg(&ops[op1]) && self.match_ireg(&ops[op2]) {
                                    trace!("emit sub-ireg-ireg");
334

335 336
                                    let reg_op1 = self.emit_ireg(&ops[op1], f_content, f_context, vm);
                                    let reg_op2 = self.emit_ireg(&ops[op2], f_content, f_context, vm);
337

338 339 340 341
                                    // mov op1, res
                                    self.backend.emit_mov_r64_r64(&res_tmp, &reg_op1);
                                    // add op2 res
                                    self.backend.emit_sub_r64_r64(&res_tmp, &reg_op2);
qinsoon's avatar
qinsoon committed
342 343 344
                                } else {
                                    unimplemented!()
                                }
345
                            },
346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421
                            op::BinOp::And => {
                                let op1 = &ops[op1];
                                let op2 = &ops[op2];

                                if self.match_ireg(op1) && self.match_iimm(op2) {
                                    trace!("emit and-ireg-iimm");

                                    let tmp_op1 = self.emit_ireg(op1, f_content, f_context, vm);
                                    let imm_op2 = self.node_iimm_to_i32(op2);

                                    // mov op1 -> res
                                    self.backend.emit_mov_r64_r64(&res_tmp, &tmp_op1);
                                    // and op2, res -> res
                                    self.backend.emit_and_r64_imm32(&res_tmp, imm_op2);
                                } else if self.match_ireg(op1) && self.match_mem(op2) {
                                    trace!("emit and-ireg-mem");

                                    let tmp_op1 = self.emit_ireg(op1, f_content, f_context, vm);
                                    let mem_op2 = self.emit_mem(op2, vm);

                                    // mov op1, res
                                    self.backend.emit_mov_r64_r64(&res_tmp, &tmp_op1);
                                    // and op2, res -> res
                                    self.backend.emit_and_r64_mem64(&res_tmp, &mem_op2);
                                } else if self.match_ireg(op1) && self.match_ireg(op2) {
                                    trace!("emit and-ireg-ireg");

                                    let tmp_op1 = self.emit_ireg(op1, f_content, f_context, vm);
                                    let tmp_op2 = self.emit_ireg(op2, f_content, f_context, vm);

                                    // mov op1, res
                                    self.backend.emit_mov_r64_r64(&res_tmp, &tmp_op1);
                                    // and op2, res -> res
                                    self.backend.emit_and_r64_r64(&res_tmp, &tmp_op2);
                                } else {
                                    unimplemented!()
                                }
                            },
                            op::BinOp::Xor => {
                                let op1 = &ops[op1];
                                let op2 = &ops[op2];

                                if self.match_ireg(op1) && self.match_iimm(op2) {
                                    trace!("emit xor-ireg-iimm");

                                    let tmp_op1 = self.emit_ireg(op1, f_content, f_context, vm);
                                    let imm_op2 = self.node_iimm_to_i32(op2);

                                    // mov op1 -> res
                                    self.backend.emit_mov_r64_r64(&res_tmp, &tmp_op1);
                                    // xor op2, res -> res
                                    self.backend.emit_xor_r64_imm32(&res_tmp, imm_op2);
                                } else if self.match_ireg(op1) && self.match_mem(op2) {
                                    trace!("emit xor-ireg-mem");

                                    let tmp_op1 = self.emit_ireg(op1, f_content, f_context, vm);
                                    let mem_op2 = self.emit_mem(op2, vm);

                                    // mov op1, res
                                    self.backend.emit_mov_r64_r64(&res_tmp, &tmp_op1);
                                    // xor op2, res -> res
                                    self.backend.emit_xor_r64_mem64(&res_tmp, &mem_op2);
                                } else if self.match_ireg(op1) && self.match_ireg(op2) {
                                    trace!("emit xor-ireg-ireg");

                                    let tmp_op1 = self.emit_ireg(op1, f_content, f_context, vm);
                                    let tmp_op2 = self.emit_ireg(op2, f_content, f_context, vm);

                                    // mov op1, res
                                    self.backend.emit_mov_r64_r64(&res_tmp, &tmp_op1);
                                    // xor op2, res -> res
                                    self.backend.emit_xor_r64_r64(&res_tmp, &tmp_op2);
                                } else {
                                    unimplemented!()
                                }
                            }
422
                            op::BinOp::Mul => {
423 424 425
                                // mov op1 -> rax
                                let rax = x86_64::RAX.clone();
                                let op1 = &ops[op1];
426
                                if self.match_iimm(op1) {
427
                                    let imm_op1 = self.node_iimm_to_i32(op1);
428 429 430
                                    
                                    self.backend.emit_mov_r64_imm32(&rax, imm_op1);
                                } else if self.match_mem(op1) {
qinsoon's avatar
shl  
qinsoon committed
431
                                    let mem_op1 = self.emit_mem(op1, vm);
432 433
                                    
                                    self.backend.emit_mov_r64_mem64(&rax, &mem_op1);
434 435 436 437
                                } else if self.match_ireg(op1) {
                                    let reg_op1 = self.emit_ireg(op1, f_content, f_context, vm);

                                    self.backend.emit_mov_r64_r64(&rax, &reg_op1);
438 439 440 441 442 443
                                } else {
                                    unimplemented!();
                                }
                                
                                // mul op2 -> rax
                                let op2 = &ops[op2];
444
                                if self.match_iimm(op2) {
445
                                    let imm_op2 = self.node_iimm_to_i32(op2);
446 447 448 449 450 451 452
                                    
                                    // put imm in a temporary
                                    // here we use result reg as temporary
                                    self.backend.emit_mov_r64_imm32(&res_tmp, imm_op2);
                                    
                                    self.backend.emit_mul_r64(&res_tmp);
                                } else if self.match_mem(op2) {
qinsoon's avatar
shl  
qinsoon committed
453
                                    let mem_op2 = self.emit_mem(op2, vm);
454 455
                                    
                                    self.backend.emit_mul_mem64(&mem_op2);
456 457 458 459
                                } else if self.match_ireg(op2) {
                                    let reg_op2 = self.emit_ireg(op2, f_content, f_context, vm);

                                    self.backend.emit_mul_r64(&reg_op2);
460 461 462 463 464 465
                                } else {
                                    unimplemented!();
                                }
                                
                                // mov rax -> result
                                self.backend.emit_mov_r64_r64(&res_tmp, &rax);
466
                            },
467 468 469 470
                            op::BinOp::Udiv => {
                                let op1 = &ops[op1];
                                let op2 = &ops[op2];

qinsoon's avatar
sdiv  
qinsoon committed
471
                                self.emit_udiv(op1, op2, f_content, f_context, vm);
472

qinsoon's avatar
sdiv  
qinsoon committed
473 474 475 476 477 478
                                // mov rax -> result
                                self.backend.emit_mov_r64_r64(&res_tmp, &x86_64::RAX);
                            },
                            op::BinOp::Sdiv => {
                                let op1 = &ops[op1];
                                let op2 = &ops[op2];
479

qinsoon's avatar
sdiv  
qinsoon committed
480
                                self.emit_idiv(op1, op2, f_content, f_context, vm);
481 482

                                // mov rax -> result
qinsoon's avatar
sdiv  
qinsoon committed
483 484
                                self.backend.emit_mov_r64_r64(&res_tmp, &x86_64::RAX);
                            },
qinsoon's avatar
qinsoon committed
485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502
                            op::BinOp::Urem => {
                                let op1 = &ops[op1];
                                let op2 = &ops[op2];

                                self.emit_udiv(op1, op2, f_content, f_context, vm);

                                // mov rdx -> result
                                self.backend.emit_mov_r64_r64(&res_tmp, &x86_64::RDX);
                            },
                            op::BinOp::Srem => {
                                let op1 = &ops[op1];
                                let op2 = &ops[op2];

                                self.emit_idiv(op1, op2, f_content, f_context, vm);

                                // mov rdx -> result
                                self.backend.emit_mov_r64_r64(&res_tmp, &x86_64::RDX);
                            },
qinsoon's avatar
qinsoon committed
503

qinsoon's avatar
shl  
qinsoon committed
504 505 506 507
                            op::BinOp::Shl => {
                                let op1 = &ops[op1];
                                let op2 = &ops[op2];

508 509 510
                                if self.match_mem(op1) {
                                    unimplemented!()
                                } else if self.match_ireg(op1) {
qinsoon's avatar
shl  
qinsoon committed
511 512
                                    let tmp_op1 = self.emit_ireg(op1, f_content, f_context, vm);

513 514 515 516 517 518 519 520 521
                                    if self.match_iimm(op2) {
                                        let imm_op2 = self.node_iimm_to_i32(op2) as i8;

                                        // shl op1, op2 -> op1
                                        self.backend.emit_shl_r64_imm8(&tmp_op1, imm_op2);

                                        // mov op1 -> result
                                        self.backend.emit_mov_r64_r64(&res_tmp, &tmp_op1);
                                    } else if self.match_ireg(op2) {
qinsoon's avatar
shl  
qinsoon committed
522 523 524 525 526 527
                                        let tmp_op2 = self.emit_ireg(op2, f_content, f_context, vm);

                                        // mov op2 -> rcx
                                        self.backend.emit_mov_r64_r64(&x86_64::RCX, &tmp_op2);

                                        // shl op1, cl -> op1
qinsoon's avatar
fix  
qinsoon committed
528
                                        self.backend.emit_shl_r64_cl(&tmp_op1);
qinsoon's avatar
shl  
qinsoon committed
529 530 531 532 533 534

                                        // mov op1 -> result
                                        self.backend.emit_mov_r64_r64(&res_tmp, &tmp_op1);
                                    } else {
                                        panic!("unexpected op2 (not ireg not iimm): {}", op2);
                                    }
535 536
                                } else {
                                    panic!("unexpected op1 (not ireg not mem): {}", op1);
qinsoon's avatar
shl  
qinsoon committed
537
                                }
qinsoon's avatar
qinsoon committed
538 539 540 541 542
                            },
                            op::BinOp::Lshr => {
                                let op1 = &ops[op1];
                                let op2 = &ops[op2];

543 544 545
                                if self.match_mem(op1) {
                                    unimplemented!()
                                } else if self.match_ireg(op1) {
qinsoon's avatar
qinsoon committed
546 547
                                    let tmp_op1 = self.emit_ireg(op1, f_content, f_context, vm);

548 549 550 551 552 553 554 555 556
                                    if self.match_iimm(op2) {
                                        let imm_op2 = self.node_iimm_to_i32(op2) as i8;

                                        // shr op1, op2 -> op1
                                        self.backend.emit_shr_r64_imm8(&tmp_op1, imm_op2);

                                        // mov op1 -> result
                                        self.backend.emit_mov_r64_r64(&res_tmp, &tmp_op1);
                                    } else if self.match_ireg(op2) {
qinsoon's avatar
qinsoon committed
557 558 559 560 561 562 563 564 565 566 567 568 569
                                        let tmp_op2 = self.emit_ireg(op2, f_content, f_context, vm);

                                        // mov op2 -> rcx
                                        self.backend.emit_mov_r64_r64(&x86_64::RCX, &tmp_op2);

                                        // shr op1, cl -> op1
                                        self.backend.emit_shr_r64_cl(&tmp_op1);

                                        // mov op1 -> result
                                        self.backend.emit_mov_r64_r64(&res_tmp, &tmp_op1);
                                    } else {
                                        panic!("unexpected op2 (not ireg not iimm): {}", op2);
                                    }
570 571
                                } else {
                                    panic!("unexpected op1 (not ireg not mem): {}", op1);
qinsoon's avatar
qinsoon committed
572 573 574 575 576 577
                                }
                            },
                            op::BinOp::Ashr => {
                                let op1 = &ops[op1];
                                let op2 = &ops[op2];

578 579 580
                                if self.match_mem(op1) {
                                    unimplemented!()
                                } else if self.match_ireg(op1) {
qinsoon's avatar
qinsoon committed
581 582
                                    let tmp_op1 = self.emit_ireg(op1, f_content, f_context, vm);

583 584 585 586 587 588 589 590 591
                                    if self.match_iimm(op2) {
                                        let imm_op2 = self.node_iimm_to_i32(op2) as i8;

                                        // sar op1, op2 -> op1
                                        self.backend.emit_sar_r64_imm8(&tmp_op1, imm_op2);

                                        // mov op1 -> result
                                        self.backend.emit_mov_r64_r64(&res_tmp, &tmp_op1);
                                    } else if self.match_ireg(op2) {
qinsoon's avatar
qinsoon committed
592 593 594 595 596 597 598 599 600 601
                                        let tmp_op2 = self.emit_ireg(op2, f_content, f_context, vm);

                                        // mov op2 -> rcx
                                        self.backend.emit_mov_r64_r64(&x86_64::RCX, &tmp_op2);

                                        // sar op1, cl -> op1
                                        self.backend.emit_sar_r64_cl(&tmp_op1);

                                        // mov op1 -> result
                                        self.backend.emit_mov_r64_r64(&res_tmp, &tmp_op1);
602
                                    } else  {
qinsoon's avatar
qinsoon committed
603 604
                                        panic!("unexpected op2 (not ireg not iimm): {}", op2);
                                    }
605 606
                                } else {
                                    panic!("unexpected op1 (not ireg not mem): {}", op1);
qinsoon's avatar
qinsoon committed
607 608 609
                                }
                            },

qinsoon's avatar
shl  
qinsoon committed
610

qinsoon's avatar
qinsoon committed
611 612
                            // floating point
                            op::BinOp::FAdd => {
613
                                if self.match_fpreg(&ops[op1]) && self.match_mem(&ops[op2]) {
qinsoon's avatar
qinsoon committed
614 615 616
                                    trace!("emit add-fpreg-mem");

                                    let reg_op1 = self.emit_fpreg(&ops[op1], f_content, f_context, vm);
qinsoon's avatar
shl  
qinsoon committed
617
                                    let mem_op2 = self.emit_mem(&ops[op2], vm);
qinsoon's avatar
qinsoon committed
618 619 620 621 622

                                    // mov op1, res
                                    self.backend.emit_movsd_f64_f64(&res_tmp, &reg_op1);
                                    // sub op2 res
                                    self.backend.emit_addsd_f64_mem64(&res_tmp, &mem_op2);
623 624 625 626 627 628 629 630 631 632
                                } else if self.match_fpreg(&ops[op1]) && self.match_fpreg(&ops[op2]) {
                                    trace!("emit add-fpreg-fpreg");

                                    let reg_op1 = self.emit_fpreg(&ops[op1], f_content, f_context, vm);
                                    let reg_op2 = self.emit_fpreg(&ops[op2], f_content, f_context, vm);

                                    // movsd op1, res
                                    self.backend.emit_movsd_f64_f64(&res_tmp, &reg_op1);
                                    // add op2 res
                                    self.backend.emit_addsd_f64_f64(&res_tmp, &reg_op2);
qinsoon's avatar
qinsoon committed
633 634 635 636
                                } else {
                                    unimplemented!()
                                }
                            }
637 638
                            
                            _ => unimplemented!()
639
                        }
640 641 642

                        // truncate result
                        if res_tmp.is_int_reg() {
643
                            self.emit_truncate_result(&UINT64_TYPE, &res_tmp.ty, &res_tmp, f_context, vm);
644
                        }
645
                    }
qinsoon's avatar
qinsoon committed
646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670

                    Instruction_::ConvOp{operation, ref from_ty, ref to_ty, operand} => {
                        let ops = inst.ops.read().unwrap();

                        let ref op = ops[operand];

                        let extract_int_len = |x: &P<MuType>| {
                            match x.v {
                                MuType_::Int(len) => len,
                                _ => panic!("only expect int types, found: {}", x)
                            }
                        };

                        match operation {
                            op::ConvOp::TRUNC => {
                                // currently only use 64bits register
                                // so only keep what is needed in the register (set others to 0)

                                if self.match_ireg(op) {
                                    let tmp_op = self.emit_ireg(op, f_content, f_context, vm);
                                    let tmp_res = self.get_result_value(node);

                                    // mov op -> result
                                    self.backend.emit_mov_r64_r64(&tmp_res, &tmp_op);

671
                                    // truncate result
672
                                    self.emit_truncate_result(from_ty, to_ty, &tmp_res, f_context, vm);
qinsoon's avatar
qinsoon committed
673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689
                                } else {
                                    panic!("unexpected op (expect ireg): {}", op);
                                }
                            }
                            op::ConvOp::ZEXT => {
                                // currently only use 64bits register
                                // so set irrelevant bits to 0
                                let from_ty_len = extract_int_len(from_ty);
                                let to_ty_len   = extract_int_len(to_ty);

                                if self.match_ireg(op) {
                                    let tmp_op = self.emit_ireg(op, f_content, f_context, vm);
                                    let tmp_res = self.get_result_value(node);

                                    let mask = match from_ty_len {
                                        8  => 0xFFi32,
                                        16 => 0xFFFFi32,
690
                                        32 => 0xFFFFFFFFi32,
qinsoon's avatar
qinsoon committed
691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730
                                        _ => unimplemented!()
                                    };

                                    // mov op -> result
                                    self.backend.emit_mov_r64_r64(&tmp_res, &tmp_op);

                                    // and mask result -> result
                                    self.backend.emit_and_r64_imm32(&tmp_res, mask);
                                } else {
                                    panic!("unexpected op (expect ireg): {}", op);
                                }
                            },
                            op::ConvOp::SEXT => {
                                // currently only use 64bits register
                                // we left shift the value, then arithmetic right shift back
                                let from_ty_len = extract_int_len(from_ty);
                                let to_ty_len   = extract_int_len(to_ty);

                                let shift : i8 = (to_ty_len - from_ty_len) as i8;

                                if self.match_ireg(op) {
                                    let tmp_op = self.emit_ireg(op, f_content, f_context, vm);
                                    let tmp_res = self.get_result_value(node);

                                    // mov op -> result
                                    self.backend.emit_mov_r64_r64(&tmp_res, &tmp_op);

                                    // shl result, shift -> result
                                    self.backend.emit_shl_r64_imm8(&tmp_res, shift);

                                    // sar result, shift -> result
                                    self.backend.emit_sar_r64_imm8(&tmp_res, shift);
                                } else {
                                    panic!("unexpected op (expect ireg): {}", op)
                                }
                            }

                            _ => unimplemented!()
                        }
                    }
731
                    
732 733
                    // load on x64 generates mov inst (no matter what order is specified)
                    // https://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
734
                    Instruction_::Load{is_ptr, order, mem_loc} => {
qinsoon's avatar
qinsoon committed
735
                        let ops = inst.ops.read().unwrap();
736
                        let ref loc_op = ops[mem_loc];
737 738 739 740 741 742
                        
                        // check order
                        match order {
                            MemoryOrder::Relaxed 
                            | MemoryOrder::Consume 
                            | MemoryOrder::Acquire
743 744
                            | MemoryOrder::SeqCst
                            | MemoryOrder::NotAtomic => {},
745 746
                            _ => panic!("didnt expect order {:?} with store inst", order)
                        }                        
747

748
                        let resolved_loc = self.emit_node_addr_to_value(loc_op, vm);
qinsoon's avatar
qinsoon committed
749
                        let res_temp = self.get_result_value(node);
750 751 752
                        
                        if self.match_ireg(node) {
                            // emit mov(GPR)
753 754 755 756 757 758 759 760
                            self.backend.emit_mov_r64_mem64(&res_temp, &resolved_loc);
                        } else {
                            // emit mov(FPR)
                            unimplemented!()
                        }
                    }
                    
                    Instruction_::Store{is_ptr, order, mem_loc, value} => {
qinsoon's avatar
qinsoon committed
761
                        let ops = inst.ops.read().unwrap();
762 763 764 765 766
                        let ref loc_op = ops[mem_loc];
                        let ref val_op = ops[value];
                        
                        let generate_plain_mov : bool = {
                            match order {
767 768 769
                                MemoryOrder::Relaxed
                                | MemoryOrder::Release
                                | MemoryOrder::NotAtomic => true,
770 771 772 773 774
                                MemoryOrder::SeqCst => false,
                                _ => panic!("didnt expect order {:?} with store inst", order)
                            }
                        };
                        
775
                        let resolved_loc = self.emit_node_addr_to_value(loc_op, vm);
776 777
                        
                        if self.match_ireg(val_op) {
778
                            let val = self.emit_ireg(val_op, f_content, f_context, vm);
779 780 781 782 783 784
                            if generate_plain_mov {
                                self.backend.emit_mov_mem64_r64(&resolved_loc, &val);
                            } else {
                                unimplemented!()
                            }
                        } else if self.match_iimm(val_op) {
785
                            let val = self.node_iimm_to_i32(val_op);
786 787 788 789 790
                            if generate_plain_mov {
                                self.backend.emit_mov_mem64_imm32(&resolved_loc, val);
                            } else {
                                unimplemented!()
                            }
791 792 793 794 795
                        } else {
                            // emit mov(FPR)
                            unimplemented!()
                        }
                    }
796
                    
797 798 799 800
                    Instruction_::GetIRef(op_index) => {
                        let ops = inst.ops.read().unwrap();
                        
                        let ref op = ops[op_index];
qinsoon's avatar
qinsoon committed
801
                        let res_tmp = self.get_result_value(node);
802
                        
803 804
                        let hdr_size = mm::objectmodel::OBJECT_HEADER_SIZE;
                        if hdr_size == 0 {
805
                            self.emit_move_node_to_value(&res_tmp, &op, f_content, f_context, vm);
806 807 808
                        } else {
                            self.emit_lea_base_offset(&res_tmp, &op.clone_value(), hdr_size as i32, vm);
                        }
809 810
                    }
                    
811
                    Instruction_::ThreadExit => {
812
                        // emit a call to swap_back_to_native_stack(sp_loc: Address)
813 814
                        
                        // get thread local and add offset to get sp_loc
qinsoon's avatar
qinsoon committed
815
                        let tl = self.emit_get_threadlocal(Some(node), f_content, f_context, vm);
816
                        self.backend.emit_add_r64_imm32(&tl, *thread::NATIVE_SP_LOC_OFFSET as i32);
817
                        
qinsoon's avatar
qinsoon committed
818
                        self.emit_runtime_entry(&entrypoints::SWAP_BACK_TO_NATIVE_STACK, vec![tl.clone()], None, Some(node), f_content, f_context, vm);
819
                    }
qinsoon's avatar
qinsoon committed
820 821 822
                    
                    Instruction_::New(ref ty) => {
                        let ty_info = vm.get_backend_type_info(ty.id());
823 824
                        let ty_size = ty_info.size;
                        let ty_align= ty_info.alignment;
qinsoon's avatar
qinsoon committed
825
                        
826
                        if ty_size > mm::LARGE_OBJECT_THRESHOLD {
qinsoon's avatar
qinsoon committed
827 828 829
                            // emit large object allocation
                            unimplemented!()
                        } else {
830 831 832
                            // emit immix allocation fast path
                            
                            // ASM: %tl = get_thread_local()
qinsoon's avatar
qinsoon committed
833
                            let tmp_tl = self.emit_get_threadlocal(Some(node), f_content, f_context, vm);
834 835 836
                            
                            // ASM: mov [%tl + allocator_offset + cursor_offset] -> %cursor
                            let cursor_offset = *thread::ALLOCATOR_OFFSET + *mm::ALLOCATOR_CURSOR_OFFSET;
qinsoon's avatar
qinsoon committed
837
                            let tmp_cursor = self.make_temporary(f_context, ADDRESS_TYPE.clone(), vm);
838
                            self.emit_load_base_offset(&tmp_cursor, &tmp_tl, cursor_offset as i32, vm);
839 840 841
                            
                            // alignup cursor (cursor + align - 1 & !(align - 1))
                            // ASM: lea align-1(%cursor) -> %start
842
                            let align = ty_info.alignment as i32;
qinsoon's avatar
qinsoon committed
843
                            let tmp_start = self.make_temporary(f_context, ADDRESS_TYPE.clone(), vm);
844
                            self.emit_lea_base_offset(&tmp_start, &tmp_cursor, align - 1, vm);
845
                            // ASM: and %start, !(align-1) -> %start
846
                            self.backend.emit_and_r64_imm32(&tmp_start, !(align - 1) as i32);
847 848 849
                            
                            // bump cursor
                            // ASM: lea size(%start) -> %end
qinsoon's avatar
qinsoon committed
850
                            let tmp_end = self.make_temporary(f_context, ADDRESS_TYPE.clone(), vm);
851
                            self.emit_lea_base_offset(&tmp_end, &tmp_start, ty_size as i32, vm);
852 853 854 855
                            
                            // check with limit
                            // ASM: cmp %end, [%tl + allocator_offset + limit_offset]
                            let limit_offset = *thread::ALLOCATOR_OFFSET + *mm::ALLOCATOR_LIMIT_OFFSET;
qinsoon's avatar
qinsoon committed
856
                            let mem_limit = self.make_memory_op_base_offset(&tmp_tl, limit_offset as i32, ADDRESS_TYPE.clone(), vm);
857
                            self.backend.emit_cmp_mem64_r64(&mem_limit, &tmp_end);
858
                            
859 860
                            // branch to slow path if end > limit (end - limit > 0)
                            // ASM: jg alloc_slow
861
                            let slowpath = format!("{}_allocslow", node.id());
862
                            self.backend.emit_jg(slowpath.clone());
863 864
                            
                            // update cursor
865 866
                            // ASM: mov %end -> [%tl + allocator_offset + cursor_offset]
                            self.emit_store_base_offset(&tmp_tl, cursor_offset as i32, &tmp_end, vm);
867 868 869
                            
                            // put start as result
                            // ASM: mov %start -> %result
qinsoon's avatar
qinsoon committed
870
                            let tmp_res = self.get_result_value(node);
871 872 873 874 875 876 877 878 879 880 881 882 883 884 885
                            self.backend.emit_mov_r64_r64(&tmp_res, &tmp_start);
                            
                            // ASM jmp alloc_end
                            let allocend = format!("{}_allocend", node.id());
                            self.backend.emit_jmp(allocend.clone());
                            
                            // finishing current block
                            self.backend.end_block(self.current_block.as_ref().unwrap().clone());
                            
                            // alloc_slow: 
                            // call alloc_slow(size, align) -> %ret
                            // new block (no livein)
                            self.current_block = Some(slowpath.clone());
                            self.backend.start_block(slowpath.clone());
                            self.backend.set_block_livein(slowpath.clone(), &vec![]); 
886 887 888

                            // arg1: allocator address                            
                            let allocator_offset = *thread::ALLOCATOR_OFFSET;
qinsoon's avatar
qinsoon committed
889
                            let tmp_allocator = self.make_temporary(f_context, ADDRESS_TYPE.clone(), vm);
890 891
                            self.emit_lea_base_offset(&tmp_allocator, &tmp_tl, allocator_offset as i32, vm);
                            // arg2: size                            
892
                            let const_size = self.make_value_int_const(ty_size as u64, vm);
893
                            // arg3: align
894
                            let const_align= self.make_value_int_const(ty_align as u64, vm);
895
                            
896 897
                            let rets = self.emit_runtime_entry(
                                &entrypoints::ALLOC_SLOW,
898
                                vec![tmp_allocator, const_size, const_align],
899 900 901
                                Some(vec![
                                    tmp_res.clone()
                                ]),
qinsoon's avatar
qinsoon committed
902
                                Some(node), f_content, f_context, vm
903 904
                            );
                            
905
                            // end block (no liveout other than result)
906 907 908 909 910 911
                            self.backend.end_block(slowpath.clone());
                            self.backend.set_block_liveout(slowpath.clone(), &vec![tmp_res.clone()]);
                            
                            // block: alloc_end
                            self.backend.start_block(allocend.clone());
                            self.current_block = Some(allocend.clone());
qinsoon's avatar
qinsoon committed
912 913
                        }
                    }
qinsoon's avatar
qinsoon committed
914 915 916 917 918 919 920 921 922
                    
                    Instruction_::Throw(op_index) => {
                        let ops = inst.ops.read().unwrap();
                        let ref exception_obj = ops[op_index];
                        
                        self.emit_runtime_entry(
                            &entrypoints::THROW_EXCEPTION, 
                            vec![exception_obj.clone_value()], 
                            None,
qinsoon's avatar
qinsoon committed
923
                            Some(node), f_content, f_context, vm);
qinsoon's avatar
qinsoon committed
924
                    }
925 926 927 928 929 930
    
                    _ => unimplemented!()
                } // main switch
            },
            
            TreeNode_::Value(ref p) => {
931
        
932 933 934 935
            }
        }
    }
    
936 937 938 939
    fn make_temporary(&mut self, f_context: &mut FunctionContext, ty: P<MuType>, vm: &VM) -> P<Value> {
        f_context.make_temporary(vm.next_id(), ty).clone_value()
    }
    
940
    fn make_memory_op_base_offset (&mut self, base: &P<Value>, offset: i32, ty: P<MuType>, vm: &VM) -> P<Value> {
941 942 943 944 945 946 947 948 949 950 951 952 953 954 955
        P(Value{
            hdr: MuEntityHeader::unnamed(vm.next_id()),
            ty: ty.clone(),
            v: Value_::Memory(MemoryLocation::Address{
                base: base.clone(),
                offset: Some(self.make_value_int_const(offset as u64, vm)),
                index: None,
                scale: None
            })
        })
    }
    
    fn make_value_int_const (&mut self, val: u64, vm: &VM) -> P<Value> {
        P(Value{
            hdr: MuEntityHeader::unnamed(vm.next_id()),
qinsoon's avatar
qinsoon committed
956
            ty: UINT64_TYPE.clone(),
957 958 959
            v: Value_::Constant(Constant::Int(val))
        })
    } 
960

961
    fn emit_truncate_result (&mut self, from_ty: &P<MuType>, to_ty: &P<MuType>, op: &P<Value>, f_context: &mut FunctionContext, vm: &VM) {
962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977
        // currently only use 64bits register
        // so only keep what is needed in the register (set others to 0)
        let from_ty_len = match from_ty.v {
            MuType_::Int(len) => len,
            _ => panic!("only expect int types, found: {}", from_ty)
        };
        let to_ty_len   = match to_ty.v {
            MuType_::Int(len) => len,
            _ => panic!("only expect int types, found: {}", to_ty)
        };

        if from_ty_len == to_ty_len {
            return;
        } else {
            debug_assert!(from_ty_len > to_ty_len);

978 979 980 981 982 983 984 985 986 987 988 989
            if to_ty_len < 32 {
                // ignoring from_ty for now (we use 64bits register for everything)
                let mask = match to_ty_len {
                    8 => 0xFFi32,
                    16 => 0xFFFFi32,
                    _ => unimplemented!()
                };

                // and mask, result -> result
                self.backend.emit_and_r64_imm32(&op, mask);
            } else if to_ty_len == 32 {
                let tmp_mask = self.make_temporary(f_context, UINT64_TYPE.clone(), vm);
990

991 992 993 994 995 996
                self.backend.emit_mov_r64_imm64(&tmp_mask, 0xFFFFFFFF as i64);

                self.backend.emit_and_r64_r64(&op, &tmp_mask);
            } else {
                unimplemented!()
            }
997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025
        }
    }

    fn emit_sign_extend_operand (&mut self, from_ty: &P<MuType>, to_ty: &P<MuType>, op: &P<Value>) {
        // currently only use 64bits register
        // we left shift the value, then arithmetic right shift back
        let from_ty_len = match from_ty.v {
            MuType_::Int(len) => len,
            _ => panic!("only expect int types, found: {}", from_ty)
        };
        let to_ty_len   = match to_ty.v {
            MuType_::Int(len) => len,
            _ => panic!("only expect int types, found: {}", to_ty)
        };

        if from_ty_len == to_ty_len {
            return;
        } else {
            debug_assert!(to_ty_len > from_ty_len);

            let shift : i8 = (to_ty_len - from_ty_len) as i8;

            // shl result, shift -> result
            self.backend.emit_shl_r64_imm8(&op, shift);
            // sar result, shift -> result
            self.backend.emit_sar_r64_imm8(&op, shift);
        }
    }

1026
    fn emit_load_base_offset (&mut self, dest: &P<Value>, base: &P<Value>, offset: i32, vm: &VM) {
1027
        let mem = self.make_memory_op_base_offset(base, offset, dest.ty.clone(), vm);
qinsoon's avatar
qinsoon committed
1028 1029 1030 1031 1032 1033 1034 1035

        if dest.is_int_reg() {
            self.backend.emit_mov_r64_mem64(dest, &mem);
        } else if dest.is_fp_reg() {
            self.backend.emit_movsd_f64_mem64(dest, &mem);
        } else {
            unimplemented!();
        }
1036 1037
    }
    
1038
    fn emit_store_base_offset (&mut self, base: &P<Value>, offset: i32, src: &P<Value>, vm: &VM) {
1039 1040 1041 1042 1043
        let mem = self.make_memory_op_base_offset(base, offset, src.ty.clone(), vm);
        
        self.backend.emit_mov_mem64_r64(&mem, src);
    }
    
1044
    fn emit_lea_base_offset (&mut self, dest: &P<Value>, base: &P<Value>, offset: i32, vm: &VM) {
qinsoon's avatar
qinsoon committed
1045
        let mem = self.make_memory_op_base_offset(base, offset, ADDRESS_TYPE.clone(), vm);
1046 1047 1048
        
        self.backend.emit_lea_r64(dest, &mem);
    }
qinsoon's avatar
sdiv  
qinsoon committed
1049 1050 1051 1052 1053 1054 1055 1056 1057

    fn emit_udiv (
        &mut self,
        op1: &P<TreeNode>, op2: &P<TreeNode>,
        f_content: &FunctionContent,
        f_context: &mut FunctionContext,
        vm: &VM)
    {
        let rax = x86_64::RAX.clone();
1058 1059 1060 1061

        debug_assert!(self.match_ireg(op1));
        let reg_op1 = self.emit_ireg(op1, f_content, f_context, vm);
        self.emit_move_value_to_value(&rax, &reg_op1);
qinsoon's avatar
sdiv  
qinsoon committed
1062 1063 1064 1065 1066 1067

        // xorq rdx, rdx -> rdx
        let rdx = x86_64::RDX.clone();
        self.backend.emit_xor_r64_r64(&rdx, &rdx);

        // div op2
1068
        if self.match_mem(op2) {
qinsoon's avatar
shl  
qinsoon committed
1069
            let mem_op2 = self.emit_mem(op2, vm);
qinsoon's avatar
sdiv  
qinsoon committed
1070 1071 1072 1073 1074 1075 1076 1077 1078 1079

            self.backend.emit_div_mem64(&mem_op2);
        } else if self.match_iimm(op2) {
            let imm = self.node_iimm_to_i32(op2);
            // moving to a temp
            let temp = self.make_temporary(f_context, UINT64_TYPE.clone(), vm);
            self.backend.emit_mov_r64_imm32(&temp, imm);

            // div tmp
            self.backend.emit_div_r64(&temp);
1080 1081 1082 1083
        } else if self.match_ireg(op2) {
            let reg_op2 = self.emit_ireg(op2, f_content, f_context, vm);

            self.backend.emit_div_r64(&reg_op2);
qinsoon's avatar
sdiv  
qinsoon committed
1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096
        } else {
            unimplemented!();
        }
    }

    fn emit_idiv (
        &mut self,
        op1: &P<TreeNode>, op2: &P<TreeNode>,
        f_content: &FunctionContent,
        f_context: &mut FunctionContext,
        vm: &VM)
    {
        let rax = x86_64::RAX.clone();
1097

1098 1099 1100
        debug_assert!(self.match_ireg(op1));
        let reg_op1 = self.emit_ireg(op1, f_content, f_context, vm);
        self.emit_move_value_to_value(&rax, &reg_op1);
1101 1102

        // sign extend rax
1103
        self.emit_sign_extend_operand(&reg_op1.ty, &UINT64_TYPE, &rax);
qinsoon's avatar
sdiv  
qinsoon committed
1104 1105 1106 1107 1108

        // cqo
        self.backend.emit_cqo();

        // idiv op2
1109
        if self.match_mem(op2) {
qinsoon's avatar
shl  
qinsoon committed
1110
            let mem_op2 = self.emit_mem(op2, vm);
qinsoon's avatar
sdiv  
qinsoon committed
1111
            self.backend.emit_idiv_mem64(&mem_op2);
1112 1113 1114

            // need to sign extend op2
            unimplemented!()
qinsoon's avatar
sdiv  
qinsoon committed
1115 1116 1117 1118 1119 1120 1121 1122
        } else if self.match_iimm(op2) {
            let imm = self.node_iimm_to_i32(op2);
            // moving to a temp
            let temp = self.make_temporary(f_context, UINT64_TYPE.clone(), vm);
            self.backend.emit_mov_r64_imm32(&temp, imm);

            // idiv temp
            self.backend.emit_idiv_r64(&temp);
1123 1124 1125 1126 1127 1128
        } else if self.match_ireg(op2) {
            let reg_op2 = self.emit_ireg(op2, f_content, f_context, vm);

            self.emit_sign_extend_operand(&reg_op2.ty, &UINT64_TYPE, &reg_op2);

            self.backend.emit_idiv_r64(&reg_op2);
qinsoon's avatar
sdiv  
qinsoon committed
1129 1130 1131 1132
        } else {
            unimplemented!();
        }
    }
1133
    
qinsoon's avatar
qinsoon committed
1134 1135
    fn emit_get_threadlocal (
        &mut self, 
qinsoon's avatar
qinsoon committed
1136
        cur_node: Option<&TreeNode>,
qinsoon's avatar
qinsoon committed
1137 1138 1139 1140
        f_content: &FunctionContent, 
        f_context: &mut FunctionContext, 
        vm: &VM) -> P<Value> {
        let mut rets = self.emit_runtime_entry(&entrypoints::GET_THREAD_LOCAL, vec![], None, cur_node, f_content, f_context, vm);
1141 1142 1143 1144
        
        rets.pop().unwrap()
    }
    
1145 1146 1147 1148
    // ret: Option<Vec<P<Value>>
    // if ret is Some, return values will put stored in given temporaries
    // otherwise create temporaries
    // always returns result temporaries (given or created)
qinsoon's avatar
qinsoon committed
1149 1150 1151 1152 1153
    fn emit_runtime_entry (
        &mut self, 
        entry: &RuntimeEntrypoint, 
        args: Vec<P<Value>>, 
        rets: Option<Vec<P<Value>>>,
qinsoon's avatar
qinsoon committed
1154
        cur_node: Option<&TreeNode>, 
qinsoon's avatar
qinsoon committed
1155 1156 1157
        f_content: &FunctionContent, 
        f_context: &mut FunctionContext, 
        vm: &VM) -> Vec<P<Value>> {
1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172
        let sig = entry.sig.clone();
        
        let entry_name = {
            if vm.is_running() {
                unimplemented!()
            } else {
                let ref entry_loc = entry.aot;
                
                match entry_loc {
                    &ValueLocation::Relocatable(_, ref name) => name.clone(),
                    _ => panic!("expecting a relocatable value")
                }
            }
        };
        
qinsoon's avatar
qinsoon committed
1173
        self.emit_c_call(entry_name, sig, args, rets, cur_node, f_content, f_context, vm)
1174 1175
    }
    
1176 1177 1178 1179
    // returns the stack arg offset - we will need this to collapse stack after the call
    fn emit_precall_convention(
        &mut self,
        args: &Vec<P<Value>>, 
1180
        vm: &VM) -> usize {
1181 1182 1183 1184 1185 1186
        // if we need to save caller saved regs
        // put it here (since this is fastpath compile, we wont have them)
        
        // put args into registers if we can
        // in the meantime record args that do not fit in registers
        let mut stack_args : Vec<P<Value>> = vec![];        
1187 1188 1189 1190 1191 1192 1193 1194
        let mut gpr_arg_count = 0;
        for arg in args.iter() {
            if arg.is_int_reg() {
                if gpr_arg_count < x86_64::ARGUMENT_GPRs.len() {
                    self.backend.emit_mov_r64_r64(&x86_64::ARGUMENT_GPRs[gpr_arg_count], &arg);
                    gpr_arg_count += 1;
                } else {
                    // use stack to pass argument
1195
                    stack_args.push(arg.clone());
1196 1197 1198
                }
            } else if arg.is_int_const() {
                if x86_64::is_valid_x86_imm(arg) {                
1199
                    let int_const = arg.extract_int_const() as i32;
1200 1201 1202 1203 1204 1205
                    
                    if gpr_arg_count < x86_64::ARGUMENT_GPRs.len() {
                        self.backend.emit_mov_r64_imm32(&x86_64::ARGUMENT_GPRs[gpr_arg_count], int_const);
                        gpr_arg_count += 1;
                    } else {
                        // use stack to pass argument
1206
                        stack_args.push(arg.clone());
1207 1208 1209 1210 1211
                    }
                } else {
                    // put the constant to memory
                    unimplemented!()
                }
1212 1213 1214 1215 1216 1217
            } else if arg.is_mem() {
                if gpr_arg_count < x86_64::ARGUMENT_GPRs.len() {
                    self.backend.emit_mov_r64_mem64(&x86_64::ARGUMENT_GPRs[gpr_arg_count], &arg);
                    gpr_arg_count += 1;
                } else {
                    // use stack to pass argument
1218
                    stack_args.push(arg.clone());
1219
                }
1220 1221 1222 1223 1224
            } else {
                // floating point
                unimplemented!()
            }
        }
1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255

        if !stack_args.is_empty() {
            // deal with stack arg, put them on stack
            // in reverse order, i.e. push the rightmost arg first to stack
            stack_args.reverse();

            // "The end of the input argument area shall be aligned on a 16
            // (32, if __m256 is passed on stack) byte boundary." - x86 ABI
            // if we need to special align the args, we do it now
            // (then the args will be put to stack following their regular alignment)
            let stack_arg_tys = stack_args.iter().map(|x| x.ty.clone()).collect();
            let (stack_arg_size, _, stack_arg_offsets) = backend::sequetial_layout(&stack_arg_tys, vm);
            let mut stack_arg_size_with_padding = stack_arg_size;
            if stack_arg_size % 16 == 0 {
                // do not need to adjust rsp
            } else if stack_arg_size % 8 == 0 {
                // adjust rsp by -8 (push a random padding value)
                self.backend.emit_push_imm32(0x7777);
                stack_arg_size_with_padding += 8;
            } else {
                panic!("expecting stack arguments to be at least 8-byte aligned, but it has size of {}", stack_arg_size);
            }

            // now, we just put all the args on the stack
            {
                let mut index = 0;
                for arg in stack_args {
                    self.emit_store_base_offset(&x86_64::RSP, - (stack_arg_offsets[index] as i32), &arg, vm);
                    index += 1;
                }

1256
                self.backend.emit_add_r64_imm32(&x86_64::RSP, (- (stack_arg_size as i32)) as i32);
1257 1258 1259
            }

            stack_arg_size_with_padding
1260
        } else {
1261
            0
1262
        }
1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298
    }

    fn emit_postcall_convention(
        &mut self,
        sig: &P<CFuncSig>,
        rets: &Option<Vec<P<Value>>>,
        precall_stack_arg_size: usize,
        f_context: &mut FunctionContext,
        vm: &VM
    ) -> Vec<P<Value>> {
        // deal with ret vals
        let mut return_vals = vec![];

        let mut gpr_ret_count = 0;
        for ret_index in 0..sig.ret_tys.len() {
            let ref ty = sig.ret_tys[ret_index];

            let ret_val = match rets {
                &Some(ref rets) => rets[ret_index].clone(),
                &None => {
                    let tmp_node = f_context.make_temporary(vm.next_id(), ty.clone());
                    tmp_node.clone_value()
                }
            };

            if ret_val.is_int_reg() {
                if gpr_ret_count < x86_64::RETURN_GPRs.len() {
                    self.backend.emit_mov_r64_r64(&ret_val, &x86_64::RETURN_GPRs[gpr_ret_count]);
                    gpr_ret_count += 1;
                } else {
                    // get return value by stack
                    unimplemented!()
                }
            } else {
                // floating point register
                unimplemented!()
1299
            }
1300 1301

            return_vals.push(ret_val);
1302
        }
1303 1304 1305 1306 1307 1308 1309

        // remove stack_args
        if precall_stack_arg_size != 0 {
            self.backend.emit_add_r64_imm32(&x86_64::RSP, precall_stack_arg_size as i32);
        }

        return_vals
1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327
    }
    
    #[allow(unused_variables)]
    // ret: Option<Vec<P<Value>>
    // if ret is Some, return values will put stored in given temporaries
    // otherwise create temporaries
    // always returns result temporaries (given or created)
    fn emit_c_call (
        &mut self, 
        func_name: CName, 
        sig: P<CFuncSig>, 
        args: Vec<P<Value>>, 
        rets: Option<Vec<P<Value>>>,
        cur_node: Option<&TreeNode>,
        f_content: &FunctionContent, 
        f_context: &mut FunctionContext, 
        vm: &VM) -> Vec<P<Value>> 
    {