Research GitLab has introduced a user quota limitation. The new rule limits each user to have 50 Gb. The quota doesn't restrict group projects. If you have any concern with this, please talk to CECS Gitlab Admin at N110 (b) CSIT building.

inst_sel.rs 88.9 KB
Newer Older
1
use ast::ir::*;
2
use ast::ptr::*;
qinsoon's avatar
qinsoon committed
3
use ast::inst::*;
4
use ast::op;
qinsoon's avatar
qinsoon committed
5
use ast::op::OpCode;
qinsoon's avatar
qinsoon committed
6
use ast::types;
qinsoon's avatar
qinsoon committed
7
use ast::types::*;
qinsoon's avatar
qinsoon committed
8
use vm::VM;
qinsoon's avatar
qinsoon committed
9
use runtime::mm;
10 11 12 13
use runtime::ValueLocation;
use runtime::thread;
use runtime::entrypoints;
use runtime::entrypoints::RuntimeEntrypoint;
14 15

use compiler::CompilerPass;
16
use compiler::backend;
qinsoon's avatar
qinsoon committed
17
use compiler::backend::PROLOGUE_BLOCK_NAME;
qinsoon's avatar
qinsoon committed
18 19 20
use compiler::backend::x86_64;
use compiler::backend::x86_64::CodeGenerator;
use compiler::backend::x86_64::ASMCodeGen;
qinsoon's avatar
qinsoon committed
21 22
use compiler::machine_code::CompiledFunction;
use compiler::frame::Frame;
23

24
use std::collections::HashMap;
qinsoon's avatar
qinsoon committed
25
use std::any::Any;
26

27
pub struct InstructionSelection {
28
    name: &'static str,
29 30
    backend: Box<CodeGenerator>,
    
qinsoon's avatar
qinsoon committed
31
    current_callsite_id: usize,
qinsoon's avatar
qinsoon committed
32 33
    current_frame: Option<Frame>,
    current_block: Option<MuName>,
qinsoon's avatar
qinsoon committed
34 35 36 37 38
    current_func_start: Option<ValueLocation>,
    // key: block id, val: callsite that names the block as exception block
    current_exn_callsites: HashMap<MuID, Vec<ValueLocation>>,
    // key: block id, val: block location
    current_exn_blocks: HashMap<MuID, ValueLocation>     
39 40
}

41
impl <'a> InstructionSelection {
qinsoon's avatar
qinsoon committed
42
    #[cfg(feature = "aot")]
43
    pub fn new() -> InstructionSelection {
44 45
        InstructionSelection{
            name: "Instruction Selection (x64)",
46
            backend: Box::new(ASMCodeGen::new()),
qinsoon's avatar
qinsoon committed
47
            
qinsoon's avatar
qinsoon committed
48
            current_callsite_id: 0,
qinsoon's avatar
qinsoon committed
49 50 51
            current_frame: None,
            current_block: None,
            current_func_start: None,
qinsoon's avatar
qinsoon committed
52 53 54
            // key: block id, val: callsite that names the block as exception block
            current_exn_callsites: HashMap::new(), 
            current_exn_blocks: HashMap::new()
55 56
        }
    }
qinsoon's avatar
qinsoon committed
57 58 59 60 61

    #[cfg(feature = "jit")]
    pub fn new() -> InstructionSelection {
        unimplemented!()
    }
62 63 64 65 66 67 68
    
    // in this pass, we assume that
    // 1. all temporaries will use 64bit registers
    // 2. we do not need to backup/restore caller-saved registers
    // 3. we need to backup/restore all the callee-saved registers
    // if any of these assumption breaks, we will need to re-emit the code
    #[allow(unused_variables)]
69
    fn instruction_select(&mut self, node: &'a TreeNode, f_content: &FunctionContent, f_context: &mut FunctionContext, vm: &VM) {
qinsoon's avatar
qinsoon committed
70 71 72
        trace!("instsel on node {}", node);
        
        match node.v {
73 74
            TreeNode_::Instruction(ref inst) => {
                match inst.v {
qinsoon's avatar
qinsoon committed
75 76 77
                    Instruction_::Branch2{cond, ref true_dest, ref false_dest, true_prob} => {
                        // move this to trace generation
                        // assert here
78 79 80 81 82
                        let (fallthrough_dest, branch_dest, branch_if_true) = {
                            if true_prob > 0.5f32 {
                                (true_dest, false_dest, false)
                            } else {
                                (false_dest, true_dest, true)
83
                            }
84
                        };
85
                        
qinsoon's avatar
qinsoon committed
86
                        let ops = inst.ops.read().unwrap();
87
                        
88 89
                        self.process_dest(&ops, fallthrough_dest, f_content, f_context, vm);
                        self.process_dest(&ops, branch_dest, f_content, f_context, vm);
qinsoon's avatar
qinsoon committed
90
                        
91
                        let branch_target = f_content.get_block(branch_dest.target).name().unwrap();
92 93 94
    
                        let ref cond = ops[cond];
                        
qinsoon's avatar
qinsoon committed
95 96
                        if self.match_cmp_res(cond) {
                            trace!("emit cmp_eq-branch2");
97
                            match self.emit_cmp_res(cond, f_content, f_context, vm) {
qinsoon's avatar
qinsoon committed
98 99 100 101 102 103 104 105 106 107
                                op::CmpOp::EQ => self.backend.emit_je(branch_target),
                                op::CmpOp::NE => self.backend.emit_jne(branch_target),
                                op::CmpOp::UGE => self.backend.emit_jae(branch_target),
                                op::CmpOp::UGT => self.backend.emit_ja(branch_target),
                                op::CmpOp::ULE => self.backend.emit_jbe(branch_target),
                                op::CmpOp::ULT => self.backend.emit_jb(branch_target),
                                op::CmpOp::SGE => self.backend.emit_jge(branch_target),
                                op::CmpOp::SGT => self.backend.emit_jg(branch_target),
                                op::CmpOp::SLE => self.backend.emit_jle(branch_target),
                                op::CmpOp::SLT => self.backend.emit_jl(branch_target),
qinsoon's avatar
qinsoon committed
108 109 110 111
                                _ => unimplemented!()
                            }
                        } else if self.match_ireg(cond) {
                            trace!("emit ireg-branch2");
112
                            
113
                            let cond_reg = self.emit_ireg(cond, f_content, f_context, vm);
114
                            
qinsoon's avatar
qinsoon committed
115 116 117
                            // emit: cmp cond_reg 1
                            self.backend.emit_cmp_r64_imm32(&cond_reg, 1);
                            // emit: je #branch_dest
qinsoon's avatar
qinsoon committed
118
                            self.backend.emit_je(branch_target);                            
qinsoon's avatar
qinsoon committed
119 120
                        } else {
                            unimplemented!();
121
                        }
122 123
                    },
                    
qinsoon's avatar
qinsoon committed
124
                    Instruction_::Branch1(ref dest) => {
qinsoon's avatar
qinsoon committed
125
                        let ops = inst.ops.read().unwrap();
126
                                            
127
                        self.process_dest(&ops, dest, f_content, f_context, vm);
128
                        
129
                        let target = f_content.get_block(dest.target).name().unwrap();
qinsoon's avatar
qinsoon committed
130
                        
qinsoon's avatar
qinsoon committed
131
                        trace!("emit branch1");
132
                        // jmp
qinsoon's avatar
qinsoon committed
133
                        self.backend.emit_jmp(target);
134 135
                    },
                    
qinsoon's avatar
qinsoon committed
136
                    Instruction_::ExprCall{ref data, is_abort} => {
qinsoon's avatar
qinsoon committed
137 138
                        if is_abort {
                            unimplemented!()
139
                        }
140
                        
qinsoon's avatar
qinsoon committed
141 142 143 144 145
                        self.emit_mu_call(
                            inst, // inst: &Instruction,
                            data, // calldata: &CallData,
                            None, // resumption: Option<&ResumptionData>,
                            node, // cur_node: &TreeNode, 
146
                            f_content, f_context, vm);
147 148
                    },
                    
qinsoon's avatar
qinsoon committed
149 150 151 152 153 154 155 156 157
                    Instruction_::Call{ref data, ref resume} => {
                        self.emit_mu_call(
                            inst, 
                            data, 
                            Some(resume), 
                            node, 
                            f_content, f_context, vm);
                    }
                    
158
                    Instruction_::Return(_) => {
159
                        self.emit_common_epilogue(inst, f_content, f_context, vm);
160
                        
qinsoon's avatar
qinsoon committed
161
                        self.backend.emit_ret();
162 163
                    },
                    
qinsoon's avatar
qinsoon committed
164
                    Instruction_::BinOp(op, op1, op2) => {
qinsoon's avatar
qinsoon committed
165
                        let ops = inst.ops.read().unwrap();
166 167

                        let res_tmp = self.get_result_value(node);
qinsoon's avatar
qinsoon committed
168
                        
169 170
                        match op {
                            op::BinOp::Add => {
qinsoon's avatar
qinsoon committed
171
                                if self.match_ireg(&ops[op1]) && self.match_iimm(&ops[op2]) {
qinsoon's avatar
qinsoon committed
172 173
                                    trace!("emit add-ireg-imm");
                                    
174
                                    let reg_op1 = self.emit_ireg(&ops[op1], f_content, f_context, vm);
175
                                    let reg_op2 = self.node_iimm_to_i32(&ops[op2]);
qinsoon's avatar
qinsoon committed
176 177 178 179 180 181 182 183 184 185 186
                                    
                                    // mov op1, res
                                    self.backend.emit_mov_r64_r64(&res_tmp, &reg_op1);
                                    // add op2, res
                                    self.backend.emit_add_r64_imm32(&res_tmp, reg_op2);
                                } else if self.match_iimm(&ops[op1]) && self.match_ireg(&ops[op2]) {
                                    trace!("emit add-imm-ireg");
                                    unimplemented!();
                                } else if self.match_ireg(&ops[op1]) && self.match_mem(&ops[op2]) {
                                    trace!("emit add-ireg-mem");
                                    
187
                                    let reg_op1 = self.emit_ireg(&ops[op1], f_content, f_context, vm);
qinsoon's avatar
shl  
qinsoon committed
188
                                    let reg_op2 = self.emit_mem(&ops[op2], vm);
qinsoon's avatar
qinsoon committed
189 190 191 192 193 194 195 196
                                    
                                    // mov op1, res
                                    self.backend.emit_mov_r64_r64(&res_tmp, &reg_op1);
                                    // add op2 res
                                    self.backend.emit_add_r64_mem64(&res_tmp, &reg_op2);
                                } else if self.match_mem(&ops[op1]) && self.match_ireg(&ops[op2]) {
                                    trace!("emit add-mem-ireg");
                                    unimplemented!();
qinsoon's avatar
qinsoon committed
197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213
                                } else if self.match_ireg(&ops[op1]) && self.match_ireg(&ops[op2]) {
                                    trace!("emit add-ireg-ireg");

                                    let reg_op1 = self.emit_ireg(&ops[op1], f_content, f_context, vm);
                                    let reg_op2 = self.emit_ireg(&ops[op2], f_content, f_context, vm);

                                    // mov op1, res
                                    self.backend.emit_mov_r64_r64(&res_tmp, &reg_op1);
                                    // add op2 res
                                    self.backend.emit_add_r64_r64(&res_tmp, &reg_op2);
                                } else if self.match_iimm(&ops[op1]) && self.match_iimm(&ops[op2]) {
                                    trace!("emit add-iimm-iimm");

                                    let imm1 = self.node_iimm_to_i32(&ops[op1]);
                                    let imm2 = self.node_iimm_to_i32(&ops[op2]);

                                    // mov imm1 -> tmp_res
214
                                    self.backend.emit_mov_r64_imm32(&res_tmp, imm1);
qinsoon's avatar
qinsoon committed
215 216

                                    // add imm2, tmp_res -> tmp_res
217
                                    self.backend.emit_add_r64_imm32(&res_tmp, imm2);
qinsoon's avatar
qinsoon committed
218 219 220
                                } else {
                                    unimplemented!()
                                }
221 222
                            },
                            op::BinOp::Sub => {
qinsoon's avatar
qinsoon committed
223 224 225
                                if self.match_ireg(&ops[op1]) && self.match_ireg(&ops[op2]) {
                                    trace!("emit sub-ireg-ireg");
                                    
226 227
                                    let reg_op1 = self.emit_ireg(&ops[op1], f_content, f_context, vm);
                                    let reg_op2 = self.emit_ireg(&ops[op2], f_content, f_context, vm);
qinsoon's avatar
qinsoon committed
228 229 230 231 232 233 234 235
                                    
                                    // mov op1, res
                                    self.backend.emit_mov_r64_r64(&res_tmp, &reg_op1);
                                    // add op2 res
                                    self.backend.emit_sub_r64_r64(&res_tmp, &reg_op2);
                                } else if self.match_ireg(&ops[op1]) && self.match_iimm(&ops[op2]) {
                                    trace!("emit sub-ireg-imm");

236
                                    let reg_op1 = self.emit_ireg(&ops[op1], f_content, f_context, vm);
237
                                    let imm_op2 = self.node_iimm_to_i32(&ops[op2]);
qinsoon's avatar
qinsoon committed
238 239 240 241
                                    
                                    // mov op1, res
                                    self.backend.emit_mov_r64_r64(&res_tmp, &reg_op1);
                                    // add op2, res
242
                                    self.backend.emit_sub_r64_imm32(&res_tmp, imm_op2);
qinsoon's avatar
qinsoon committed
243 244 245 246 247 248
                                } else if self.match_iimm(&ops[op1]) && self.match_ireg(&ops[op2]) {
                                    trace!("emit sub-imm-ireg");
                                    unimplemented!();
                                } else if self.match_ireg(&ops[op1]) && self.match_mem(&ops[op2]) {
                                    trace!("emit sub-ireg-mem");
                                    
249
                                    let reg_op1 = self.emit_ireg(&ops[op1], f_content, f_context, vm);
qinsoon's avatar
shl  
qinsoon committed
250
                                    let mem_op2 = self.emit_mem(&ops[op2], vm);
qinsoon's avatar
qinsoon committed
251 252 253 254
                                    
                                    // mov op1, res
                                    self.backend.emit_mov_r64_r64(&res_tmp, &reg_op1);
                                    // sub op2 res
255
                                    self.backend.emit_sub_r64_mem64(&res_tmp, &mem_op2);
qinsoon's avatar
qinsoon committed
256 257 258
                                } else if self.match_mem(&ops[op1]) && self.match_ireg(&ops[op2]) {
                                    trace!("emit add-mem-ireg");
                                    unimplemented!();
259 260 261 262 263 264 265 266 267 268 269 270
                                } else if self.match_iimm(&ops[op1]) && self.match_iimm(&ops[op2]) {
                                    trace!("emit sub-iimm-iimm");

                                    let tmp_res = self.get_result_value(node);
                                    let imm1 = self.node_iimm_to_i32(&ops[op1]);
                                    let imm2 = self.node_iimm_to_i32(&ops[op2]);

                                    // mov imm1 -> tmp_res
                                    self.backend.emit_mov_r64_imm32(&res_tmp, imm1);

                                    // add imm2, tmp_res -> tmp_res
                                    self.backend.emit_sub_r64_imm32(&res_tmp, imm2);
qinsoon's avatar
qinsoon committed
271 272 273
                                } else {
                                    unimplemented!()
                                }
274
                            },
275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384
                            op::BinOp::And => {
                                let op1 = &ops[op1];
                                let op2 = &ops[op2];

                                if self.match_ireg(op1) && self.match_iimm(op2) {
                                    trace!("emit and-ireg-iimm");

                                    let tmp_op1 = self.emit_ireg(op1, f_content, f_context, vm);
                                    let imm_op2 = self.node_iimm_to_i32(op2);

                                    // mov op1 -> res
                                    self.backend.emit_mov_r64_r64(&res_tmp, &tmp_op1);
                                    // and op2, res -> res
                                    self.backend.emit_and_r64_imm32(&res_tmp, imm_op2);
                                } else if self.match_iimm(op1) && self.match_ireg(op2) {
                                    trace!("emit and-iimm-ireg");
                                    unimplemented!()
                                } else if self.match_ireg(op1) && self.match_mem(op2) {
                                    trace!("emit and-ireg-mem");

                                    let tmp_op1 = self.emit_ireg(op1, f_content, f_context, vm);
                                    let mem_op2 = self.emit_mem(op2, vm);

                                    // mov op1, res
                                    self.backend.emit_mov_r64_r64(&res_tmp, &tmp_op1);
                                    // and op2, res -> res
                                    self.backend.emit_and_r64_mem64(&res_tmp, &mem_op2);
                                } else if self.match_mem(op1) && self.match_ireg(op2) {
                                    trace!("emit and-mem-ireg");
                                    unimplemented!()
                                } else if self.match_ireg(op1) && self.match_ireg(op2) {
                                    trace!("emit and-ireg-ireg");

                                    let tmp_op1 = self.emit_ireg(op1, f_content, f_context, vm);
                                    let tmp_op2 = self.emit_ireg(op2, f_content, f_context, vm);

                                    // mov op1, res
                                    self.backend.emit_mov_r64_r64(&res_tmp, &tmp_op1);
                                    // and op2, res -> res
                                    self.backend.emit_and_r64_r64(&res_tmp, &tmp_op2);
                                } else if self.match_iimm(op1) && self.match_iimm(op2) {
                                    trace!("emit and-iimm-iimm");

                                    let imm1 = self.node_iimm_to_i32(op1);
                                    let imm2 = self.node_iimm_to_i32(op2);

                                    // mov imm1 -> res_tmp
                                    self.backend.emit_mov_r64_imm32(&res_tmp, imm1);

                                    // and imm2, res_tmp -> res_tmp
                                    self.backend.emit_and_r64_imm32(&res_tmp, imm2);
                                } else {
                                    unimplemented!()
                                }
                            },
                            op::BinOp::Xor => {
                                let op1 = &ops[op1];
                                let op2 = &ops[op2];

                                if self.match_ireg(op1) && self.match_iimm(op2) {
                                    trace!("emit xor-ireg-iimm");

                                    let tmp_op1 = self.emit_ireg(op1, f_content, f_context, vm);
                                    let imm_op2 = self.node_iimm_to_i32(op2);

                                    // mov op1 -> res
                                    self.backend.emit_mov_r64_r64(&res_tmp, &tmp_op1);
                                    // xor op2, res -> res
                                    self.backend.emit_xor_r64_imm32(&res_tmp, imm_op2);
                                } else if self.match_iimm(op1) && self.match_ireg(op2) {
                                    trace!("emit xor-iimm-ireg");
                                    unimplemented!()
                                } else if self.match_ireg(op1) && self.match_mem(op2) {
                                    trace!("emit xor-ireg-mem");

                                    let tmp_op1 = self.emit_ireg(op1, f_content, f_context, vm);
                                    let mem_op2 = self.emit_mem(op2, vm);

                                    // mov op1, res
                                    self.backend.emit_mov_r64_r64(&res_tmp, &tmp_op1);
                                    // xor op2, res -> res
                                    self.backend.emit_xor_r64_mem64(&res_tmp, &mem_op2);
                                } else if self.match_mem(op1) && self.match_ireg(op2) {
                                    trace!("emit xor-mem-ireg");
                                    unimplemented!()
                                } else if self.match_ireg(op1) && self.match_ireg(op2) {
                                    trace!("emit xor-ireg-ireg");

                                    let tmp_op1 = self.emit_ireg(op1, f_content, f_context, vm);
                                    let tmp_op2 = self.emit_ireg(op2, f_content, f_context, vm);

                                    // mov op1, res
                                    self.backend.emit_mov_r64_r64(&res_tmp, &tmp_op1);
                                    // xor op2, res -> res
                                    self.backend.emit_xor_r64_r64(&res_tmp, &tmp_op2);
                                } else if self.match_iimm(op1) && self.match_iimm(op2) {
                                    trace!("emit xor-iimm-iimm");

                                    let imm1 = self.node_iimm_to_i32(op1);
                                    let imm2 = self.node_iimm_to_i32(op2);

                                    // mov imm1 -> res_tmp
                                    self.backend.emit_mov_r64_imm32(&res_tmp, imm1);

                                    // xor imm2, res_tmp -> res_tmp
                                    self.backend.emit_xor_r64_imm32(&res_tmp, imm2);
                                } else {
                                    unimplemented!()
                                }
                            }
385
                            op::BinOp::Mul => {
386 387 388 389
                                // mov op1 -> rax
                                let rax = x86_64::RAX.clone();
                                let op1 = &ops[op1];
                                if self.match_ireg(op1) {
390
                                    let reg_op1 = self.emit_ireg(op1, f_content, f_context, vm);
391 392 393
                                    
                                    self.backend.emit_mov_r64_r64(&rax, &reg_op1);
                                } else if self.match_iimm(op1) {
394
                                    let imm_op1 = self.node_iimm_to_i32(op1);
395 396 397
                                    
                                    self.backend.emit_mov_r64_imm32(&rax, imm_op1);
                                } else if self.match_mem(op1) {
qinsoon's avatar
shl  
qinsoon committed
398
                                    let mem_op1 = self.emit_mem(op1, vm);
399 400 401 402 403 404 405 406 407
                                    
                                    self.backend.emit_mov_r64_mem64(&rax, &mem_op1);
                                } else {
                                    unimplemented!();
                                }
                                
                                // mul op2 -> rax
                                let op2 = &ops[op2];
                                if self.match_ireg(op2) {
408
                                    let reg_op2 = self.emit_ireg(op2, f_content, f_context, vm);
409 410 411
                                    
                                    self.backend.emit_mul_r64(&reg_op2);
                                } else if self.match_iimm(op2) {
412
                                    let imm_op2 = self.node_iimm_to_i32(op2);
413 414 415 416 417 418 419
                                    
                                    // put imm in a temporary
                                    // here we use result reg as temporary
                                    self.backend.emit_mov_r64_imm32(&res_tmp, imm_op2);
                                    
                                    self.backend.emit_mul_r64(&res_tmp);
                                } else if self.match_mem(op2) {
qinsoon's avatar
shl  
qinsoon committed
420
                                    let mem_op2 = self.emit_mem(op2, vm);
421 422 423 424 425 426 427 428
                                    
                                    self.backend.emit_mul_mem64(&mem_op2);
                                } else {
                                    unimplemented!();
                                }
                                
                                // mov rax -> result
                                self.backend.emit_mov_r64_r64(&res_tmp, &rax);
429
                            },
430 431 432 433
                            op::BinOp::Udiv => {
                                let op1 = &ops[op1];
                                let op2 = &ops[op2];

qinsoon's avatar
sdiv  
qinsoon committed
434
                                self.emit_udiv(op1, op2, f_content, f_context, vm);
435

qinsoon's avatar
sdiv  
qinsoon committed
436 437 438 439 440 441
                                // mov rax -> result
                                self.backend.emit_mov_r64_r64(&res_tmp, &x86_64::RAX);
                            },
                            op::BinOp::Sdiv => {
                                let op1 = &ops[op1];
                                let op2 = &ops[op2];
442

qinsoon's avatar
sdiv  
qinsoon committed
443
                                self.emit_idiv(op1, op2, f_content, f_context, vm);
444 445

                                // mov rax -> result
qinsoon's avatar
sdiv  
qinsoon committed
446 447
                                self.backend.emit_mov_r64_r64(&res_tmp, &x86_64::RAX);
                            },
qinsoon's avatar
qinsoon committed
448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465
                            op::BinOp::Urem => {
                                let op1 = &ops[op1];
                                let op2 = &ops[op2];

                                self.emit_udiv(op1, op2, f_content, f_context, vm);

                                // mov rdx -> result
                                self.backend.emit_mov_r64_r64(&res_tmp, &x86_64::RDX);
                            },
                            op::BinOp::Srem => {
                                let op1 = &ops[op1];
                                let op2 = &ops[op2];

                                self.emit_idiv(op1, op2, f_content, f_context, vm);

                                // mov rdx -> result
                                self.backend.emit_mov_r64_r64(&res_tmp, &x86_64::RDX);
                            },
qinsoon's avatar
qinsoon committed
466

qinsoon's avatar
shl  
qinsoon committed
467 468 469 470 471 472 473 474 475 476 477 478 479 480
                            op::BinOp::Shl => {
                                let op1 = &ops[op1];
                                let op2 = &ops[op2];

                                if self.match_ireg(op1) {
                                    let tmp_op1 = self.emit_ireg(op1, f_content, f_context, vm);

                                    if self.match_ireg(op2) {
                                        let tmp_op2 = self.emit_ireg(op2, f_content, f_context, vm);

                                        // mov op2 -> rcx
                                        self.backend.emit_mov_r64_r64(&x86_64::RCX, &tmp_op2);

                                        // shl op1, cl -> op1
qinsoon's avatar
fix  
qinsoon committed
481
                                        self.backend.emit_shl_r64_cl(&tmp_op1);
qinsoon's avatar
shl  
qinsoon committed
482 483 484 485 486 487 488

                                        // mov op1 -> result
                                        self.backend.emit_mov_r64_r64(&res_tmp, &tmp_op1);
                                    } else if self.match_iimm(op2) {
                                        let imm_op2 = self.node_iimm_to_i32(op2) as i8;

                                        // shl op1, op2 -> op1
qinsoon's avatar
fix  
qinsoon committed
489
                                        self.backend.emit_shl_r64_imm8(&tmp_op1, imm_op2);
qinsoon's avatar
shl  
qinsoon committed
490 491 492 493 494 495 496 497 498

                                        // mov op1 -> result
                                        self.backend.emit_mov_r64_r64(&res_tmp, &tmp_op1);
                                    } else {
                                        panic!("unexpected op2 (not ireg not iimm): {}", op2);
                                    }
                                } else if self.match_mem(op1) {
                                    unimplemented!()
                                }
qinsoon's avatar
qinsoon committed
499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566
                            },
                            op::BinOp::Lshr => {
                                let op1 = &ops[op1];
                                let op2 = &ops[op2];

                                if self.match_ireg(op1) {
                                    let tmp_op1 = self.emit_ireg(op1, f_content, f_context, vm);

                                    if self.match_ireg(op2) {
                                        let tmp_op2 = self.emit_ireg(op2, f_content, f_context, vm);

                                        // mov op2 -> rcx
                                        self.backend.emit_mov_r64_r64(&x86_64::RCX, &tmp_op2);

                                        // shr op1, cl -> op1
                                        self.backend.emit_shr_r64_cl(&tmp_op1);

                                        // mov op1 -> result
                                        self.backend.emit_mov_r64_r64(&res_tmp, &tmp_op1);
                                    } else if self.match_iimm(op2) {
                                        let imm_op2 = self.node_iimm_to_i32(op2) as i8;

                                        // shr op1, op2 -> op1
                                        self.backend.emit_shr_r64_imm8(&tmp_op1, imm_op2);

                                        // mov op1 -> result
                                        self.backend.emit_mov_r64_r64(&res_tmp, &tmp_op1);
                                    } else {
                                        panic!("unexpected op2 (not ireg not iimm): {}", op2);
                                    }
                                } else if self.match_mem(op1) {
                                    unimplemented!()
                                }
                            },
                            op::BinOp::Ashr => {
                                let op1 = &ops[op1];
                                let op2 = &ops[op2];

                                if self.match_ireg(op1) {
                                    let tmp_op1 = self.emit_ireg(op1, f_content, f_context, vm);

                                    if self.match_ireg(op2) {
                                        let tmp_op2 = self.emit_ireg(op2, f_content, f_context, vm);

                                        // mov op2 -> rcx
                                        self.backend.emit_mov_r64_r64(&x86_64::RCX, &tmp_op2);

                                        // sar op1, cl -> op1
                                        self.backend.emit_sar_r64_cl(&tmp_op1);

                                        // mov op1 -> result
                                        self.backend.emit_mov_r64_r64(&res_tmp, &tmp_op1);
                                    } else if self.match_iimm(op2) {
                                        let imm_op2 = self.node_iimm_to_i32(op2) as i8;

                                        // sar op1, op2 -> op1
                                        self.backend.emit_sar_r64_imm8(&tmp_op1, imm_op2);

                                        // mov op1 -> result
                                        self.backend.emit_mov_r64_r64(&res_tmp, &tmp_op1);
                                    } else {
                                        panic!("unexpected op2 (not ireg not iimm): {}", op2);
                                    }
                                } else if self.match_mem(op1) {
                                    unimplemented!()
                                }
                            },

qinsoon's avatar
shl  
qinsoon committed
567

qinsoon's avatar
qinsoon committed
568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583
                            // floating point
                            op::BinOp::FAdd => {
                                if self.match_fpreg(&ops[op1]) && self.match_fpreg(&ops[op2]) {
                                    trace!("emit add-fpreg-fpreg");

                                    let reg_op1 = self.emit_fpreg(&ops[op1], f_content, f_context, vm);
                                    let reg_op2 = self.emit_fpreg(&ops[op2], f_content, f_context, vm);

                                    // movsd op1, res
                                    self.backend.emit_movsd_f64_f64(&res_tmp, &reg_op1);
                                    // add op2 res
                                    self.backend.emit_addsd_f64_f64(&res_tmp, &reg_op2);
                                } else if self.match_fpreg(&ops[op1]) && self.match_mem(&ops[op2]) {
                                    trace!("emit add-fpreg-mem");

                                    let reg_op1 = self.emit_fpreg(&ops[op1], f_content, f_context, vm);
qinsoon's avatar
shl  
qinsoon committed
584
                                    let mem_op2 = self.emit_mem(&ops[op2], vm);
qinsoon's avatar
qinsoon committed
585 586 587 588 589 590 591 592 593 594 595 596

                                    // mov op1, res
                                    self.backend.emit_movsd_f64_f64(&res_tmp, &reg_op1);
                                    // sub op2 res
                                    self.backend.emit_addsd_f64_mem64(&res_tmp, &mem_op2);
                                } else if self.match_mem(&ops[op1]) && self.match_fpreg(&ops[op2]) {
                                    trace!("emit add-mem-fpreg");
                                    unimplemented!();
                                } else {
                                    unimplemented!()
                                }
                            }
597 598
                            
                            _ => unimplemented!()
599
                        }
600 601 602 603 604

                        // truncate result
                        if res_tmp.is_int_reg() {
                            self.emit_truncate_result(&UINT64_TYPE, &res_tmp.ty, &res_tmp);
                        }
605
                    }
qinsoon's avatar
qinsoon committed
606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630

                    Instruction_::ConvOp{operation, ref from_ty, ref to_ty, operand} => {
                        let ops = inst.ops.read().unwrap();

                        let ref op = ops[operand];

                        let extract_int_len = |x: &P<MuType>| {
                            match x.v {
                                MuType_::Int(len) => len,
                                _ => panic!("only expect int types, found: {}", x)
                            }
                        };

                        match operation {
                            op::ConvOp::TRUNC => {
                                // currently only use 64bits register
                                // so only keep what is needed in the register (set others to 0)

                                if self.match_ireg(op) {
                                    let tmp_op = self.emit_ireg(op, f_content, f_context, vm);
                                    let tmp_res = self.get_result_value(node);

                                    // mov op -> result
                                    self.backend.emit_mov_r64_r64(&tmp_res, &tmp_op);

631 632
                                    // truncate result
                                    self.emit_truncate_result(from_ty, to_ty, &tmp_res);
qinsoon's avatar
qinsoon committed
633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649
                                } else {
                                    panic!("unexpected op (expect ireg): {}", op);
                                }
                            }
                            op::ConvOp::ZEXT => {
                                // currently only use 64bits register
                                // so set irrelevant bits to 0
                                let from_ty_len = extract_int_len(from_ty);
                                let to_ty_len   = extract_int_len(to_ty);

                                if self.match_ireg(op) {
                                    let tmp_op = self.emit_ireg(op, f_content, f_context, vm);
                                    let tmp_res = self.get_result_value(node);

                                    let mask = match from_ty_len {
                                        8  => 0xFFi32,
                                        16 => 0xFFFFi32,
650
                                        32 => -1i32,
qinsoon's avatar
qinsoon committed
651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690
                                        _ => unimplemented!()
                                    };

                                    // mov op -> result
                                    self.backend.emit_mov_r64_r64(&tmp_res, &tmp_op);

                                    // and mask result -> result
                                    self.backend.emit_and_r64_imm32(&tmp_res, mask);
                                } else {
                                    panic!("unexpected op (expect ireg): {}", op);
                                }
                            },
                            op::ConvOp::SEXT => {
                                // currently only use 64bits register
                                // we left shift the value, then arithmetic right shift back
                                let from_ty_len = extract_int_len(from_ty);
                                let to_ty_len   = extract_int_len(to_ty);

                                let shift : i8 = (to_ty_len - from_ty_len) as i8;

                                if self.match_ireg(op) {
                                    let tmp_op = self.emit_ireg(op, f_content, f_context, vm);
                                    let tmp_res = self.get_result_value(node);

                                    // mov op -> result
                                    self.backend.emit_mov_r64_r64(&tmp_res, &tmp_op);

                                    // shl result, shift -> result
                                    self.backend.emit_shl_r64_imm8(&tmp_res, shift);

                                    // sar result, shift -> result
                                    self.backend.emit_sar_r64_imm8(&tmp_res, shift);
                                } else {
                                    panic!("unexpected op (expect ireg): {}", op)
                                }
                            }

                            _ => unimplemented!()
                        }
                    }
691
                    
692 693
                    // load on x64 generates mov inst (no matter what order is specified)
                    // https://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
694
                    Instruction_::Load{is_ptr, order, mem_loc} => {
qinsoon's avatar
qinsoon committed
695
                        let ops = inst.ops.read().unwrap();
696
                        let ref loc_op = ops[mem_loc];
697 698 699 700 701 702 703 704 705
                        
                        // check order
                        match order {
                            MemoryOrder::Relaxed 
                            | MemoryOrder::Consume 
                            | MemoryOrder::Acquire
                            | MemoryOrder::SeqCst => {},
                            _ => panic!("didnt expect order {:?} with store inst", order)
                        }                        
706

qinsoon's avatar
shl  
qinsoon committed
707
                        let resolved_loc = self.node_addr_to_value(loc_op, vm);
qinsoon's avatar
qinsoon committed
708
                        let res_temp = self.get_result_value(node);
709 710 711
                        
                        if self.match_ireg(node) {
                            // emit mov(GPR)
712 713 714 715 716 717 718 719
                            self.backend.emit_mov_r64_mem64(&res_temp, &resolved_loc);
                        } else {
                            // emit mov(FPR)
                            unimplemented!()
                        }
                    }
                    
                    Instruction_::Store{is_ptr, order, mem_loc, value} => {
qinsoon's avatar
qinsoon committed
720
                        let ops = inst.ops.read().unwrap();
721 722 723 724 725 726 727 728 729 730 731
                        let ref loc_op = ops[mem_loc];
                        let ref val_op = ops[value];
                        
                        let generate_plain_mov : bool = {
                            match order {
                                MemoryOrder::Relaxed | MemoryOrder::Release => true,
                                MemoryOrder::SeqCst => false,
                                _ => panic!("didnt expect order {:?} with store inst", order)
                            }
                        };
                        
qinsoon's avatar
shl  
qinsoon committed
732
                        let resolved_loc = self.node_addr_to_value(loc_op, vm);
733 734
                        
                        if self.match_ireg(val_op) {
735
                            let val = self.emit_ireg(val_op, f_content, f_context, vm);
736 737 738 739 740 741
                            if generate_plain_mov {
                                self.backend.emit_mov_mem64_r64(&resolved_loc, &val);
                            } else {
                                unimplemented!()
                            }
                        } else if self.match_iimm(val_op) {
742
                            let val = self.node_iimm_to_i32(val_op);
743 744 745 746 747
                            if generate_plain_mov {
                                self.backend.emit_mov_mem64_imm32(&resolved_loc, val);
                            } else {
                                unimplemented!()
                            }
748 749 750 751 752
                        } else {
                            // emit mov(FPR)
                            unimplemented!()
                        }
                    }
753
                    
754 755 756 757
                    Instruction_::GetIRef(op_index) => {
                        let ops = inst.ops.read().unwrap();
                        
                        let ref op = ops[op_index];
qinsoon's avatar
qinsoon committed
758
                        let res_tmp = self.get_result_value(node);
759
                        
760 761
                        let hdr_size = mm::objectmodel::OBJECT_HEADER_SIZE;
                        if hdr_size == 0 {
762
                            self.emit_move_node_to_value(&res_tmp, &op, f_content, f_context, vm);
763 764 765
                        } else {
                            self.emit_lea_base_offset(&res_tmp, &op.clone_value(), hdr_size as i32, vm);
                        }
766 767
                    }
                    
768
                    Instruction_::ThreadExit => {
769
                        // emit a call to swap_back_to_native_stack(sp_loc: Address)
770 771
                        
                        // get thread local and add offset to get sp_loc
qinsoon's avatar
qinsoon committed
772
                        let tl = self.emit_get_threadlocal(Some(node), f_content, f_context, vm);
773
                        self.backend.emit_add_r64_imm32(&tl, *thread::NATIVE_SP_LOC_OFFSET as i32);
774
                        
qinsoon's avatar
qinsoon committed
775
                        self.emit_runtime_entry(&entrypoints::SWAP_BACK_TO_NATIVE_STACK, vec![tl.clone()], None, Some(node), f_content, f_context, vm);
776
                    }
qinsoon's avatar
qinsoon committed
777 778 779
                    
                    Instruction_::New(ref ty) => {
                        let ty_info = vm.get_backend_type_info(ty.id());
780 781
                        let ty_size = ty_info.size;
                        let ty_align= ty_info.alignment;
qinsoon's avatar
qinsoon committed
782
                        
783
                        if ty_size > mm::LARGE_OBJECT_THRESHOLD {
qinsoon's avatar
qinsoon committed
784 785 786
                            // emit large object allocation
                            unimplemented!()
                        } else {
787 788 789
                            // emit immix allocation fast path
                            
                            // ASM: %tl = get_thread_local()
qinsoon's avatar
qinsoon committed
790
                            let tmp_tl = self.emit_get_threadlocal(Some(node), f_content, f_context, vm);
791 792 793
                            
                            // ASM: mov [%tl + allocator_offset + cursor_offset] -> %cursor
                            let cursor_offset = *thread::ALLOCATOR_OFFSET + *mm::ALLOCATOR_CURSOR_OFFSET;
qinsoon's avatar
qinsoon committed
794
                            let tmp_cursor = self.make_temporary(f_context, ADDRESS_TYPE.clone(), vm);
795
                            self.emit_load_base_offset(&tmp_cursor, &tmp_tl, cursor_offset as i32, vm);
796 797 798
                            
                            // alignup cursor (cursor + align - 1 & !(align - 1))
                            // ASM: lea align-1(%cursor) -> %start
799
                            let align = ty_info.alignment as i32;
qinsoon's avatar
qinsoon committed
800
                            let tmp_start = self.make_temporary(f_context, ADDRESS_TYPE.clone(), vm);
801
                            self.emit_lea_base_offset(&tmp_start, &tmp_cursor, align - 1, vm);
802
                            // ASM: and %start, !(align-1) -> %start
803
                            self.backend.emit_and_r64_imm32(&tmp_start, !(align - 1));
804 805 806
                            
                            // bump cursor
                            // ASM: lea size(%start) -> %end
qinsoon's avatar
qinsoon committed
807
                            let tmp_end = self.make_temporary(f_context, ADDRESS_TYPE.clone(), vm);
808
                            self.emit_lea_base_offset(&tmp_end, &tmp_start, ty_size as i32, vm);
809 810 811 812
                            
                            // check with limit
                            // ASM: cmp %end, [%tl + allocator_offset + limit_offset]
                            let limit_offset = *thread::ALLOCATOR_OFFSET + *mm::ALLOCATOR_LIMIT_OFFSET;
qinsoon's avatar
qinsoon committed
813
                            let mem_limit = self.make_memory_op_base_offset(&tmp_tl, limit_offset as i32, ADDRESS_TYPE.clone(), vm);
814 815 816
                            self.backend.emit_cmp_r64_mem64(&tmp_end, &mem_limit);
                            
                            // branch to slow path if end > limit
817
                            // ASM: jl alloc_slow
818
                            let slowpath = format!("{}_allocslow", node.id());
819
                            self.backend.emit_jl(slowpath.clone());
820 821
                            
                            // update cursor
822 823
                            // ASM: mov %end -> [%tl + allocator_offset + cursor_offset]
                            self.emit_store_base_offset(&tmp_tl, cursor_offset as i32, &tmp_end, vm);
824 825 826
                            
                            // put start as result
                            // ASM: mov %start -> %result
qinsoon's avatar
qinsoon committed
827
                            let tmp_res = self.get_result_value(node);
828 829 830 831 832 833 834 835 836 837 838 839 840 841 842
                            self.backend.emit_mov_r64_r64(&tmp_res, &tmp_start);
                            
                            // ASM jmp alloc_end
                            let allocend = format!("{}_allocend", node.id());
                            self.backend.emit_jmp(allocend.clone());
                            
                            // finishing current block
                            self.backend.end_block(self.current_block.as_ref().unwrap().clone());
                            
                            // alloc_slow: 
                            // call alloc_slow(size, align) -> %ret
                            // new block (no livein)
                            self.current_block = Some(slowpath.clone());
                            self.backend.start_block(slowpath.clone());
                            self.backend.set_block_livein(slowpath.clone(), &vec![]); 
843 844 845

                            // arg1: allocator address                            
                            let allocator_offset = *thread::ALLOCATOR_OFFSET;
qinsoon's avatar
qinsoon committed
846
                            let tmp_allocator = self.make_temporary(f_context, ADDRESS_TYPE.clone(), vm);
847 848
                            self.emit_lea_base_offset(&tmp_allocator, &tmp_tl, allocator_offset as i32, vm);
                            // arg2: size                            
849
                            let const_size = self.make_value_int_const(ty_size as u64, vm);
850
                            // arg3: align
851
                            let const_align= self.make_value_int_const(ty_align as u64, vm);
852
                            
853 854
                            let rets = self.emit_runtime_entry(
                                &entrypoints::ALLOC_SLOW,
855
                                vec![tmp_allocator, const_size, const_align],
856 857 858
                                Some(vec![
                                    tmp_res.clone()
                                ]),
qinsoon's avatar
qinsoon committed
859
                                Some(node), f_content, f_context, vm
860 861
                            );
                            
862
                            // end block (no liveout other than result)
863 864 865 866 867 868
                            self.backend.end_block(slowpath.clone());
                            self.backend.set_block_liveout(slowpath.clone(), &vec![tmp_res.clone()]);
                            
                            // block: alloc_end
                            self.backend.start_block(allocend.clone());
                            self.current_block = Some(allocend.clone());
qinsoon's avatar
qinsoon committed
869 870
                        }
                    }
qinsoon's avatar
qinsoon committed
871 872 873 874 875 876 877 878 879
                    
                    Instruction_::Throw(op_index) => {
                        let ops = inst.ops.read().unwrap();
                        let ref exception_obj = ops[op_index];
                        
                        self.emit_runtime_entry(
                            &entrypoints::THROW_EXCEPTION, 
                            vec![exception_obj.clone_value()], 
                            None,
qinsoon's avatar
qinsoon committed
880
                            Some(node), f_content, f_context, vm);
qinsoon's avatar
qinsoon committed
881
                    }
882 883 884 885 886 887
    
                    _ => unimplemented!()
                } // main switch
            },
            
            TreeNode_::Value(ref p) => {
888
        
889 890 891 892
            }
        }
    }
    
893 894 895 896
    fn make_temporary(&mut self, f_context: &mut FunctionContext, ty: P<MuType>, vm: &VM) -> P<Value> {
        f_context.make_temporary(vm.next_id(), ty).clone_value()
    }
    
897
    fn make_memory_op_base_offset (&mut self, base: &P<Value>, offset: i32, ty: P<MuType>, vm: &VM) -> P<Value> {
898 899 900 901 902 903 904 905 906 907 908 909 910 911 912
        P(Value{
            hdr: MuEntityHeader::unnamed(vm.next_id()),
            ty: ty.clone(),
            v: Value_::Memory(MemoryLocation::Address{
                base: base.clone(),
                offset: Some(self.make_value_int_const(offset as u64, vm)),
                index: None,
                scale: None
            })
        })
    }
    
    fn make_value_int_const (&mut self, val: u64, vm: &VM) -> P<Value> {
        P(Value{
            hdr: MuEntityHeader::unnamed(vm.next_id()),
qinsoon's avatar
qinsoon committed
913
            ty: UINT64_TYPE.clone(),
914 915 916
            v: Value_::Constant(Constant::Int(val))
        })
    } 
917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973

    fn emit_truncate_result (&mut self, from_ty: &P<MuType>, to_ty: &P<MuType>, op: &P<Value>) {
        // currently only use 64bits register
        // so only keep what is needed in the register (set others to 0)
        let from_ty_len = match from_ty.v {
            MuType_::Int(len) => len,
            _ => panic!("only expect int types, found: {}", from_ty)
        };
        let to_ty_len   = match to_ty.v {
            MuType_::Int(len) => len,
            _ => panic!("only expect int types, found: {}", to_ty)
        };

        if from_ty_len == to_ty_len {
            return;
        } else {
            debug_assert!(from_ty_len > to_ty_len);

            // ignoring from_ty for now (we use 64bits register for everything)
            let mask = match to_ty_len {
                8 => 0xFFi32,
                16 => 0xFFFFi32,
                32 => -1i32,
                _ => unimplemented!()
            };

            // and mask, result -> result
            self.backend.emit_and_r64_imm32(&op, mask);
        }
    }

    fn emit_sign_extend_operand (&mut self, from_ty: &P<MuType>, to_ty: &P<MuType>, op: &P<Value>) {
        // currently only use 64bits register
        // we left shift the value, then arithmetic right shift back
        let from_ty_len = match from_ty.v {
            MuType_::Int(len) => len,
            _ => panic!("only expect int types, found: {}", from_ty)
        };
        let to_ty_len   = match to_ty.v {
            MuType_::Int(len) => len,
            _ => panic!("only expect int types, found: {}", to_ty)
        };

        if from_ty_len == to_ty_len {
            return;
        } else {
            debug_assert!(to_ty_len > from_ty_len);

            let shift : i8 = (to_ty_len - from_ty_len) as i8;

            // shl result, shift -> result
            self.backend.emit_shl_r64_imm8(&op, shift);
            // sar result, shift -> result
            self.backend.emit_sar_r64_imm8(&op, shift);
        }
    }

974
    fn emit_load_base_offset (&mut self, dest: &P<Value>, base: &P<Value>, offset: i32, vm: &VM) {
975
        let mem = self.make_memory_op_base_offset(base, offset, dest.ty.clone(), vm);
qinsoon's avatar
qinsoon committed
976 977 978 979 980 981 982 983

        if dest.is_int_reg() {
            self.backend.emit_mov_r64_mem64(dest, &mem);
        } else if dest.is_fp_reg() {
            self.backend.emit_movsd_f64_mem64(dest, &mem);
        } else {
            unimplemented!();
        }
984 985
    }
    
986
    fn emit_store_base_offset (&mut self, base: &P<Value>, offset: i32, src: &P<Value>, vm: &VM) {
987 988 989 990 991
        let mem = self.make_memory_op_base_offset(base, offset, src.ty.clone(), vm);
        
        self.backend.emit_mov_mem64_r64(&mem, src);
    }
    
992
    fn emit_lea_base_offset (&mut self, dest: &P<Value>, base: &P<Value>, offset: i32, vm: &VM) {
qinsoon's avatar
qinsoon committed
993
        let mem = self.make_memory_op_base_offset(base, offset, ADDRESS_TYPE.clone(), vm);
994 995 996
        
        self.backend.emit_lea_r64(dest, &mem);
    }
qinsoon's avatar
sdiv  
qinsoon committed
997 998 999 1000 1001 1002 1003 1004 1005 1006

    fn emit_udiv (
        &mut self,
        op1: &P<TreeNode>, op2: &P<TreeNode>,
        f_content: &FunctionContent,
        f_context: &mut FunctionContext,
        vm: &VM)
    {
        // mov op1 -> rax
        let rax = x86_64::RAX.clone();
qinsoon's avatar
qinsoon committed
1007
        self.emit_move_value_to_value(&rax, &op1.clone_value());
qinsoon's avatar
sdiv  
qinsoon committed
1008 1009 1010 1011 1012 1013 1014 1015 1016

        // xorq rdx, rdx -> rdx
        let rdx = x86_64::RDX.clone();
        self.backend.emit_xor_r64_r64(&rdx, &rdx);

        // div op2
        if self.match_ireg(op2) {
            let reg_op2 = self.emit_ireg(op2, f_content, f_context, vm);

qinsoon's avatar
qinsoon committed
1017
            self.backend.emit_div_r64(&reg_op2);
qinsoon's avatar
sdiv  
qinsoon committed
1018
        } else if self.match_mem(op2) {
qinsoon's avatar
shl  
qinsoon committed
1019
            let mem_op2 = self.emit_mem(op2, vm);
qinsoon's avatar
sdiv  
qinsoon committed
1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042

            self.backend.emit_div_mem64(&mem_op2);
        } else if self.match_iimm(op2) {
            let imm = self.node_iimm_to_i32(op2);
            // moving to a temp
            let temp = self.make_temporary(f_context, UINT64_TYPE.clone(), vm);
            self.backend.emit_mov_r64_imm32(&temp, imm);

            // div tmp
            self.backend.emit_div_r64(&temp);
        } else {
            unimplemented!();
        }
    }

    fn emit_idiv (
        &mut self,
        op1: &P<TreeNode>, op2: &P<TreeNode>,
        f_content: &FunctionContent,
        f_context: &mut FunctionContext,
        vm: &VM)
    {
        let rax = x86_64::RAX.clone();
1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061

        // put op1 to rax
        let value_op1 = {
            if self.match_ireg(op1) {
                let reg_op1 = self.emit_ireg(op1, f_content, f_context, vm);
                self.emit_move_value_to_value(&rax, &reg_op1);

                reg_op1
            } else if self.match_iimm(op1) {
                let val_imm = self.node_iimm_to_value(op1);
                self.emit_move_value_to_value(&rax, &val_imm);

                val_imm
            } else {
                unimplemented!()
            }
        };
        // sign extend rax
        self.emit_sign_extend_operand(&value_op1.ty, &UINT64_TYPE, &rax);
qinsoon's avatar
sdiv  
qinsoon committed
1062 1063 1064 1065 1066 1067 1068 1069

        // cqo
        self.backend.emit_cqo();

        // idiv op2
        if self.match_ireg(op2) {
            let reg_op2 = self.emit_ireg(op2, f_content, f_context, vm);

1070 1071
            self.emit_sign_extend_operand(&reg_op2.ty, &UINT64_TYPE, &reg_op2);

qinsoon's avatar
qinsoon committed
1072
            self.backend.emit_idiv_r64(&reg_op2);
qinsoon's avatar
sdiv  
qinsoon committed
1073
        } else if self.match_mem(op2) {
qinsoon's avatar
shl  
qinsoon committed
1074
            let mem_op2 = self.emit_mem(op2, vm);
qinsoon's avatar
sdiv  
qinsoon committed
1075
            self.backend.emit_idiv_mem64(&mem_op2);
1076 1077 1078

            // need to sign extend op2
            unimplemented!()
qinsoon's avatar
sdiv  
qinsoon committed
1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090
        } else if self.match_iimm(op2) {
            let imm = self.node_iimm_to_i32(op2);
            // moving to a temp
            let temp = self.make_temporary(f_context, UINT64_TYPE.clone(), vm);
            self.backend.emit_mov_r64_imm32(&temp, imm);

            // idiv temp
            self.backend.emit_idiv_r64(&temp);
        } else {
            unimplemented!();
        }
    }
1091
    
qinsoon's avatar
qinsoon committed
1092 1093
    fn emit_get_threadlocal (
        &mut self, 
qinsoon's avatar
qinsoon committed
1094
        cur_node: Option<&TreeNode>,
qinsoon's avatar
qinsoon committed
1095 1096 1097 1098
        f_content: &FunctionContent, 
        f_context: &mut FunctionContext, 
        vm: &VM) -> P<Value> {
        let mut rets = self.emit_runtime_entry(&entrypoints::GET_THREAD_LOCAL, vec![], None, cur_node, f_content, f_context, vm);
1099 1100 1101 1102
        
        rets.pop().unwrap()
    }
    
1103 1104 1105 1106
    // ret: Option<Vec<P<Value>>
    // if ret is Some, return values will put stored in given temporaries
    // otherwise create temporaries
    // always returns result temporaries (given or created)
qinsoon's avatar
qinsoon committed
1107 1108 1109 1110 1111
    fn emit_runtime_entry (
        &mut self, 
        entry: &RuntimeEntrypoint, 
        args: Vec<P<Value>>, 
        rets: Option<Vec<P<Value>>>,
qinsoon's avatar
qinsoon committed
1112
        cur_node: Option<&TreeNode>, 
qinsoon's avatar
qinsoon committed
1113 1114 1115
        f_content: &FunctionContent, 
        f_context: &mut FunctionContext, 
        vm: &VM) -> Vec<P<Value>> {
1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130
        let sig = entry.sig.clone();
        
        let entry_name = {
            if vm.is_running() {
                unimplemented!()
            } else {
                let ref entry_loc = entry.aot;
                
                match entry_loc {
                    &ValueLocation::Relocatable(_, ref name) => name.clone(),
                    _ => panic!("expecting a relocatable value")
                }
            }
        };
        
qinsoon's avatar
qinsoon committed
1131
        self.emit_c_call(entry_name, sig, args, rets, cur_node, f_content, f_context, vm)
1132 1133
    }
    
1134 1135 1136 1137
    // returns the stack arg offset - we will need this to collapse stack after the call
    fn emit_precall_convention(
        &mut self,
        args: &Vec<P<Value>>, 
1138
        vm: &VM) -> usize {
1139 1140 1141 1142 1143 1144
        // if we need to save caller saved regs
        // put it here (since this is fastpath compile, we wont have them)
        
        // put args into registers if we can
        // in the meantime record args that do not fit in registers
        let mut stack_args : Vec<P<Value>> = vec![];        
1145 1146 1147 1148 1149 1150 1151 1152
        let mut gpr_arg_count = 0;
        for arg in args.iter() {
            if arg.is_int_reg() {
                if gpr_arg_count < x86_64::ARGUMENT_GPRs.len() {
                    self.backend.emit_mov_r64_r64(&x86_64::ARGUMENT_GPRs[gpr_arg_count], &arg);
                    gpr_arg_count += 1;
                } else {
                    // use stack to pass argument
1153
                    stack_args.push(arg.clone());
1154 1155 1156
                }
            } else if arg.is_int_const() {
                if x86_64::is_valid_x86_imm(arg) {                
1157
                    let int_const = arg.extract_int_const() as i32;
1158 1159 1160 1161 1162 1163
                    
                    if gpr_arg_count < x86_64::ARGUMENT_GPRs.len() {
                        self.backend.emit_mov_r64_imm32(&x86_64::ARGUMENT_GPRs[gpr_arg_count], int_const);
                        gpr_arg_count += 1;
                    } else {
                        // use stack to pass argument
1164
                        stack_args.push(arg.clone());
1165 1166 1167 1168 1169
                    }
                } else {
                    // put the constant to memory
                    unimplemented!()
                }
1170 1171 1172 1173 1174 1175
            } else if arg.is_mem() {
                if gpr_arg_count < x86_64::ARGUMENT_GPRs.len() {
                    self.backend.emit_mov_r64_mem64(&x86_64::ARGUMENT_GPRs[gpr_arg_count], &arg);
                    gpr_arg_count += 1;
                } else {
                    // use stack to pass argument
1176
                    stack_args.push(arg.clone());
1177
                }
1178 1179 1180 1181 1182
            } else {
                // floating point
                unimplemented!()
            }
        }
1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217

        if !stack_args.is_empty() {
            // deal with stack arg, put them on stack
            // in reverse order, i.e. push the rightmost arg first to stack
            stack_args.reverse();

            // "The end of the input argument area shall be aligned on a 16
            // (32, if __m256 is passed on stack) byte boundary." - x86 ABI
            // if we need to special align the args, we do it now
            // (then the args will be put to stack following their regular alignment)
            let stack_arg_tys = stack_args.iter().map(|x| x.ty.clone()).collect();
            let (stack_arg_size, _, stack_arg_offsets) = backend::sequetial_layout(&stack_arg_tys, vm);
            let mut stack_arg_size_with_padding = stack_arg_size;
            if stack_arg_size % 16 == 0 {
                // do not need to adjust rsp
            } else if stack_arg_size % 8 == 0 {
                // adjust rsp by -8 (push a random padding value)
                self.backend.emit_push_imm32(0x7777);
                stack_arg_size_with_padding += 8;
            } else {
                panic!("expecting stack arguments to be at least 8-byte aligned, but it has size of {}", stack_arg_size);
            }

            // now, we just put all the args on the stack
            {
                let mut index = 0;
                for arg in stack_args {
                    self.emit_store_base_offset(&x86_64::RSP, - (stack_arg_offsets[index] as i32), &arg, vm);
                    index += 1;
                }

                self.backend.emit_add_r64_imm32(&x86_64::RSP, - (stack_arg_size as i32));
            }

            stack_arg_size_with_padding
1218
        } else {
1219
            0
1220
        }
1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256
    }

    fn emit_postcall_convention(
        &mut self,
        sig: &P<CFuncSig>,
        rets: &Option<Vec<P<Value>>>,
        precall_stack_arg_size: usize,
        f_context: &mut FunctionContext,
        vm: &VM
    ) -> Vec<P<Value>> {
        // deal with ret vals
        let mut return_vals = vec![];

        let mut gpr_ret_count = 0;
        for ret_index in 0..sig.ret_tys.len() {
            let ref ty = sig.ret_tys[ret_index];

            let ret_val = match rets {
                &Some(ref rets) => rets[ret_index].clone(),
                &None => {
                    let tmp_node = f_context.make_temporary(vm.next_id(), ty.clone());
                    tmp_node.clone_value()
                }
            };

            if ret_val.is_int_reg() {
                if gpr_ret_count < x86_64::RETURN_GPRs.len() {
                    self.backend.emit_mov_r64_r64(&ret_val, &x86_64::RETURN_GPRs[gpr_ret_count]);
                    gpr_ret_count += 1;
                } else {
                    // get return value by stack
                    unimplemented!()
                }
            } else {
                // floating point register
                unimplemented!()
1257
            }
1258 1259

            return_vals.push(ret_val);
1260
        }
1261 1262 1263 1264 1265 1266 1267

        // remove stack_args
        if precall_stack_arg_size != 0 {
            self.backend.emit_add_r64_imm32(&x86_64::RSP, precall_stack_arg_size as i32);
        }

        return_vals
1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285
    }
    
    #[allow(unused_variables)]
    // ret: Option<Vec<P<Value>>
    // if ret is Some, return values will put stored in given temporaries
    // otherwise create temporaries
    // always returns result temporaries (given or created)
    fn emit_c_call (
        &mut self, 
        func_name: CName, 
        sig: P<CFuncSig>, 
        args: Vec<P<Value>>, 
        rets: Option<Vec<P<Value>>>,
        cur_node: Option<&TreeNode>,
        f_content: &FunctionContent, 
        f_context: &mut FunctionContext, 
        vm: &VM) -> Vec<P<Value>> 
    {
1286
        let stack_arg_size = self.emit_precall_convention(&args, vm);
1287
        
1288 1289 1290 1291
        // make call
        if vm.is_running() {
            unimplemented!()
        } else {
qinsoon's avatar
qinsoon committed
1292
            let callsite = self.new_callsite_label(cur_node);
qinsoon's avatar
qinsoon committed
1293 1294 1295
            self.backend.emit_call_near_rel32(callsite, func_name);
            
            // record exception block (CCall may have an exception block)
qinsoon's avatar
qinsoon committed
1296 1297 1298 1299 1300
            if cur_node.is_some() {
                let cur_node = cur_node.unwrap(); 
                if cur_node.op == OpCode::CCall {
                    unimplemented!()
                }
qinsoon's avatar
qinsoon committed
1301
            }
1302 1303
        }
        
1304
        self.emit_postcall_convention(&sig, &rets, stack_arg_size, f_context, vm)
1305 1306
    }
    
qinsoon's avatar
qinsoon committed
1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331
    fn emit_mu_call(
        &mut self,
        inst: &Instruction,
        calldata: &CallData,
        resumption: Option<&ResumptionData>,
        cur_node: &TreeNode, 
        f_content: &FunctionContent, 
        f_context: &mut FunctionContext, 
        vm: &VM) {
        trace!("deal with pre-call convention");
        
        let ops = inst.ops.read().unwrap();
        let ref func = ops[calldata.func];
        let ref func_sig = match func.v {
            TreeNode_::Value(ref pv) => {
                let ty : &MuType = &pv.ty;
                match ty.v {
                    MuType_::FuncRef(ref sig)
                    | MuType_::UFuncPtr(ref sig) => sig,
                    _ => panic!("expected funcref/ptr type")
                }
            },
            _ => panic!("expected funcref/ptr type")
        };
        
1332
        debug_assert!(func_sig.arg_tys.len() == calldata.args.len());
qinsoon's avatar
qinsoon committed
1333 1334
        if cfg!(debug_assertions) {
            if inst.value.is_some() {
1335
                assert!(func_sig.ret_tys.len() == inst.value.as_ref().unwrap().len());
qinsoon's avatar
qinsoon committed
1336
            } else {
1337
                assert!(func_sig.ret_tys.len() == 0, "expect call inst's value doesnt match reg args. value: {:?}, ret args: {:?}", inst.value, func_sig.ret_tys);
qinsoon's avatar
qinsoon committed
1338 1339
            }
        }
1340

1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356
        // prepare args (they could be instructions, we need to emit inst and get value)
        let mut arg_values = vec![];
        for arg_index in calldata.args.iter() {
            let ref arg = ops[*arg_index];

            if self.match_ireg(arg) {
                let arg = self.emit_ireg(arg, f_content, f_context, vm);
                arg_values.push(arg);
            } else if self.match_iimm(arg) {
                let arg = self.node_iimm_to_value(arg);
                arg_values.push(arg);
            } else {
                unimplemented!();
            }
        }
        let stack_arg_size = self.emit_precall_convention(&arg_values, vm);
qinsoon's avatar
qinsoon committed
1357
        
1358
        trace!("generating call inst");
qinsoon's avatar
qinsoon committed
1359 1360 1361
        // check direct call or indirect
        let callsite = {
            if self.match_funcref_const(func) {
1362
                let target_id = self.node_funcref_const_to_id(func);
qinsoon's avatar
qinsoon committed
1363 1364 1365 1366 1367 1368
                let funcs = vm.funcs().read().unwrap();
                let target = funcs.get(&target_id).unwrap().read().unwrap();
                                            
                if vm.is_running() {
                    unimplemented!()
                } else {
qinsoon's avatar
qinsoon committed
1369
                    let callsite = self.new_callsite_label(Some(cur_node));
qinsoon's avatar
qinsoon committed
1370 1371 1372 1373 1374
                    self.backend.emit_call_near_rel32(callsite, target.name().unwrap())
                }
            } else if self.match_ireg(func) {
                let target = self.emit_ireg(func, f_content, f_context, vm);
                
qinsoon's avatar
qinsoon committed
1375
                let callsite = self.new_callsite_label(Some(cur_node));
qinsoon's avatar
qinsoon committed
1376 1377
                self.backend.emit_call_near_r64(callsite, &target)
            } else if self.match_mem(func) {
qinsoon's avatar
shl  
qinsoon committed
1378
                let target = self.emit_mem(func, vm);
qinsoon's avatar
qinsoon committed
1379
                
qinsoon's avatar
qinsoon committed
1380
                let callsite = self.new_callsite_label(Some(cur_node));