WARNING! Access to this system is limited to authorised users only.
Unauthorised users may be subject to prosecution.
Unauthorised access to this system is a criminal offence under Australian law (Federal Crimes Act 1914 Part VIA)
It is a criminal offence to:
(1) Obtain access to data without authority. -Penalty 2 years imprisonment.
(2) Damage, delete, alter or insert data without authority. -Penalty 10 years imprisonment.
User activity is monitored and recorded. Anyone using this system expressly consents to such monitoring and recording.

To protect your data, the CISO officer has suggested users to enable 2FA as soon as possible.
Currently 2.6% of users enabled 2FA.

inst_sel.rs 60.1 KB
Newer Older
1
use ast::ir::*;
2
use ast::ptr::*;
qinsoon's avatar
qinsoon committed
3
use ast::inst::*;
4
use ast::op;
qinsoon's avatar
qinsoon committed
5
use ast::op::OpCode;
qinsoon's avatar
qinsoon committed
6
use ast::types;
qinsoon's avatar
qinsoon committed
7
use ast::types::*;
qinsoon's avatar
qinsoon committed
8
use vm::VM;
qinsoon's avatar
qinsoon committed
9
use runtime::mm;
10 11 12 13
use runtime::ValueLocation;
use runtime::thread;
use runtime::entrypoints;
use runtime::entrypoints::RuntimeEntrypoint;
14 15

use compiler::CompilerPass;
16
use compiler::backend;
qinsoon's avatar
qinsoon committed
17 18 19
use compiler::backend::x86_64;
use compiler::backend::x86_64::CodeGenerator;
use compiler::backend::x86_64::ASMCodeGen;
qinsoon's avatar
qinsoon committed
20 21
use compiler::machine_code::CompiledFunction;
use compiler::frame::Frame;
22

23
use std::collections::HashMap;
qinsoon's avatar
qinsoon committed
24
use std::any::Any;
25

26
pub struct InstructionSelection {
27
    name: &'static str,
28 29
    backend: Box<CodeGenerator>,
    
qinsoon's avatar
qinsoon committed
30
    current_callsite_id: usize,
qinsoon's avatar
qinsoon committed
31 32
    current_frame: Option<Frame>,
    current_block: Option<MuName>,
qinsoon's avatar
qinsoon committed
33 34 35 36 37
    current_func_start: Option<ValueLocation>,
    // key: block id, val: callsite that names the block as exception block
    current_exn_callsites: HashMap<MuID, Vec<ValueLocation>>,
    // key: block id, val: block location
    current_exn_blocks: HashMap<MuID, ValueLocation>     
38 39
}

40
impl <'a> InstructionSelection {
qinsoon's avatar
qinsoon committed
41
    #[cfg(feature = "aot")]
42
    pub fn new() -> InstructionSelection {
43 44
        InstructionSelection{
            name: "Instruction Selection (x64)",
45
            backend: Box::new(ASMCodeGen::new()),
qinsoon's avatar
qinsoon committed
46
            
qinsoon's avatar
qinsoon committed
47
            current_callsite_id: 0,
qinsoon's avatar
qinsoon committed
48 49 50
            current_frame: None,
            current_block: None,
            current_func_start: None,
qinsoon's avatar
qinsoon committed
51 52 53
            // key: block id, val: callsite that names the block as exception block
            current_exn_callsites: HashMap::new(), 
            current_exn_blocks: HashMap::new()
54 55
        }
    }
qinsoon's avatar
qinsoon committed
56 57 58 59 60

    #[cfg(feature = "jit")]
    pub fn new() -> InstructionSelection {
        unimplemented!()
    }
61 62 63 64 65 66 67
    
    // in this pass, we assume that
    // 1. all temporaries will use 64bit registers
    // 2. we do not need to backup/restore caller-saved registers
    // 3. we need to backup/restore all the callee-saved registers
    // if any of these assumption breaks, we will need to re-emit the code
    #[allow(unused_variables)]
68
    fn instruction_select(&mut self, node: &'a TreeNode, f_content: &FunctionContent, f_context: &mut FunctionContext, vm: &VM) {
qinsoon's avatar
qinsoon committed
69 70 71
        trace!("instsel on node {}", node);
        
        match node.v {
72 73
            TreeNode_::Instruction(ref inst) => {
                match inst.v {
qinsoon's avatar
qinsoon committed
74 75 76
                    Instruction_::Branch2{cond, ref true_dest, ref false_dest, true_prob} => {
                        // move this to trace generation
                        // assert here
77 78 79 80 81
                        let (fallthrough_dest, branch_dest, branch_if_true) = {
                            if true_prob > 0.5f32 {
                                (true_dest, false_dest, false)
                            } else {
                                (false_dest, true_dest, true)
82
                            }
83
                        };
84
                        
qinsoon's avatar
qinsoon committed
85
                        let ops = inst.ops.read().unwrap();
86
                        
87 88
                        self.process_dest(&ops, fallthrough_dest, f_content, f_context, vm);
                        self.process_dest(&ops, branch_dest, f_content, f_context, vm);
qinsoon's avatar
qinsoon committed
89
                        
90
                        let branch_target = f_content.get_block(branch_dest.target).name().unwrap();
91 92 93
    
                        let ref cond = ops[cond];
                        
qinsoon's avatar
qinsoon committed
94 95
                        if self.match_cmp_res(cond) {
                            trace!("emit cmp_eq-branch2");
96
                            match self.emit_cmp_res(cond, f_content, f_context, vm) {
qinsoon's avatar
qinsoon committed
97 98 99 100 101 102 103 104 105 106
                                op::CmpOp::EQ => self.backend.emit_je(branch_target),
                                op::CmpOp::NE => self.backend.emit_jne(branch_target),
                                op::CmpOp::UGE => self.backend.emit_jae(branch_target),
                                op::CmpOp::UGT => self.backend.emit_ja(branch_target),
                                op::CmpOp::ULE => self.backend.emit_jbe(branch_target),
                                op::CmpOp::ULT => self.backend.emit_jb(branch_target),
                                op::CmpOp::SGE => self.backend.emit_jge(branch_target),
                                op::CmpOp::SGT => self.backend.emit_jg(branch_target),
                                op::CmpOp::SLE => self.backend.emit_jle(branch_target),
                                op::CmpOp::SLT => self.backend.emit_jl(branch_target),
qinsoon's avatar
qinsoon committed
107 108 109 110
                                _ => unimplemented!()
                            }
                        } else if self.match_ireg(cond) {
                            trace!("emit ireg-branch2");
111
                            
112
                            let cond_reg = self.emit_ireg(cond, f_content, f_context, vm);
113
                            
qinsoon's avatar
qinsoon committed
114 115 116
                            // emit: cmp cond_reg 1
                            self.backend.emit_cmp_r64_imm32(&cond_reg, 1);
                            // emit: je #branch_dest
qinsoon's avatar
qinsoon committed
117
                            self.backend.emit_je(branch_target);                            
qinsoon's avatar
qinsoon committed
118 119
                        } else {
                            unimplemented!();
120
                        }
121 122
                    },
                    
qinsoon's avatar
qinsoon committed
123
                    Instruction_::Branch1(ref dest) => {
qinsoon's avatar
qinsoon committed
124
                        let ops = inst.ops.read().unwrap();
125
                                            
126
                        self.process_dest(&ops, dest, f_content, f_context, vm);
127
                        
128
                        let target = f_content.get_block(dest.target).name().unwrap();
qinsoon's avatar
qinsoon committed
129
                        
qinsoon's avatar
qinsoon committed
130
                        trace!("emit branch1");
131
                        // jmp
qinsoon's avatar
qinsoon committed
132
                        self.backend.emit_jmp(target);
133 134
                    },
                    
qinsoon's avatar
qinsoon committed
135
                    Instruction_::ExprCall{ref data, is_abort} => {
qinsoon's avatar
qinsoon committed
136 137
                        if is_abort {
                            unimplemented!()
138
                        }
139
                        
qinsoon's avatar
qinsoon committed
140 141 142 143 144 145
                        self.emit_mu_call(
                            inst, // inst: &Instruction,
                            data, // calldata: &CallData,
                            None, // resumption: Option<&ResumptionData>,
                            node, // cur_node: &TreeNode, 
                            f_content, f_context, vm);                         
146 147
                    },
                    
qinsoon's avatar
qinsoon committed
148 149 150 151 152 153 154 155 156
                    Instruction_::Call{ref data, ref resume} => {
                        self.emit_mu_call(
                            inst, 
                            data, 
                            Some(resume), 
                            node, 
                            f_content, f_context, vm);
                    }
                    
157
                    Instruction_::Return(_) => {
158
                        self.emit_common_epilogue(inst, f_content, f_context, vm);
159
                        
qinsoon's avatar
qinsoon committed
160
                        self.backend.emit_ret();
161 162
                    },
                    
qinsoon's avatar
qinsoon committed
163
                    Instruction_::BinOp(op, op1, op2) => {
qinsoon's avatar
qinsoon committed
164
                        let ops = inst.ops.read().unwrap();
qinsoon's avatar
qinsoon committed
165
                        
166 167
                        match op {
                            op::BinOp::Add => {
qinsoon's avatar
qinsoon committed
168 169 170
                                if self.match_ireg(&ops[op1]) && self.match_ireg(&ops[op2]) {
                                    trace!("emit add-ireg-ireg");
                                    
171 172
                                    let reg_op1 = self.emit_ireg(&ops[op1], f_content, f_context, vm);
                                    let reg_op2 = self.emit_ireg(&ops[op2], f_content, f_context, vm);
qinsoon's avatar
qinsoon committed
173 174 175 176 177 178 179 180 181
                                    let res_tmp = self.emit_get_result(node);
                                    
                                    // mov op1, res
                                    self.backend.emit_mov_r64_r64(&res_tmp, &reg_op1);
                                    // add op2 res
                                    self.backend.emit_add_r64_r64(&res_tmp, &reg_op2);
                                } else if self.match_ireg(&ops[op1]) && self.match_iimm(&ops[op2]) {
                                    trace!("emit add-ireg-imm");
                                    
182
                                    let reg_op1 = self.emit_ireg(&ops[op1], f_content, f_context, vm);
183
                                    let reg_op2 = self.node_iimm_to_i32(&ops[op2]);
qinsoon's avatar
qinsoon committed
184 185 186 187 188 189 190 191 192 193 194 195
                                    let res_tmp = self.emit_get_result(node);
                                    
                                    // mov op1, res
                                    self.backend.emit_mov_r64_r64(&res_tmp, &reg_op1);
                                    // add op2, res
                                    self.backend.emit_add_r64_imm32(&res_tmp, reg_op2);
                                } else if self.match_iimm(&ops[op1]) && self.match_ireg(&ops[op2]) {
                                    trace!("emit add-imm-ireg");
                                    unimplemented!();
                                } else if self.match_ireg(&ops[op1]) && self.match_mem(&ops[op2]) {
                                    trace!("emit add-ireg-mem");
                                    
196
                                    let reg_op1 = self.emit_ireg(&ops[op1], f_content, f_context, vm);
qinsoon's avatar
qinsoon committed
197 198 199 200 201 202 203 204 205 206 207 208 209
                                    let reg_op2 = self.emit_mem(&ops[op2]);
                                    let res_tmp = self.emit_get_result(node);
                                    
                                    // mov op1, res
                                    self.backend.emit_mov_r64_r64(&res_tmp, &reg_op1);
                                    // add op2 res
                                    self.backend.emit_add_r64_mem64(&res_tmp, &reg_op2);
                                } else if self.match_mem(&ops[op1]) && self.match_ireg(&ops[op2]) {
                                    trace!("emit add-mem-ireg");
                                    unimplemented!();
                                } else {
                                    unimplemented!()
                                }
210 211
                            },
                            op::BinOp::Sub => {
qinsoon's avatar
qinsoon committed
212 213 214
                                if self.match_ireg(&ops[op1]) && self.match_ireg(&ops[op2]) {
                                    trace!("emit sub-ireg-ireg");
                                    
215 216
                                    let reg_op1 = self.emit_ireg(&ops[op1], f_content, f_context, vm);
                                    let reg_op2 = self.emit_ireg(&ops[op2], f_content, f_context, vm);
qinsoon's avatar
qinsoon committed
217 218 219 220 221 222 223 224 225
                                    let res_tmp = self.emit_get_result(node);
                                    
                                    // mov op1, res
                                    self.backend.emit_mov_r64_r64(&res_tmp, &reg_op1);
                                    // add op2 res
                                    self.backend.emit_sub_r64_r64(&res_tmp, &reg_op2);
                                } else if self.match_ireg(&ops[op1]) && self.match_iimm(&ops[op2]) {
                                    trace!("emit sub-ireg-imm");

226
                                    let reg_op1 = self.emit_ireg(&ops[op1], f_content, f_context, vm);
227
                                    let imm_op2 = self.node_iimm_to_i32(&ops[op2]);
qinsoon's avatar
qinsoon committed
228 229 230 231 232
                                    let res_tmp = self.emit_get_result(node);
                                    
                                    // mov op1, res
                                    self.backend.emit_mov_r64_r64(&res_tmp, &reg_op1);
                                    // add op2, res
233
                                    self.backend.emit_sub_r64_imm32(&res_tmp, imm_op2);
qinsoon's avatar
qinsoon committed
234 235 236 237 238 239
                                } else if self.match_iimm(&ops[op1]) && self.match_ireg(&ops[op2]) {
                                    trace!("emit sub-imm-ireg");
                                    unimplemented!();
                                } else if self.match_ireg(&ops[op1]) && self.match_mem(&ops[op2]) {
                                    trace!("emit sub-ireg-mem");
                                    
240
                                    let reg_op1 = self.emit_ireg(&ops[op1], f_content, f_context, vm);
241
                                    let mem_op2 = self.emit_mem(&ops[op2]);
qinsoon's avatar
qinsoon committed
242 243 244 245 246
                                    let res_tmp = self.emit_get_result(node);
                                    
                                    // mov op1, res
                                    self.backend.emit_mov_r64_r64(&res_tmp, &reg_op1);
                                    // sub op2 res
247
                                    self.backend.emit_sub_r64_mem64(&res_tmp, &mem_op2);
qinsoon's avatar
qinsoon committed
248 249 250 251 252 253
                                } else if self.match_mem(&ops[op1]) && self.match_ireg(&ops[op2]) {
                                    trace!("emit add-mem-ireg");
                                    unimplemented!();
                                } else {
                                    unimplemented!()
                                }
254 255
                            },
                            op::BinOp::Mul => {
256 257 258 259
                                // mov op1 -> rax
                                let rax = x86_64::RAX.clone();
                                let op1 = &ops[op1];
                                if self.match_ireg(op1) {
260
                                    let reg_op1 = self.emit_ireg(op1, f_content, f_context, vm);
261 262 263
                                    
                                    self.backend.emit_mov_r64_r64(&rax, &reg_op1);
                                } else if self.match_iimm(op1) {
264
                                    let imm_op1 = self.node_iimm_to_i32(op1);
265 266 267 268 269 270 271 272 273 274 275 276 277
                                    
                                    self.backend.emit_mov_r64_imm32(&rax, imm_op1);
                                } else if self.match_mem(op1) {
                                    let mem_op1 = self.emit_mem(op1);
                                    
                                    self.backend.emit_mov_r64_mem64(&rax, &mem_op1);
                                } else {
                                    unimplemented!();
                                }
                                
                                // mul op2 -> rax
                                let op2 = &ops[op2];
                                if self.match_ireg(op2) {
278
                                    let reg_op2 = self.emit_ireg(op2, f_content, f_context, vm);
279 280 281
                                    
                                    self.backend.emit_mul_r64(&reg_op2);
                                } else if self.match_iimm(op2) {
282
                                    let imm_op2 = self.node_iimm_to_i32(op2);
283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300
                                    
                                    // put imm in a temporary
                                    // here we use result reg as temporary
                                    let res_tmp = self.emit_get_result(node);
                                    self.backend.emit_mov_r64_imm32(&res_tmp, imm_op2);
                                    
                                    self.backend.emit_mul_r64(&res_tmp);
                                } else if self.match_mem(op2) {
                                    let mem_op2 = self.emit_mem(op2);
                                    
                                    self.backend.emit_mul_mem64(&mem_op2);
                                } else {
                                    unimplemented!();
                                }
                                
                                // mov rax -> result
                                let res_tmp = self.emit_get_result(node);
                                self.backend.emit_mov_r64_r64(&res_tmp, &rax);
301 302 303
                            },
                            
                            _ => unimplemented!()
304 305
                        }
                    }
306
                    
307 308
                    // load on x64 generates mov inst (no matter what order is specified)
                    // https://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
309
                    Instruction_::Load{is_ptr, order, mem_loc} => {
qinsoon's avatar
qinsoon committed
310
                        let ops = inst.ops.read().unwrap();
311
                        let ref loc_op = ops[mem_loc];
312 313 314 315 316 317 318 319 320
                        
                        // check order
                        match order {
                            MemoryOrder::Relaxed 
                            | MemoryOrder::Consume 
                            | MemoryOrder::Acquire
                            | MemoryOrder::SeqCst => {},
                            _ => panic!("didnt expect order {:?} with store inst", order)
                        }                        
321

322
                        let resolved_loc = self.node_mem_to_value(loc_op, vm);
323 324 325 326
                        let res_temp = self.emit_get_result(node);
                        
                        if self.match_ireg(node) {
                            // emit mov(GPR)
327 328 329 330 331 332 333 334
                            self.backend.emit_mov_r64_mem64(&res_temp, &resolved_loc);
                        } else {
                            // emit mov(FPR)
                            unimplemented!()
                        }
                    }
                    
                    Instruction_::Store{is_ptr, order, mem_loc, value} => {
qinsoon's avatar
qinsoon committed
335
                        let ops = inst.ops.read().unwrap();
336 337 338 339 340 341 342 343 344 345 346
                        let ref loc_op = ops[mem_loc];
                        let ref val_op = ops[value];
                        
                        let generate_plain_mov : bool = {
                            match order {
                                MemoryOrder::Relaxed | MemoryOrder::Release => true,
                                MemoryOrder::SeqCst => false,
                                _ => panic!("didnt expect order {:?} with store inst", order)
                            }
                        };
                        
347
                        let resolved_loc = self.node_mem_to_value(loc_op, vm);
348 349
                        
                        if self.match_ireg(val_op) {
350
                            let val = self.emit_ireg(val_op, f_content, f_context, vm);
351 352 353 354 355 356
                            if generate_plain_mov {
                                self.backend.emit_mov_mem64_r64(&resolved_loc, &val);
                            } else {
                                unimplemented!()
                            }
                        } else if self.match_iimm(val_op) {
357
                            let val = self.node_iimm_to_i32(val_op);
358 359 360 361 362
                            if generate_plain_mov {
                                self.backend.emit_mov_mem64_imm32(&resolved_loc, val);
                            } else {
                                unimplemented!()
                            }
363 364 365 366 367
                        } else {
                            // emit mov(FPR)
                            unimplemented!()
                        }
                    }
368
                    
369 370 371 372 373 374
                    Instruction_::GetIRef(op_index) => {
                        let ops = inst.ops.read().unwrap();
                        
                        let ref op = ops[op_index];
                        let res_tmp = self.emit_get_result(node);
                        
375 376 377 378 379 380
                        let hdr_size = mm::objectmodel::OBJECT_HEADER_SIZE;
                        if hdr_size == 0 {
                            self.emit_general_move(&op, &res_tmp, f_content, f_context, vm);
                        } else {
                            self.emit_lea_base_offset(&res_tmp, &op.clone_value(), hdr_size as i32, vm);
                        }
381 382
                    }
                    
383
                    Instruction_::ThreadExit => {
384
                        // emit a call to swap_back_to_native_stack(sp_loc: Address)
385 386
                        
                        // get thread local and add offset to get sp_loc
qinsoon's avatar
qinsoon committed
387
                        let tl = self.emit_get_threadlocal(Some(node), f_content, f_context, vm);
388
                        self.backend.emit_add_r64_imm32(&tl, *thread::NATIVE_SP_LOC_OFFSET as i32);
389
                        
qinsoon's avatar
qinsoon committed
390
                        self.emit_runtime_entry(&entrypoints::SWAP_BACK_TO_NATIVE_STACK, vec![tl.clone()], None, Some(node), f_content, f_context, vm);
391
                    }
qinsoon's avatar
qinsoon committed
392 393 394
                    
                    Instruction_::New(ref ty) => {
                        let ty_info = vm.get_backend_type_info(ty.id());
395 396
                        let ty_size = ty_info.size;
                        let ty_align= ty_info.alignment;
qinsoon's avatar
qinsoon committed
397
                        
398
                        if ty_size > mm::LARGE_OBJECT_THRESHOLD {
qinsoon's avatar
qinsoon committed
399 400 401
                            // emit large object allocation
                            unimplemented!()
                        } else {
402 403 404
                            // emit immix allocation fast path
                            
                            // ASM: %tl = get_thread_local()
qinsoon's avatar
qinsoon committed
405
                            let tmp_tl = self.emit_get_threadlocal(Some(node), f_content, f_context, vm);
406 407 408
                            
                            // ASM: mov [%tl + allocator_offset + cursor_offset] -> %cursor
                            let cursor_offset = *thread::ALLOCATOR_OFFSET + *mm::ALLOCATOR_CURSOR_OFFSET;
qinsoon's avatar
qinsoon committed
409
                            let tmp_cursor = self.make_temporary(f_context, ADDRESS_TYPE.clone(), vm);
410
                            self.emit_load_base_offset(&tmp_cursor, &tmp_tl, cursor_offset as i32, vm);
411 412 413
                            
                            // alignup cursor (cursor + align - 1 & !(align - 1))
                            // ASM: lea align-1(%cursor) -> %start
414
                            let align = ty_info.alignment as i32;
qinsoon's avatar
qinsoon committed
415
                            let tmp_start = self.make_temporary(f_context, ADDRESS_TYPE.clone(), vm);
416
                            self.emit_lea_base_offset(&tmp_start, &tmp_cursor, align - 1, vm);
417
                            // ASM: and %start, !(align-1) -> %start
418
                            self.backend.emit_and_r64_imm32(&tmp_start, !(align - 1));
419 420 421
                            
                            // bump cursor
                            // ASM: lea size(%start) -> %end
qinsoon's avatar
qinsoon committed
422
                            let tmp_end = self.make_temporary(f_context, ADDRESS_TYPE.clone(), vm);
423
                            self.emit_lea_base_offset(&tmp_end, &tmp_start, ty_size as i32, vm);
424 425 426 427
                            
                            // check with limit
                            // ASM: cmp %end, [%tl + allocator_offset + limit_offset]
                            let limit_offset = *thread::ALLOCATOR_OFFSET + *mm::ALLOCATOR_LIMIT_OFFSET;
qinsoon's avatar
qinsoon committed
428
                            let mem_limit = self.make_memory_op_base_offset(&tmp_tl, limit_offset as i32, ADDRESS_TYPE.clone(), vm);
429 430 431
                            self.backend.emit_cmp_r64_mem64(&tmp_end, &mem_limit);
                            
                            // branch to slow path if end > limit
432
                            // ASM: jl alloc_slow
433
                            let slowpath = format!("{}_allocslow", node.id());
434
                            self.backend.emit_jl(slowpath.clone());
435 436
                            
                            // update cursor
437 438
                            // ASM: mov %end -> [%tl + allocator_offset + cursor_offset]
                            self.emit_store_base_offset(&tmp_tl, cursor_offset as i32, &tmp_end, vm);
439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457
                            
                            // put start as result
                            // ASM: mov %start -> %result
                            let tmp_res = self.emit_get_result(node);
                            self.backend.emit_mov_r64_r64(&tmp_res, &tmp_start);
                            
                            // ASM jmp alloc_end
                            let allocend = format!("{}_allocend", node.id());
                            self.backend.emit_jmp(allocend.clone());
                            
                            // finishing current block
                            self.backend.end_block(self.current_block.as_ref().unwrap().clone());
                            
                            // alloc_slow: 
                            // call alloc_slow(size, align) -> %ret
                            // new block (no livein)
                            self.current_block = Some(slowpath.clone());
                            self.backend.start_block(slowpath.clone());
                            self.backend.set_block_livein(slowpath.clone(), &vec![]); 
458 459 460

                            // arg1: allocator address                            
                            let allocator_offset = *thread::ALLOCATOR_OFFSET;
qinsoon's avatar
qinsoon committed
461
                            let tmp_allocator = self.make_temporary(f_context, ADDRESS_TYPE.clone(), vm);
462 463
                            self.emit_lea_base_offset(&tmp_allocator, &tmp_tl, allocator_offset as i32, vm);
                            // arg2: size                            
464
                            let const_size = self.make_value_int_const(ty_size as u64, vm);
465
                            // arg3: align
466
                            let const_align= self.make_value_int_const(ty_align as u64, vm);
467
                            
468 469
                            let rets = self.emit_runtime_entry(
                                &entrypoints::ALLOC_SLOW,
470
                                vec![tmp_allocator, const_size, const_align],
471 472 473
                                Some(vec![
                                    tmp_res.clone()
                                ]),
qinsoon's avatar
qinsoon committed
474
                                Some(node), f_content, f_context, vm
475 476
                            );
                            
477
                            // end block (no liveout other than result)
478 479 480 481 482 483
                            self.backend.end_block(slowpath.clone());
                            self.backend.set_block_liveout(slowpath.clone(), &vec![tmp_res.clone()]);
                            
                            // block: alloc_end
                            self.backend.start_block(allocend.clone());
                            self.current_block = Some(allocend.clone());
qinsoon's avatar
qinsoon committed
484 485
                        }
                    }
qinsoon's avatar
qinsoon committed
486 487 488 489 490 491 492 493 494
                    
                    Instruction_::Throw(op_index) => {
                        let ops = inst.ops.read().unwrap();
                        let ref exception_obj = ops[op_index];
                        
                        self.emit_runtime_entry(
                            &entrypoints::THROW_EXCEPTION, 
                            vec![exception_obj.clone_value()], 
                            None,
qinsoon's avatar
qinsoon committed
495
                            Some(node), f_content, f_context, vm);
qinsoon's avatar
qinsoon committed
496
                    }
497 498 499 500 501 502
    
                    _ => unimplemented!()
                } // main switch
            },
            
            TreeNode_::Value(ref p) => {
503
        
504 505 506 507
            }
        }
    }
    
508 509 510 511
    fn make_temporary(&mut self, f_context: &mut FunctionContext, ty: P<MuType>, vm: &VM) -> P<Value> {
        f_context.make_temporary(vm.next_id(), ty).clone_value()
    }
    
512
    fn make_memory_op_base_offset (&mut self, base: &P<Value>, offset: i32, ty: P<MuType>, vm: &VM) -> P<Value> {
513 514 515 516 517 518 519 520 521 522 523 524 525 526 527
        P(Value{
            hdr: MuEntityHeader::unnamed(vm.next_id()),
            ty: ty.clone(),
            v: Value_::Memory(MemoryLocation::Address{
                base: base.clone(),
                offset: Some(self.make_value_int_const(offset as u64, vm)),
                index: None,
                scale: None
            })
        })
    }
    
    fn make_value_int_const (&mut self, val: u64, vm: &VM) -> P<Value> {
        P(Value{
            hdr: MuEntityHeader::unnamed(vm.next_id()),
qinsoon's avatar
qinsoon committed
528
            ty: UINT64_TYPE.clone(),
529 530 531 532
            v: Value_::Constant(Constant::Int(val))
        })
    } 
    
533
    fn emit_load_base_offset (&mut self, dest: &P<Value>, base: &P<Value>, offset: i32, vm: &VM) {
534 535 536 537 538
        let mem = self.make_memory_op_base_offset(base, offset, dest.ty.clone(), vm);
        
        self.backend.emit_mov_r64_mem64(dest, &mem);
    }
    
539
    fn emit_store_base_offset (&mut self, base: &P<Value>, offset: i32, src: &P<Value>, vm: &VM) {
540 541 542 543 544
        let mem = self.make_memory_op_base_offset(base, offset, src.ty.clone(), vm);
        
        self.backend.emit_mov_mem64_r64(&mem, src);
    }
    
545
    fn emit_lea_base_offset (&mut self, dest: &P<Value>, base: &P<Value>, offset: i32, vm: &VM) {
qinsoon's avatar
qinsoon committed
546
        let mem = self.make_memory_op_base_offset(base, offset, ADDRESS_TYPE.clone(), vm);
547 548 549 550
        
        self.backend.emit_lea_r64(dest, &mem);
    }
    
qinsoon's avatar
qinsoon committed
551 552
    fn emit_get_threadlocal (
        &mut self, 
qinsoon's avatar
qinsoon committed
553
        cur_node: Option<&TreeNode>,
qinsoon's avatar
qinsoon committed
554 555 556 557
        f_content: &FunctionContent, 
        f_context: &mut FunctionContext, 
        vm: &VM) -> P<Value> {
        let mut rets = self.emit_runtime_entry(&entrypoints::GET_THREAD_LOCAL, vec![], None, cur_node, f_content, f_context, vm);
558 559 560 561
        
        rets.pop().unwrap()
    }
    
562 563 564 565
    // ret: Option<Vec<P<Value>>
    // if ret is Some, return values will put stored in given temporaries
    // otherwise create temporaries
    // always returns result temporaries (given or created)
qinsoon's avatar
qinsoon committed
566 567 568 569 570
    fn emit_runtime_entry (
        &mut self, 
        entry: &RuntimeEntrypoint, 
        args: Vec<P<Value>>, 
        rets: Option<Vec<P<Value>>>,
qinsoon's avatar
qinsoon committed
571
        cur_node: Option<&TreeNode>, 
qinsoon's avatar
qinsoon committed
572 573 574
        f_content: &FunctionContent, 
        f_context: &mut FunctionContext, 
        vm: &VM) -> Vec<P<Value>> {
575 576 577 578 579 580 581 582 583 584 585 586 587 588 589
        let sig = entry.sig.clone();
        
        let entry_name = {
            if vm.is_running() {
                unimplemented!()
            } else {
                let ref entry_loc = entry.aot;
                
                match entry_loc {
                    &ValueLocation::Relocatable(_, ref name) => name.clone(),
                    _ => panic!("expecting a relocatable value")
                }
            }
        };
        
qinsoon's avatar
qinsoon committed
590
        self.emit_c_call(entry_name, sig, args, rets, cur_node, f_content, f_context, vm)
591 592
    }
    
593 594 595 596
    // returns the stack arg offset - we will need this to collapse stack after the call
    fn emit_precall_convention(
        &mut self,
        args: &Vec<P<Value>>, 
597
        vm: &VM) -> usize {
598 599 600 601 602 603
        // if we need to save caller saved regs
        // put it here (since this is fastpath compile, we wont have them)
        
        // put args into registers if we can
        // in the meantime record args that do not fit in registers
        let mut stack_args : Vec<P<Value>> = vec![];        
604 605 606 607 608 609 610 611
        let mut gpr_arg_count = 0;
        for arg in args.iter() {
            if arg.is_int_reg() {
                if gpr_arg_count < x86_64::ARGUMENT_GPRs.len() {
                    self.backend.emit_mov_r64_r64(&x86_64::ARGUMENT_GPRs[gpr_arg_count], &arg);
                    gpr_arg_count += 1;
                } else {
                    // use stack to pass argument
612
                    stack_args.push(arg.clone());
613 614 615
                }
            } else if arg.is_int_const() {
                if x86_64::is_valid_x86_imm(arg) {                
616
                    let int_const = arg.extract_int_const() as i32;
617 618 619 620 621 622
                    
                    if gpr_arg_count < x86_64::ARGUMENT_GPRs.len() {
                        self.backend.emit_mov_r64_imm32(&x86_64::ARGUMENT_GPRs[gpr_arg_count], int_const);
                        gpr_arg_count += 1;
                    } else {
                        // use stack to pass argument
623
                        stack_args.push(arg.clone());
624 625 626 627 628
                    }
                } else {
                    // put the constant to memory
                    unimplemented!()
                }
629 630 631 632 633 634
            } else if arg.is_mem() {
                if gpr_arg_count < x86_64::ARGUMENT_GPRs.len() {
                    self.backend.emit_mov_r64_mem64(&x86_64::ARGUMENT_GPRs[gpr_arg_count], &arg);
                    gpr_arg_count += 1;
                } else {
                    // use stack to pass argument
635
                    stack_args.push(arg.clone());
636
                }
637 638 639 640 641
            } else {
                // floating point
                unimplemented!()
            }
        }
642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676

        if !stack_args.is_empty() {
            // deal with stack arg, put them on stack
            // in reverse order, i.e. push the rightmost arg first to stack
            stack_args.reverse();

            // "The end of the input argument area shall be aligned on a 16
            // (32, if __m256 is passed on stack) byte boundary." - x86 ABI
            // if we need to special align the args, we do it now
            // (then the args will be put to stack following their regular alignment)
            let stack_arg_tys = stack_args.iter().map(|x| x.ty.clone()).collect();
            let (stack_arg_size, _, stack_arg_offsets) = backend::sequetial_layout(&stack_arg_tys, vm);
            let mut stack_arg_size_with_padding = stack_arg_size;
            if stack_arg_size % 16 == 0 {
                // do not need to adjust rsp
            } else if stack_arg_size % 8 == 0 {
                // adjust rsp by -8 (push a random padding value)
                self.backend.emit_push_imm32(0x7777);
                stack_arg_size_with_padding += 8;
            } else {
                panic!("expecting stack arguments to be at least 8-byte aligned, but it has size of {}", stack_arg_size);
            }

            // now, we just put all the args on the stack
            {
                let mut index = 0;
                for arg in stack_args {
                    self.emit_store_base_offset(&x86_64::RSP, - (stack_arg_offsets[index] as i32), &arg, vm);
                    index += 1;
                }

                self.backend.emit_add_r64_imm32(&x86_64::RSP, - (stack_arg_size as i32));
            }

            stack_arg_size_with_padding
677
        } else {
678
            0
679
        }
680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715
    }

    fn emit_postcall_convention(
        &mut self,
        sig: &P<CFuncSig>,
        rets: &Option<Vec<P<Value>>>,
        precall_stack_arg_size: usize,
        f_context: &mut FunctionContext,
        vm: &VM
    ) -> Vec<P<Value>> {
        // deal with ret vals
        let mut return_vals = vec![];

        let mut gpr_ret_count = 0;
        for ret_index in 0..sig.ret_tys.len() {
            let ref ty = sig.ret_tys[ret_index];

            let ret_val = match rets {
                &Some(ref rets) => rets[ret_index].clone(),
                &None => {
                    let tmp_node = f_context.make_temporary(vm.next_id(), ty.clone());
                    tmp_node.clone_value()
                }
            };

            if ret_val.is_int_reg() {
                if gpr_ret_count < x86_64::RETURN_GPRs.len() {
                    self.backend.emit_mov_r64_r64(&ret_val, &x86_64::RETURN_GPRs[gpr_ret_count]);
                    gpr_ret_count += 1;
                } else {
                    // get return value by stack
                    unimplemented!()
                }
            } else {
                // floating point register
                unimplemented!()
716
            }
717 718

            return_vals.push(ret_val);
719
        }
720 721 722 723 724 725 726

        // remove stack_args
        if precall_stack_arg_size != 0 {
            self.backend.emit_add_r64_imm32(&x86_64::RSP, precall_stack_arg_size as i32);
        }

        return_vals
727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744
    }
    
    #[allow(unused_variables)]
    // ret: Option<Vec<P<Value>>
    // if ret is Some, return values will put stored in given temporaries
    // otherwise create temporaries
    // always returns result temporaries (given or created)
    fn emit_c_call (
        &mut self, 
        func_name: CName, 
        sig: P<CFuncSig>, 
        args: Vec<P<Value>>, 
        rets: Option<Vec<P<Value>>>,
        cur_node: Option<&TreeNode>,
        f_content: &FunctionContent, 
        f_context: &mut FunctionContext, 
        vm: &VM) -> Vec<P<Value>> 
    {
745
        let stack_arg_size = self.emit_precall_convention(&args, vm);
746
        
747 748 749 750
        // make call
        if vm.is_running() {
            unimplemented!()
        } else {
qinsoon's avatar
qinsoon committed
751
            let callsite = self.new_callsite_label(cur_node);
qinsoon's avatar
qinsoon committed
752 753 754
            self.backend.emit_call_near_rel32(callsite, func_name);
            
            // record exception block (CCall may have an exception block)
qinsoon's avatar
qinsoon committed
755 756 757 758 759
            if cur_node.is_some() {
                let cur_node = cur_node.unwrap(); 
                if cur_node.op == OpCode::CCall {
                    unimplemented!()
                }
qinsoon's avatar
qinsoon committed
760
            }
761 762
        }
        
763
        self.emit_postcall_convention(&sig, &rets, stack_arg_size, f_context, vm)
764 765
    }
    
qinsoon's avatar
qinsoon committed
766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790
    fn emit_mu_call(
        &mut self,
        inst: &Instruction,
        calldata: &CallData,
        resumption: Option<&ResumptionData>,
        cur_node: &TreeNode, 
        f_content: &FunctionContent, 
        f_context: &mut FunctionContext, 
        vm: &VM) {
        trace!("deal with pre-call convention");
        
        let ops = inst.ops.read().unwrap();
        let ref func = ops[calldata.func];
        let ref func_sig = match func.v {
            TreeNode_::Value(ref pv) => {
                let ty : &MuType = &pv.ty;
                match ty.v {
                    MuType_::FuncRef(ref sig)
                    | MuType_::UFuncPtr(ref sig) => sig,
                    _ => panic!("expected funcref/ptr type")
                }
            },
            _ => panic!("expected funcref/ptr type")
        };
        
791
        debug_assert!(func_sig.arg_tys.len() == calldata.args.len());
qinsoon's avatar
qinsoon committed
792 793
        if cfg!(debug_assertions) {
            if inst.value.is_some() {
794
                assert!(func_sig.ret_tys.len() == inst.value.as_ref().unwrap().len());
qinsoon's avatar
qinsoon committed
795
            } else {
796
                assert!(func_sig.ret_tys.len() == 0, "expect call inst's value doesnt match reg args. value: {:?}, ret args: {:?}", inst.value, func_sig.ret_tys);
qinsoon's avatar
qinsoon committed
797 798
            }
        }
799

800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815
        // prepare args (they could be instructions, we need to emit inst and get value)
        let mut arg_values = vec![];
        for arg_index in calldata.args.iter() {
            let ref arg = ops[*arg_index];

            if self.match_ireg(arg) {
                let arg = self.emit_ireg(arg, f_content, f_context, vm);
                arg_values.push(arg);
            } else if self.match_iimm(arg) {
                let arg = self.node_iimm_to_value(arg);
                arg_values.push(arg);
            } else {
                unimplemented!();
            }
        }
        let stack_arg_size = self.emit_precall_convention(&arg_values, vm);
qinsoon's avatar
qinsoon committed
816
        
817
        trace!("generating call inst");
qinsoon's avatar
qinsoon committed
818 819 820
        // check direct call or indirect
        let callsite = {
            if self.match_funcref_const(func) {
821
                let target_id = self.node_funcref_const_to_id(func);
qinsoon's avatar
qinsoon committed
822 823 824 825 826 827
                let funcs = vm.funcs().read().unwrap();
                let target = funcs.get(&target_id).unwrap().read().unwrap();
                                            
                if vm.is_running() {
                    unimplemented!()
                } else {
qinsoon's avatar
qinsoon committed
828
                    let callsite = self.new_callsite_label(Some(cur_node));
qinsoon's avatar
qinsoon committed
829 830 831 832 833
                    self.backend.emit_call_near_rel32(callsite, target.name().unwrap())
                }
            } else if self.match_ireg(func) {
                let target = self.emit_ireg(func, f_content, f_context, vm);
                
qinsoon's avatar
qinsoon committed
834
                let callsite = self.new_callsite_label(Some(cur_node));
qinsoon's avatar
qinsoon committed
835 836 837 838
                self.backend.emit_call_near_r64(callsite, &target)
            } else if self.match_mem(func) {
                let target = self.emit_mem(func);
                
qinsoon's avatar
qinsoon committed
839
                let callsite = self.new_callsite_label(Some(cur_node));
qinsoon's avatar
qinsoon committed
840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861
                self.backend.emit_call_near_mem64(callsite, &target)
            } else {
                unimplemented!()
            }
        };
        
        // record exception branch
        if resumption.is_some() {
            let ref exn_dest = resumption.as_ref().unwrap().exn_dest;
            let target_block = exn_dest.target;
            
            if self.current_exn_callsites.contains_key(&target_block) {
                let callsites = self.current_exn_callsites.get_mut(&target_block).unwrap();
                callsites.push(callsite);
            } else {
                let mut callsites = vec![];
                callsites.push(callsite);
                self.current_exn_callsites.insert(target_block, callsites);
            } 
        }
        
        // deal with ret vals
862 863 864
        self.emit_postcall_convention(
            &func_sig, &inst.value,
            stack_arg_size, f_context, vm);
qinsoon's avatar
qinsoon committed
865 866
    }
    
867 868
    #[allow(unused_variables)]
    fn process_dest(&mut self, ops: &Vec<P<TreeNode>>, dest: &Destination, f_content: &FunctionContent, f_context: &mut FunctionContext, vm: &VM) {
869 870
        for i in 0..dest.args.len() {
            let ref dest_arg = dest.args[i];
871 872
            match dest_arg {
                &DestArg::Normal(op_index) => {
qinsoon's avatar
qinsoon committed
873
                    let ref arg = ops[op_index];
874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889
//                    match arg.op {
//                        OpCode::RegI64 
//                        | OpCode::RegFP
//                        | OpCode::IntImmI64
//                        | OpCode::FPImm => {
//                            // do nothing
//                        },
//                        _ => {
//                            trace!("nested: compute arg for branch");
//                            // nested: compute arg
//                            self.instruction_select(arg, cur_func);
//                            
//                            self.emit_get_result(arg);
//                        }
//                    }
//                    
890
                    let ref target_args = f_content.get_block(dest.target).content.as_ref().unwrap().args;
891 892
                    let ref target_arg = target_args[i];
                    
893
                    self.emit_general_move(&arg, target_arg, f_content, f_context, vm);
894 895 896 897
                },
                &DestArg::Freshbound(_) => unimplemented!()
            }
        }
qinsoon's avatar
qinsoon committed
898 899
    }
    
qinsoon's avatar
qinsoon committed
900
    fn emit_common_prologue(&mut self, args: &Vec<P<Value>>, vm: &VM) {
901 902
        let block_name = "prologue".to_string();
        self.backend.start_block(block_name.clone());
903 904 905
        
        // no livein
        // liveout = entry block's args
906 907
        self.backend.set_block_livein(block_name.clone(), &vec![]);
        self.backend.set_block_liveout(block_name.clone(), args);
qinsoon's avatar
qinsoon committed
908
        
909 910 911
        // push rbp
        self.backend.emit_push_r64(&x86_64::RBP);
        // mov rsp -> rbp
qinsoon's avatar
qinsoon committed
912
        self.backend.emit_mov_r64_r64(&x86_64::RBP, &x86_64::RSP);
913
        
914
        // push all callee-saved registers
qinsoon's avatar
qinsoon committed
915 916 917 918 919 920 921 922 923
        {
            let frame = self.current_frame.as_mut().unwrap();
            for i in 0..x86_64::CALLEE_SAVED_GPRs.len() {
                let ref reg = x86_64::CALLEE_SAVED_GPRs[i];
                // not pushing rbp (as we have done taht)
                if reg.extract_ssa_id().unwrap() != x86_64::RBP.extract_ssa_id().unwrap() {
                    self.backend.emit_push_r64(&reg);
                    frame.alloc_slot_for_callee_saved_reg(reg.clone(), vm);
                }
924
            }
925 926 927 928
        }
        
        // unload arguments
        let mut gpr_arg_count = 0;
929
        // TODO: let mut fpr_arg_count = 0;
930 931 932 933 934
        // initial stack arg is at RBP+16
        //   arg           <- RBP + 16
        //   return addr
        //   old RBP       <- RBP
        let mut stack_arg_offset : i32 = 16;
935 936 937 938 939 940 941
        for arg in args {
            if arg.is_int_reg() {
                if gpr_arg_count < x86_64::ARGUMENT_GPRs.len() {
                    self.backend.emit_mov_r64_r64(&arg, &x86_64::ARGUMENT_GPRs[gpr_arg_count]);
                    gpr_arg_count += 1;
                } else {
                    // unload from stack
942 943 944 945 946
                    self.emit_load_base_offset(&arg, &x86_64::RBP.clone(), stack_arg_offset, vm);
                    
                    // move stack_arg_offset by the size of 'arg'
                    let arg_size = vm.get_backend_type_info(arg.ty.id()).size;
                    stack_arg_offset += arg_size as i32;
947 948 949 950
                }
            } else if arg.is_fp_reg() {
                unimplemented!();
            } else {
951 952
                // args that are not fp or int (possibly struct/array/etc)
                unimplemented!();
953 954
            }
        }
955 956
        
        self.backend.end_block(block_name);
957 958
    }
    
959
    fn emit_common_epilogue(&mut self, ret_inst: &Instruction, f_content: &FunctionContent, f_context: &mut FunctionContext, vm: &VM) {
960 961
        // epilogue is not a block (its a few instruction inserted before return)
        // FIXME: this may change in the future
962
        
963
        // prepare return regs
qinsoon's avatar
qinsoon committed
964
        let ref ops = ret_inst.ops.read().unwrap();
965 966 967 968 969 970
        let ret_val_indices = match ret_inst.v {
            Instruction_::Return(ref vals) => vals,
            _ => panic!("expected ret inst")
        };
        
        let mut gpr_ret_count = 0;
971
        // TODO: let mut fpr_ret_count = 0;
972 973 974
        for i in ret_val_indices {
            let ref ret_val = ops[*i];
            if self.match_ireg(ret_val) {
975
                let reg_ret_val = self.emit_ireg(ret_val, f_content, f_context, vm);
976 977 978 979
                
                self.backend.emit_mov_r64_r64(&x86_64::RETURN_GPRs[gpr_ret_count], &reg_ret_val);
                gpr_ret_count += 1;
            } else if self.match_iimm(ret_val) {
980
                let imm_ret_val = self.node_iimm_to_i32(ret_val);