To protect your data, the CISO officer has suggested users to enable GitLab 2FA as soon as possible.

inst_sel.rs 118 KB
Newer Older
1
use ast::ir::*;
2
use ast::ptr::*;
qinsoon's avatar
qinsoon committed
3
use ast::inst::*;
4
use ast::op;
qinsoon's avatar
qinsoon committed
5
use ast::op::OpCode;
qinsoon's avatar
qinsoon committed
6
use ast::types;
qinsoon's avatar
qinsoon committed
7
use ast::types::*;
qinsoon's avatar
qinsoon committed
8
use vm::VM;
qinsoon's avatar
qinsoon committed
9
use runtime::mm;
10
11
12
13
use runtime::ValueLocation;
use runtime::thread;
use runtime::entrypoints;
use runtime::entrypoints::RuntimeEntrypoint;
14
15

use compiler::CompilerPass;
16
use compiler::backend;
qinsoon's avatar
qinsoon committed
17
use compiler::backend::PROLOGUE_BLOCK_NAME;
qinsoon's avatar
qinsoon committed
18
19
20
use compiler::backend::x86_64;
use compiler::backend::x86_64::CodeGenerator;
use compiler::backend::x86_64::ASMCodeGen;
qinsoon's avatar
qinsoon committed
21
22
use compiler::machine_code::CompiledFunction;
use compiler::frame::Frame;
23

24
use std::collections::HashMap;
qinsoon's avatar
qinsoon committed
25
use std::any::Any;
26

27
pub struct InstructionSelection {
28
    name: &'static str,
29
30
    backend: Box<CodeGenerator>,
    
qinsoon's avatar
qinsoon committed
31
    current_callsite_id: usize,
qinsoon's avatar
qinsoon committed
32
33
    current_frame: Option<Frame>,
    current_block: Option<MuName>,
qinsoon's avatar
qinsoon committed
34
35
36
37
38
    current_func_start: Option<ValueLocation>,
    // key: block id, val: callsite that names the block as exception block
    current_exn_callsites: HashMap<MuID, Vec<ValueLocation>>,
    // key: block id, val: block location
    current_exn_blocks: HashMap<MuID, ValueLocation>     
39
40
}

41
impl <'a> InstructionSelection {
qinsoon's avatar
qinsoon committed
42
    #[cfg(feature = "aot")]
43
    pub fn new() -> InstructionSelection {
44
45
        InstructionSelection{
            name: "Instruction Selection (x64)",
46
            backend: Box::new(ASMCodeGen::new()),
qinsoon's avatar
qinsoon committed
47
            
qinsoon's avatar
qinsoon committed
48
            current_callsite_id: 0,
qinsoon's avatar
qinsoon committed
49
50
51
            current_frame: None,
            current_block: None,
            current_func_start: None,
qinsoon's avatar
qinsoon committed
52
53
54
            // key: block id, val: callsite that names the block as exception block
            current_exn_callsites: HashMap::new(), 
            current_exn_blocks: HashMap::new()
55
56
        }
    }
qinsoon's avatar
qinsoon committed
57
58
59
60
61

    #[cfg(feature = "jit")]
    pub fn new() -> InstructionSelection {
        unimplemented!()
    }
62
63
64
65
66
67
68
    
    // in this pass, we assume that
    // 1. all temporaries will use 64bit registers
    // 2. we do not need to backup/restore caller-saved registers
    // 3. we need to backup/restore all the callee-saved registers
    // if any of these assumption breaks, we will need to re-emit the code
    #[allow(unused_variables)]
69
    fn instruction_select(&mut self, node: &'a TreeNode, f_content: &FunctionContent, f_context: &mut FunctionContext, vm: &VM) {
qinsoon's avatar
qinsoon committed
70
71
72
        trace!("instsel on node {}", node);
        
        match node.v {
73
74
            TreeNode_::Instruction(ref inst) => {
                match inst.v {
qinsoon's avatar
qinsoon committed
75
                    Instruction_::Branch2{cond, ref true_dest, ref false_dest, true_prob} => {
76
77
                        // 'branch_if_true' == true, we emit cjmp the same as CmpOp  (je  for EQ, jne for NE)
                        // 'branch_if_true' == false, we emit opposite cjmp as CmpOp (jne for EQ, je  for NE)
78
79
80
81
82
                        let (fallthrough_dest, branch_dest, branch_if_true) = {
                            if true_prob > 0.5f32 {
                                (true_dest, false_dest, false)
                            } else {
                                (false_dest, true_dest, true)
83
                            }
84
                        };
85
                        
qinsoon's avatar
qinsoon committed
86
                        let ops = inst.ops.read().unwrap();
87
                        
88
89
                        self.process_dest(&ops, fallthrough_dest, f_content, f_context, vm);
                        self.process_dest(&ops, branch_dest, f_content, f_context, vm);
qinsoon's avatar
qinsoon committed
90
                        
91
                        let branch_target = f_content.get_block(branch_dest.target).name().unwrap();
92
93
94
    
                        let ref cond = ops[cond];
                        
qinsoon's avatar
qinsoon committed
95
96
                        if self.match_cmp_res(cond) {
                            trace!("emit cmp_eq-branch2");
97
                            match self.emit_cmp_res(cond, f_content, f_context, vm) {
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
                                op::CmpOp::EQ => {
                                    if branch_if_true {
                                        self.backend.emit_je(branch_target);
                                    } else {
                                        self.backend.emit_jne(branch_target);
                                    }
                                },
                                op::CmpOp::NE => {
                                    if branch_if_true {
                                        self.backend.emit_jne(branch_target);
                                    } else {
                                        self.backend.emit_je(branch_target);
                                    }
                                },
                                op::CmpOp::UGE => {
                                    if branch_if_true {
                                        self.backend.emit_jae(branch_target);
                                    } else {
                                        self.backend.emit_jb(branch_target);
                                    }
                                },
                                op::CmpOp::UGT => {
                                    if branch_if_true {
                                        self.backend.emit_ja(branch_target);
                                    } else {
                                        self.backend.emit_jbe(branch_target);
                                    }
                                },
                                op::CmpOp::ULE => {
                                    if branch_if_true {
                                        self.backend.emit_jbe(branch_target);
                                    } else {
                                        self.backend.emit_ja(branch_target);
                                    }
                                },
                                op::CmpOp::ULT => {
                                    if branch_if_true {
                                        self.backend.emit_jb(branch_target);
                                    } else {
                                        self.backend.emit_jae(branch_target);
                                    }
                                },
                                op::CmpOp::SGE => {
                                    if branch_if_true {
                                        self.backend.emit_jge(branch_target);
                                    } else {
                                        self.backend.emit_jl(branch_target);
                                    }
                                },
                                op::CmpOp::SGT => {
                                    if branch_if_true {
                                        self.backend.emit_jg(branch_target);
                                    } else {
                                        self.backend.emit_jle(branch_target);
                                    }
                                },
                                op::CmpOp::SLE => {
                                    if branch_if_true {
                                        self.backend.emit_jle(branch_target);
                                    } else {
                                        self.backend.emit_jg(branch_target);
                                    }
                                },
                                op::CmpOp::SLT => {
                                    if branch_if_true {
                                        self.backend.emit_jl(branch_target);
                                    } else {
                                        self.backend.emit_jge(branch_target);
                                    }
                                },
qinsoon's avatar
qinsoon committed
168
169
170
171
                                _ => unimplemented!()
                            }
                        } else if self.match_ireg(cond) {
                            trace!("emit ireg-branch2");
172
                            
173
                            let cond_reg = self.emit_ireg(cond, f_content, f_context, vm);
174
                            
qinsoon's avatar
qinsoon committed
175
                            // emit: cmp cond_reg 1
176
                            self.backend.emit_cmp_imm32_r64(1, &cond_reg);
qinsoon's avatar
qinsoon committed
177
                            // emit: je #branch_dest
178
                            self.backend.emit_je(branch_target);
qinsoon's avatar
qinsoon committed
179
180
                        } else {
                            unimplemented!();
181
                        }
182
                    },
qinsoon's avatar
qinsoon committed
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260

                    Instruction_::Select{cond, true_val, false_val} => {
                        let ops = inst.ops.read().unwrap();

                        let ref cond = ops[cond];
                        let ref true_val = ops[true_val];
                        let ref false_val = ops[false_val];

                        if self.match_ireg(true_val) {
                            // moving integers/pointers
                            let tmp_res   = self.get_result_value(node);
                            let tmp_true  = self.emit_ireg(true_val, f_content, f_context, vm);
                            let tmp_false = self.emit_ireg(false_val, f_content, f_context, vm);

                            if self.match_cmp_res(cond) {
                                match self.emit_cmp_res(cond, f_content, f_context, vm) {
                                    op::CmpOp::EQ => {
                                        self.backend.emit_cmove_r64_r64 (&tmp_res, &tmp_true);
                                        self.backend.emit_cmovne_r64_r64(&tmp_res, &tmp_false);
                                    }
                                    op::CmpOp::NE => {
                                        self.backend.emit_cmovne_r64_r64(&tmp_res, &tmp_true);
                                        self.backend.emit_cmove_r64_r64 (&tmp_res, &tmp_false);
                                    }
                                    op::CmpOp::SGE => {
                                        self.backend.emit_cmovge_r64_r64(&tmp_res, &tmp_true);
                                        self.backend.emit_cmovl_r64_r64 (&tmp_res, &tmp_false);
                                    }
                                    op::CmpOp::SGT => {
                                        self.backend.emit_cmovg_r64_r64 (&tmp_res, &tmp_true);
                                        self.backend.emit_cmovle_r64_r64(&tmp_res, &tmp_false);
                                    }
                                    op::CmpOp::SLE => {
                                        self.backend.emit_cmovle_r64_r64(&tmp_res, &tmp_true);
                                        self.backend.emit_cmovg_r64_r64 (&tmp_res, &tmp_false);
                                    }
                                    op::CmpOp::SLT => {
                                        self.backend.emit_cmovl_r64_r64 (&tmp_res, &tmp_true);
                                        self.backend.emit_cmovge_r64_r64(&tmp_res, &tmp_false);
                                    }
                                    op::CmpOp::UGE => {
                                        self.backend.emit_cmovae_r64_r64(&tmp_res, &tmp_true);
                                        self.backend.emit_cmovb_r64_r64 (&tmp_res, &tmp_false);
                                    }
                                    op::CmpOp::UGT => {
                                        self.backend.emit_cmova_r64_r64 (&tmp_res, &tmp_true);
                                        self.backend.emit_cmovbe_r64_r64(&tmp_res, &tmp_false);
                                    }
                                    op::CmpOp::ULE => {
                                        self.backend.emit_cmovbe_r64_r64(&tmp_res, &tmp_true);
                                        self.backend.emit_cmova_r64_r64 (&tmp_res, &tmp_false);
                                    }
                                    op::CmpOp::ULT => {
                                        self.backend.emit_cmovb_r64_r64 (&tmp_res, &tmp_true);
                                        self.backend.emit_cmovae_r64_r64(&tmp_res, &tmp_false);
                                    }
                                    _ => panic!("expecting CmpOp for integers")
                                }
                            } else if self.match_ireg(cond) {
                                let tmp_cond = self.emit_ireg(cond, f_content, f_context, vm);

                                // emit: cmp cond_reg 1
                                self.backend.emit_cmp_imm32_r64(1, &tmp_cond);

                                // emit: cmove tmp_true -> tmp_res
                                self.backend.emit_cmove_r64_r64(&tmp_res, &tmp_true);

                                // emit: cmovne tmp_false -> tmp_res
                                self.backend.emit_cmovne_r64_r64(&tmp_res, &tmp_false);
                            } else {
                                unimplemented!()
                            }
                        } else {
                            // moving vectors, floatingpoints
                            unimplemented!()
                        }
                    },

qinsoon's avatar
qinsoon committed
261
                    Instruction_::Branch1(ref dest) => {
qinsoon's avatar
qinsoon committed
262
                        let ops = inst.ops.read().unwrap();
263
                                            
264
                        self.process_dest(&ops, dest, f_content, f_context, vm);
265
                        
266
                        let target = f_content.get_block(dest.target).name().unwrap();
qinsoon's avatar
qinsoon committed
267
                        
qinsoon's avatar
qinsoon committed
268
                        trace!("emit branch1");
269
                        // jmp
qinsoon's avatar
qinsoon committed
270
                        self.backend.emit_jmp(target);
271
                    },
qinsoon's avatar
qinsoon committed
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317

                    Instruction_::Switch{cond, ref default, ref branches} => {
                        let ops = inst.ops.read().unwrap();

                        let ref cond = ops[cond];

                        if self.match_ireg(cond) {
                            let tmp_cond = self.emit_ireg(cond, f_content, f_context, vm);

                            // emit each branch
                            for &(case_op_index, ref case_dest) in branches {
                                let ref case_op = ops[case_op_index];

                                // process dest
                                self.process_dest(&ops, case_dest, f_content, f_context, vm);

                                let target = f_content.get_block(case_dest.target).name().unwrap();

                                if self.match_iimm(case_op) {
                                    let imm = self.node_iimm_to_i32(case_op);

                                    // cmp case cond
                                    self.backend.emit_cmp_imm32_r64(imm, &tmp_cond);
                                    // je dest
                                    self.backend.emit_je(target);
                                } else if self.match_ireg(case_op) {
                                    let tmp_case_op = self.emit_ireg(case_op, f_content, f_context, vm);

                                    // cmp case cond
                                    self.backend.emit_cmp_r64_r64(&tmp_case_op, &tmp_cond);
                                    // je dest
                                    self.backend.emit_je(target);
                                } else {
                                    panic!("expecting ireg cond to be either iimm or ireg: {}", cond);
                                }
                            }

                            // emit default
                            self.process_dest(&ops, default, f_content, f_context, vm);
                            
                            let default_target = f_content.get_block(default.target).name().unwrap();
                            self.backend.emit_jmp(default_target);
                        } else {
                            panic!("expecting cond in switch to be ireg: {}", cond);
                        }
                    }
318
                    
qinsoon's avatar
qinsoon committed
319
                    Instruction_::ExprCall{ref data, is_abort} => {
qinsoon's avatar
qinsoon committed
320
321
                        if is_abort {
                            unimplemented!()
322
                        }
323
                        
qinsoon's avatar
qinsoon committed
324
325
326
327
328
                        self.emit_mu_call(
                            inst, // inst: &Instruction,
                            data, // calldata: &CallData,
                            None, // resumption: Option<&ResumptionData>,
                            node, // cur_node: &TreeNode, 
329
                            f_content, f_context, vm);
330
331
                    },
                    
qinsoon's avatar
qinsoon committed
332
333
334
335
336
337
338
                    Instruction_::Call{ref data, ref resume} => {
                        self.emit_mu_call(
                            inst, 
                            data, 
                            Some(resume), 
                            node, 
                            f_content, f_context, vm);
qinsoon's avatar
qinsoon committed
339
340
341
342
343
344
345
346
347
348
349
350
                    },

                    Instruction_::ExprCCall{ref data, is_abort} => {
                        if is_abort {
                            unimplemented!()
                        }

                        self.emit_c_call_ir(inst, data, None, node, f_content, f_context, vm);
                    }

                    Instruction_::CCall{ref data, ref resume} => {
                        self.emit_c_call_ir(inst, data, Some(resume), node, f_content, f_context, vm);
qinsoon's avatar
qinsoon committed
351
352
                    }
                    
353
                    Instruction_::Return(_) => {
354
                        self.emit_common_epilogue(inst, f_content, f_context, vm);
355
                        
qinsoon's avatar
qinsoon committed
356
                        self.backend.emit_ret();
357
358
                    },
                    
qinsoon's avatar
qinsoon committed
359
                    Instruction_::BinOp(op, op1, op2) => {
qinsoon's avatar
qinsoon committed
360
                        let ops = inst.ops.read().unwrap();
361
362

                        let res_tmp = self.get_result_value(node);
qinsoon's avatar
qinsoon committed
363
                        
364
365
                        match op {
                            op::BinOp::Add => {
qinsoon's avatar
qinsoon committed
366
                                if self.match_ireg(&ops[op1]) && self.match_iimm(&ops[op2]) {
qinsoon's avatar
qinsoon committed
367
368
                                    trace!("emit add-ireg-imm");
                                    
369
                                    let reg_op1 = self.emit_ireg(&ops[op1], f_content, f_context, vm);
370
                                    let reg_op2 = self.node_iimm_to_i32(&ops[op2]);
qinsoon's avatar
qinsoon committed
371
372
373
374
375
376
377
378
                                    
                                    // mov op1, res
                                    self.backend.emit_mov_r64_r64(&res_tmp, &reg_op1);
                                    // add op2, res
                                    self.backend.emit_add_r64_imm32(&res_tmp, reg_op2);
                                } else if self.match_ireg(&ops[op1]) && self.match_mem(&ops[op2]) {
                                    trace!("emit add-ireg-mem");
                                    
379
                                    let reg_op1 = self.emit_ireg(&ops[op1], f_content, f_context, vm);
qinsoon's avatar
shl    
qinsoon committed
380
                                    let reg_op2 = self.emit_mem(&ops[op2], vm);
qinsoon's avatar
qinsoon committed
381
382
383
384
385
                                    
                                    // mov op1, res
                                    self.backend.emit_mov_r64_r64(&res_tmp, &reg_op1);
                                    // add op2 res
                                    self.backend.emit_add_r64_mem64(&res_tmp, &reg_op2);
qinsoon's avatar
qinsoon committed
386
387
388
389
390
391
392
393
394
395
                                } else if self.match_ireg(&ops[op1]) && self.match_ireg(&ops[op2]) {
                                    trace!("emit add-ireg-ireg");

                                    let reg_op1 = self.emit_ireg(&ops[op1], f_content, f_context, vm);
                                    let reg_op2 = self.emit_ireg(&ops[op2], f_content, f_context, vm);

                                    // mov op1, res
                                    self.backend.emit_mov_r64_r64(&res_tmp, &reg_op1);
                                    // add op2 res
                                    self.backend.emit_add_r64_r64(&res_tmp, &reg_op2);
qinsoon's avatar
qinsoon committed
396
397
398
                                } else {
                                    unimplemented!()
                                }
399
400
                            },
                            op::BinOp::Sub => {
401
                                if self.match_ireg(&ops[op1]) && self.match_iimm(&ops[op2]) {
qinsoon's avatar
qinsoon committed
402
403
                                    trace!("emit sub-ireg-imm");

404
                                    let reg_op1 = self.emit_ireg(&ops[op1], f_content, f_context, vm);
405
                                    let imm_op2 = self.node_iimm_to_i32(&ops[op2]);
qinsoon's avatar
qinsoon committed
406
407
408
409
                                    
                                    // mov op1, res
                                    self.backend.emit_mov_r64_r64(&res_tmp, &reg_op1);
                                    // add op2, res
410
                                    self.backend.emit_sub_r64_imm32(&res_tmp, imm_op2);
qinsoon's avatar
qinsoon committed
411
412
413
                                } else if self.match_ireg(&ops[op1]) && self.match_mem(&ops[op2]) {
                                    trace!("emit sub-ireg-mem");
                                    
414
                                    let reg_op1 = self.emit_ireg(&ops[op1], f_content, f_context, vm);
qinsoon's avatar
shl    
qinsoon committed
415
                                    let mem_op2 = self.emit_mem(&ops[op2], vm);
qinsoon's avatar
qinsoon committed
416
417
418
419
                                    
                                    // mov op1, res
                                    self.backend.emit_mov_r64_r64(&res_tmp, &reg_op1);
                                    // sub op2 res
420
                                    self.backend.emit_sub_r64_mem64(&res_tmp, &mem_op2);
421
422
                                } else if self.match_ireg(&ops[op1]) && self.match_ireg(&ops[op2]) {
                                    trace!("emit sub-ireg-ireg");
423

424
425
                                    let reg_op1 = self.emit_ireg(&ops[op1], f_content, f_context, vm);
                                    let reg_op2 = self.emit_ireg(&ops[op2], f_content, f_context, vm);
426

427
428
429
430
                                    // mov op1, res
                                    self.backend.emit_mov_r64_r64(&res_tmp, &reg_op1);
                                    // add op2 res
                                    self.backend.emit_sub_r64_r64(&res_tmp, &reg_op2);
qinsoon's avatar
qinsoon committed
431
432
433
                                } else {
                                    unimplemented!()
                                }
434
                            },
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
                            op::BinOp::And => {
                                let op1 = &ops[op1];
                                let op2 = &ops[op2];

                                if self.match_ireg(op1) && self.match_iimm(op2) {
                                    trace!("emit and-ireg-iimm");

                                    let tmp_op1 = self.emit_ireg(op1, f_content, f_context, vm);
                                    let imm_op2 = self.node_iimm_to_i32(op2);

                                    // mov op1 -> res
                                    self.backend.emit_mov_r64_r64(&res_tmp, &tmp_op1);
                                    // and op2, res -> res
                                    self.backend.emit_and_r64_imm32(&res_tmp, imm_op2);
                                } else if self.match_ireg(op1) && self.match_mem(op2) {
                                    trace!("emit and-ireg-mem");

                                    let tmp_op1 = self.emit_ireg(op1, f_content, f_context, vm);
                                    let mem_op2 = self.emit_mem(op2, vm);

                                    // mov op1, res
                                    self.backend.emit_mov_r64_r64(&res_tmp, &tmp_op1);
                                    // and op2, res -> res
                                    self.backend.emit_and_r64_mem64(&res_tmp, &mem_op2);
                                } else if self.match_ireg(op1) && self.match_ireg(op2) {
                                    trace!("emit and-ireg-ireg");

                                    let tmp_op1 = self.emit_ireg(op1, f_content, f_context, vm);
                                    let tmp_op2 = self.emit_ireg(op2, f_content, f_context, vm);

                                    // mov op1, res
                                    self.backend.emit_mov_r64_r64(&res_tmp, &tmp_op1);
                                    // and op2, res -> res
                                    self.backend.emit_and_r64_r64(&res_tmp, &tmp_op2);
                                } else {
                                    unimplemented!()
                                }
                            },
                            op::BinOp::Xor => {
                                let op1 = &ops[op1];
                                let op2 = &ops[op2];

                                if self.match_ireg(op1) && self.match_iimm(op2) {
                                    trace!("emit xor-ireg-iimm");

                                    let tmp_op1 = self.emit_ireg(op1, f_content, f_context, vm);
                                    let imm_op2 = self.node_iimm_to_i32(op2);

                                    // mov op1 -> res
                                    self.backend.emit_mov_r64_r64(&res_tmp, &tmp_op1);
                                    // xor op2, res -> res
                                    self.backend.emit_xor_r64_imm32(&res_tmp, imm_op2);
                                } else if self.match_ireg(op1) && self.match_mem(op2) {
                                    trace!("emit xor-ireg-mem");

                                    let tmp_op1 = self.emit_ireg(op1, f_content, f_context, vm);
                                    let mem_op2 = self.emit_mem(op2, vm);

                                    // mov op1, res
                                    self.backend.emit_mov_r64_r64(&res_tmp, &tmp_op1);
                                    // xor op2, res -> res
                                    self.backend.emit_xor_r64_mem64(&res_tmp, &mem_op2);
                                } else if self.match_ireg(op1) && self.match_ireg(op2) {
                                    trace!("emit xor-ireg-ireg");

                                    let tmp_op1 = self.emit_ireg(op1, f_content, f_context, vm);
                                    let tmp_op2 = self.emit_ireg(op2, f_content, f_context, vm);

                                    // mov op1, res
                                    self.backend.emit_mov_r64_r64(&res_tmp, &tmp_op1);
                                    // xor op2, res -> res
                                    self.backend.emit_xor_r64_r64(&res_tmp, &tmp_op2);
                                } else {
                                    unimplemented!()
                                }
                            }
511
                            op::BinOp::Mul => {
512
513
514
                                // mov op1 -> rax
                                let rax = x86_64::RAX.clone();
                                let op1 = &ops[op1];
515
                                if self.match_iimm(op1) {
516
                                    let imm_op1 = self.node_iimm_to_i32(op1);
517
518
519
                                    
                                    self.backend.emit_mov_r64_imm32(&rax, imm_op1);
                                } else if self.match_mem(op1) {
qinsoon's avatar
shl    
qinsoon committed
520
                                    let mem_op1 = self.emit_mem(op1, vm);
521
522
                                    
                                    self.backend.emit_mov_r64_mem64(&rax, &mem_op1);
523
524
525
526
                                } else if self.match_ireg(op1) {
                                    let reg_op1 = self.emit_ireg(op1, f_content, f_context, vm);

                                    self.backend.emit_mov_r64_r64(&rax, &reg_op1);
527
528
529
530
531
532
                                } else {
                                    unimplemented!();
                                }
                                
                                // mul op2 -> rax
                                let op2 = &ops[op2];
533
                                if self.match_iimm(op2) {
534
                                    let imm_op2 = self.node_iimm_to_i32(op2);
535
536
537
538
539
540
541
                                    
                                    // put imm in a temporary
                                    // here we use result reg as temporary
                                    self.backend.emit_mov_r64_imm32(&res_tmp, imm_op2);
                                    
                                    self.backend.emit_mul_r64(&res_tmp);
                                } else if self.match_mem(op2) {
qinsoon's avatar
shl    
qinsoon committed
542
                                    let mem_op2 = self.emit_mem(op2, vm);
543
544
                                    
                                    self.backend.emit_mul_mem64(&mem_op2);
545
546
547
548
                                } else if self.match_ireg(op2) {
                                    let reg_op2 = self.emit_ireg(op2, f_content, f_context, vm);

                                    self.backend.emit_mul_r64(&reg_op2);
549
550
551
552
553
554
                                } else {
                                    unimplemented!();
                                }
                                
                                // mov rax -> result
                                self.backend.emit_mov_r64_r64(&res_tmp, &rax);
555
                            },
qinsoon's avatar
qinsoon committed
556
557
558
559
                            op::BinOp::Udiv => {
                                let op1 = &ops[op1];
                                let op2 = &ops[op2];

qinsoon's avatar
sdiv    
qinsoon committed
560
                                self.emit_udiv(op1, op2, f_content, f_context, vm);
qinsoon's avatar
qinsoon committed
561

qinsoon's avatar
sdiv    
qinsoon committed
562
563
564
565
566
567
                                // mov rax -> result
                                self.backend.emit_mov_r64_r64(&res_tmp, &x86_64::RAX);
                            },
                            op::BinOp::Sdiv => {
                                let op1 = &ops[op1];
                                let op2 = &ops[op2];
qinsoon's avatar
qinsoon committed
568

qinsoon's avatar
sdiv    
qinsoon committed
569
                                self.emit_idiv(op1, op2, f_content, f_context, vm);
qinsoon's avatar
qinsoon committed
570
571

                                // mov rax -> result
qinsoon's avatar
sdiv    
qinsoon committed
572
573
                                self.backend.emit_mov_r64_r64(&res_tmp, &x86_64::RAX);
                            },
qinsoon's avatar
qinsoon committed
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
                            op::BinOp::Urem => {
                                let op1 = &ops[op1];
                                let op2 = &ops[op2];

                                self.emit_udiv(op1, op2, f_content, f_context, vm);

                                // mov rdx -> result
                                self.backend.emit_mov_r64_r64(&res_tmp, &x86_64::RDX);
                            },
                            op::BinOp::Srem => {
                                let op1 = &ops[op1];
                                let op2 = &ops[op2];

                                self.emit_idiv(op1, op2, f_content, f_context, vm);

                                // mov rdx -> result
                                self.backend.emit_mov_r64_r64(&res_tmp, &x86_64::RDX);
                            },
qinsoon's avatar
qinsoon committed
592

qinsoon's avatar
shl    
qinsoon committed
593
594
595
596
                            op::BinOp::Shl => {
                                let op1 = &ops[op1];
                                let op2 = &ops[op2];

597
598
599
                                if self.match_mem(op1) {
                                    unimplemented!()
                                } else if self.match_ireg(op1) {
qinsoon's avatar
shl    
qinsoon committed
600
601
                                    let tmp_op1 = self.emit_ireg(op1, f_content, f_context, vm);

602
603
604
605
606
607
608
609
610
                                    if self.match_iimm(op2) {
                                        let imm_op2 = self.node_iimm_to_i32(op2) as i8;

                                        // shl op1, op2 -> op1
                                        self.backend.emit_shl_r64_imm8(&tmp_op1, imm_op2);

                                        // mov op1 -> result
                                        self.backend.emit_mov_r64_r64(&res_tmp, &tmp_op1);
                                    } else if self.match_ireg(op2) {
qinsoon's avatar
shl    
qinsoon committed
611
612
613
614
615
616
                                        let tmp_op2 = self.emit_ireg(op2, f_content, f_context, vm);

                                        // mov op2 -> rcx
                                        self.backend.emit_mov_r64_r64(&x86_64::RCX, &tmp_op2);

                                        // shl op1, cl -> op1
qinsoon's avatar
fix    
qinsoon committed
617
                                        self.backend.emit_shl_r64_cl(&tmp_op1);
qinsoon's avatar
shl    
qinsoon committed
618
619
620
621
622
623

                                        // mov op1 -> result
                                        self.backend.emit_mov_r64_r64(&res_tmp, &tmp_op1);
                                    } else {
                                        panic!("unexpected op2 (not ireg not iimm): {}", op2);
                                    }
624
625
                                } else {
                                    panic!("unexpected op1 (not ireg not mem): {}", op1);
qinsoon's avatar
shl    
qinsoon committed
626
                                }
qinsoon's avatar
qinsoon committed
627
628
629
630
631
                            },
                            op::BinOp::Lshr => {
                                let op1 = &ops[op1];
                                let op2 = &ops[op2];

632
633
634
                                if self.match_mem(op1) {
                                    unimplemented!()
                                } else if self.match_ireg(op1) {
qinsoon's avatar
qinsoon committed
635
636
                                    let tmp_op1 = self.emit_ireg(op1, f_content, f_context, vm);

637
638
639
640
641
642
643
644
645
                                    if self.match_iimm(op2) {
                                        let imm_op2 = self.node_iimm_to_i32(op2) as i8;

                                        // shr op1, op2 -> op1
                                        self.backend.emit_shr_r64_imm8(&tmp_op1, imm_op2);

                                        // mov op1 -> result
                                        self.backend.emit_mov_r64_r64(&res_tmp, &tmp_op1);
                                    } else if self.match_ireg(op2) {
qinsoon's avatar
qinsoon committed
646
647
648
649
650
651
652
653
654
655
656
657
658
                                        let tmp_op2 = self.emit_ireg(op2, f_content, f_context, vm);

                                        // mov op2 -> rcx
                                        self.backend.emit_mov_r64_r64(&x86_64::RCX, &tmp_op2);

                                        // shr op1, cl -> op1
                                        self.backend.emit_shr_r64_cl(&tmp_op1);

                                        // mov op1 -> result
                                        self.backend.emit_mov_r64_r64(&res_tmp, &tmp_op1);
                                    } else {
                                        panic!("unexpected op2 (not ireg not iimm): {}", op2);
                                    }
659
660
                                } else {
                                    panic!("unexpected op1 (not ireg not mem): {}", op1);
qinsoon's avatar
qinsoon committed
661
662
663
664
665
666
                                }
                            },
                            op::BinOp::Ashr => {
                                let op1 = &ops[op1];
                                let op2 = &ops[op2];

667
668
669
                                if self.match_mem(op1) {
                                    unimplemented!()
                                } else if self.match_ireg(op1) {
qinsoon's avatar
qinsoon committed
670
671
                                    let tmp_op1 = self.emit_ireg(op1, f_content, f_context, vm);

672
673
674
675
676
677
678
679
680
                                    if self.match_iimm(op2) {
                                        let imm_op2 = self.node_iimm_to_i32(op2) as i8;

                                        // sar op1, op2 -> op1
                                        self.backend.emit_sar_r64_imm8(&tmp_op1, imm_op2);

                                        // mov op1 -> result
                                        self.backend.emit_mov_r64_r64(&res_tmp, &tmp_op1);
                                    } else if self.match_ireg(op2) {
qinsoon's avatar
qinsoon committed
681
682
683
684
685
686
687
688
689
690
                                        let tmp_op2 = self.emit_ireg(op2, f_content, f_context, vm);

                                        // mov op2 -> rcx
                                        self.backend.emit_mov_r64_r64(&x86_64::RCX, &tmp_op2);

                                        // sar op1, cl -> op1
                                        self.backend.emit_sar_r64_cl(&tmp_op1);

                                        // mov op1 -> result
                                        self.backend.emit_mov_r64_r64(&res_tmp, &tmp_op1);
691
                                    } else  {
qinsoon's avatar
qinsoon committed
692
693
                                        panic!("unexpected op2 (not ireg not iimm): {}", op2);
                                    }
694
695
                                } else {
                                    panic!("unexpected op1 (not ireg not mem): {}", op1);
qinsoon's avatar
qinsoon committed
696
697
698
                                }
                            },

qinsoon's avatar
shl    
qinsoon committed
699

qinsoon's avatar
qinsoon committed
700
701
                            // floating point
                            op::BinOp::FAdd => {
702
                                if self.match_fpreg(&ops[op1]) && self.match_mem(&ops[op2]) {
qinsoon's avatar
qinsoon committed
703
704
705
                                    trace!("emit add-fpreg-mem");

                                    let reg_op1 = self.emit_fpreg(&ops[op1], f_content, f_context, vm);
qinsoon's avatar
shl    
qinsoon committed
706
                                    let mem_op2 = self.emit_mem(&ops[op2], vm);
qinsoon's avatar
qinsoon committed
707
708
709
710
711

                                    // mov op1, res
                                    self.backend.emit_movsd_f64_f64(&res_tmp, &reg_op1);
                                    // sub op2 res
                                    self.backend.emit_addsd_f64_mem64(&res_tmp, &mem_op2);
712
713
714
715
716
717
718
719
720
721
                                } else if self.match_fpreg(&ops[op1]) && self.match_fpreg(&ops[op2]) {
                                    trace!("emit add-fpreg-fpreg");

                                    let reg_op1 = self.emit_fpreg(&ops[op1], f_content, f_context, vm);
                                    let reg_op2 = self.emit_fpreg(&ops[op2], f_content, f_context, vm);

                                    // movsd op1, res
                                    self.backend.emit_movsd_f64_f64(&res_tmp, &reg_op1);
                                    // add op2 res
                                    self.backend.emit_addsd_f64_f64(&res_tmp, &reg_op2);
qinsoon's avatar
qinsoon committed
722
723
724
725
                                } else {
                                    unimplemented!()
                                }
                            }
726
727
                            
                            _ => unimplemented!()
728
                        }
729
730
731

                        // truncate result
                        if res_tmp.is_int_reg() {
732
                            self.emit_truncate_result(&UINT64_TYPE, &res_tmp.ty, &res_tmp, f_context, vm);
733
                        }
734
                    }
qinsoon's avatar
qinsoon committed
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759

                    Instruction_::ConvOp{operation, ref from_ty, ref to_ty, operand} => {
                        let ops = inst.ops.read().unwrap();

                        let ref op = ops[operand];

                        let extract_int_len = |x: &P<MuType>| {
                            match x.v {
                                MuType_::Int(len) => len,
                                _ => panic!("only expect int types, found: {}", x)
                            }
                        };

                        match operation {
                            op::ConvOp::TRUNC => {
                                // currently only use 64bits register
                                // so only keep what is needed in the register (set others to 0)

                                if self.match_ireg(op) {
                                    let tmp_op = self.emit_ireg(op, f_content, f_context, vm);
                                    let tmp_res = self.get_result_value(node);

                                    // mov op -> result
                                    self.backend.emit_mov_r64_r64(&tmp_res, &tmp_op);

760
                                    // truncate result
761
                                    self.emit_truncate_result(from_ty, to_ty, &tmp_res, f_context, vm);
qinsoon's avatar
qinsoon committed
762
763
764
765
766
767
768
769
770
771
772
773
774
775
                                } else {
                                    panic!("unexpected op (expect ireg): {}", op);
                                }
                            }
                            op::ConvOp::ZEXT => {
                                // currently only use 64bits register
                                // so set irrelevant bits to 0
                                let from_ty_len = extract_int_len(from_ty);
                                let to_ty_len   = extract_int_len(to_ty);

                                if self.match_ireg(op) {
                                    let tmp_op = self.emit_ireg(op, f_content, f_context, vm);
                                    let tmp_res = self.get_result_value(node);

qinsoon's avatar
qinsoon committed
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
                                    if from_ty_len < 32 {
                                        let mask = match to_ty_len {
                                            8 => 0xFFi32,
                                            16 => 0xFFFFi32,
                                            _ => unimplemented!()
                                        };

                                        // mov op -> result
                                        self.backend.emit_mov_r64_r64(&tmp_res, &tmp_op);

                                        // and mask result -> result
                                        self.backend.emit_and_r64_imm32(&tmp_res, mask);
                                    } else if from_ty_len == 32 {
                                        let tmp_mask = self.make_temporary(f_context, UINT64_TYPE.clone(), vm);
                                        self.backend.emit_mov_r64_imm64(&tmp_mask, 0xFFFFFFFF as i64);

                                        // mov op -> result
                                        self.backend.emit_mov_r64_r64(&tmp_res, &tmp_op);

                                        // and mask result -> result
                                        self.backend.emit_and_r64_r64(&tmp_res, &tmp_mask);
                                    } else {
                                        unimplemented!()
                                    }
qinsoon's avatar
qinsoon committed
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
                                } else {
                                    panic!("unexpected op (expect ireg): {}", op);
                                }
                            },
                            op::ConvOp::SEXT => {
                                // currently only use 64bits register
                                // we left shift the value, then arithmetic right shift back
                                let from_ty_len = extract_int_len(from_ty);
                                let to_ty_len   = extract_int_len(to_ty);

                                let shift : i8 = (to_ty_len - from_ty_len) as i8;

                                if self.match_ireg(op) {
                                    let tmp_op = self.emit_ireg(op, f_content, f_context, vm);
                                    let tmp_res = self.get_result_value(node);

                                    // mov op -> result
                                    self.backend.emit_mov_r64_r64(&tmp_res, &tmp_op);

                                    // shl result, shift -> result
                                    self.backend.emit_shl_r64_imm8(&tmp_res, shift);

                                    // sar result, shift -> result
                                    self.backend.emit_sar_r64_imm8(&tmp_res, shift);
                                } else {
                                    panic!("unexpected op (expect ireg): {}", op)
                                }
                            }
qinsoon's avatar
qinsoon committed
828
829
830
831
832
833
834
835
836
837
838
                            op::ConvOp::REFCAST | op::ConvOp::PTRCAST => {
                                // just a mov (and hopefully reg alloc will coalesce it)
                                let tmp_res = self.get_result_value(node);

                                if self.match_ireg(op) {
                                    let tmp_op = self.emit_ireg(op, f_content, f_context, vm);
                                    self.backend.emit_mov_r64_r64(&tmp_res, &tmp_op);
                                } else {
                                    panic!("unexpected op (expect ireg): {}", op)
                                }
                            }
qinsoon's avatar
qinsoon committed
839
840
841
842

                            _ => unimplemented!()
                        }
                    }
843
                    
844
845
                    // load on x64 generates mov inst (no matter what order is specified)
                    // https://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
846
                    Instruction_::Load{is_ptr, order, mem_loc} => {
qinsoon's avatar
qinsoon committed
847
                        let ops = inst.ops.read().unwrap();
848
                        let ref loc_op = ops[mem_loc];
849
850
851
852
853
854
                        
                        // check order
                        match order {
                            MemoryOrder::Relaxed 
                            | MemoryOrder::Consume 
                            | MemoryOrder::Acquire
855
856
                            | MemoryOrder::SeqCst
                            | MemoryOrder::NotAtomic => {},
857
858
                            _ => panic!("didnt expect order {:?} with store inst", order)
                        }                        
859

860
                        let resolved_loc = self.emit_node_addr_to_value(loc_op, f_content, f_context, vm);
qinsoon's avatar
qinsoon committed
861
                        let res_temp = self.get_result_value(node);
862
863
864
                        
                        if self.match_ireg(node) {
                            // emit mov(GPR)
865
866
867
868
869
870
871
872
                            self.backend.emit_mov_r64_mem64(&res_temp, &resolved_loc);
                        } else {
                            // emit mov(FPR)
                            unimplemented!()
                        }
                    }
                    
                    Instruction_::Store{is_ptr, order, mem_loc, value} => {
qinsoon's avatar
qinsoon committed
873
                        let ops = inst.ops.read().unwrap();
874
875
876
877
878
                        let ref loc_op = ops[mem_loc];
                        let ref val_op = ops[value];
                        
                        let generate_plain_mov : bool = {
                            match order {
879
880
881
                                MemoryOrder::Relaxed
                                | MemoryOrder::Release
                                | MemoryOrder::NotAtomic => true,
882
883
884
885
886
                                MemoryOrder::SeqCst => false,
                                _ => panic!("didnt expect order {:?} with store inst", order)
                            }
                        };
                        
887
888
889
890
                        let resolved_loc = self.emit_node_addr_to_value(loc_op, f_content, f_context, vm);

                        if self.match_iimm(val_op) {
                            let val = self.node_iimm_to_i32(val_op);
891
                            if generate_plain_mov {
892
                                self.backend.emit_mov_mem64_imm32(&resolved_loc, val);
893
894
895
                            } else {
                                unimplemented!()
                            }
896
897
                        } else if self.match_ireg(val_op) {
                            let val = self.emit_ireg(val_op, f_content, f_context, vm);
898
                            if generate_plain_mov {
899
                                self.backend.emit_mov_mem64_r64(&resolved_loc, &val);
900
901
902
                            } else {
                                unimplemented!()
                            }
903
904
905
906
907
                        } else {
                            // emit mov(FPR)
                            unimplemented!()
                        }
                    }
qinsoon's avatar
qinsoon committed
908

909
910
911
912
913
914
915
                    // memory insts: calculate the address, then lea
                    Instruction_::GetIRef(_)
                    | Instruction_::GetFieldIRef{..}
                    | Instruction_::GetVarPartIRef{..}
                    | Instruction_::ShiftIRef{..} => {
                        let mem_addr = self.emit_get_mem_from_inst(node, f_content, f_context, vm);
                        let tmp_res  = self.get_result_value(node);
qinsoon's avatar
qinsoon committed
916

917
                        self.backend.emit_lea_r64(&tmp_res, &mem_addr);
qinsoon's avatar
qinsoon committed
918
                    }
919
                    
920
                    Instruction_::ThreadExit => {
921
                        // emit a call to swap_back_to_native_stack(sp_loc: Address)
922
923
                        
                        // get thread local and add offset to get sp_loc
qinsoon's avatar
qinsoon committed
924
                        let tl = self.emit_get_threadlocal(Some(node), f_content, f_context, vm);
925
                        self.backend.emit_add_r64_imm32(&tl, *thread::NATIVE_SP_LOC_OFFSET as i32);
926
                        
qinsoon's avatar
qinsoon committed
927
                        self.emit_runtime_entry(&entrypoints::SWAP_BACK_TO_NATIVE_STACK, vec![tl.clone()], None, Some(node), f_content, f_context, vm);
928
                    }
qinsoon's avatar
qinsoon committed
929
930
                    
                    Instruction_::New(ref ty) => {
qinsoon's avatar
qinsoon committed
931
932
933
934
935
936
937
                        if cfg!(debug_assertions) {
                            match ty.v {
                                MuType_::Hybrid(_) => panic!("cannot use NEW for hybrid, use NEWHYBRID instead"),
                                _ => {}
                            }
                        }

qinsoon's avatar
qinsoon committed
938
                        let ty_info = vm.get_backend_type_info(ty.id());
qinsoon's avatar
qinsoon committed
939
                        let size = ty_info.size;
940
                        let ty_align= ty_info.alignment;
qinsoon's avatar
qinsoon committed
941
942

                        let const_size = self.make_value_int_const(size as u64, vm);
qinsoon's avatar
qinsoon committed
943
                        
qinsoon's avatar
qinsoon committed
944
945
946
947
948
949
950
951
952
                        self.emit_alloc_sequence(const_size, ty_align, node, f_content, f_context, vm);
                    }

                    Instruction_::NewHybrid(ref ty, var_len) => {
                        if cfg!(debug_assertions) {
                            match ty.v {
                                MuType_::Hybrid(_) => {},
                                _ => panic!("NEWHYBRID is only for allocating hybrid types, use NEW for others")
                            }
qinsoon's avatar
qinsoon committed
953
                        }
qinsoon's avatar
qinsoon committed
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032

                        let ty_info = vm.get_backend_type_info(ty.id());
                        let ty_align = ty_info.alignment;
                        let fix_part_size = ty_info.size;
                        let var_ty_size = match ty.v {
                            MuType_::Hybrid(ref name) => {
                                let map_lock = HYBRID_TAG_MAP.read().unwrap();
                                let hybrid_ty_ = map_lock.get(name).unwrap();
                                let var_ty = hybrid_ty_.get_var_ty();

                                vm.get_backend_type_info(var_ty.id()).size
                            },
                            _ => panic!("only expect HYBRID type here")
                        };

                        // actual size = fix_part_size + var_ty_size * len
                        let actual_size = {
                            let ops = inst.ops.read().unwrap();
                            let ref var_len = ops[var_len];

                            if self.match_iimm(var_len) {
                                let var_len = self.node_iimm_to_i32(var_len);
                                let actual_size = fix_part_size + var_ty_size * (var_len as usize);

                                self.make_value_int_const(actual_size as u64, vm)
                            } else {
                                let tmp_actual_size = self.make_temporary(f_context, UINT64_TYPE.clone(), vm);
                                let tmp_var_len = self.emit_ireg(var_len, f_content, f_context, vm);

                                let is_power_of_two = |x: usize| {
                                    use std::i8;

                                    let mut power_of_two = 1;
                                    let mut i: i8 = 0;
                                    while power_of_two < x && i < i8::MAX {
                                        power_of_two *= 2;
                                        i += 1;
                                    }

                                    if power_of_two == x {
                                        Some(i)
                                    } else {
                                        None
                                    }
                                };

                                match is_power_of_two(var_ty_size) {
                                    Some(shift) => {
                                        // a shift-left will get the total size of var part
                                        self.backend.emit_shl_r64_imm8(&tmp_var_len, shift);

                                        // add with fix-part size
                                        self.backend.emit_add_r64_imm32(&tmp_var_len, fix_part_size as i32);

                                        // mov result to tmp_actual_size
                                        self.backend.emit_mov_r64_r64(&tmp_actual_size, &tmp_var_len);
                                    }
                                    None => {
                                        // we need to do a multiply

                                        // mov var_ty_size -> rax
                                        self.backend.emit_mov_r64_imm32(&x86_64::RAX, var_ty_size as i32);

                                        // mul tmp_var_len, rax -> rdx:rax
                                        self.backend.emit_mul_r64(&tmp_var_len);

                                        // add with fix-part size
                                        self.backend.emit_add_r64_imm32(&x86_64::RAX, fix_part_size as i32);

                                        // mov result to tmp_actual_size
                                        self.backend.emit_mov_r64_r64(&tmp_actual_size, &x86_64::RAX);
                                    }
                                }

                                tmp_actual_size
                            }
                        };

                        self.emit_alloc_sequence(actual_size, ty_align, node, f_content, f_context, vm);
qinsoon's avatar
qinsoon committed
1033
                    }
qinsoon's avatar
qinsoon committed
1034
1035
1036
1037
1038
1039
1040
1041
1042
                    
                    Instruction_::Throw(op_index) => {
                        let ops = inst.ops.read().unwrap();
                        let ref exception_obj = ops[op_index];
                        
                        self.emit_runtime_entry(
                            &entrypoints::THROW_EXCEPTION, 
                            vec![exception_obj.clone_value()], 
                            None,
qinsoon's avatar
qinsoon committed
1043
                            Some(node), f_content, f_context, vm);
qinsoon's avatar
qinsoon committed
1044
                    }
1045
1046
1047
1048
1049
1050
    
                    _ => unimplemented!()
                } // main switch
            },
            
            TreeNode_::Value(ref p) => {
1051
        
1052
1053
1054
1055
            }
        }
    }
    
1056
1057
1058
1059
    fn make_temporary(&mut self, f_context: &mut FunctionContext, ty: P<MuType>, vm: &VM) -> P<Value> {
        f_context.make_temporary(vm.next_id(), ty).clone_value()
    }
    
1060
    fn make_memory_op_base_offset (&mut self, base: &P<Value>, offset: i32, ty: P<MuType>, vm: &VM) -> P<Value> {
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
        P(Value{
            hdr: MuEntityHeader::unnamed(vm.next_id()),
            ty: ty.clone(),
            v: Value_::Memory(MemoryLocation::Address{
                base: base.clone(),
                offset: Some(self.make_value_int_const(offset as u64, vm)),
                index: None,
                scale: None
            })
        })
    }
qinsoon's avatar
qinsoon committed
1072

qinsoon's avatar
qinsoon committed
1073
    fn make_memory_op_base_index(&mut self, base: &P<Value>, index: &P<Value>, scale: u8, ty: P<MuType>, vm: &VM) -> P<Value> {
qinsoon's avatar
qinsoon committed
1074
1075
1076
1077
1078
        P(Value{
            hdr: MuEntityHeader::unnamed(vm.next_id()),
            ty: ty.clone(),
            v: Value_::Memory(MemoryLocation::Address{
                base: base.clone(),
qinsoon's avatar
qinsoon committed
1079
1080
1081
                offset: None,
                index: Some(index.clone()),
                scale: Some(scale)
qinsoon's avatar
qinsoon committed
1082
1083
1084
            })
        })
    }
1085
1086
1087
1088
    
    fn make_value_int_const (&mut self, val: u64, vm: &VM) -> P<Value> {
        P(Value{
            hdr: MuEntityHeader::unnamed(vm.next_id()),
qinsoon's avatar
qinsoon committed
1089
            ty: UINT64_TYPE.clone(),
1090
1091
            v: Value_::Constant(Constant::Int(val))
        })
qinsoon's avatar
qinsoon committed
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
    }

    fn emit_alloc_sequence (&mut self, size: P<Value>, align: usize, node: &TreeNode, f_content: &FunctionContent, f_context: &mut FunctionContext, vm: &VM) {
        if size.is_int_const() {
            // size known at compile time, we can choose to emit alloc_small or large now
            if size.extract_int_const() > mm::LARGE_OBJECT_THRESHOLD as u64 {
                self.emit_alloc_sequence_large(size, align, node, f_content, f_context, vm);
            } else {
                self.emit_alloc_sequence_small(size, align, node, f_content, f_context, vm);
            }
        } else {
            // size is unknown at compile time
            // we need to emit both alloc small and alloc large,
            // and it is decided at runtime

            // emit: cmp size, THRESHOLD
            // emit: jg ALLOC_LARGE
            // emit: >> small object alloc
            // emit: jmp ALLOC_LARGE_END
            // emit: ALLOC_LARGE:
            // emit: >> large object alloc
            // emit: ALLOC_LARGE_END:
            let blk_alloc_large = format!("{}_alloc_large", node.id());
            let blk_alloc_large_end = format!("{}_alloc_large_end", node.id());

            self.backend.emit_cmp_imm32_r64(mm::LARGE_OBJECT_THRESHOLD as i32, &size);
            self.backend.emit_jg(blk_alloc_large.clone());

            // alloc small here
            let tmp_res = self.emit_alloc_sequence_small(size.clone(), align, node, f_content, f_context, vm);

            self.backend.emit_jmp(blk_alloc_large_end.clone());

            // finishing current block
            let cur_block = self.current_block.as_ref().unwrap().clone();
            self.backend.end_block(cur_block.clone());
            self.backend.set_block_liveout(cur_block.clone(), &vec![tmp_res.clone()]);

            // alloc_large:
            self.current_block = Some(blk_alloc_large.clone());
            self.backend.start_block(blk_alloc_large.clone());
            self.backend.set_block_livein(blk_alloc_large.clone(), &vec![size.clone()]);

            let tmp_res = self.emit_alloc_sequence_large(size, align, node, f_content, f_context, vm);

            self.backend.end_block(blk_alloc_large.clone());
            self.backend.set_block_liveout(blk_alloc_large.clone(), &vec![tmp_res]);

            // alloc_large_end:
            self.backend.start_block(blk_alloc_large_end.clone());
            self.current_block = Some(blk_alloc_large_end.clone());
        }
    }

    fn emit_alloc_sequence_large (&mut self, size: P<Value>, align: usize, node: &TreeNode, f_content: &FunctionContent, f_context: &mut FunctionContext, vm: &VM) -> P<Value> {
        let tmp_res = self.get_result_value(node);

        // ASM: %tl = get_thread_local()
        let tmp_tl = self.emit_get_threadlocal(Some(node), f_content, f_context, vm);

        // ASM: lea [%tl + allocator_offset] -> %tmp_allocator
        let allocator_offset = *thread::ALLOCATOR_OFFSET;
        let tmp_allocator = self.make_temporary(f_context, ADDRESS_TYPE.clone(), vm);
        self.emit_lea_base_immoffset(&tmp_allocator, &tmp_tl, allocator_offset as i32, vm);

        // ASM: %tmp_res = call muentry_alloc_large(%allocator, size, align)
        let const_align = self.make_value_int_const(align as u64, vm);

qinsoon's avatar
qinsoon committed
1160
        self.emit_runtime_entry(
qinsoon's avatar
qinsoon committed
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
            &entrypoints::ALLOC_LARGE,
            vec![tmp_allocator, size.clone(), const_align],
            Some(vec![tmp_res.clone()]),
            Some(node), f_content, f_context, vm
        );

        tmp_res
    }

    fn emit_alloc_sequence_small (&mut self, size: P<Value>, align: usize, node: &TreeNode, f_content: &FunctionContent, f_context: &mut FunctionContext, vm: &VM) -> P<Value> {
        // emit immix allocation fast path

        // ASM: %tl = get_thread_local()
        let tmp_tl = self.emit_get_threadlocal(Some(node), f_content, f_context, vm);

        // ASM: mov [%tl + allocator_offset + cursor_offset] -> %cursor
        let cursor_offset = *thread::ALLOCATOR_OFFSET + *mm::ALLOCATOR_CURSOR_OFFSET;
        let tmp_cursor = self.make_temporary(f_context, ADDRESS_TYPE.clone(), vm);
        self.emit_load_base_offset(&tmp_cursor, &tmp_tl, cursor_offset as i32, vm);

        // alignup cursor (cursor + align - 1 & !(align - 1))
        // ASM: lea align-1(%cursor) -> %start
        let align = align as i32;
        let tmp_start = self.make_temporary(f_context, ADDRESS_TYPE.clone(), vm);
        self.emit_lea_base_immoffset(&tmp_start, &tmp_cursor, align - 1, vm);
        // ASM: and %start, !(align-1) -> %start
        self.backend.emit_and_r64_imm32(&tmp_start, !(align - 1) as i32);

        // bump cursor
        // ASM: add %size, %start -> %end
        // or lea size(%start) -> %end
        let tmp_end = self.make_temporary(f_context, ADDRESS_TYPE.clone(), vm);
        if size.is_int_const() {
            let offset = size.extract_int_const() as i32;
            self.emit_lea_base_immoffset(&tmp_end, &tmp_start, offset, vm);
        } else {
            self.backend.emit_mov_r64_r64(&tmp_end, &tmp_start);
            self.backend.emit_add_r64_r64(&tmp_end, &size);
        }

        // check with limit
        // ASM: cmp %end, [%tl + allocator_offset + limit_offset]
        let limit_offset = *thread::ALLOCATOR_OFFSET + *mm::ALLOCATOR_LIMIT_OFFSET;
        let mem_limit = self.make_memory_op_base_offset(&tmp_tl, limit_offset as i32, ADDRESS_TYPE.clone(), vm);
        self.backend.emit_cmp_mem64_r64(&mem_limit, &tmp_end);

        // branch to slow path if end > limit (end - limit > 0)
        // ASM: jg alloc_slow
        let slowpath = format!("{}_allocslow", node.id());
        self.backend.emit_jg(slowpath.clone());

        // update cursor
        // ASM: mov %end -> [%tl + allocator_offset + cursor_offset]
        self.emit_store_base_offset(&tmp_tl, cursor_offset as i32, &tmp_end, vm);

        // put start as result
        // ASM: mov %start -> %result
        let tmp_res = self.get_result_value(node);
        self.backend.emit_mov_r64_r64(&tmp_res, &tmp_start);

        // ASM jmp alloc_end
        let allocend = format!("{}_alloc_small_end", node.id());
        self.backend.emit_jmp(allocend.clone());

        // finishing current block
        let cur_block = self.current_block.as_ref().unwrap().clone();
        self.backend.end_block(cur_block.clone());
        self.backend.set_block_liveout(cur_block.clone(), &vec![tmp_res.clone()]);

        // alloc_slow:
        // call alloc_slow(size, align) -> %ret
        // new block (no livein)
        self.current_block = Some(slowpath.clone());
        self.backend.start_block(slowpath.clone());
        self.backend.set_block_livein(slowpath.clone(), &vec![size.clone()]);

        // arg1: allocator address
        let allocator_offset = *thread::ALLOCATOR_OFFSET;
        let tmp_allocator = self.make_temporary(f_context, ADDRESS_TYPE.clone(), vm);
        self.emit_lea_base_immoffset(&tmp_allocator, &tmp_tl, allocator_offset as i32, vm);
        // arg2: size
        // arg3: align
        let const_align= self.make_value_int_const(align as u64, vm);

qinsoon's avatar
qinsoon committed
1245
        self.emit_runtime_entry(
qinsoon's avatar
qinsoon committed
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
            &entrypoints::ALLOC_SLOW,
            vec![tmp_allocator, size.clone(), const_align],
            Some(vec![
            tmp_res.clone()
            ]),
            Some(node), f_content, f_context, vm
        );

        // end block (no liveout other than result)
        self.backend.end_block(slowpath.clone());
        self.backend.set_block_liveout(slowpath.clone(), &vec![tmp_res.clone()]);

        // block: alloc_end
        self.backend.start_block(allocend.clone());
        self.current_block = Some(allocend.clone());

        tmp_res
    }
1264

1265
    fn emit_truncate_result (&mut self, from_ty: &P<MuType>, to_ty: &P<MuType>, op: &P<Value>, f_context: &mut FunctionContext, vm: &VM) {
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
        // currently only use 64bits register
        // so only keep what is needed in the register (set others to 0)
        let from_ty_len = match from_ty.v {
            MuType_::Int(len) => len,
            _ => panic!("only expect int types, found: {}", from_ty)
        };
        let to_ty_len   = match to_ty.v {
            MuType_::Int(len) => len,
            _ => panic!("only expect int types, found: {}", to_ty)
        };

        if from_ty_len == to_ty_len {
            return;
        } else {
            debug_assert!(from_ty_len > to_ty_len);

1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
            if to_ty_len < 32 {
                // ignoring from_ty for now (we use 64bits register for everything)
                let mask = match to_ty_len {
                    8 => 0xFFi32,
                    16 => 0xFFFFi32,
                    _ => unimplemented!()
                };

                // and mask, result -> result
                self.backend.emit_and_r64_imm32(&op, mask);
            } else if to_ty_len == 32 {
                let tmp_mask = self.make_temporary(f_context, UINT64_TYPE.clone(), vm);
1294

1295
1296
1297
1298
1299
1300
                self.backend.emit_mov_r64_imm64(&tmp_mask, 0xFFFFFFFF as i64);

                self.backend.emit_and_r64_r64(&op, &tmp_mask);
            } else {
                unimplemented!()
            }
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
        }
    }

    fn emit_sign_extend_operand (&mut self, from_ty: &P<MuType>, to_ty: &P<MuType>, op: &P<Value>) {
        // currently only use 64bits register
        // we left shift the value, then arithmetic right shift back
        let from_ty_len = match from_ty.v {
            MuType_::Int(len) => len,
            _ => panic!("only expect int types, found: {}", from_ty)
        };
        let to_ty_len   = match to_ty.v {
            MuType_::Int(len) => len,
            _ => panic!("only expect int types, found: {}", to_ty)
        };

        if from_ty_len == to_ty_len {
            return;
        } else {
            debug_assert!(to_ty_len > from_ty_len);

            let shift : i8 = (to_ty_len - from_ty_len) as i8;

            // shl result, shift -> result
            self.backend.emit_shl_r64_imm8(&op, shift);
            // sar result, shift -> result
            self.backend.emit_sar_r64_imm8(&op, shift);
        }
    }

1330
    fn emit_load_base_offset (&mut self, dest: &P<Value>, base: &P<Value>, offset: i32, vm: &VM) {
1331
        let mem = self.make_memory_op_base_offset(base, offset, dest.ty.clone(), vm);
qinsoon's avatar
qinsoon committed
1332
1333
1334
1335
1336
1337
1338
1339

        if dest.is_int_reg() {
            self.backend.emit_mov_r64_mem64(dest, &mem);
        } else if dest.is_fp_reg() {
            self.backend.emit_movsd_f64_mem64(dest, &mem);
        } else {
            unimplemented!();
        }
1340
1341
    }
    
1342
    fn emit_store_base_offset (&mut self, base: &P<Value>, offset: i32, src: &P<Value>, vm: &VM) {
1343
1344
1345
1346
1347
        let mem = self.make_memory_op_base_offset(base, offset, src.ty.clone(), vm);
        
        self.backend.emit_mov_mem64_r64(&mem, src);
    }
    
qinsoon's avatar
qinsoon committed
1348
    fn emit_lea_base_immoffset(&mut self, dest: &P<Value>, base: &P<Value>, offset: i32, vm: &VM) {
qinsoon's avatar
qinsoon committed
1349
        let mem = self.make_memory_op_base_offset(base, offset, ADDRESS_TYPE.clone(), vm);
1350
1351
1352
        
        self.backend.emit_lea_r64(dest, &mem);
    }
qinsoon's avatar
sdiv    
qinsoon committed
1353
1354
1355

    fn emit_udiv (
        &mut self,
1356
        op1: &TreeNode, op2: &TreeNode,
qinsoon's avatar
sdiv    
qinsoon committed
1357
1358
1359
1360
1361
        f_content: &FunctionContent,
        f_context: &mut FunctionContext,
        vm: &VM)
    {
        let rax = x86_64::RAX.clone();
1362
1363
1364
1365

        debug_assert!(self.match_ireg(op1));
        let reg_op1 = self.emit_ireg(op1, f_content, f_context, vm);
        self.emit_move_value_to_value(&rax, &reg_op1);
qinsoon's avatar
sdiv    
qinsoon committed
1366
1367
1368
1369
1370
1371

        // xorq rdx, rdx -> rdx
        let rdx = x86_64::RDX.clone();
        self.backend.emit_xor_r64_r64(&rdx, &rdx);

        // div op2
1372
        if self.match_mem(op2) {
qinsoon's avatar
shl    
qinsoon committed
1373
            let mem_op2 = self.emit_mem(op2, vm);
qinsoon's avatar
sdiv    
qinsoon committed
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383

            self.backend.emit_div_mem64(&mem_op2);
        } else if self.match_iimm(op2) {
            let imm = self.node_iimm_to_i32(op2);
            // moving to a temp
            let temp = self.make_temporary(f_context, UINT64_TYPE.clone(), vm);
            self.backend.emit_mov_r64_imm32(&temp, imm);

            // div tmp
            self.backend.emit_div_r64(&temp);
1384
1385
1386
1387
        } else if self.match_ireg(op2) {
            let reg_op2 = self.emit_ireg(op2, f_content, f_context, vm);

            self.backend.emit_div_r64(&reg_op2);
qinsoon's avatar
sdiv