Commit 05cbb3dc authored by qinsoon's avatar qinsoon

implemented fptoui. revisited codegen for uitofp, different int length

results in different code (the same as clang did)
parent 8166d516
Pipeline #269 passed with stage
in 32 minutes and 7 seconds
......@@ -2718,6 +2718,77 @@ impl CodeGenerator for ASMCodeGen {
false
)
}
// move aligned packed double-precision fp values
fn emit_movapd_f64_mem128(&mut self, dest: Reg, src: Mem) {
trace!("emit movapd {} -> {}", src, dest);
let (mem, mut uses) = self.prepare_mem(src, 6 + 1);
let (reg, id2, loc2) = self.prepare_fpreg(dest, 6 + 1 + mem.len() + 1);
// memory op won't use a fpreg, we insert the use of fpreg
uses.insert(id2, vec![loc2.clone()]);
let asm = format!("movapd {},{}", mem, reg);
self.add_asm_inst(
asm,
linked_hashmap!{
id2 => vec![loc2.clone()]
},
uses,
true
)
}
fn emit_movapd_f64_f64 (&mut self, dest: Reg, src: Mem) {
trace!("emit movapd {} -> {}", src, dest);
let (reg1, id1, loc1) = self.prepare_fpreg(src, 6 + 1);
let (reg2, id2, loc2) = self.prepare_fpreg(dest, 6 + 1 + reg1.len() + 1);
let asm = format!("movapd {},{}", reg1, reg2);
self.add_asm_inst(
asm,
linked_hashmap!{
id2 => vec![loc2.clone()]
},
{
if id1 == id2 {
linked_hashmap!{id1 => vec![loc1, loc2]}
} else {
linked_hashmap!{
id1 => vec![loc1],
id2 => vec![loc2]
}
}
},
false
)
}
fn emit_cvttsd2si_r_f64 (&mut self, dest: Reg, src: Reg) {
let len = check_op_len(dest);
let inst = "cvttsd2si".to_string() + &op_postfix(len);
trace!("emit: {} {} -> {}", inst, src, dest);
let (reg1, id1, loc1) = self.prepare_fpreg(src, inst.len() + 1);
let (reg2, id2, loc2) = self.prepare_reg (dest, inst.len() + 1 + reg1.len() + 1);
let asm = format!("{} {},{}", inst, reg1, reg2);
self.add_asm_inst(
asm,
linked_hashmap!{
id2 => vec![loc2]
},
linked_hashmap!{
id1 => vec![loc1]
},
false
)
}
}
fn create_emit_directory(vm: &VM) {
......@@ -2753,7 +2824,11 @@ pub fn emit_code(fv: &mut MuFunctionVersion, vm: &VM) {
// constants in text section
file.write("\t.text\n".as_bytes()).unwrap();
file.write("\t.align 8\n".as_bytes()).unwrap();
// FIXME: need a more precise way to determine alignment
// (probably use alignment backend info, which require introducing int128 to zebu)
write_const_min_align(&mut file);
for (id, constant) in cf.consts.iter() {
let mem = cf.const_mem.get(id).unwrap();
......@@ -2768,6 +2843,19 @@ pub fn emit_code(fv: &mut MuFunctionVersion, vm: &VM) {
}
}
// min alignment as 16 byte (written as 4 (2^4) on macos)
#[cfg(target_os = "linux")]
fn write_const_min_align(f: &mut File) {
use std::io::Write;
f.write("\t.align 16\n".as_bytes()).unwrap();
}
#[cfg(target_os = "macos")]
fn write_const_min_align(f: &mut File) {
use std::io::Write;
f.write("\t.align 4\n".as_bytes()).unwrap();
}
fn write_const(f: &mut File, constant: P<Value>, loc: P<Value>) {
use std::io::Write;
......
......@@ -184,6 +184,7 @@ pub trait CodeGenerator {
// fp conversion
fn emit_cvtsi2sd_f64_r (&mut self, dest: Reg, src: Reg);
fn emit_cvtsd2si_r_f64 (&mut self, dest: Reg, src: Reg);
fn emit_cvttsd2si_r_f64 (&mut self, dest: Reg, src: Reg);
// used for unsigned int to fp conversion
......@@ -193,4 +194,8 @@ pub trait CodeGenerator {
fn emit_subpd_f64_mem128 (&mut self, dest: Reg, src: Mem);
// packed double-fp horizontal add
fn emit_haddpd_f64_f64 (&mut self, dest: Reg, src: Reg);
// move aligned packed double-precision fp values
fn emit_movapd_f64_mem128(&mut self, dest: Reg, src: Mem);
fn emit_movapd_f64_f64 (&mut self, dest: Reg, src: Mem);
}
......@@ -76,6 +76,12 @@ lazy_static! {
})
]))
});
pub static ref FPTOUI_C : P<Value> = P(Value{
hdr: MuEntityHeader::named(new_internal_id(), Mu("FPTOUI_C")),
ty : UINT64_TYPE.clone(),
v : Value_::Constant(Constant::Int(4890909195324358656u64))
});
}
pub struct InstructionSelection {
......@@ -1239,20 +1245,104 @@ impl <'a> InstructionSelection {
if self.match_ireg(op) {
let tmp_op = self.emit_ireg(op, f_content, f_context, vm);
// movd/movq op -> res
self.backend.emit_mov_fpr_r64(&tmp_res, &tmp_op);
let op_ty_size = vm.get_backend_type_info(tmp_op.ty.id()).size;
match op_ty_size {
8 => {
// movd/movq op -> res
self.backend.emit_mov_fpr_r64(&tmp_res, &tmp_op);
// punpckldq UITOFP_C0, tmp_res -> tmp_res
// (interleaving low bytes: xmm = xmm[0] mem[0] xmm[1] mem[1]
let mem_c0 = self.get_mem_for_const(UITOFP_C0.clone(), vm);
self.backend.emit_punpckldq_f64_mem128(&tmp_res, &mem_c0);
// subpd UITOFP_C1, tmp_res -> tmp_res
let mem_c1 = self.get_mem_for_const(UITOFP_C1.clone(), vm);
self.backend.emit_subpd_f64_mem128(&tmp_res, &mem_c1);
// haddpd tmp_res, tmp_res -> tmp_res
self.backend.emit_haddpd_f64_f64(&tmp_res, &tmp_res);
}
4 => {
let tmp = self.make_temporary(f_context, UINT64_TYPE.clone(), vm);
// movl op -> tmp(32)
let tmp32 = unsafe {tmp.as_type(UINT32_TYPE.clone())};
self.backend.emit_mov_r_r(&tmp32, &tmp_op);
// cvtsi2sd %tmp(64) -> %tmp_res
self.backend.emit_cvtsi2sd_f64_r(&tmp_res, &tmp);
}
2 | 1 => {
let tmp_op32 = unsafe {tmp_op.as_type(UINT32_TYPE.clone())};
self.backend.emit_cvtsi2sd_f64_r(&tmp_res, &tmp_op32);
}
_ => panic!("not implemented int length {}", op_ty_size)
}
} else {
panic!("unexpected op (expected ireg): {}", op)
}
}
op::ConvOp::FPTOUI => {
let tmp_res = self.get_result_value(node);
let res_ty_size = vm.get_backend_type_info(tmp_res.ty.id()).size;
// punpckldq UITOFP_C0, tmp_res -> tmp_res
// (interleaving low bytes: xmm = xmm[0] mem[0] xmm[1] mem[1]
let mem_c0 = self.get_mem_for_const(UITOFP_C0.clone(), vm);
self.backend.emit_punpckldq_f64_mem128(&tmp_res, &mem_c0);
if self.match_fpreg(op) {
let tmp_op = self.emit_fpreg(op, f_content, f_context, vm);
match res_ty_size {
8 => {
let tmp1 = self.make_temporary(f_context, DOUBLE_TYPE.clone(), vm);
let tmp2 = self.make_temporary(f_context, DOUBLE_TYPE.clone(), vm);
// movsd FPTOUI_C -> %tmp1
let mem_c = self.get_mem_for_const(FPTOUI_C.clone(), vm);
self.backend.emit_movsd_f64_mem64(&tmp1, &mem_c);
// movapd %tmp_op -> %tmp2
self.backend.emit_movapd_f64_f64(&tmp2, &tmp_op);
// subsd %tmp1, %tmp2 -> %tmp2
self.backend.emit_subsd_f64_f64(&tmp2, &tmp1);
// cvttsd2si %tmp2 -> %tmp_res
self.backend.emit_cvttsd2si_r_f64(&tmp_res, &tmp2);
let tmp_const = self.make_temporary(f_context, UINT64_TYPE.clone(), vm);
// mov 0x8000000000000000 -> %tmp_const
self.backend.emit_mov_r64_imm64(&tmp_const, -9223372036854775808i64);
// xor %tmp_res, %tmp_const -> %tmp_const
self.backend.emit_xor_r_r(&tmp_const, &tmp_res);
// subpd UITOFP_C1, tmp_res -> tmp_res
let mem_c1 = self.get_mem_for_const(UITOFP_C1.clone(), vm);
self.backend.emit_subpd_f64_mem128(&tmp_res, &mem_c1);
// cvttsd2si %tmp_op -> %tmp_res
self.backend.emit_cvttsd2si_r_f64(&tmp_res, &tmp_op);
// haddpd tmp_res, tmp_res -> tmp_res
self.backend.emit_haddpd_f64_f64(&tmp_res, &tmp_res);
// ucomisd %tmp_op %tmp1
self.backend.emit_ucomisd_f64_f64(&tmp1, &tmp_op);
// cmovaeq %tmp_const -> %tmp_res
self.backend.emit_cmovae_r_r(&tmp_res, &tmp_const);
}
4 => {
let tmp_res64 = unsafe {tmp_res.as_type(UINT64_TYPE.clone())};
// cvttsd2si %tmp_op -> %tmp_res(64)
self.backend.emit_cvttsd2si_r_f64(&tmp_res64, &tmp_op);
}
2 | 1 => {
let tmp_res32 = unsafe {tmp_res.as_type(UINT32_TYPE.clone())};
// cvttsd2si %tmp_op -> %tmp_res(32)
self.backend.emit_cvttsd2si_r_f64(&tmp_res32, &tmp_op);
// movz %tmp_res -> %tmp_res(32)
self.backend.emit_movz_r_r(&tmp_res32, &tmp_res);
}
_ => panic!("not implemented int length {}", res_ty_size)
}
} else {
panic!("unexpected op (expected ireg): {}", op)
}
......
This diff is collapsed.
......@@ -67,8 +67,6 @@ def test_double_sitofp():
fnp, _ = fncptr_from_c_script("test_double_sitofp.c", "test_fnc", restype=ctypes.c_double)
assert fnp() == -42.0
@pytest.mark.xfail(reason='not implemented yet')
@may_spawn_proc
def test_double_uitofp():
fnp, _ = fncptr_from_c_script("test_double_uitofp.c", "test_fnc", restype=ctypes.c_double)
......@@ -79,8 +77,6 @@ def test_double_fptosi():
fnp, _ = fncptr_from_c_script("test_double_fptosi.c", "test_fnc", restype=ctypes.c_int64)
assert fnp() == -3
@pytest.mark.xfail(reason='not implemented yet')
@may_spawn_proc
def test_double_fptoui():
fnp, _ = fncptr_from_c_script("test_double_fptoui.c", "test_fnc", restype=ctypes.c_uint64)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment