Commit 05cbb3dc authored by qinsoon's avatar qinsoon
Browse files

implemented fptoui. revisited codegen for uitofp, different int length

results in different code (the same as clang did)
parent 8166d516
......@@ -2718,6 +2718,77 @@ impl CodeGenerator for ASMCodeGen {
false
)
}
// move aligned packed double-precision fp values
fn emit_movapd_f64_mem128(&mut self, dest: Reg, src: Mem) {
trace!("emit movapd {} -> {}", src, dest);
let (mem, mut uses) = self.prepare_mem(src, 6 + 1);
let (reg, id2, loc2) = self.prepare_fpreg(dest, 6 + 1 + mem.len() + 1);
// memory op won't use a fpreg, we insert the use of fpreg
uses.insert(id2, vec![loc2.clone()]);
let asm = format!("movapd {},{}", mem, reg);
self.add_asm_inst(
asm,
linked_hashmap!{
id2 => vec![loc2.clone()]
},
uses,
true
)
}
fn emit_movapd_f64_f64 (&mut self, dest: Reg, src: Mem) {
trace!("emit movapd {} -> {}", src, dest);
let (reg1, id1, loc1) = self.prepare_fpreg(src, 6 + 1);
let (reg2, id2, loc2) = self.prepare_fpreg(dest, 6 + 1 + reg1.len() + 1);
let asm = format!("movapd {},{}", reg1, reg2);
self.add_asm_inst(
asm,
linked_hashmap!{
id2 => vec![loc2.clone()]
},
{
if id1 == id2 {
linked_hashmap!{id1 => vec![loc1, loc2]}
} else {
linked_hashmap!{
id1 => vec![loc1],
id2 => vec![loc2]
}
}
},
false
)
}
fn emit_cvttsd2si_r_f64 (&mut self, dest: Reg, src: Reg) {
let len = check_op_len(dest);
let inst = "cvttsd2si".to_string() + &op_postfix(len);
trace!("emit: {} {} -> {}", inst, src, dest);
let (reg1, id1, loc1) = self.prepare_fpreg(src, inst.len() + 1);
let (reg2, id2, loc2) = self.prepare_reg (dest, inst.len() + 1 + reg1.len() + 1);
let asm = format!("{} {},{}", inst, reg1, reg2);
self.add_asm_inst(
asm,
linked_hashmap!{
id2 => vec![loc2]
},
linked_hashmap!{
id1 => vec![loc1]
},
false
)
}
}
fn create_emit_directory(vm: &VM) {
......@@ -2753,7 +2824,11 @@ pub fn emit_code(fv: &mut MuFunctionVersion, vm: &VM) {
// constants in text section
file.write("\t.text\n".as_bytes()).unwrap();
file.write("\t.align 8\n".as_bytes()).unwrap();
// FIXME: need a more precise way to determine alignment
// (probably use alignment backend info, which require introducing int128 to zebu)
write_const_min_align(&mut file);
for (id, constant) in cf.consts.iter() {
let mem = cf.const_mem.get(id).unwrap();
......@@ -2768,6 +2843,19 @@ pub fn emit_code(fv: &mut MuFunctionVersion, vm: &VM) {
}
}
// min alignment as 16 byte (written as 4 (2^4) on macos)
#[cfg(target_os = "linux")]
fn write_const_min_align(f: &mut File) {
use std::io::Write;
f.write("\t.align 16\n".as_bytes()).unwrap();
}
#[cfg(target_os = "macos")]
fn write_const_min_align(f: &mut File) {
use std::io::Write;
f.write("\t.align 4\n".as_bytes()).unwrap();
}
fn write_const(f: &mut File, constant: P<Value>, loc: P<Value>) {
use std::io::Write;
......
......@@ -184,6 +184,7 @@ pub trait CodeGenerator {
// fp conversion
fn emit_cvtsi2sd_f64_r (&mut self, dest: Reg, src: Reg);
fn emit_cvtsd2si_r_f64 (&mut self, dest: Reg, src: Reg);
fn emit_cvttsd2si_r_f64 (&mut self, dest: Reg, src: Reg);
// used for unsigned int to fp conversion
......@@ -193,4 +194,8 @@ pub trait CodeGenerator {
fn emit_subpd_f64_mem128 (&mut self, dest: Reg, src: Mem);
// packed double-fp horizontal add
fn emit_haddpd_f64_f64 (&mut self, dest: Reg, src: Reg);
// move aligned packed double-precision fp values
fn emit_movapd_f64_mem128(&mut self, dest: Reg, src: Mem);
fn emit_movapd_f64_f64 (&mut self, dest: Reg, src: Mem);
}
......@@ -76,6 +76,12 @@ lazy_static! {
})
]))
});
pub static ref FPTOUI_C : P<Value> = P(Value{
hdr: MuEntityHeader::named(new_internal_id(), Mu("FPTOUI_C")),
ty : UINT64_TYPE.clone(),
v : Value_::Constant(Constant::Int(4890909195324358656u64))
});
}
pub struct InstructionSelection {
......@@ -1239,20 +1245,104 @@ impl <'a> InstructionSelection {
if self.match_ireg(op) {
let tmp_op = self.emit_ireg(op, f_content, f_context, vm);
// movd/movq op -> res
self.backend.emit_mov_fpr_r64(&tmp_res, &tmp_op);
let op_ty_size = vm.get_backend_type_info(tmp_op.ty.id()).size;
match op_ty_size {
8 => {
// movd/movq op -> res
self.backend.emit_mov_fpr_r64(&tmp_res, &tmp_op);
// punpckldq UITOFP_C0, tmp_res -> tmp_res
// (interleaving low bytes: xmm = xmm[0] mem[0] xmm[1] mem[1]
let mem_c0 = self.get_mem_for_const(UITOFP_C0.clone(), vm);
self.backend.emit_punpckldq_f64_mem128(&tmp_res, &mem_c0);
// subpd UITOFP_C1, tmp_res -> tmp_res
let mem_c1 = self.get_mem_for_const(UITOFP_C1.clone(), vm);
self.backend.emit_subpd_f64_mem128(&tmp_res, &mem_c1);
// haddpd tmp_res, tmp_res -> tmp_res
self.backend.emit_haddpd_f64_f64(&tmp_res, &tmp_res);
}
4 => {
let tmp = self.make_temporary(f_context, UINT64_TYPE.clone(), vm);
// movl op -> tmp(32)
let tmp32 = unsafe {tmp.as_type(UINT32_TYPE.clone())};
self.backend.emit_mov_r_r(&tmp32, &tmp_op);
// cvtsi2sd %tmp(64) -> %tmp_res
self.backend.emit_cvtsi2sd_f64_r(&tmp_res, &tmp);
}
2 | 1 => {
let tmp_op32 = unsafe {tmp_op.as_type(UINT32_TYPE.clone())};
self.backend.emit_cvtsi2sd_f64_r(&tmp_res, &tmp_op32);
}
_ => panic!("not implemented int length {}", op_ty_size)
}
} else {
panic!("unexpected op (expected ireg): {}", op)
}
}
op::ConvOp::FPTOUI => {
let tmp_res = self.get_result_value(node);
let res_ty_size = vm.get_backend_type_info(tmp_res.ty.id()).size;
// punpckldq UITOFP_C0, tmp_res -> tmp_res
// (interleaving low bytes: xmm = xmm[0] mem[0] xmm[1] mem[1]
let mem_c0 = self.get_mem_for_const(UITOFP_C0.clone(), vm);
self.backend.emit_punpckldq_f64_mem128(&tmp_res, &mem_c0);
if self.match_fpreg(op) {
let tmp_op = self.emit_fpreg(op, f_content, f_context, vm);
match res_ty_size {
8 => {
let tmp1 = self.make_temporary(f_context, DOUBLE_TYPE.clone(), vm);
let tmp2 = self.make_temporary(f_context, DOUBLE_TYPE.clone(), vm);
// movsd FPTOUI_C -> %tmp1
let mem_c = self.get_mem_for_const(FPTOUI_C.clone(), vm);
self.backend.emit_movsd_f64_mem64(&tmp1, &mem_c);
// movapd %tmp_op -> %tmp2
self.backend.emit_movapd_f64_f64(&tmp2, &tmp_op);
// subsd %tmp1, %tmp2 -> %tmp2
self.backend.emit_subsd_f64_f64(&tmp2, &tmp1);
// cvttsd2si %tmp2 -> %tmp_res
self.backend.emit_cvttsd2si_r_f64(&tmp_res, &tmp2);
let tmp_const = self.make_temporary(f_context, UINT64_TYPE.clone(), vm);
// mov 0x8000000000000000 -> %tmp_const
self.backend.emit_mov_r64_imm64(&tmp_const, -9223372036854775808i64);
// xor %tmp_res, %tmp_const -> %tmp_const
self.backend.emit_xor_r_r(&tmp_const, &tmp_res);
// subpd UITOFP_C1, tmp_res -> tmp_res
let mem_c1 = self.get_mem_for_const(UITOFP_C1.clone(), vm);
self.backend.emit_subpd_f64_mem128(&tmp_res, &mem_c1);
// cvttsd2si %tmp_op -> %tmp_res
self.backend.emit_cvttsd2si_r_f64(&tmp_res, &tmp_op);
// haddpd tmp_res, tmp_res -> tmp_res
self.backend.emit_haddpd_f64_f64(&tmp_res, &tmp_res);
// ucomisd %tmp_op %tmp1
self.backend.emit_ucomisd_f64_f64(&tmp1, &tmp_op);
// cmovaeq %tmp_const -> %tmp_res
self.backend.emit_cmovae_r_r(&tmp_res, &tmp_const);
}
4 => {
let tmp_res64 = unsafe {tmp_res.as_type(UINT64_TYPE.clone())};
// cvttsd2si %tmp_op -> %tmp_res(64)
self.backend.emit_cvttsd2si_r_f64(&tmp_res64, &tmp_op);
}
2 | 1 => {
let tmp_res32 = unsafe {tmp_res.as_type(UINT32_TYPE.clone())};
// cvttsd2si %tmp_op -> %tmp_res(32)
self.backend.emit_cvttsd2si_r_f64(&tmp_res32, &tmp_op);
// movz %tmp_res -> %tmp_res(32)
self.backend.emit_movz_r_r(&tmp_res32, &tmp_res);
}
_ => panic!("not implemented int length {}", res_ty_size)
}
} else {
panic!("unexpected op (expected ireg): {}", op)
}
......
......@@ -252,50 +252,393 @@ fn sitofp() -> VM {
}
#[test]
fn test_uitofp() {
let lib = testutil::compile_fnc("uitofp", &uitofp);
fn test_ui64tofp() {
let lib = testutil::compile_fnc("ui64tofp", &ui64tofp);
unsafe {
let uitofp : libloading::Symbol<unsafe extern fn(u64) -> f64> = lib.get(b"uitofp").unwrap();
let ui64tofp : libloading::Symbol<unsafe extern fn(u64) -> f64> = lib.get(b"ui64tofp").unwrap();
let res = uitofp(0u64);
println!("uitofp(0) = {}", res);
let res = ui64tofp(0u64);
println!("ui64tofp(0) = {}", res);
assert!(res == 0f64);
let res = uitofp(1u64);
println!("uitofp(1) = {}", res);
let res = ui64tofp(1u64);
println!("ui64tofp(1) = {}", res);
assert!(res == 1f64);
}
}
fn uitofp() -> VM {
fn ui64tofp() -> VM {
let vm = VM::new();
typedef! ((vm) int64 = mu_int(64));
typedef! ((vm) double = mu_double);
funcsig! ((vm) sig = (int64) -> (double));
funcdecl! ((vm) <sig> uitofp);
funcdef! ((vm) <sig> uitofp VERSION uitofp_v1);
funcdecl! ((vm) <sig> ui64tofp);
funcdef! ((vm) <sig> ui64tofp VERSION ui64tofp_v1);
// blk entry
block! ((vm, uitofp_v1) blk_entry);
ssa! ((vm, uitofp_v1) <int64> x);
block! ((vm, ui64tofp_v1) blk_entry);
ssa! ((vm, ui64tofp_v1) <int64> x);
ssa! ((vm, uitofp_v1) <double> res);
inst! ((vm, uitofp_v1) blk_entry_conv:
ssa! ((vm, ui64tofp_v1) <double> res);
inst! ((vm, ui64tofp_v1) blk_entry_conv:
res = CONVOP (ConvOp::UITOFP) <int64 double> x
);
inst! ((vm, uitofp_v1) blk_entry_ret:
inst! ((vm, ui64tofp_v1) blk_entry_ret:
RET (res)
);
define_block!((vm, uitofp_v1) blk_entry(x){
define_block!((vm, ui64tofp_v1) blk_entry(x){
blk_entry_conv, blk_entry_ret
});
define_func_ver!((vm) uitofp_v1 (entry: blk_entry) {blk_entry});
define_func_ver!((vm) ui64tofp_v1 (entry: blk_entry) {blk_entry});
vm
}
#[test]
fn test_ui32tofp() {
let lib = testutil::compile_fnc("ui32tofp", &ui32tofp);
unsafe {
let ui32tofp : libloading::Symbol<unsafe extern fn(u32) -> f64> = lib.get(b"ui32tofp").unwrap();
let res = ui32tofp(0u32);
println!("ui32tofp(0) = {}", res);
assert!(res == 0f64);
let res = ui32tofp(1u32);
println!("ui32tofp(1) = {}", res);
assert!(res == 1f64);
}
}
fn ui32tofp() -> VM {
let vm = VM::new();
typedef! ((vm) int32 = mu_int(32));
typedef! ((vm) double = mu_double);
funcsig! ((vm) sig = (int32) -> (double));
funcdecl! ((vm) <sig> ui32tofp);
funcdef! ((vm) <sig> ui32tofp VERSION ui32tofp_v1);
// blk entry
block! ((vm, ui32tofp_v1) blk_entry);
ssa! ((vm, ui32tofp_v1) <int32> x);
ssa! ((vm, ui32tofp_v1) <double> res);
inst! ((vm, ui32tofp_v1) blk_entry_conv:
res = CONVOP (ConvOp::UITOFP) <int32 double> x
);
inst! ((vm, ui32tofp_v1) blk_entry_ret:
RET (res)
);
define_block!((vm, ui32tofp_v1) blk_entry(x){
blk_entry_conv, blk_entry_ret
});
define_func_ver!((vm) ui32tofp_v1 (entry: blk_entry) {blk_entry});
vm
}
#[test]
fn test_ui16tofp() {
let lib = testutil::compile_fnc("ui16tofp", &ui16tofp);
unsafe {
let ui16tofp : libloading::Symbol<unsafe extern fn(u16) -> f64> = lib.get(b"ui16tofp").unwrap();
let res = ui16tofp(0u16);
println!("ui16tofp(0) = {}", res);
assert!(res == 0f64);
let res = ui16tofp(1u16);
println!("ui16tofp(1) = {}", res);
assert!(res == 1f64);
}
}
fn ui16tofp() -> VM {
let vm = VM::new();
typedef! ((vm) int16 = mu_int(16));
typedef! ((vm) double = mu_double);
funcsig! ((vm) sig = (int16) -> (double));
funcdecl! ((vm) <sig> ui16tofp);
funcdef! ((vm) <sig> ui16tofp VERSION ui16tofp_v1);
// blk entry
block! ((vm, ui16tofp_v1) blk_entry);
ssa! ((vm, ui16tofp_v1) <int16> x);
ssa! ((vm, ui16tofp_v1) <double> res);
inst! ((vm, ui16tofp_v1) blk_entry_conv:
res = CONVOP (ConvOp::UITOFP) <int16 double> x
);
inst! ((vm, ui16tofp_v1) blk_entry_ret:
RET (res)
);
define_block!((vm, ui16tofp_v1) blk_entry(x){
blk_entry_conv, blk_entry_ret
});
define_func_ver!((vm) ui16tofp_v1 (entry: blk_entry) {blk_entry});
vm
}
#[test]
fn test_ui8tofp() {
let lib = testutil::compile_fnc("ui8tofp", &ui8tofp);
unsafe {
let ui8tofp : libloading::Symbol<unsafe extern fn(u8) -> f64> = lib.get(b"ui8tofp").unwrap();
let res = ui8tofp(0u8);
println!("ui8tofp(0) = {}", res);
assert!(res == 0f64);
let res = ui8tofp(1u8);
println!("ui8tofp(1) = {}", res);
assert!(res == 1f64);
}
}
fn ui8tofp() -> VM {
let vm = VM::new();
typedef! ((vm) int8 = mu_int(8));
typedef! ((vm) double = mu_double);
funcsig! ((vm) sig = (int8) -> (double));
funcdecl! ((vm) <sig> ui8tofp);
funcdef! ((vm) <sig> ui8tofp VERSION ui8tofp_v1);
// blk entry
block! ((vm, ui8tofp_v1) blk_entry);
ssa! ((vm, ui8tofp_v1) <int8> x);
ssa! ((vm, ui8tofp_v1) <double> res);
inst! ((vm, ui8tofp_v1) blk_entry_conv:
res = CONVOP (ConvOp::UITOFP) <int8 double> x
);
inst! ((vm, ui8tofp_v1) blk_entry_ret:
RET (res)
);
define_block!((vm, ui8tofp_v1) blk_entry(x){
blk_entry_conv, blk_entry_ret
});
define_func_ver!((vm) ui8tofp_v1 (entry: blk_entry) {blk_entry});
vm
}
#[test]
fn test_fptoui64() {
let lib = testutil::compile_fnc("fptoui64", &fptoui64);
unsafe {
let fptoui64 : libloading::Symbol<unsafe extern fn(f64) -> u64> = lib.get(b"fptoui64").unwrap();
let res = fptoui64(0f64);
println!("fptoui64(0) = {}", res);
assert!(res == 0u64);
let res = fptoui64(1f64);
println!("fptoui64(1) = {}", res);
assert!(res == 1u64);
}
}
fn fptoui64() -> VM {
let vm = VM::new();
typedef! ((vm) int64 = mu_int(64));
typedef! ((vm) double = mu_double);
funcsig! ((vm) sig = (double) -> (int64));
funcdecl! ((vm) <sig> fptoui64);
funcdef! ((vm) <sig> fptoui64 VERSION fptoui64_v1);
// blk entry
block! ((vm, fptoui64_v1) blk_entry);
ssa! ((vm, fptoui64_v1) <double> x);
ssa! ((vm, fptoui64_v1) <int64> res);
inst! ((vm, fptoui64_v1) blk_entry_conv:
res = CONVOP (ConvOp::FPTOUI) <double int64> x
);
inst! ((vm, fptoui64_v1) blk_entry_ret:
RET (res)
);
define_block!((vm, fptoui64_v1) blk_entry(x){
blk_entry_conv, blk_entry_ret
});
define_func_ver!((vm) fptoui64_v1 (entry: blk_entry) {blk_entry});
vm
}
#[test]
fn test_fptoui32() {
let lib = testutil::compile_fnc("fptoui32", &fptoui32);
unsafe {
let fptoui32 : libloading::Symbol<unsafe extern fn(f64) -> u32> = lib.get(b"fptoui32").unwrap();
let res = fptoui32(0f64);
println!("fptoui32(0) = {}", res);
assert!(res == 0u32);
let res = fptoui32(1f64);
println!("fptoui32(1) = {}", res);
assert!(res == 1u32);
}
}
fn fptoui32() -> VM {
let vm = VM::new();
typedef! ((vm) int32 = mu_int(32));
typedef! ((vm) double = mu_double);
funcsig! ((vm) sig = (double) -> (int32));
funcdecl! ((vm) <sig> fptoui32);
funcdef! ((vm) <sig> fptoui32 VERSION fptoui32_v1);
// blk entry
block! ((vm, fptoui32_v1) blk_entry);
ssa! ((vm, fptoui32_v1) <double> x);
ssa! ((vm, fptoui32_v1) <int32> res);
inst! ((vm, fptoui32_v1) blk_entry_conv:
res = CONVOP (ConvOp::FPTOUI) <double int32> x
);
inst! ((vm, fptoui32_v1) blk_entry_ret:
RET (res)
);
define_block!((vm, fptoui32_v1) blk_entry(x){
blk_entry_conv, blk_entry_ret
});
define_func_ver!((vm) fptoui32_v1 (entry: blk_entry) {blk_entry});
vm
}
#[test]
fn test_fptoui16() {
let lib = testutil::compile_fnc("fptoui16", &fptoui16);
unsafe {
let fptoui16 : libloading::Symbol<unsafe extern fn(f64) -> u16> = lib.get(b"fptoui16").unwrap();
let res = fptoui16(0f64);
println!("fptoui16(0) = {}", res);
assert!(res == 0u16);
let res = fptoui16(1f64);
println!("fptoui16(1) = {}", res);
assert!(res == 1u16);
}
}
fn fptoui16() -> VM {
let vm = VM::new();