Commit 84bc93fe authored by Yi Lin's avatar Yi Lin

Merge branch 'trace-scheduling' into 'develop'

Trace scheduling

See merge request !16
parents 5a939029 d4551598
......@@ -681,8 +681,8 @@ impl Instruction_ {
&Instruction_::Throw(exn_obj) => format!("THROW {}", ops[exn_obj]),
&Instruction_::TailCall(ref call) => format!("TAILCALL {}", call.debug_str(ops)),
&Instruction_::Branch1(ref dest) => format!("BRANCH {}", dest.debug_str(ops)),
&Instruction_::Branch2{cond, ref true_dest, ref false_dest, ..} => {
format!("BRANCH2 {} {} {}", ops[cond], true_dest.debug_str(ops), false_dest.debug_str(ops))
&Instruction_::Branch2{cond, ref true_dest, ref false_dest, true_prob} => {
format!("BRANCH2 {} {}({}) {}", ops[cond], true_dest.debug_str(ops), true_prob, false_dest.debug_str(ops))
},
&Instruction_::Select{cond, true_val, false_val} => {
format!("SELECT if {} then {} else {}", ops[cond], ops[true_val], ops[false_val])
......
......@@ -481,11 +481,16 @@ impl FunctionContext {
/// Block contains BlockContent, which includes all the instructions for the block
// FIXME: control_flow field should be moved out of ast crate (Issue #18)
// FIXME: control_flow field should be moved out of ast crate (Issue #18)
// FIXME: trace_hint should also be moved
#[derive(Clone)]
pub struct Block {
pub hdr: MuEntityHeader,
/// the actual content of this block
pub content: Option<BlockContent>,
/// a trace scheduling hint about where to layout this block
pub trace_hint: TraceHint,
/// control flow info about this block (predecessors, successors, etc)
pub control_flow: ControlFlow
}
......@@ -503,9 +508,15 @@ impl fmt::Debug for Block {
}
}
impl fmt::Display for Block {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", self.name())
}
}
impl Block {
pub fn new(entity: MuEntityHeader) -> Block {
Block{hdr: entity, content: None, control_flow: ControlFlow::default()}
Block{hdr: entity, content: None, trace_hint: TraceHint::None, control_flow: ControlFlow::default()}
}
/// does this block have an exception arguments?
......@@ -523,11 +534,60 @@ impl Block {
content.body.len()
}
}
/// is this block ends with a conditional branch?
pub fn ends_with_cond_branch(&self) -> bool {
let block : &BlockContent = self.content.as_ref().unwrap();
match block.body.last() {
Some(node) => {
match node.v {
TreeNode_::Instruction(Instruction {v: Instruction_::Branch2{..}, ..}) => {
true
}
_ => false
}
}
None => false
}
}
/// is this block ends with a return?
pub fn ends_with_return(&self) -> bool {
let block : &BlockContent = self.content.as_ref().unwrap();
match block.body.last() {
Some(node) => {
match node.v {
TreeNode_::Instruction(Instruction {v: Instruction_::Return(_), ..}) => {
true
}
_ => false
}
}
None => false
}
}
}
/// ControlFlow stores compilation info about control flows of a block
/// TraceHint is a hint for the compiler to generate better trace for this block
// Note: for a sequence of blocks that are supposed to be fast/slow path, only mark the
// first block with TraceHint, and let the trace scheduler to normally layout other
// blocks. Otherwise, the scheduler will take every TraceHint into consideration,
// and may not generate the trace as expected.
// FIXME: Issue #18
#[derive(Clone, PartialEq)]
pub enum TraceHint {
/// no hint provided. Trace scheduler should use its own heuristics to decide
None,
/// this block is fast path, and should be put in straightline code where possible
FastPath,
/// this block is slow path, and should be kept out of hot loops
SlowPath,
/// this block is return sink, and should be put at the end of a function
ReturnSink
}
// FIXME: Issue #18
/// ControlFlow stores compilation info about control flows of a block
// FIXME: Issue #18
#[derive(Debug, Clone)]
pub struct ControlFlow {
pub preds : Vec<MuID>,
......@@ -709,6 +769,13 @@ impl TreeNode {
})
}
/// creates a sharable Value TreeNode
pub fn new_value(v: P<Value>) -> P<TreeNode> {
P(TreeNode {
v: TreeNode_::Value(v)
})
}
/// extracts the MuID of an SSA TreeNode
/// if the node is not an SSA, returns None
pub fn extract_ssa_id(&self) -> Option<MuID> {
......@@ -798,9 +865,13 @@ rodal_struct!(Value{hdr, ty, v});
impl Value {
/// creates an int constant value
pub fn make_int_const(id: MuID, val: u64) -> P<Value> {
Value::make_int_const_ty(id, UINT32_TYPE.clone(), val)
}
pub fn make_int_const_ty(id: MuID, ty: P<MuType>, val: u64) -> P<Value> {
P(Value{
hdr: MuEntityHeader::unnamed(id),
ty: UINT32_TYPE.clone(),
ty: ty,
v: Value_::Constant(Constant::Int(val))
})
}
......
......@@ -505,17 +505,6 @@ impl MachineCode for ASMCode {
let split : Vec<&str> = inst.code.split(' ').collect();
Some(demangle_name(String::from(split[1])))
}
Some(inst) if inst.code.starts_with("CBNZ ") || inst.code.starts_with("CBZ ") => {
// Destination is the second argument
let split : Vec<&str> = inst.code.split(',').collect();
Some(demangle_name(String::from(split[1])))
}
Some(inst) if inst.code.starts_with("TBNZ ") || inst.code.starts_with("TBZ ") => {
// Destination is the third argument
let split : Vec<&str> = inst.code.split(',').collect();
Some(demangle_name(String::from(split[2])))
}
_ => None
}
}
......@@ -629,6 +618,24 @@ impl MachineCode for ASMCode {
}
}
fn replace_branch_dest(&mut self, inst: usize, new_dest: &str, succ: usize) {
{
let asm = &mut self.code[inst];
let inst = String::from(asm.code.split_whitespace().next().unwrap());
asm.code = format!("{} {}", inst, mangle_name(String::from(new_dest)));
asm.succs.clear();
asm.succs.push(succ);
}
{
let asm = &mut self.code[succ];
if !asm.preds.contains(&inst) {
asm.preds.push(inst);
}
}
}
fn set_inst_nop(&mut self, index: usize) {
self.code[index].code.clear();
// self.code.remove(index);
......
......@@ -32,8 +32,7 @@ use runtime::entrypoints::RuntimeEntrypoint;
use compiler::CompilerPass;
use compiler::backend::PROLOGUE_BLOCK_NAME;
use compiler::backend::EPILOGUE_BLOCK_NAME;
use compiler::PROLOGUE_BLOCK_NAME;
use compiler::backend::aarch64::*;
use compiler::backend::make_block_name;
......@@ -78,7 +77,7 @@ impl <'a> InstructionSelection {
#[cfg(feature = "aot")]
pub fn new() -> InstructionSelection {
InstructionSelection {
name: "Instruction Selection (x64)",
name: "Instruction Selection (aarch64)",
backend: Box::new(ASMCodeGen::new()),
current_fv_id: 0,
......@@ -120,16 +119,7 @@ impl <'a> InstructionSelection {
// TODO: Optimise if cond is a flag from a binary operation?
Instruction_::Branch2 { cond, ref true_dest, ref false_dest, .. } => {
trace!("instsel on BRANCH2");
let (fallthrough_dest, branch_dest, branch_if_true) = {
let cur_block = f_content.get_block_by_name(self.current_block_in_ir.as_ref().unwrap().clone());
let next_block_in_trace = cur_block.control_flow.get_hottest_succ().unwrap();
if next_block_in_trace == true_dest.target {
(true_dest, false_dest, false)
} else {
(false_dest, true_dest, true)
}
};
let (fallthrough_dest, branch_dest) = (false_dest, true_dest);
let ref ops = inst.ops;
......@@ -151,20 +141,11 @@ impl <'a> InstructionSelection {
if use_cbnz { Some(Box::new(tmp_cond.as_ref().unwrap().clone())) }
else { None };
let mut cmpop = self.emit_cmp_res(cond, cond_box, f_content, f_context, vm);
let cmpop = self.emit_cmp_res(cond, cond_box, f_content, f_context, vm);
if use_cbnz {
if !branch_if_true {
self.backend.emit_cbz(tmp_cond.as_ref().unwrap(), branch_target);
} else {
self.backend.emit_cbnz(tmp_cond.as_ref().unwrap(), branch_target);
}
self.backend.emit_cbnz(tmp_cond.as_ref().unwrap(), branch_target);
} else {
if !branch_if_true {
cmpop = cmpop.invert();
}
let cond = get_condition_codes(cmpop);
if cmpop == op::CmpOp::FFALSE {
......@@ -181,22 +162,8 @@ impl <'a> InstructionSelection {
}
} else {
let cond_reg = self.emit_ireg(cond, f_content, f_context, vm);
if branch_if_true {
self.backend.emit_tbnz(&cond_reg, 0, branch_target.clone());
} else {
self.backend.emit_tbz(&cond_reg, 0, branch_target.clone());
}
self.backend.emit_tbnz(&cond_reg, 0, branch_target.clone());
};
// it is possible that the fallthrough block is scheduled somewhere else
// we need to explicitly jump to it
self.finish_block();
let fallthrough_temp_block = make_block_name(&self.current_fv_name, node.id(), "branch_fallthrough", );
self.start_block(fallthrough_temp_block);
let fallthrough_target = f_content.get_block(fallthrough_dest.target).name();
self.backend.emit_b(fallthrough_target);
},
Instruction_::Select { cond, true_val, false_val } => {
......@@ -466,8 +433,8 @@ impl <'a> InstructionSelection {
}
}
let epilogue_block = format!("{}:{}", self.current_fv_name, EPILOGUE_BLOCK_NAME);
self.backend.emit_b(epilogue_block);
self.emit_epilogue(f_context, vm);
self.backend.emit_ret(&LR);
},
Instruction_::BinOp(op, op1, op2) => {
......@@ -4571,13 +4538,6 @@ impl CompilerPass for InstructionSelection {
}
fn finish_function(&mut self, vm: &VM, func: &mut MuFunctionVersion) {
// Todo: Don't emit this if the function never returns
let epilogue_block = format!("{}:{}", self.current_fv_name, EPILOGUE_BLOCK_NAME);
self.start_block(epilogue_block);
self.emit_epilogue(&mut func.context, vm);
self.backend.emit_ret(&LR); // return to the Link Register
self.finish_block();
self.backend.print_cur_code();
let func_name = {
......
......@@ -12,12 +12,13 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#![allow(dead_code)]
// TODO: CHECK THAT THE TYPE OF EVERY MEMORY LOCATION HAS THE CORRECT SIZE
// (the size should be size of the area in memory that it is referring to, and will indicate
// how much data any load/store instructions that uses it will operate on
// (so it should be [1], 8, 16, 32, 64, or 128 bits in size (when using emit_mem, it can have other sizes before this))
#![allow(non_upper_case_globals)]
// TODO: Move architecture independent codes in here, inst_sel and asm_backend to somewhere else...
pub mod inst_sel;
......@@ -332,7 +333,6 @@ pub fn primitive_byte_size(ty : &P<MuType>) -> usize
}
}
#[allow(dead_code)]
lazy_static! {
// Note: these are the same as the ARGUMENT_GPRS
pub static ref RETURN_GPRS : [P<Value>; 8] = [
......@@ -396,7 +396,6 @@ lazy_static! {
//X18.clone(), // Platform Register
];
/*#[allow(dead_code)]
static ref ALL_GPRS : [P<Value>; 30] = [
X0.clone(),
X1.clone(),
......@@ -429,7 +428,7 @@ lazy_static! {
X28.clone(),
X29.clone(), // Frame Pointer
X30.clone() // Link Register
];*/
];
}
pub const FPR_ID_START : usize = 100;
......@@ -586,7 +585,6 @@ lazy_static!{
D31.clone()
];
/*#[allow(dead_code)]
static ref ALL_FPRS : [P<Value>; 32] = [
D0.clone(),
D1.clone(),
......@@ -622,7 +620,7 @@ lazy_static!{
D29.clone(),
D30.clone(),
D31.clone()
];*/
];
}
lazy_static! {
......
......@@ -759,6 +759,24 @@ impl MachineCode for ASMCode {
}
}
/// replace destination for a jump instruction
fn replace_branch_dest(&mut self, inst: usize, new_dest: &str, succ: MuID) {
{
let asm = &mut self.code[inst];
asm.code = format!("jmp {}", symbol(mangle_name(String::from(new_dest))));
asm.succs.clear();
asm.succs.push(succ);
}
{
let asm = &mut self.code[succ];
if !asm.preds.contains(&inst) {
asm.preds.push(inst);
}
}
}
/// set an instruction as nop
fn set_inst_nop(&mut self, index: usize) {
self.code[index].code.clear();
......
......@@ -33,11 +33,6 @@ use num::integer::lcm;
#[cfg(feature = "aot")]
pub const AOT_EMIT_CONTEXT_FILE : &'static str = "context.S";
/// name for prologue (this is not full name, but prologue name is generated from this)
pub const PROLOGUE_BLOCK_NAME: &'static str = "prologue";
/// name for epilogue (this is not full name, but epilogue name is generated from this)
pub const EPILOGUE_BLOCK_NAME: &'static str = "epilogue";
// type alias to make backend code more readable
pub type Reg<'a> = &'a P<Value>;
pub type Mem<'a> = &'a P<Value>;
......
......@@ -40,6 +40,20 @@ impl CompilerPass for PeepholeOptimization {
for i in 0..cf.mc().number_of_insts() {
// if two sides of a move instruction are the same, it is redundant, and can be eliminated
self.remove_redundant_move(i, &mut cf);
// if a branch jumps a label that contains another jump, such as
// ..
// jmp L1
// ..
// L1:
// jmp L2
// ..
// we can rewrite first branch to jump to L2 directly
// the order matters: we need to run this first, then remove_unnecessary_jump()
// as this will give us more chances to remove unnecessary jumps
self.remove_jump_to_jump(i, &mut cf);
// if a branch targets a block that immediately follow it, it can be eliminated
self.remove_unnecessary_jump(i, &mut cf);
}
......@@ -90,7 +104,7 @@ impl PeepholeOptimization {
// check if two registers are aliased
if backend::is_aliased(src_machine_reg, dst_machine_reg) {
trace!("move between {} and {} is redundant! removed", src_machine_reg, dst_machine_reg);
info!("move between {} and {} is redundant! removed", src_machine_reg, dst_machine_reg);
// redundant, remove this move
cf.mc_mut().set_inst_nop(inst);
} else {
......@@ -115,7 +129,8 @@ impl PeepholeOptimization {
let opt_label = mc.is_label(inst + 1);
match opt_label {
Some(ref label) if dest == label => {
mc.set_inst_nop(inst);
info!("inst {}'s jmp to {} is unnecessary! removed", inst, label);
mc.set_inst_nop(inst);
}
_ => {
// do nothing
......@@ -127,4 +142,56 @@ impl PeepholeOptimization {
}
}
}
fn remove_jump_to_jump(&mut self, inst: usize, cf: &mut CompiledFunction) {
let mut mc = cf.mc_mut();
// the instruction that we may rewrite
let orig_inst = inst;
// the destination we will rewrite the instruction to branch to
let final_dest : Option<MuName> = {
let mut cur_inst = inst;
let mut last_dest = None;
loop {
let opt_dest = mc.is_jmp(cur_inst);
match opt_dest {
Some(ref dest) => {
// get the block for destination
let first_inst = mc.get_block_range(dest).unwrap().start;
debug_assert!(mc.is_label(first_inst).is_none(), "expect start inst {} of \
block {} is a inst instead of label", first_inst, dest);
trace!("examining first inst {} of block {}", first_inst, dest);
// if first instruction is jump
match mc.is_jmp(first_inst) {
Some(ref dest2) => {
// its a jump-to-jump case
cur_inst = first_inst;
last_dest = Some(dest2.clone());
}
None => break
}
}
None => break
}
}
last_dest
};
if let Some(dest) = final_dest {
let first_inst = {
let start = mc.get_block_range(&dest).unwrap().start;
match mc.get_next_inst(start) {
Some(i) => i,
None => panic!("we are jumping to a block {}\
that does not have instructions?", dest)
}
};
info!("inst {} chain jumps to {}, rewrite as branching to {} (successor: {})",
orig_inst, dest, dest, first_inst);
mc.replace_branch_dest(inst, &dest, first_inst);
}
}
}
......@@ -24,7 +24,7 @@ use ast::ir::*;
use ast::ptr::*;
use compiler::machine_code::CompiledFunction;
use compiler::backend::get_color_for_precolored as alias;
use compiler::backend::PROLOGUE_BLOCK_NAME;
use compiler::PROLOGUE_BLOCK_NAME;
mod alive_entry;
use compiler::backend::reg_alloc::validate::alive_entry::*;
......
......@@ -208,6 +208,8 @@ pub trait MachineCode {
fn replace_define_tmp_for_inst(&mut self, from: MuID, to: MuID, inst: usize);
/// replace a temp that is used in the inst with another temp
fn replace_use_tmp_for_inst(&mut self, from: MuID, to: MuID, inst: usize);
/// replace destination for an unconditional branch instruction
fn replace_branch_dest(&mut self, inst: usize, new_dest: &str, succ: usize);
/// set an instruction as nop
fn set_inst_nop(&mut self, index: usize);
/// remove unnecessary push/pop if the callee saved register is not used
......
......@@ -29,6 +29,11 @@ pub mod machine_code;
pub use compiler::passes::CompilerPass;
/// name for prologue (this is not full name, but prologue name is generated from this)
pub const PROLOGUE_BLOCK_NAME: &'static str = "prologue";
/// name for epilogue (this is not full name, but epilogue name is generated from this)
pub const EPILOGUE_BLOCK_NAME: &'static str = "epilogue";
/// Zebu compiler
pub struct Compiler<'vm> {
/// policy decides what passes to be executed
......@@ -92,8 +97,10 @@ impl Default for CompilerPolicy {
fn default() -> Self {
let mut passes : Vec<Box<CompilerPass>> = vec![];
passes.push(Box::new(passes::DotGen::new(".orig")));
passes.push(Box::new(passes::Inlining::new()));
// ir level passes
passes.push(Box::new(passes::RetSink::new()));
passes.push(Box::new(passes::Inlining::new()));
passes.push(Box::new(passes::DefUse::new()));
passes.push(Box::new(passes::TreeGen::new()));
passes.push(Box::new(passes::GenMovPhi::new()));
......
......@@ -366,6 +366,7 @@ fn copy_inline_blocks(caller: &mut Vec<Block>, ret_block: MuID, callee: &Functio
let mut block = Block {
hdr: MuEntityHeader::named(new_id, format!("{}:inlinedblock.#{}", block.name(), new_id)),
content: block.content.clone(),
trace_hint: TraceHint::None,
control_flow: ControlFlow::default()
};
......
......@@ -22,6 +22,10 @@ use std::any::Any;
mod inlining;
pub use compiler::passes::inlining::Inlining;
/// A pass to check and rewrite RET instructions to ensure a single return sink for every function
mod ret_sink;
pub use compiler::passes::ret_sink::RetSink;
/// A Def-Use pass. Getting use info and count for SSA variables in the IR (we are not collecting
/// define info)
mod def_use;
......
// Copyright 2017 The Australian National University
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use ast::ir::*;
use ast::inst::*;
use ast::ptr::*;
use vm::VM;
use compiler::CompilerPass;
use compiler::EPILOGUE_BLOCK_NAME;
use std::any::Any;
/// Mu IR the client gives us may contain several RET instructions. However,
/// internally we want a single exit point for a function. In this pass, we
/// create a return sink (a block), and rewrite all the RET instruction into
/// a BRANCH with return values.
pub struct RetSink {
name: &'static str
}
impl RetSink {
pub fn new() -> RetSink {
RetSink {
name: "Creating Return Sink"
}
}
}
impl CompilerPass for RetSink {
fn name(&self) -> &'static str {
self.name
}
fn as_any(&self) -> &Any {
self
}
fn visit_function(&mut self, vm: &VM, func: &mut MuFunctionVersion) {
let mut f_content = func.content.take().unwrap();
// create a return sink
let return_sink = {
let block_name = format!("{}:{}", func.name(), EPILOGUE_BLOCK_NAME);
trace!("created return sink {}", block_name);
let mut block = Block::new(MuEntityHeader::named(vm.next_id(), block_name));
// tell the compiler this is the return sink
block.trace_hint = TraceHint::ReturnSink;
vm.set_name(block.as_entity());
let sig = func.sig.clone();
let args : Vec<P<Value>> = sig.ret_tys.iter()
.map(|ty| func.new_ssa(MuEntityHeader::unnamed(vm.next_id()), ty.clone()).clone_value()).collect();
block.content = Some(BlockContent {
args: args.clone(),
exn_arg: None,
body: vec![
func.new_inst(Instruction {
hdr: MuEntityHeader::unnamed(vm.next_id()),
value: None,
ops: args.iter().map(|val| TreeNode::new_value(val.clone())).collect(),
v: Instruction_::Return((0..args.len()).collect())
})
],
keepalives: None
});
block
};
// rewrite existing RET instruction to a BRANCH
// use RET values as BRANCH's goto values
let mut has_ret : bool = false;
for (blk_id, mut block) in f_content.blocks.iter_mut() {
trace!("block: {}", blk_id);
// old block content
let block_content = block.content.as_ref().unwrap().clone();
let mut new_body = vec![];
for node in block_content.body.iter() {
trace!("{}", node);
match node.v {
TreeNode_::Instruction(Instruction {ref ops, v: Instruction_::Return(ref arg_index), ..}) => {
let branch_to_sink = func.new_inst(Instruction {
hdr: MuEntityHeader::unnamed(vm.next_id()),
value: None,
ops: ops.clone(),
v: Instruction_::Branch1(Destination {
target: return_sink.id(),
args: arg_index.iter().map(|i| DestArg::Normal(*i)).collect()
})
});
trace!(">> rewrite ret to {}", branch_to_sink);
new_body.push(branch_to_sink);
has_ret = true;
}
_ => new_body.push(node.clone())
}
}
block.content = Some(BlockContent {
args : block_content.args.to_vec(),
exn_arg : block_content.exn_arg.clone(),
body : new_body,
keepalives: block_content.keepalives.clone()
});
}
// insert return sink
if has_ret {
f_content.blocks.insert(return_sink.id(), return_sink);
}
// put back the function content
func.content = Some(f_content);
}
}
\ No newline at end of file
This diff is collapsed.
......@@ -1521,6 +1521,7 @@ impl<'lb, 'lvm> BundleLoader<'lb, 'lvm> {
Block {
hdr: hdr,
content: Some(ctn),
trace_hint: TraceHint::None,
control_flow: Default::default(),
}
}
......
......@@ -95,10 +95,9 @@ fn ccall_exit() -> VM {
consta! ((vm, ccall_exit_v1) int32_10_local = int32_10);
let blk_entry_ccall = gen_ccall_exit(int32_10_local.clone(), &mut ccall_exit_v1, &vm);
// RET %const_int32_0
consta! ((vm, ccall_exit_v1) int32_0_local = int32_0);
// RET
inst! ((vm, ccall_exit_v1) blk_entry_ret:
RET (int32_0_local)
RET
);
define_block!((vm, ccall_exit_v1) blk_entry() {
......
......@@ -93,7 +93,7 @@ fn truncate_then_call() -> VM {
);
inst!((vm, truncate_then_call_v1) blk_entry_ret:
RET (arg)
RET (res)
);
define_block!((vm, truncate_then_call_v1) blk_entry(arg) {
......
......@@ -421,7 +421,7 @@ fn create_catch_exception_and_add(vm: &VM) {
let blk_exception_exit = gen_ccall_exit(res4.clone(), &mut catch_and_add_v1, &vm);
inst! ((vm, catch_and_add_v1) blk_exception_ret:
RET (res4)
RET
<