Commit 844ff350 authored by Yi Lin's avatar Yi Lin

Merge branch 'perf-tweak' into 'develop'

A few performance improvements

See merge request !34
parents 15066724 bdca1777
......@@ -519,6 +519,14 @@ impl ASMCode {
use std::any::Any;
impl MachineCode for ASMCode {
fn is_nop(&self, index: usize) -> bool {
let ref inst = self.code[index];
if inst.code == "" || inst.code == "NOP" {
true
} else {
false
}
}
fn as_any(&self) -> &Any {
self
}
......
......@@ -851,6 +851,16 @@ impl MachineCode for ASMCode {
self.code[index].code.clear();
}
/// is the specified index is a nop?
fn is_nop(&self, index: usize) -> bool {
let ref inst = self.code[index];
if inst.code == "" || inst.code == "nop" {
true
} else {
false
}
}
/// remove unnecessary push/pop if the callee saved register is not used
/// returns what registers push/pop have been deleted, and the number of callee saved registers
/// that weren't deleted
......
......@@ -1643,6 +1643,7 @@ impl<'a> InstructionSelection {
// FIXME: the semantic of Pin/Unpin is different from spec
// See Issue #33
Instruction_::CommonInst_Pin(op) => {
use runtime::mm::GC_MOVES_OBJECT;
trace!("instsel on PIN");
// call pin() in GC
......@@ -1653,17 +1654,23 @@ impl<'a> InstructionSelection {
let tmp_op = self.emit_ireg(op, f_content, f_context, vm);
let tmp_res = self.get_result_value(node);
self.emit_runtime_entry(
&entrypoints::PIN_OBJECT,
vec![tmp_op.clone()],
Some(vec![tmp_res]),
Some(node),
f_content,
f_context,
vm
);
if GC_MOVES_OBJECT {
self.emit_runtime_entry(
&entrypoints::PIN_OBJECT,
vec![tmp_op.clone()],
Some(vec![tmp_res]),
Some(node),
f_content,
f_context,
vm
);
} else {
// FIXME: this is problematic, as we are not keeping the object alive
self.backend.emit_mov_r_r(&tmp_res, &tmp_op);
}
}
Instruction_::CommonInst_Unpin(op) => {
use runtime::mm::GC_MOVES_OBJECT;
trace!("instsel on UNPIN");
// call unpin() in GC
......@@ -1673,15 +1680,17 @@ impl<'a> InstructionSelection {
assert!(self.match_ireg(op));
let tmp_op = self.emit_ireg(op, f_content, f_context, vm);
self.emit_runtime_entry(
&entrypoints::UNPIN_OBJECT,
vec![tmp_op.clone()],
None,
Some(node),
f_content,
f_context,
vm
);
if GC_MOVES_OBJECT {
self.emit_runtime_entry(
&entrypoints::UNPIN_OBJECT,
vec![tmp_op.clone()],
None,
Some(node),
f_content,
f_context,
vm
);
}
}
Instruction_::CommonInst_GetAddr(op) => {
trace!("instsel on GETADDR");
......@@ -2672,6 +2681,12 @@ impl<'a> InstructionSelection {
1 | 2 | 4 | 8 => {
trace!("emit mul");
// we need to emit both operands first, then move one into RAX
let tmp_op1 = self.emit_ireg(op1, f_content, f_context, vm);
let tmp_op2 = self.emit_ireg(op2, f_content, f_context, vm);
// move op1 -> RAX
let mreg_op1 = match op_size {
8 => x86_64::RAX.clone(),
4 => x86_64::EAX.clone(),
......@@ -2679,38 +2694,10 @@ impl<'a> InstructionSelection {
1 => x86_64::AL.clone(),
_ => unimplemented!()
};
if self.match_iimm(op1) {
let imm_op1 = self.node_iimm_to_i32(op1);
self.backend.emit_mov_r_imm(&mreg_op1, imm_op1);
} else if self.match_mem(op1) {
let mem_op1 = self.emit_mem(op1, vm);
self.backend.emit_mov_r_mem(&mreg_op1, &mem_op1);
} else if self.match_ireg(op1) {
let reg_op1 = self.emit_ireg(op1, f_content, f_context, vm);
self.backend.emit_mov_r_r(&mreg_op1, &reg_op1);
} else {
panic!("unexpected op1 for node {:?}", node)
}
self.backend.emit_mov_r_r(&mreg_op1, &tmp_op1);
// mul op2
if self.match_iimm(op2) {
let imm_op2 = self.node_iimm_to_i32(op2);
// put imm in a temporary
// here we use result reg as temporary
self.backend.emit_mov_r_imm(&res_tmp, imm_op2);
self.backend.emit_mul_r(&res_tmp);
} else if self.match_mem(op2) {
let mem_op2 = self.emit_mem(op2, vm);
self.backend.emit_mul_mem(&mem_op2);
} else if self.match_ireg(op2) {
let reg_op2 = self.emit_ireg(op2, f_content, f_context, vm);
self.backend.emit_mul_r(&reg_op2);
} else {
panic!("unexpected op2 for node {:?}", node)
}
self.backend.emit_mul_r(&tmp_op2);
// mov rax -> result
let res_size = vm.get_backend_type_size(res_tmp.ty.id());
......@@ -3437,69 +3424,22 @@ impl<'a> InstructionSelection {
)
}
} else {
// size is unknown at compile time
// we need to emit both alloc small and alloc large,
// and it is decided at runtime
// emit: cmp size, THRESHOLD
// emit: jg ALLOC_LARGE
// emit: >> small object alloc
// emit: jmp ALLOC_LARGE_END
// emit: ALLOC_LARGE:
// emit: >> large object alloc
// emit: ALLOC_LARGE_END:
let blk_alloc_large = make_block_name(&node.name(), "alloc_large");
let blk_alloc_large_end = make_block_name(&node.name(), "alloc_large_end");
if OBJECT_HEADER_SIZE != 0 {
// if the header size is not zero, we need to calculate a total size to alloc
let size_with_hdr = self.make_temporary(f_context, UINT64_TYPE.clone(), vm);
self.backend.emit_mov_r_r(&size_with_hdr, &size);
self.backend
.emit_add_r_imm(&size_with_hdr, OBJECT_HEADER_SIZE as i32);
self.backend
.emit_cmp_imm_r(mm::LARGE_OBJECT_THRESHOLD as i32, &size_with_hdr);
} else {
self.backend
.emit_cmp_imm_r(mm::LARGE_OBJECT_THRESHOLD as i32, &size);
}
self.backend.emit_jg(blk_alloc_large.clone());
self.finish_block();
let block_name = make_block_name(&node.name(), "allocsmall");
self.start_block(block_name);
// directly call 'alloc'
let tmp_res = self.get_result_value(node);
// alloc small here
self.emit_alloc_sequence_small(
tmp_allocator.clone(),
size.clone(),
align,
node,
f_content,
f_context,
vm
);
self.backend.emit_jmp(blk_alloc_large_end.clone());
// finishing current block
self.finish_block();
let const_align = self.make_int_const(align as u64, vm);
// alloc_large:
self.start_block(blk_alloc_large.clone());
self.emit_alloc_sequence_large(
tmp_allocator.clone(),
size,
align,
node,
self.emit_runtime_entry(
&entrypoints::ALLOC_ANY,
vec![tmp_allocator.clone(), size.clone(), const_align],
Some(vec![tmp_res.clone()]),
Some(node),
f_content,
f_context,
vm
);
self.finish_block();
// alloc_large_end:
self.start_block(blk_alloc_large_end.clone());
self.get_result_value(node)
tmp_res
}
}
......
......@@ -37,10 +37,19 @@ impl CompilerPass for PeepholeOptimization {
let compiled_funcs = vm.compiled_funcs().read().unwrap();
let mut cf = compiled_funcs.get(&func.id()).unwrap().write().unwrap();
// remove redundant move first
for i in 0..cf.mc().number_of_insts() {
cf.mc().trace_inst(i);
// if two sides of a move instruction are the same,
// it is redundant, and can be eliminated
trace!("trying to remove redundant move");
self.remove_redundant_move(i, &mut cf);
}
// then remove jumps (because removing movs will affect this)
for i in 0..cf.mc().number_of_insts() {
cf.mc().trace_inst(i);
// if a branch jumps a label that contains another jump, such as
// ..
......@@ -53,9 +62,11 @@ impl CompilerPass for PeepholeOptimization {
// the order matters: we need to run this first, then remove_unnecessary_jump()
// as this will give us more chances to remove unnecessary jumps
trace!("trying to remove jump-to-jump");
self.remove_jump_to_jump(i, &mut cf);
// if a branch targets a block that immediately follow it, it can be eliminated
trace!("trying to remove unnecessary jmp");
self.remove_unnecessary_jump(i, &mut cf);
}
......@@ -74,8 +85,6 @@ impl PeepholeOptimization {
fn remove_redundant_move(&mut self, inst: usize, cf: &mut CompiledFunction) {
// if this instruction is a move, and move from register to register (no memory operands)
if cf.mc().is_move(inst) && !cf.mc().is_using_mem_op(inst) {
cf.mc().trace_inst(inst);
// get source reg/temp ID
let src: MuID = {
let uses = cf.mc().get_inst_reg_uses(inst);
......@@ -166,34 +175,48 @@ impl PeepholeOptimization {
let opt_dest = mc.is_jmp(cur_inst);
match opt_dest {
Some(ref dest) => {
trace!("current instruction {} jumps to {}", cur_inst, dest);
// if we have already visited this instruction
// this means we met an infinite loop, we need to break
if visited_labels.contains(dest) {
warn!("met an infinite loop in removing jump-to-jump");
warn!("we are not optimizing this case");
return;
} else {
visited_labels.insert(dest.clone());
debug!("visited {}", dest);
}
// get the block for destination
let first_inst = mc.get_block_range(dest).unwrap().start;
debug_assert!(
mc.is_label(first_inst).is_none(),
"expect start inst {} of \
block {} is a inst instead of label",
let first_inst = {
let start = mc.get_block_range(dest).unwrap().start;
let last = mc.number_of_insts();
let mut first = start;
for i in start..last {
if mc.is_label(i).is_some() || mc.is_nop(i) {
continue;
} else {
first = i;
break;
}
}
first
};
trace!(
"examining first valid inst {} from block {}",
first_inst,
dest
);
trace!("examining first inst {} of block {}", first_inst, dest);
// if first instruction is jump
match mc.is_jmp(first_inst) {
Some(ref dest2) => {
// its a jump-to-jump case
cur_inst = first_inst;
last_dest = Some(dest2.clone());
visited_labels.insert(dest2.clone());
debug!("visited {}", dest2);
}
None => break
}
......
......@@ -20,13 +20,9 @@ use compiler::backend;
use utils::LinkedHashSet;
use utils::LinkedHashMap;
use compiler::backend::reg_alloc::graph_coloring::petgraph;
use compiler::backend::reg_alloc::graph_coloring::petgraph::Graph;
use compiler::backend::reg_alloc::graph_coloring::petgraph::graph::NodeIndex;
/// GraphNode represents a node in the interference graph.
#[derive(Clone, Copy, Debug, PartialEq)]
pub struct GraphNode {
pub struct Node {
/// temp ID (could be register)
temp: MuID,
/// assigned color
......@@ -41,8 +37,29 @@ pub struct GraphNode {
/// We need to know the moves so that we can coalesce.
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub struct Move {
pub from: NodeIndex,
pub to: NodeIndex
pub from: MuID,
pub to: MuID
}
#[inline(always)]
fn is_precolored(reg: MuID) -> bool {
if reg < MACHINE_ID_END {
true
} else {
false
}
}
#[inline(always)]
fn is_usable(reg: MuID) -> bool {
if backend::all_usable_regs()
.iter()
.any(|x| x.id() == backend::get_color_for_precolored(reg))
{
true
} else {
false
}
}
/// InterferenceGraph represents the interference graph, including
......@@ -50,12 +67,11 @@ pub struct Move {
/// * all the nodes and its NodeIndex (a node is referred to by NodeIndex)
/// * all the moves
pub struct InterferenceGraph {
/// the internal graph
graph: Graph<GraphNode, (), petgraph::Undirected>,
/// a map of all nodes (from temp ID to node index)
/// node index is how nodes are referred to with pet_graph
nodes: LinkedHashMap<MuID, NodeIndex>,
/// a set of all moves
nodes: LinkedHashMap<MuID, Node>,
adj_set: LinkedHashSet<(MuID, MuID)>,
adj_list: LinkedHashMap<MuID, LinkedHashSet<MuID>>,
degree: LinkedHashMap<MuID, usize>,
moves: LinkedHashSet<Move>
}
......@@ -63,7 +79,9 @@ impl InterferenceGraph {
/// creates a new graph
fn new() -> InterferenceGraph {
InterferenceGraph {
graph: Graph::new_undirected(),
adj_set: LinkedHashSet::new(),
adj_list: LinkedHashMap::new(),
degree: LinkedHashMap::new(),
nodes: LinkedHashMap::new(),
moves: LinkedHashSet::new()
}
......@@ -71,49 +89,34 @@ impl InterferenceGraph {
/// creates a new node for a temp (if we already created a temp for the temp, returns the node)
/// This function will increase spill cost for the node by 1 each tiem it is called for the temp
fn new_node(&mut self, reg_id: MuID, context: &FunctionContext) -> NodeIndex {
fn new_node(&mut self, reg_id: MuID, context: &FunctionContext) -> MuID {
let entry = context.get_value(reg_id).unwrap();
// if it is the first time, create the node
if !self.nodes.contains_key(&reg_id) {
let node = GraphNode {
let node = Node {
temp: reg_id,
color: None,
group: backend::RegGroup::get_from_ty(entry.ty()),
spill_cost: 0.0f32
};
// add to the graph
let index = self.graph.add_node(node);
// save index
self.nodes.insert(reg_id, index);
self.nodes.insert(reg_id, node);
self.adj_list.insert(reg_id, LinkedHashSet::new());
self.degree.insert(reg_id, 0);
}
// get the node index
let node_index = *self.nodes.get(&reg_id).unwrap();
// get node
let node_mut = self.graph.node_weight_mut(node_index).unwrap();
let node_mut = self.nodes.get_mut(&reg_id).unwrap();
// increase node spill cost
node_mut.spill_cost += 1.0f32;
node_index
}
/// returns the node index for a temp
pub fn get_node(&self, reg: MuID) -> NodeIndex {
match self.nodes.get(&reg) {
Some(index) => *index,
None => panic!("do not have a node for {}", reg)
}
reg_id
}
/// returns all the nodes in the graph
pub fn nodes(&self) -> Vec<NodeIndex> {
let mut ret = vec![];
for index in self.nodes.values() {
ret.push(*index);
}
ret
pub fn nodes(&self) -> Vec<MuID> {
self.nodes.keys().map(|x| *x).collect()
}
/// returns all the moves in the graph
......@@ -122,23 +125,19 @@ impl InterferenceGraph {
}
/// adds a move edge between two nodes
fn add_move(&mut self, src: NodeIndex, dst: NodeIndex) {
fn add_move(&mut self, src: MuID, dst: MuID) {
let src = {
let temp_src = self.get_temp_of(src);
if temp_src < MACHINE_ID_END {
if is_precolored(src) {
// get the color for the machine register, e.g. rax for eax/ax/al/ah
let alias = backend::get_color_for_precolored(temp_src);
self.get_node(alias)
backend::get_color_for_precolored(src)
} else {
src
}
};
let dst = {
let temp_dst = self.get_temp_of(dst);
if temp_dst < MACHINE_ID_END {
let alias = backend::get_color_for_precolored(temp_dst);
self.get_node(alias)
if is_precolored(dst) {
backend::get_color_for_precolored(dst)
} else {
dst
}
......@@ -148,99 +147,105 @@ impl InterferenceGraph {
}
/// adds an interference edge between two nodes
pub fn add_interference_edge(&mut self, from: NodeIndex, to: NodeIndex) {
// adds edge to the internal graph
self.graph.update_edge(from, to, ());
pub fn add_edge(&mut self, u: MuID, v: MuID) {
// if one of the node is machine register, we also add
// interference edge to its alias
// e.g. if we have %a - %edi interfered,
// we also add %a - %rdi interference
let from_tmp = self.graph.node_weight(from).unwrap().temp;
let to_tmp = self.graph.node_weight(to).unwrap().temp;
if from_tmp < MACHINE_ID_END || to_tmp < MACHINE_ID_END {
let from_tmp = if from_tmp < MACHINE_ID_END {
backend::get_color_for_precolored(from_tmp)
let u = if is_precolored(u) {
if is_usable(u) {
backend::get_color_for_precolored(u)
} else {
from_tmp
};
let to_tmp = if to_tmp < MACHINE_ID_END {
backend::get_color_for_precolored(to_tmp)
// if it is not usable, we do not need to add an interference edge
return;
}
} else {
u
};
let v = if is_precolored(v) {
if is_usable(v) {
backend::get_color_for_precolored(v)
} else {
to_tmp
};
return;
}
} else {
v
};
let from_tmp_node = self.get_node(from_tmp);
let to_tmp_node = self.get_node(to_tmp);
self.graph.update_edge(from_tmp_node, to_tmp_node, ());
}
}
if !self.adj_set.contains(&(u, v)) && u != v {
self.adj_set.insert((u, v));
self.adj_set.insert((v, u));
/// is two nodes interfered?
pub fn is_interfered_with(&self, node1: NodeIndex, node2: NodeIndex) -> bool {
let edge = self.graph.find_edge(node1, node2);
edge.is_some()
if !is_precolored(u) {
self.adj_list.get_mut(&u).unwrap().insert(v);
let degree = self.degree.get_mut(&u).unwrap();
*degree = *degree + 1;
}
if !is_precolored(v) {
self.adj_list.get_mut(&v).unwrap().insert(u);
let degree = self.degree.get_mut(&v).unwrap();
*degree = *degree + 1;
}
}
}
/// set color for a node
pub fn color_node(&mut self, node: NodeIndex, color: MuID) {
self.graph.node_weight_mut(node).unwrap().color = Some(color);
pub fn color_node(&mut self, reg: MuID, color: MuID) {
self.nodes.get_mut(&reg).unwrap().color = Some(color);
}
/// is a node colored yet?
pub fn is_colored(&self, node: NodeIndex) -> bool {
self.graph.node_weight(node).unwrap().color.is_some()
pub fn is_colored(&self, reg: MuID) -> bool {
self.nodes.get(&reg).unwrap().color.is_some()
}
/// gets the color of a node
pub fn get_color_of(&self, node: NodeIndex) -> Option<MuID> {
self.graph.node_weight(node).unwrap().color
pub fn get_color_of(&self, reg: MuID) -> Option<MuID> {
self.nodes.get(&reg).unwrap().color
}
/// gets the reg group of a node
pub fn get_group_of(&self, node: NodeIndex) -> backend::RegGroup {
self.graph.node_weight(node).unwrap().group
pub fn get_group_of(&self, reg: MuID) -> backend::RegGroup {
self.nodes.get(&reg).unwrap().group
}
/// gets the temporary of a node
pub fn get_temp_of(&self, node: NodeIndex) -> MuID {
self.graph.node_weight(node).unwrap().temp
pub fn get_temp_of(&self, reg: MuID) -> MuID {
self.nodes.get(&reg).unwrap().temp
}
/// gets the spill cost of a node
pub fn get_spill_cost(&self, node: NodeIndex) -> f32 {
self.graph.node_weight(node).unwrap().spill_cost
pub fn get_spill_cost(&self, reg: MuID) -> f32 {
self.nodes.get(&reg).unwrap().spill_cost
}
/// are two nodes the same node?
fn is_same_node(&self, node1: NodeIndex, node2: NodeIndex) -> bool {
node1 == node2
fn is_same_node(&self, reg1: MuID, reg2: MuID) -> bool {
reg1 == reg2