Commit 5145efb5 authored by qinsoon's avatar qinsoon

gc crate

parent c2243dd0
......@@ -23,7 +23,7 @@ use ast::op::*;
use ast::types::*;
use vm::VM;
use runtime::mm;
use runtime::mm::objectmodel::OBJECT_HEADER_SIZE;
use runtime::mm::OBJECT_HEADER_SIZE;
use runtime::ValueLocation;
use runtime::thread;
......
......@@ -20,8 +20,8 @@ use ast::op::*;
use ast::types::*;
use vm::VM;
use runtime::mm;
use runtime::mm::objectmodel::OBJECT_HEADER_SIZE;
use runtime::mm::objectmodel::OBJECT_HEADER_OFFSET;
use runtime::mm::OBJECT_HEADER_SIZE;
use runtime::mm::OBJECT_HEADER_OFFSET;
use runtime::ValueLocation;
use runtime::thread;
use runtime::entrypoints;
......@@ -1411,9 +1411,6 @@ impl <'a> InstructionSelection {
Some(node), f_content, f_context, vm);
}
Instruction_::CommonInst_GetAddr(op) => {
use runtime::mm::objectmodel::GC_IREF_HAS_OFFSET;
debug_assert!(!GC_IREF_HAS_OFFSET);
trace!("instsel on GETADDR");
// assume it is pinned
......
......@@ -472,9 +472,6 @@ mod tests {
#[test]
fn test_treadmill_alloc_spanblock() {
use simple_logger;
simple_logger::init().unwrap();
let space = FreeListSpace::new(BLOCK_SIZE * 20);
for i in 0..5 {
......@@ -486,9 +483,6 @@ mod tests {
#[test]
fn test_treadmill_sweep() {
use simple_logger;
simple_logger::init().unwrap();
let space = FreeListSpace::new(BLOCK_SIZE * 20);
for i in 0..5 {
......
......@@ -282,8 +282,6 @@ fn gc() {
trace!("GC finishes");
}
pub const MULTI_THREAD_TRACE_THRESHOLD : usize = 10;
pub const PUSH_BACK_THRESHOLD : usize = 50;
pub static GC_THREADS : atomic::AtomicUsize = atomic::ATOMIC_USIZE_INIT;
......
......@@ -24,13 +24,6 @@ pub use self::immix_mutator::N_MUTATORS;
pub use self::immix_mutator::CURSOR_OFFSET;
pub use self::immix_mutator::LIMIT_OFFSET;
use std::sync::Arc;
use std::sync::RwLock;
lazy_static!{
pub static ref SHARED_SPACE : Option<Arc<RwLock<ImmixSpace>>> = None;
}
pub const LOG_BYTES_IN_LINE : usize = 8;
pub const BYTES_IN_LINE : usize = (1 << LOG_BYTES_IN_LINE);
pub const LOG_BYTES_IN_BLOCK : usize = 16;
......
......@@ -22,8 +22,6 @@ pub mod immix;
pub mod freelist;
pub mod gc;
pub const ALIGNMENT_VALUE : u8 = 1;
pub const IMMIX_SPACE_RATIO : f64 = 1.0 - LO_SPACE_RATIO;
pub const LO_SPACE_RATIO : f64 = 0.2;
pub const DEFAULT_HEAP_SIZE : usize = 500 << 20;
......@@ -97,7 +95,11 @@ pub trait Space {
}
}
#[allow(dead_code)]
pub const ALIGNMENT_VALUE : u8 = 1;
#[inline(always)]
#[allow(dead_code)]
pub fn fill_alignment_gap(start : Address, end : Address) -> () {
debug_assert!(end >= start);
unsafe {start.memset(ALIGNMENT_VALUE, end - start);}
......
......@@ -12,9 +12,63 @@
// See the License for the specific language governing permissions and
// limitations under the License.
//! # An Immix garbage collector implementation
//!
//! This crate implements a garbage collector for Zebu. We carefully designed
//! the interface so the garbage collector is a standalone crate from the VM,
//! and it should be able to reuse easily outside Zebu project.
//!
//! The GC implements immix for small object allocation/reclamation, and
//! treadmill for large objects. It uses an object model with 64-bits object header
//! before the start of the object. Allocation always returns an ObjectReference
//! pointing to the start of the object.
//!
//! The idea of the GC implementation is discussed in the paper: Rust as a language
//! for high performance GC implementation (ISMM'16).
//!
//! A user who tries to use this GC (Zebu or other user) should do the following:
//!
//! 1. initialize the GC by calling gc_init()
//! 2. for a running mutator thread, call new_mutator() to create a mutator
//! (and store it somewhere in TLS). And call set_low_water_mark() to inform
//! the GC so that when it conservatively scans stack, it will not scan beyond
//! the low water mark
//! 3. insert yieldpoint() occasionally in the code so that the GC can synchronise
//! with the thread, or insert yieldpoint_slow() if user decides to implement an inlined
//! fastpath
//! 4. call alloc_fast() to ask for allocation, or alloc_slow()
//! if user decides to implement an inlined fastpath
//! 5. the allocation may trigger a GC, and it is guaranteed to return a valid address
//! 6. call init_object() or init_hybrid() to initialize the object
//! 7. when the thread quits, call drop_mutator() to properly destroy a mutator.
//!
//! Other utility functions provided by the GC:
//!
//! * explicit control of root set - add_to_root()/remove_root():
//! the GC treats stacks and registers as default root set, however the client may
//! explicitly add references as root
//! * explicit control of object movement/liveness - pin_object()/unpin_object():
//! the GC will keep the object alive, and in place (does not move it)
//! * capability of persisting the heap as a relocatable boot image - persist_heap():
//! the GC will traverse the heap from given roots, and dump all reachable objects
//! in a structured way so that the user can use the data structure to access every
//! object and persist them in their own approach
//!
//! Issues (going to be fixed in a major GC rewrite):
//!
//! * currently collection is disabled: due to bugs (and the fact that we are going to
//! majorly change the GC)
//! * we are using a 64-bits header for each object, we will switch to sidemap object
//! model (Issue #12)
//! * we are allocating the whole heap, and initialize it all at once during startup. We
//! should allow dynamic growth of heap (Issue #56)
//! * pin/unpin operations is different from Mu spec (Issue #33)
//! * we are using some utility C functions (heap/gc/clib_(architecture).c/.S) to help acquire
//! some information for GC. And those C functions are not returning accurate results
//! (Issue #21)
#[macro_use]
extern crate rodal;
extern crate utils;
#[macro_use]
extern crate lazy_static;
......@@ -26,103 +80,66 @@ extern crate crossbeam;
#[macro_use]
extern crate field_offset;
use std::sync::atomic::Ordering;
pub mod common;
pub mod objectmodel;
pub mod heap;
use common::gctype::GCType;
use utils::ObjectReference;
use common::objectdump;
use heap::immix::BYTES_IN_LINE;
use heap::immix::ImmixSpace;
use heap::immix::ImmixMutatorLocal;
use heap::freelist;
use heap::freelist::FreeListSpace;
use common::objectdump;
use utils::LinkedHashSet;
use utils::Address;
use utils::ObjectReference;
use std::fmt;
use std::sync::Arc;
use std::sync::RwLock;
use std::sync::atomic::Ordering;
/// data structures for the GC and the user
pub mod common;
/// object model (metadata for objects managed by the GC)
/// this allows the user to know some GC semantics, and to be able to implement
/// fastpath on their side
// FIXME: this mod can be private (we expose it only because tests are using it)
// we should consider moving those tests within the mod
pub mod objectmodel;
/// object header size (in byte)
pub use objectmodel::OBJECT_HEADER_SIZE;
/// offset from an object reference to the header (in byte, can be negative)
pub use objectmodel::OBJECT_HEADER_OFFSET;
/// the main GC crate, heap structures (including collection, immix space, freelist space)
// FIXME: this mod can be private (we expose it only because tests are using it)
// we should consider moving those tests within the mod
pub mod heap;
/// whether this GC will move objects?
/// (does an object have a fixed address once allocated before it is reclaimed)
pub const GC_MOVES_OBJECT : bool = false;
/// threshold for small objects. Use small object allocator (immix) for objects that
/// are smaller than this threshold. Otherwise use large object allocator (freelist)
pub const LARGE_OBJECT_THRESHOLD : usize = BYTES_IN_LINE;
/// the mutator that the user is supposed to put to every mutator thread
/// Most interface functions provided by the GC require a pointer to this mutator.
pub use heap::immix::ImmixMutatorLocal as Mutator;
pub use heap::immix::CURSOR_OFFSET as ALLOCATOR_CURSOR_OFFSET;
pub use heap::immix::LIMIT_OFFSET as ALLOCATOR_LIMIT_OFFSET;
#[repr(C)]
pub struct GC {
immix_space: Arc<ImmixSpace>,
lo_space : Arc<FreeListSpace>,
gc_types : Vec<Arc<GCType>>,
roots : LinkedHashSet<ObjectReference>
}
impl fmt::Debug for GC {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "GC\n").unwrap();
write!(f, "{}", self.immix_space).unwrap();
// these two offsets help user's compiler to generate inlined fastpath code
write!(f, "{}", self.lo_space)
}
}
lazy_static! {
pub static ref MY_GC : RwLock<Option<GC>> = RwLock::new(None);
}
#[no_mangle]
pub extern fn gc_stats() {
println!("{:?}", MY_GC.read().unwrap().as_ref().unwrap());
}
#[no_mangle]
pub extern fn get_spaces() -> (Arc<ImmixSpace>, Arc<FreeListSpace>) {
let space_lock = MY_GC.read().unwrap();
let space = space_lock.as_ref().unwrap();
(space.immix_space.clone(), space.lo_space.clone())
}
#[no_mangle]
pub extern fn add_gc_type(mut ty: GCType) -> Arc<GCType> {
let mut gc_guard = MY_GC.write().unwrap();
let mut gc = gc_guard.as_mut().unwrap();
let index = gc.gc_types.len() as u32;
ty.id = index;
let ty = Arc::new(ty);
gc.gc_types.push(ty.clone());
ty
}
#[no_mangle]
pub extern fn get_gc_type_encode(id: u32) -> u64 {
let gc_lock = MY_GC.read().unwrap();
let ref gctype = gc_lock.as_ref().unwrap().gc_types[id as usize];
/// offset to the immix allocator cursor from its pointer
pub use heap::immix::CURSOR_OFFSET as ALLOCATOR_CURSOR_OFFSET;
/// offset to the immix allocator limit from its pointer
pub use heap::immix::LIMIT_OFFSET as ALLOCATOR_LIMIT_OFFSET;
if gctype.is_hybrid() {
objectmodel::gen_hybrid_gctype_encode(gctype, 0) // fake length
} else {
objectmodel::gen_gctype_encode(gctype)
}
}
// the implementation of this GC will be changed dramatically in the future,
// but the exposed interface is likely to stay the same.
/// initializes the GC
#[no_mangle]
pub extern fn gc_init(immix_size: usize, lo_size: usize, n_gcthreads: usize, enable_gc: bool) {
// set this line to turn on certain level of debugging info
// simple_logger::init_with_level(log::LogLevel::Trace).ok();
// init object model - init this first, since spaces may use it
objectmodel::init();
......@@ -160,11 +177,16 @@ pub extern fn gc_init(immix_size: usize, lo_size: usize, n_gcthreads: usize, ena
}
}
/// creates a mutator
#[no_mangle]
pub extern fn new_mutator() -> ImmixMutatorLocal {
ImmixMutatorLocal::new(MY_GC.read().unwrap().as_ref().unwrap().immix_space.clone())
}
/// destroys a mutator
/// Note the user has to explicitly drop mutator that they are not using, otherwise
/// the GC may not be able to stop all the mutators before GC, and ends up in an endless
/// pending status
#[no_mangle]
#[allow(unused_variables)]
pub extern fn drop_mutator(mutator: *mut ImmixMutatorLocal) {
......@@ -173,10 +195,12 @@ pub extern fn drop_mutator(mutator: *mut ImmixMutatorLocal) {
// rust will reclaim the boxed mutator
}
/// sets low water mark for current thread
/// When the GC conservatively scans stack for root, it will not scan beyond the low
/// water mark
pub use heap::gc::set_low_water_mark;
// explicitly control roots
/// adds an object reference to the root set
#[no_mangle]
#[inline(always)]
pub extern fn add_to_root(obj: ObjectReference) {
......@@ -184,6 +208,7 @@ pub extern fn add_to_root(obj: ObjectReference) {
gc.as_mut().unwrap().roots.insert(obj);
}
/// removes an object reference from the root set
#[no_mangle]
#[inline(always)]
pub extern fn remove_root(obj: ObjectReference) {
......@@ -191,44 +216,47 @@ pub extern fn remove_root(obj: ObjectReference) {
gc.as_mut().unwrap().roots.remove(&obj);
}
// pin/unpin
/// pins an object so that it will be moved or reclaimed
#[no_mangle]
pub extern fn muentry_pin_object(obj: ObjectReference) -> Address {
add_to_root(obj);
obj.to_address()
}
/// unpins an object so that it can be freely moved/reclaimed as normal objects
#[no_mangle]
pub extern fn muentry_unpin_object(obj: Address) {
remove_root(unsafe {obj.to_object_reference()});
}
// yieldpoint
/// a regular check to see if the mutator should stop for synchronisation
#[no_mangle]
#[inline(always)]
pub extern fn yieldpoint(mutator: *mut ImmixMutatorLocal) {
unsafe {mutator.as_mut().unwrap()}.yieldpoint();
}
/// the slowpath for yieldpoint
/// We assume for performance, the user will implement an inlined fastpath, we provide
/// constants, offsets to fields and this slowpath function for the user
#[no_mangle]
#[inline(never)]
pub extern fn yieldpoint_slow(mutator: *mut ImmixMutatorLocal) {
unsafe {mutator.as_mut().unwrap()}.yieldpoint_slow()
}
// allocation
/// allocates an object in the immix space
// size doesn't include HEADER_SIZE
#[inline(always)]
pub fn alloc(mutator: *mut ImmixMutatorLocal, size: usize, align: usize) -> ObjectReference {
let addr = unsafe {&mut *mutator}.alloc(size, align);
unsafe {addr.to_object_reference()}
}
/// allocates an object in the immix space
// size doesn't include HEADER_SIZE
#[no_mangle]
#[inline(never)]
/// size doesn't include HEADER_SIZE, return value is offset by HEADER_OFFSET
pub extern fn muentry_alloc_fast(mutator: *mut ImmixMutatorLocal, size: usize, align: usize) -> ObjectReference {
let ret = alloc(mutator, size, align);
trace!("muentry_alloc_fast(mutator: {:?}, size: {}, align: {}) = {}", mutator, size, align, ret);
......@@ -236,46 +264,118 @@ pub extern fn muentry_alloc_fast(mutator: *mut ImmixMutatorLocal, size: usize, a
ret
}
/// allocates an object with slowpath in the immix space
#[no_mangle]
#[inline(never)]
// this function is supposed to be called by an inlined fastpath
// size includes HEADER_SIZE, return value is NOT offset by HEADER_OFFSET
pub extern fn muentry_alloc_slow(mutator: *mut ImmixMutatorLocal, size: usize, align: usize) -> Address {
let ret = unsafe {&mut *mutator}.try_alloc_from_local(size, align);
trace!("muentry_alloc_slow(mutator: {:?}, size: {}, align: {}) = {}", mutator, size, align, ret);
ret
}
/// allocates an object in the freelist space (large object space)
#[no_mangle]
// size doesn't include HEADER_SIZE
pub extern fn muentry_alloc_large(mutator: *mut ImmixMutatorLocal, size: usize, align: usize) -> ObjectReference {
let ret = freelist::alloc_large(size, align, unsafe {mutator.as_mut().unwrap()}, MY_GC.read().unwrap().as_ref().unwrap().lo_space.clone());
trace!("muentry_alloc_large(mutator: {:?}, size: {}, align: {}) = {}", mutator, size, align, ret);
unsafe {ret.to_object_reference()}
}
/// initializes a fix-sized object
#[no_mangle]
#[inline(never)]
pub extern fn muentry_init_object(mutator: *mut ImmixMutatorLocal, obj: ObjectReference, encode: u64) {
unsafe {&mut *mutator}.init_object(obj.to_address(), encode);
}
/// initializes a hybrid type object
#[no_mangle]
#[inline(never)]
pub extern fn muentry_init_hybrid(mutator: *mut ImmixMutatorLocal, obj: ObjectReference, encode: u64, length: u64) {
unsafe {&mut *mutator}.init_hybrid(obj.to_address(), encode, length);
}
/// forces gc to happen (this is not a 'hint' - world will be stopped, and heap traversal will start)
#[no_mangle]
#[inline(never)]
/// this function is supposed to be called by an inlined fastpath
/// size _includes_ HEADER_SIZE, return value is _NOT_ offset by HEADER_OFFSET
pub extern fn muentry_alloc_slow(mutator: *mut ImmixMutatorLocal, size: usize, align: usize) -> ObjectReference {
let ret = unsafe {&mut *mutator}.try_alloc_from_local(size, align);
trace!("muentry_alloc_slow(mutator: {:?}, size: {}, align: {}) = {}", mutator, size, align, ret);
pub extern fn force_gc() {
heap::gc::trigger_gc();
}
unsafe {ret.to_object_reference()}
/// traces reachable objects and record them as a data structure
/// so that the user can inspect the reachable heap and persist it in their way
#[no_mangle]
pub extern fn persist_heap(roots: Vec<Address>) -> objectdump::HeapDump {
objectdump::HeapDump::from_roots(roots)
}
/// GC represents the context for the current running GC instance
struct GC {
immix_space: Arc<ImmixSpace>,
lo_space : Arc<FreeListSpace>,
gc_types : Vec<Arc<GCType>>,
roots : LinkedHashSet<ObjectReference>
}
lazy_static! {
static ref MY_GC : RwLock<Option<GC>> = RwLock::new(None);
}
impl fmt::Debug for GC {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "GC\n").unwrap();
write!(f, "{}", self.immix_space).unwrap();
write!(f, "{}", self.lo_space)
}
}
// the following API functions may get removed in the future
/// prints current GC context for debugging
#[no_mangle]
/// size doesn't include HEADER_SIZE, return value is offset by HEADER_OFFSET
pub extern fn muentry_alloc_large(mutator: *mut ImmixMutatorLocal, size: usize, align: usize) -> ObjectReference {
let ret = freelist::alloc_large(size, align, unsafe {mutator.as_mut().unwrap()}, MY_GC.read().unwrap().as_ref().unwrap().lo_space.clone());
trace!("muentry_alloc_large(mutator: {:?}, size: {}, align: {}) = {}", mutator, size, align, ret);
pub extern fn print_gc_context() {
println!("{:?}", MY_GC.read().unwrap().as_ref().unwrap());
}
unsafe {ret.to_object_reference()}
/// gets immix space and freelist space
#[no_mangle]
pub extern fn get_spaces() -> (Arc<ImmixSpace>, Arc<FreeListSpace>) {
let space_lock = MY_GC.read().unwrap();
let space = space_lock.as_ref().unwrap();
(space.immix_space.clone(), space.lo_space.clone())
}
// force gc
/// informs GC of a GCType
#[no_mangle]
pub extern fn force_gc() {
heap::gc::trigger_gc();
pub extern fn add_gc_type(mut ty: GCType) -> Arc<GCType> {
let mut gc_guard = MY_GC.write().unwrap();
let mut gc = gc_guard.as_mut().unwrap();
let index = gc.gc_types.len() as u32;
ty.id = index;
let ty = Arc::new(ty);
gc.gc_types.push(ty.clone());
ty
}
// dump heap
/// gets the encoding for a given GC type (by ID)
#[no_mangle]
pub extern fn persist_heap(roots: Vec<Address>) -> objectdump::HeapDump {
objectdump::HeapDump::from_roots(roots)
pub extern fn get_gc_type_encode(id: u32) -> u64 {
let gc_lock = MY_GC.read().unwrap();
let ref gctype = gc_lock.as_ref().unwrap().gc_types[id as usize];
if gctype.is_hybrid() {
objectmodel::gen_hybrid_gctype_encode(gctype, 0) // fake length
} else {
objectmodel::gen_gctype_encode(gctype)
}
}
\ No newline at end of file
......@@ -15,8 +15,6 @@
use std::sync::atomic;
use utils::ByteSize;
pub const GC_IREF_HAS_OFFSET : bool = false;
#[cfg(feature = "use-sidemap")]
mod sidemap;
#[cfg(not(feature = "use-sidemap"))]
......@@ -40,10 +38,6 @@ pub fn load_mark_state() -> u8 {
MARK_STATE.load(atomic::Ordering::SeqCst) as u8
}
pub fn flip(mark: u8) -> u8 {
mark ^ 1
}
#[inline(always)]
pub fn check_alignment(align: ByteSize) -> ByteSize {
if align < MINIMAL_ALIGNMENT {
......
......@@ -164,7 +164,7 @@ fn create_linked_list() {
start_logging();
gc::gc_init(IMMIX_SPACE_SIZE, LO_SPACE_SIZE, 1, true);
gc::gc_stats();
gc::print_gc_context();
let mut mutator = gc::new_mutator();
......@@ -192,7 +192,7 @@ fn linked_list_heap_dump() {
start_logging();
gc::gc_init(IMMIX_SPACE_SIZE, LO_SPACE_SIZE, 1, true);
gc::gc_stats();
gc::print_gc_context();
let mut mutator = gc::new_mutator();
......@@ -229,7 +229,7 @@ fn linked_list_survive_gc() {
start_logging();
gc::gc_init(IMMIX_SPACE_SIZE, LO_SPACE_SIZE, 1, true);
gc::gc_stats();
gc::print_gc_context();
let mut mutator = gc::new_mutator();
......
......@@ -153,7 +153,7 @@ fn alloc(mutator: &mut ImmixMutatorLocal) -> *mut Node {
if cfg!(debug_assertions) {
unsafe {
let hdr = addr.offset(objectmodel::OBJECT_HEADER_OFFSET).load::<u64>();
let hdr = (addr + objectmodel::OBJECT_HEADER_OFFSET).load::<u64>();
assert!(objectmodel::header_is_object_start(hdr));
}
}
......@@ -168,7 +168,7 @@ fn start() {
start_logging();
gc::gc_init(IMMIX_SPACE_SIZE, LO_SPACE_SIZE, 1, true);
gc::gc_stats();
gc::print_gc_context();
let mut mutator = gc::new_mutator();
......
......@@ -12,7 +12,9 @@
// See the License for the specific language governing permissions and
// limitations under the License.
//! Utility crate that serves Zebu includes:
//! # Utility crate that serves Zebu
//!
//! It includes:
//! * data structures
//! * double linked list
//! * linked hashmap/set
......
......@@ -1088,12 +1088,9 @@ impl <'a> VM {
/// performs GETIREF
pub fn handle_get_iref(&self, handle_ref: APIHandleArg) -> APIHandleResult {
use runtime::mm::objectmodel::GC_IREF_HAS_OFFSET;
let (ty, addr) = handle_ref.v.as_ref();
// assume iref has the same address as ref
debug_assert!(!GC_IREF_HAS_OFFSET);
let ret = self.new_handle(APIHandle {
id: self.next_id(),
v : APIHandleValue::IRef(ty, addr)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment