Commit ed3861b2 authored by qinsoon's avatar qinsoon

type and object encoding

parent 7f1c3933
......@@ -22,7 +22,7 @@ build = "build.rs"
crate-type = ["rlib"]
[features]
default = []
default = ["use-sidemap"]
use-sidemap = []
sel4-rumprun = []
sel4-rumprun-target-side = []
......
......@@ -62,6 +62,12 @@ impl HeapDump {
heap
}
#[cfg(feature = "use-sidemap")]
fn persist_object(&self, obj: Address) -> ObjectDump {
unimplemented!()
}
#[cfg(not(feature = "use-sidemap"))]
fn persist_object(&self, obj: Address) -> ObjectDump {
trace!("dump object: {}", obj);
let hdr_addr = obj + objectmodel::OBJECT_HEADER_OFFSET;
......
......@@ -24,6 +24,7 @@ use MY_GC;
use utils::{Address, ObjectReference};
use utils::POINTER_SIZE;
use utils::bit_utils;
use std::sync::atomic::{AtomicIsize, AtomicBool, Ordering};
use std::sync::{Arc, Mutex, Condvar, RwLock};
......@@ -446,7 +447,7 @@ pub fn steal_trace_object(
let value = objectmodel::get_ref_byte(alloc_map, space_start, obj);
let (ref_bits, short_encode) = (
bit_utils::lower_bits_u8(value, objectmodel::REF_BITS_LEN),
bit_utils::test_nth_bit_u8(value, objectmodel::SHORT_ENCODE_BIT)
bit_utils::test_nth_bit_u8(value, objectmodel::SHORT_ENCODE_BIT, 1)
);
match ref_bits {
0b0000_0000 => {}
......@@ -528,7 +529,7 @@ pub fn steal_trace_object(
if short_encode {
return;
} else {
base = base.plus(objectmodel::REF_BITS_LEN * POINTER_SIZE);
base += objectmodel::REF_BITS_LEN * POINTER_SIZE;
}
}
}
......@@ -737,7 +738,7 @@ pub fn steal_process_edge(
immix_space: &ImmixSpace,
lo_space: &FreeListSpace
) {
let field_addr = base.plus(offset);
let field_addr = base + offset;
let edge = unsafe { field_addr.load::<ObjectReference>() };
if cfg!(debug_assertions) {
......
......@@ -13,7 +13,8 @@
// limitations under the License.
use utils::Address;
use utils::POINTER_SIZE;
use utils::{LOG_POINTER_SIZE, POINTER_SIZE};
use utils::bit_utils;
use std::sync::atomic::AtomicUsize;
use objectmodel;
......@@ -54,12 +55,13 @@ pub trait Space {
return false;
}
let index = (addr.diff(start) >> LOG_POINTER_SIZE) as isize;
let index = ((addr - start) >> LOG_POINTER_SIZE) as isize;
// use side map
if !bit_utils::test_nth_bit_u8(
unsafe { *self.alloc_map().offset(index) },
objectmodel::OBJ_START_BIT
objectmodel::OBJ_START_BIT,
1
) {
return false;
}
......
......@@ -76,6 +76,20 @@ pub use self::sidemap::mark_as_untraced;
#[cfg(feature = "use-sidemap")]
pub use self::sidemap::is_traced;
#[cfg(feature = "use-sidemap")]
pub use self::sidemap::header_is_fix_size;
#[cfg(feature = "use-sidemap")]
pub use self::sidemap::header_has_ref_map;
#[cfg(feature = "use-sidemap")]
pub use self::sidemap::header_is_object_start;
#[cfg(feature = "use-sidemap")]
pub use self::sidemap::header_get_gctype_id;
#[cfg(feature = "use-sidemap")]
pub use self::sidemap::header_get_ref_map;
#[cfg(feature = "use-sidemap")]
pub use self::sidemap::header_get_object_size;
#[cfg(feature = "use-sidemap")]
pub use self::sidemap::header_get_hybrid_length;
#[cfg(feature = "use-sidemap")]
pub use self::sidemap::get_ref_byte;
// --- header ----
......
......@@ -12,23 +12,137 @@
// See the License for the specific language governing permissions and
// limitations under the License.
//! # Sidemap Encoding
//!
//! Terminology
//!
//! * GC byte
//! a byte for GC information (such as trace byte, ref count, etc)
//! * Type bytes
//! several bytes to store type-related information for an object, GC
//! needs the information to properly trace object
//! * Ref encode
//! encode whether a word (8 bytes) is a reference or non-reference;
//! and if it is a reference, which kind of reference it is (weakref,
//! tagged ref, or normal ref)
//! * Size encode
//! encode the size of the object. How the size is encoded depends on
//! object sizes
//! * Type ID
//! a type ID that allows us to indirectly find type information
//!
//! Design Goal
//!
//! We aim for a 1/8 constant space cost for the object encoding.
//! Min object size is 16 bytes. We always reserve 1 byte per 16 bytes as *GC byte*.
//! GC bytes are in a separate table opposed to *type bytes*.
//!
//! Ref Encode
//!
//! We need 2 bits per word to encode reference kinds
//! * 00: non ref
//! * 01: normal ref
//! * 10: weak ref
//! * 11: tagged ref
//!
//! Object Size and Categories
//!
//! We categorize objects into 4 kinds, we use different type bytes encoding
//! for different kinds
//!
//! * tiny object - [16, 32) bytes
//! Stored in a tiny object space - by address, we can know it is a tiny object
//!
//! 1 type byte : 6 bits - ref encode (2 bits per word, 3 words at most (for 24 bytes objects))
//! 1 bit - size encode (00: 16 bytes, 01: 24 bytes)
//! 1 bit - unused
//!
//! * small object - [32, 64) bytes
//! Stored in a normal object space, along with medium objects
//!
//! 2 type bytes: 1 bit - small or medium object
//! 2 bits - size encode (32, 40, 48, 56 bytes)
//! 13 bits - type ID
//!
//! * medium object - [64, 2k)
//! Stored in a normal object space, along with small objects
//!
//! 4 type bytes: 1 bit - small or medium object
//! 8 bits - size encode (64, 72, ... 2k-8 bytes)
//! 23 bits - type ID
//!
//! * large object - [2k, *)
//! Stored in a large object space - by address, we can know it is a large object
//! We use header
//!
//! 16 type bytes: 8 bytes - object size (u32::MAX << 3 = ~12G)
//! 4 bytes - type ID
//! 4 bytes - unused
use std::sync::atomic;
use common::gctype::GCType;
use utils::{Address, ObjectReference};
use utils::{LOG_POINTER_SIZE, POINTER_SIZE};
use utils::bit_utils;
use utils::{ByteSize, ByteOffset};
use std::mem::transmute;
pub const MINIMAL_ALIGNMENT: ByteSize = 1;
pub const MINIMAL_ALIGNMENT: ByteSize = 16;
pub const MINIMAL_OBJECT_SIZE: ByteSize = 16;
pub const OBJECT_HEADER_SIZE: ByteSize = 0;
pub const OBJECT_HEADER_OFFSET: ByteOffset = 0;
pub type TypeID = u32;
pub mod object_encode;
pub mod type_encode;
#[inline(always)]
pub fn header_is_object_start(hdr: u64) -> bool {
unimplemented!()
}
#[inline(always)]
pub fn header_is_fix_size(hdr: u64) -> bool {
unimplemented!()
}
#[inline(always)]
pub fn header_is_traced(hdr: u64, mark_state: u8) -> bool {
unimplemented!()
}
#[inline(always)]
pub fn header_has_ref_map(hdr: u64) -> bool {
unimplemented!()
}
#[inline(always)]
pub fn header_get_ref_map(hdr: u64) -> u32 {
unimplemented!()
}
#[inline(always)]
pub fn header_get_hybrid_length(hdr: u64) -> u32 {
unimplemented!()
}
#[inline(always)]
pub fn header_get_gctype_id(hdr: u64) -> u32 {
unimplemented!()
}
#[inline(always)]
pub fn header_get_object_size(hdr: u64) -> u32 {
unimplemented!()
}
pub fn gen_gctype_encode(ty: &GCType) -> u64 {
unimplemented!()
}
pub fn gen_hybrid_gctype_encode(ty: &GCType) -> u64 {
pub fn gen_hybrid_gctype_encode(ty: &GCType, length: u32) -> u64 {
unimplemented!()
}
......@@ -44,10 +158,9 @@ pub fn print_object(obj: Address, space_start: Address, trace_map: *mut u8, allo
);
let (ref_bits, short_encode) = (
bit_utils::lower_bits_u8(hdr, REF_BITS_LEN),
bit_utils::test_nth_bit_u8(hdr, SHORT_ENCODE_BIT)
bit_utils::test_nth_bit_u8(hdr, SHORT_ENCODE_BIT, 1)
);
trace!(
"0x{:x} | val: 0x{:15x} | {}, hdr: {:b}",
cursor,
......@@ -55,7 +168,7 @@ pub fn print_object(obj: Address, space_start: Address, trace_map: *mut u8, allo
interpret_hdr_for_print_object(hdr, 0),
hdr
);
cursor = cursor.plus(POINTER_SIZE);
cursor += POINTER_SIZE;
trace!(
"0x{:x} | val: 0x{:15x} | {}",
cursor,
......@@ -63,7 +176,7 @@ pub fn print_object(obj: Address, space_start: Address, trace_map: *mut u8, allo
interpret_hdr_for_print_object(hdr, 1)
);
cursor = cursor.plus(POINTER_SIZE);
cursor += POINTER_SIZE;
trace!(
"0x{:x} | val: 0x{:15x} | {}",
cursor,
......@@ -71,7 +184,7 @@ pub fn print_object(obj: Address, space_start: Address, trace_map: *mut u8, allo
interpret_hdr_for_print_object(hdr, 2)
);
cursor = cursor.plus(POINTER_SIZE);
cursor += POINTER_SIZE;
trace!(
"0x{:x} | val: 0x{:15x} | {}",
cursor,
......@@ -79,7 +192,7 @@ pub fn print_object(obj: Address, space_start: Address, trace_map: *mut u8, allo
interpret_hdr_for_print_object(hdr, 3)
);
cursor = cursor.plus(POINTER_SIZE);
cursor += POINTER_SIZE;
trace!(
"0x{:x} | val: 0x{:15x} | {}",
cursor,
......@@ -87,16 +200,16 @@ pub fn print_object(obj: Address, space_start: Address, trace_map: *mut u8, allo
interpret_hdr_for_print_object(hdr, 4)
);
cursor = cursor.plus(POINTER_SIZE);
cursor += POINTER_SIZE;
trace!("0x{:x} | val: 0x{:15x} | {} {}",
cursor, unsafe{cursor.load::<u64>()}, interpret_hdr_for_print_object(hdr, 5),
{
if !short_encode {
"MORE..."
} else {
""
}
});
cursor, unsafe{cursor.load::<u64>()}, interpret_hdr_for_print_object(hdr, 5),
{
if !short_encode {
"MORE..."
} else {
""
}
});
if short_encode {
return;
......@@ -106,7 +219,7 @@ pub fn print_object(obj: Address, space_start: Address, trace_map: *mut u8, allo
// index between 0 and 5
fn interpret_hdr_for_print_object(hdr: u8, index: usize) -> &'static str {
if bit_utils::test_nth_bit_u8(hdr, index) {
if bit_utils::test_nth_bit_u8(hdr, index, 1) {
"REF "
} else {
"NON-REF"
......@@ -122,7 +235,7 @@ pub fn mark_as_traced(
) {
unsafe {
*trace_map.offset(
(obj.to_address().diff(space_start) >> LOG_POINTER_SIZE) as isize
((obj.to_address() - space_start) >> LOG_POINTER_SIZE) as isize
) = mark_state;
}
}
......@@ -130,7 +243,7 @@ pub fn mark_as_traced(
#[inline(always)]
pub fn mark_as_untraced(trace_map: *mut u8, space_start: Address, addr: Address, mark_state: u8) {
unsafe {
*trace_map.offset((addr.diff(space_start) >> LOG_POINTER_SIZE) as isize) = mark_state ^ 1;
*trace_map.offset(((addr - space_start) >> LOG_POINTER_SIZE) as isize) = mark_state ^ 1;
}
}
......@@ -143,7 +256,7 @@ pub fn is_traced(
) -> bool {
unsafe {
(*trace_map.offset(
(obj.to_address().diff(space_start) >> LOG_POINTER_SIZE) as isize
((obj.to_address() - space_start) >> LOG_POINTER_SIZE) as isize
)) == mark_state
}
}
......@@ -156,7 +269,7 @@ pub const SHORT_ENCODE_BIT: usize = 7;
pub fn get_ref_byte(alloc_map: *mut u8, space_start: Address, obj: ObjectReference) -> u8 {
unsafe {
*alloc_map.offset(
(obj.to_address().diff(space_start) >> LOG_POINTER_SIZE) as isize
((obj.to_address() - space_start) >> LOG_POINTER_SIZE) as isize
)
}
}
// Copyright 2017 The Australian National University
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use common::gctype::GCType;
use objectmodel::sidemap::TypeID;
use objectmodel::sidemap::type_encode::WordType;
use std::sync::atomic;
use utils::{Address, ObjectReference};
use utils::{LOG_POINTER_SIZE, POINTER_SIZE};
use utils::bit_utils;
use utils::{ByteSize, ByteOffset};
use std::mem::transmute;
/// Tiny object encoding - [16, 32) bytes
/// Stored in a tiny object space - by address, we can know it is a tiny object
/// hi lo
/// |s|u|r2r1r0|
/// s, 1 bit - size encode
/// u, 1 bit - unused
/// ri, 2 bits - ref encode for ith word
#[repr(C, packed)]
#[derive(Copy, Clone)]
pub struct TinyObjectEncode {
b: u8
}
impl TinyObjectEncode {
#[inline(always)]
pub fn size(self) -> usize {
let size = ((self.b >> 7) & 0b1u8) << 3;
(16 + size) as usize
}
#[inline(always)]
pub fn n_fields(self) -> usize {
let n = (self.b >> 7) & 0b1u8;
(2 + n) as usize
}
#[inline(always)]
pub fn field_0(self) -> WordType {
let f = self.b & 0b11u8;
unsafe { transmute(f) }
}
#[inline(always)]
pub fn field_1(self) -> WordType {
let f = (self.b >> 2) & 0b11u8;
unsafe { transmute(f) }
}
#[inline(always)]
pub fn field_2(self) -> WordType {
let f = (self.b >> 4) & 0b11u8;
unsafe { transmute(f) }
}
}
#[cfg(test)]
mod tiny_object_encoding {
use super::*;
use objectmodel::sidemap::type_encode::WordType;
use std::mem::size_of;
#[test]
fn struct_size() {
assert_eq!(size_of::<TinyObjectEncode>(), 1);
}
const encode1: TinyObjectEncode = TinyObjectEncode { b: 0b10111001 };
const encode2: TinyObjectEncode = TinyObjectEncode { b: 0b01001000 };
#[test]
fn size() {
assert_eq!(encode1.size(), 24);
assert_eq!(encode2.size(), 16);
}
#[test]
fn fields() {
assert_eq!(encode1.n_fields(), 3);
assert_eq!(encode1.field_0(), WordType::Ref);
assert_eq!(encode1.field_1(), WordType::WeakRef);
assert_eq!(encode1.field_2(), WordType::TaggedRef);
assert_eq!(encode2.n_fields(), 2);
assert_eq!(encode2.field_0(), WordType::NonRef);
assert_eq!(encode2.field_1(), WordType::WeakRef);
}
}
/// Small object encoding - [32, 64) bytes
/// Stored in a normal object space, along with medium objects
/// hi lo
/// |f|sz|type_id.....|
/// f, 1 bit - small(1) or medium(0)
/// sz, 2 bits - size encode (00: 32, 01:40, 10: 48, 11: 56)
/// type_id, 13 bits - type id
#[repr(C, packed)]
#[derive(Copy, Clone)]
pub struct SmallObjectEncode {
w: u16
}
impl SmallObjectEncode {
#[inline(always)]
pub fn is_small(self) -> bool {
(self.w >> 15) == 1
}
#[inline(always)]
pub fn size(self) -> usize {
debug_assert!(self.is_small());
let size = ((self.w >> 13) & 0b11u16) << 3;
(32 + size) as usize
}
#[inline(always)]
pub fn type_id(self) -> TypeID {
debug_assert!(self.is_small());
(self.w & 0b0001111111111111u16) as u32
}
}
#[cfg(test)]
mod small_object_encoding {
use super::*;
use std::mem::size_of;
#[test]
fn struct_size() {
assert_eq!(size_of::<SmallObjectEncode>(), 2);
}
const encode1: SmallObjectEncode = SmallObjectEncode {
w: 0b1000000000000001u16
};
const encode2: SmallObjectEncode = SmallObjectEncode {
w: 0b1011000000000000u16
};
const encode3: SmallObjectEncode = SmallObjectEncode {
w: 0b1111000000000001u16
};
const encode4: SmallObjectEncode = SmallObjectEncode {
w: 0b0101010101110101u16
};
#[test]
fn is_small() {
assert!(encode1.is_small());
assert!(encode2.is_small());
assert!(encode3.is_small());
assert!(!encode4.is_small());
}
#[test]
fn size() {
assert_eq!(encode1.size(), 32);
assert_eq!(encode2.size(), 40);
assert_eq!(encode3.size(), 56);
}
#[test]
fn type_id() {
assert_eq!(encode1.type_id(), 1);
assert_eq!(encode2.type_id(), 4096);
assert_eq!(encode3.type_id(), 4097);
}
}
/// Medium object encoding - [64, 2k)
/// Stored in a normal object space, along with small objects
/// hi lo
/// |f|type_id.....|size|
/// f , 1 bit - small(1) or medium(0)
/// type_id, 23 bits - type id
/// size , 8 bits - size encode (sz -> 64 + sz * 8)
#[repr(C, packed)]
#[derive(Copy, Clone)]
pub struct MediumObjectEncode {
d: u32
}
impl MediumObjectEncode {
#[inline(always)]
pub fn is_medium(self) -> bool {
(self.d >> 31) == 0
}
#[inline(always)]
pub fn size(self) -> usize {
debug_assert!(self.is_medium());
let size = (self.d & 0xFFu32) << 3;
(64 + size) as usize
}
#[inline(always)]
pub fn type_id(self) -> TypeID {
debug_assert!(self.is_medium());
self.d >> 8
}
}
#[cfg(test)]
mod medium_object_encoding {
use super::*;
use std::mem::size_of;
#[test]
fn struct_size() {
assert_eq!(size_of::<MediumObjectEncode>(), 4);
}
const encode1: MediumObjectEncode = MediumObjectEncode {
d: 0b0000_0000_0000_0000_0000_0000_0000_0000u32
};
const encode2: MediumObjectEncode = MediumObjectEncode {
d: 0b0100_0000_0000_0000_0000_0001_1000_0000u32
};
const encode3: MediumObjectEncode = MediumObjectEncode {
d: 0b0111_1111_1111_1111_1111_1111_1111_1101u32
};
const encode4: MediumObjectEncode = MediumObjectEncode {
d: 0b1100_0000_0000_0000_0000_0001_1111_1111u32
};
#[test]
fn is_medium() {
assert!(encode1.is_medium());
assert!(encode2.is_medium());
assert!(encode3.is_medium());
assert!(!encode4.is_medium());
}
#[test]
fn size() {
assert_eq!(encode1.size(), 64);
assert_eq!(encode2.size(), 1088);
assert_eq!(encode3.size(), 2088);
}
#[test]
fn type_id() {
assert_eq!(encode1.type_id(), 0);
assert_eq!(encode2.type_id(), 4194305);
assert_eq!(encode3.type_id(), 8388607);
}
}
/// Large object encoding - [2k, *)
/// Stored in a large object space - by address, we can know it is a large object
/// Header is used for it
#[repr(C, packed)]
#[derive(Copy, Clone)]
pub struct LargeObjectEncode {
size: u64,
tyid: u32,
unused: u32
}
impl LargeObjectEncode {
#[inline(always)]
pub fn size(self) -> usize {
(self.size << 8) as usize
}
#[inline(always)]
pub fn type_id(self) -> TypeID {
self.tyid
}
}
#[cfg(test)]
mod large_object_encoding {
use super::*;
use std::mem::size_of;
#[test]
fn struct_size() {
assert_eq!(size_of::<LargeObjectEncode>(), 16);
}
}
// Copyright 2017 The Australian National University
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::mem::transmute;
/// Ref Encode
#[repr(u8)]
#[derive(Copy, Clone, Debug, PartialEq)]
pub enum WordType {
NonRef = 0,
Ref = 1,
WeakRef = 2,
TaggedRef = 3
}
/// TypeEncode
#[repr(C, packed)]
pub struct TypeEncode {
/// how many words in fixed part of the type (max 255 = ~2k bytes)
fix_len: u8,
/// types for each word (63 * 4 = 252 words)
fix_ty: [u8; 63],
/// how many words in var part of the type
var_len: u8,
/// types for each word
var_ty: [u8; 63]
}
impl TypeEncode {
#[inline(always)]
pub fn fix_len(&self) -> u8 {
self.fix_len
}
#[inline(always)]
pub fn var_len(&self) -> u8 {
self.var_len
}
#[inline(always)]
fn extract_ty(vec: &[u8; 63], i: u8) -> WordType {
let res = vec[(i >> 2) as usize] >> ((i & 0b11) << 1);
unsafe { transmute(res & 0b11) }
}
#[inline(always)]
pub fn fix_ty(&self, i: u8) -> WordType {
debug_assert!(i < self.fix_len);
TypeEncode::extract_ty(&self.fix_ty, i)
}
#[inline(always)]
pub fn var_ty(&self, i: u8) -> WordType {
debug_assert!(i < self.var_len);
TypeEncode::extract_ty(&self.var_ty, i)
}
}
#[cfg(test)]
mod type_encoding {
use super::*;
use std::mem::size_of;
#[test]
fn struct_size() {
assert_eq!(size_of::<TypeEncode>(), 128);
}
fn build_encode() -> TypeEncode {
let mut ret = TypeEncode {
fix_len: 12,
fix_ty: [0; 63],
var_len: 8,
var_ty: [0; 63]
};
ret.fix_ty[0] = 0b11100100u8;
ret.fix_ty[1] = 0b00011011u8;
ret.fix_ty[2] = 0b11100100u8;
ret