Commit b255aaa7 authored by Kunshan Wang's avatar Kunshan Wang

WIP: parser and codegen

parent 5464c7d5
import re
from typing import List, Union, Tuple, Any, Callable, TypeVar
from typing.re import Pattern
Predicate = Union[str,
Tuple[Pattern, ...],
Callable[[Any], bool]]
def _string_contains(line, string):
return string in line
def _pattern_value_match(line, tup):
pat = tup[0]
vals = tup[1:]
m = pat.search(line)
return m is not None and all(
v is None or g == v
for g,v in zip(m.groups(), vals))
def _apply_func(line, func):
return func(line)
def find_line(lines: List[str], substr: Predicate, start: int = 0) -> int:
"""Find the line that contains or matches substr since line ``start``. """
if isinstance(substr, str):
pred = _string_contains
elif isinstance(substr, tuple):
pred = _pattern_value_match
else:
pred = _apply_func
for i in range(start, len(lines)):
if pred(lines[i], substr):
return i
raise KeyError("Not found: " + str(substr) + "\n text:" + str(lines) )
def extract_lines(parent: str, begin: Predicate, end: Predicate) -> str:
"""
Extract the lines between the line containing ``begin`` and the line
containing ``end`` (excluding both lines) in ``parent``.
"""
lines = parent.splitlines()
begin_line = find_line(lines, begin)
end_line = find_line(lines, end, begin_line+1)
new_lines = lines[begin_line+1:end_line]
return "\n".join(new_lines)
def inject_lines(parent: str, begin: Predicate, end: Predicate, generated: str) -> str:
"""
Replace the lines between the line containing ``begin`` and the line
containing ``end`` (excluding both lines) in ``parent`` with ``generated``.
"""
lines = parent.splitlines()
begin_line = find_line(lines, begin)
end_line = find_line(lines, end, begin_line+1)
new_lines = lines[:begin_line+1] + generated.splitlines() + lines[end_line:]
return "\n".join(new_lines)
......@@ -6,13 +6,15 @@ The result will be a simple JSON object (dict of dicts).
import re
import injecttools
r_commpragma = re.compile(r'///\s*MUAPIPARSER:(.*)$')
r_comment = re.compile(r'//.*$', re.MULTILINE)
r_decl = re.compile(r'(?P<ret>\w+\s*\*?)\s*\(\s*\*\s*(?P<name>\w+)\s*\)\s*\((?P<params>[^)]*)\)\s*;\s*(?:///\s*MUAPIPARSER\s+(?P<pragma>.*)$)?', re.MULTILINE)
r_param = re.compile(r'\s*(?P<type>\w+\s*\*?)\s*(?P<name>\w+)')
r_value_ty = re.compile(r'Mu\w*(Value|Node)')
r_define = re.compile(r'#define\s*(?P<name>\w+)\s*(?P<value>\w+)')
r_define = re.compile(r'^\s*#define\s*(?P<name>\w+)\s*\(\((?P<type>\w+)\)(?P<value>\w+)\)\s*$', re.MULTILINE)
r_struct_start = re.compile(r'^struct\s+(\w+)\s*\{')
r_struct_end = re.compile(r'^\};')
......@@ -29,7 +31,6 @@ def extract_params(text):
return params
def extract_pragmas(text):
text = text.strip()
if len(text) == 0:
......@@ -49,26 +50,14 @@ def extract_methods(body):
return methods
def extract_struct(lines, name):
for i in range(len(lines)):
m = r_struct_start.search(lines[i])
if m is not None and m.group(1) == name:
for j in range(i+1, len(lines)):
m2 = r_struct_end.search(lines[j])
if m2 is not None:
body = lines[i+1:j]
return "\n".join(body)
else:
raise Exception("Cannot find the end of struct {}".format(name))
else:
raise Exception("Cannot find the start of struct {}".format(name))
def extract_struct(text, name):
return injecttools.extract_lines(text, (r_struct_start, name), (r_struct_end,))
def extract_enums(lines, typename, pattern):
def extract_enums(text, typename, pattern):
defs = []
for line in lines:
m = r_define.search(line)
for m in r_define.finditer(text):
if m is not None:
name, value = m.groups()
name, ty, value = m.groups()
if pattern.search(name) is not None:
defs.append({"name": name, "value": value})
return {
......@@ -85,23 +74,22 @@ _enums = [(typename, re.compile(regex)) for typename, regex in [
("MuConvOptr", r'^MU_CONV_'),
("MuMemOrd", r'^MU_ORD_'),
("MuAtomicRMWOp", r'^MU_ARMW_'),
("MuFlag", r'^MU_CC_'),
("MuCallConv", r'^MU_CC_'),
("MuCommInst", r'^MU_CI_'),
]]
def parse_muapi(text):
structs = []
lines = text.splitlines()
for sn in _top_level_structs:
b = extract_struct(lines, sn)
b = extract_struct(text, sn)
methods = extract_methods(b)
structs.append({"name": sn, "methods": methods})
enums = []
for tn,pat in _enums:
enums.append(extract_enums(lines, tn, pat))
enums.append(extract_enums(text, tn, pat))
return {
"structs": structs,
......
......@@ -15,88 +15,78 @@ import re
import tempfile
import muapiparser
import injecttools
target_begin = '/// SCRIPT: GENERATED CODE BEGIN'
target_end = '/// SCRIPT: GENERATED CODE END'
def find_line(lines, substr, start=0):
for i in range(start, len(lines)):
if substr in lines[i]:
return i
raise KeyError("Not found: " + substr)
def general_inject_generated_code(parent: str, begin: str, end: str, generated: str):
lines = parent.splitlines()
begin_line = find_line(lines, begin)
end_line = find_line(lines, end, begin_line+1)
new_lines = lines[:begin_line+1] + generated.splitlines() + lines[end_line:]
return "\n".join(new_lines)
def inject_generated_code(parent, generated):
return general_inject_generated_code(parent, target_begin, target_end, generated)
_simple_map = {
"void": "Unit",
"int": "Int",
"long": "Long",
"uint64_t": "Long",
"uint64_t*": "LongPtr",
"float": "Float",
"double": "Double",
return injecttools.inject_lines(parent, target_begin, target_end, generated)
# C types to Scala types, JFFI types and JFFI Buffer getters and setters
_primitive_types = {
"void": ["Unit", "VOID", None, None ],
"int": ["Int", "SINT", "getInt", "setIntReturn"],
"long": ["Long", "SLONG", "getLong", "setLongReturn"],
"int8_t": ["Byte", "SINT8", "getByte", "setByteReturn"],
"uint8_t": ["Byte", "UINT8", "getByte", "setByteReturn"],
"int16_t": ["Short", "SINT16", "getShort", "setShortReturn"],
"uint16_t": ["Short", "UINT16", "getShort", "setShortReturn"],
"int32_t": ["Int", "SINT32", "getInt", "setIntReturn"],
"uint32_t": ["Int", "UINT32", "getInt", "setIntReturn"],
"int64_t": ["Long", "SINT64", "getLong", "setLongReturn"],
"uint64_t": ["Long", "UINT64", "getLong", "setLongReturn"],
"float": ["Float", "FLOAT", "getFloat", "setFloatReturn"],
"double": ["Double", "DOUBLE", "getDouble", "setDoubleReturn"],
}
_self_getters = {
"MuVM*": "getMuVM",
"MuCtx*": "getMuCtx",
}
def conv_ret_ty(ty):
if ty in _simple_map:
ty = _simple_map[ty]
m = r_value_ty.match(ty)
if m is not None:
return (True, "MuValueFak")
else:
return (False, ty)
_special_case = {
"id": "ID",
"uptr": "UPtr",
"ufuncptr": "UFuncPtr",
"iref": "IRef",
"weakref": "WeakRef",
"funcref": "FuncRef",
"tagref64": "TagRef64",
"threadref": "ThreadRef",
"stackref": "StackRef",
def type_is_ptr(ty):
return ty.endswith("*")
_special_cases = {
"id": "ID",
"uptr": "UPtr",
"ufuncptr": "UFuncPtr",
"iref": "IRef",
"weakref": "WeakRef",
"funcref": "FuncRef",
"tagref64": "TagRef64",
"threadref": "ThreadRef",
"stackref": "StackRef",
"framecursorref": "FrameCursorRef",
"irnoderef": "IRNodeRef",
"funcsig": "FuncSig",
"bb": "BB",
"keepalives": "KeepAlives",
"binop": "BinOp",
"tailcall": "TailCall",
"extractvalue": "ExtractValue",
"insertvalue" : "InsertValue",
"irnoderef": "IRNodeRef",
"funcsig": "FuncSig",
"bb": "BB",
"keepalives": "KeepAlives",
"binop": "BinOp",
"tailcall": "TailCall",
"extractvalue": "ExtractValue",
"insertvalue": "InsertValue",
"extractelement": "ExtractElement",
"insertelement" : "InsertElement",
"shufflevector" : "ShuffleVector",
"newhybrid" : "NewHybrid",
"allocahybrid" : "AllocaHybrid",
"getiref" : "GetIRef",
"getfieldiref" : "GetFieldIRef",
"getelemiref" : "GetElemIRef",
"shiftiref" : "ShiftIRef",
"insertelement": "InsertElement",
"shufflevector": "ShuffleVector",
"newhybrid": "NewHybrid",
"allocahybrid": "AllocaHybrid",
"getiref": "GetIRef",
"getfieldiref": "GetFieldIRef",
"getelemiref": "GetElemIRef",
"shiftiref": "ShiftIRef",
"getvarpartiref": "GetVarPartIRef",
"cmpxchg" : "CmpXchg",
"atomicrmw" : "AtomicRMW",
"watchpoint" : "WatchPoint",
"wpbranch" : "WPBranch",
"ccall" : "CCall",
"newthread" : "NewThread",
"newstack" : "NewStack",
"swapstack" : "SwapStack",
"comminst" : "CommInst",
"cmpxchg": "CmpXchg",
"atomicrmw": "AtomicRMW",
"watchpoint": "WatchPoint",
"wpbranch": "WPBranch",
"ccall": "CCall",
"newthread": "NewThread",
"newstack": "NewStack",
"swapstack": "SwapStack",
"comminst": "CommInst",
}
def toCamelCase(name):
......@@ -110,17 +100,52 @@ def toCamelCase(name):
return "".join(outs)
src_path = os.path.join(*"src/main/scala/uvm/refimpl/nat/cStubs.scala".split("/"))
with open(src_path) as f:
src_text = f.read()
r_handle_ty = re.compile(r'Mu\w*(Value|Node)')
def is_handle(ty):
return r_handle_ty.match(ty) is not None
def is_handle_array(ty):
return is_ptr(ty) and r_handle_ty.match(ty[:-1]) is not None
def generate_stubs(ast):
stubs = []
for st in ast["structs"]:
if st["name"] == "MuCtx":
for meth in st["methods"]:
stubs.append(meth["name"])
return "\n".join("// "+ fn for fn in stubs)
def generate_things(ast):
stubs = generate_stubs(ast)
enums = "" # TODO: generate_enums(ast)
return "\n".join([stubs])
src_path = "cbinding/muapi.h"
dst_path = "src/main/scala/uvm/refimpl/nat/cStubs.scala"
def main():
with open(src_path) as f:
src_text = f.read()
ast = muapiparser.parse_muapi(src_text)
generated = generate_things(ast)
with open(dst_path) as f:
dst_text = f.read()
generated = "// goodbye world"
result_text = inject_generated_code(dst_text, generated)
result_text = inject_generated_code(src_text, generated)
with tempfile.NamedTemporaryFile("w") as f:
print("Backup to temporary file:", f.name)
f.write(dst_text)
with tempfile.NamedTemporaryFile("w") as f:
print("Backup to temporary file:", f.name)
f.write(src_text)
with open(dst_path, "w") as f:
f.write(result_text)
with open(src_path, "w") as f:
f.write(result_text)
main()
......@@ -20,8 +20,187 @@ object CDefs {
def exposedMethod(jRetTy: JType, jParamTys: Array[JType])(invokeFunc: Buffer => Unit) = {
new ExposedMethod(jRetTy, jParamTys, invokeFunc)
}
exposedMethod(JType.VOID, Array(JType.SINT, JType.UINT32)) { jffiBuffer =>
val i = jffiBuffer.getInt(0)
jffiBuffer.setIntReturn(i)
}
/// SCRIPT: GENERATED CODE BEGIN
// goodbye world
// id_of
// name_of
// close_context
// load_bundle
// load_hail
// handle_from_sint8
// handle_from_uint8
// handle_from_sint16
// handle_from_uint16
// handle_from_sint32
// handle_from_uint32
// handle_from_sint64
// handle_from_uint64
// handle_from_uint64s
// handle_from_float
// handle_from_double
// handle_from_ptr
// handle_from_fp
// handle_to_sint8
// handle_to_uint8
// handle_to_sint16
// handle_to_uint16
// handle_to_sint32
// handle_to_uint32
// handle_to_sint64
// handle_to_uint64
// handle_to_float
// handle_to_double
// handle_to_ptr
// handle_to_fp
// handle_from_const
// handle_from_global
// handle_from_func
// handle_from_expose
// delete_value
// ref_eq
// ref_ult
// extract_value
// insert_value
// extract_element
// insert_element
// new_fixed
// new_hybrid
// refcast
// get_iref
// get_field_iref
// get_elem_iref
// shift_iref
// get_var_part_iref
// load
// store
// cmpxchg
// atomicrmw
// fence
// new_stack
// new_thread_nor
// new_thread_exc
// kill_stack
// set_threadlocal
// get_threadlocal
// new_cursor
// next_frame
// copy_cursor
// close_cursor
// cur_func
// cur_func_ver
// cur_inst
// dump_keepalives
// pop_frames_to
// push_frame
// tr64_is_fp
// tr64_is_int
// tr64_is_ref
// tr64_to_fp
// tr64_to_int
// tr64_to_ref
// tr64_to_tag
// tr64_from_fp
// tr64_from_int
// tr64_from_ref
// enable_watchpoint
// disable_watchpoint
// pin
// unpin
// expose
// unexpose
// new_bundle
// load_bundle_from_node
// abort_bundle_node
// get_node
// get_id
// set_name
// new_type_int
// new_type_float
// new_type_double
// new_type_uptr
// set_type_uptr
// new_type_ufuncptr
// set_type_ufuncptr
// new_type_struct
// new_type_hybrid
// new_type_array
// new_type_vector
// new_type_void
// new_type_ref
// set_type_ref
// new_type_iref
// set_type_iref
// new_type_weakref
// set_type_weakref
// new_type_funcref
// set_type_funcref
// new_type_tagref64
// new_type_threadref
// new_type_stackref
// new_type_framecursorref
// new_type_irnoderef
// new_funcsig
// new_const_int
// new_const_int_ex
// new_const_float
// new_const_double
// new_const_null
// new_const_seq
// new_global_cell
// new_func
// new_func_ver
// new_exp_func
// new_bb
// new_nor_param
// new_exc_param
// new_inst_res
// add_dest
// add_keepalives
// new_binop
// new_cmp
// new_conv
// new_select
// new_branch
// new_branch2
// new_switch
// add_switch_dest
// new_call
// new_tailcall
// new_ret
// new_throw
// new_extractvalue
// new_insertvalue
// new_extractelement
// new_insertelement
// new_shufflevector
// new_new
// new_newhybrid
// new_alloca
// new_allocahybrid
// new_getiref
// new_getfieldiref
// new_getelemiref
// new_shiftiref
// new_getvarpartiref
// new_load
// new_store
// new_cmpxchg
// new_atomicrmw
// new_fence
// new_trap
// new_watchpoint
// new_wpbranch
// new_ccall
// new_newthread
// new_swapstack_ret
// new_swapstack_kill
// set_newstack_pass_values
// set_newstack_throw_exc
// new_comminst
/// SCRIPT: GENERATED CODE END
}
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment