structure CodeParser :> CodeParser = struct open HolKernel TypeBase Parse Term UIRLexer open boolLib open finite_mapSyntax open listSyntax open numSyntax open pairSyntax open stringSyntax open sumSyntax open wordsSyntax open uvmValuesTheory open uvmMemoryTheory open uvmIRTheory open ParserHelpers open TypeParser val bin_ops = [ "ADD", "SUB", "MUL", "SDIV", "SREM", "UDIV", "UREM", "SHL", "LSHR", "ASHR", "AND", "OR", "XOR", "FADD", "FSUB", "FMUL", "FDIV", "FREM" ] val cmp_ops = [ "EQ", "NE", "SGE", "SGT", "SLE", "SLT", "UGE", "UGT", "ULE", "ULT", "FFALSE", "FTRUE", "FOEQ", "FOGT", "FOGE", "FOLT", "FOLE", "FONE", "FORD", "FUEQ", "FUGT", "FUGE", "FULT", "FULE", "FUNE", "FUNO" ] val conv_ops = [ "TRUNC", "ZEXT", "SEXT", "FPTRUNC", "FPEXT", "FPTOUI", "FPTOSI", "UITOFP", "SITOFP", "BITCAST", "REFCAST", "PTRCAST" ] val insts = bin_ops @ cmp_ops @ conv_ops @ [ "ID", "SELECT", "EXTRACTVALUE", "INSERTVALUE", "EXTRACTELEMENT", "INSERTELEMENT", "SHUFFLEVECTOR", "NEW", "ALLOCA", "NEWHYBRID", "ALLOCAHYBRID", "GETIREF", "GETFIELDIREF", "GETELEMENTIREF", "SHIFTIREF", "GETVARPARTIREF", "LOAD", "STORE", "CMPXCHG", "ATOMICRMW", "FENCE", "NEWTHREAD" ] val terminsts = [ "RET", "THROW", "CALL", "TAILCALL", "BRANCH", "BRANCH2", "SWITCH", "TRAP", "WATCHPOINT", "WPBRANCH", "SWAPSTACK", "COMMINST" ] val mem_orders = [ "NOT_ATOMIC", "RELAXED", "CONSUME", "ACQUIRE", "RELEASE", "ACQ_REL", "SEQ_CST" ] val comminsts = [ "uvm.new_stack", "uvm.current_stack" ] val ty_ssa = mk_type("ssavar", []) val ty_blabel = mk_type("block_label", []) val ty_ci = mk_type("comminst", [ty_ssa]) fun or_const ty = ``:^ty or_const`` val t_Var = mk_const("INL", ty_ssa --> or_const ty_ssa) val t_Const = mk_const("INR", ty_val --> or_const ty_ssa) val t_SSA = mk_const("SSA", ty_str --> ty_ssa) fun mk_ssa s = mk_comb(t_SSA, fromMLstring s) fun mk_some x = let val ty = type_of x in mk_comb(mk_const("SOME", ty --> mk_type("option", [ty])), x) end fun ir_const (name : string) (ty : hol_type) : term = mk_thy_const { Thy="uvmIR", Name=name, Ty=ty } fun ir_ap(name, xs, ty_out) = let val ty = foldr (op -->) ty_out (map (#1 o dest_sum o type_of) xs) in (* print ("\nname: " ^ name ^ ", ty: " ^ type_to_string ty); *) list_mk_lift(ir_const name ty, xs) end (* Reads an SSA variable, replacing global variables with constants using the given context. Returns a term of type ``:ssavar or_const or_error``. *) fun ssa_var _ (Local, v) = mk_return(mk_comb(t_Var, mk_ssa v)) | ssa_var (env : env_context) (Global, v) = mk_lift(t_Const, #get_global env v) fun parse_mem_order (ord : string) : term = if exists (fn x => x = ord) mem_orders then mk_const(ord, mk_type("memory_order", [])) else raise ParseError (ord ^ " is not a memory order") fun parse_new_stack_clause (env : env_context) (tokens : uir_token list) : (term * uir_token list) = let val ty = mk_type("new_stack_clause", [or_const ty_ssa]) val ty_pair = mk_prod(or_const ty_ssa, ty_type) in case tokens of Word "PASS_VALUES" :: Type tys :: Parens vals :: rest => (mk_lift(mk_const("PassValues", ty_list ty_pair --> ty), mk_sequence( map (fn (ty, v) => case v of SSA v => list_mk_lift( mk_const(",", or_const ty_ssa --> ty_type --> ty_pair), [ssa_var env v, ty]) | _ => raise ParseError "PASS_VALUES expected SSA arguments") (ListPair.zipEq(parse_type_list env tys, vals)), ty_pair)), rest) | Word "THROW_EXC" :: SSA exc :: rest => (mk_lift(mk_const("ThrowExc", or_const ty_ssa --> ty), ssa_var env exc), rest) | _ => raise ParseError "expected newStackClause (PASS_VALUES or THROW_EXC)" end fun lhs0 (lhs : uir_token) (inst : string) = case lhs of Parens [] => () | _ => raise ParseError(inst ^ " does not return a value") fun lhs1 (lhs : uir_token) (inst : string) = case lhs of SSA (Local, v) => mk_return(mk_ssa v) | Parens [SSA (Local, v)] => mk_return(mk_ssa v) | _ => raise ParseError(inst ^ " returns exactly 1 value") fun lhs_n (lhs : uir_token) (inst : string) = case lhs of (SSA (Local, v)) => mk_list([mk_ssa v], ty_ssa) | (Parens vs) => let fun var (SSA (Local, v)) = mk_ssa v | var _ = raise ParseError(inst ^ " lhs contains non-SSA-var") in mk_list(map var vs, ty_ssa) end | _ => raise ParseError(inst ^ " lhs contains non-SSA-var") (* Parses a list of tokens into a Mu IR instruction. Returns a term of type ``:ssavar instruction or_error``. *) fun parse_inst (env : env_context) (tokens : uir_token list) : term = let val ty_inst = mk_type("instruction", [ty_ssa]) val ty_expr = mk_type("expression", [or_const ty_ssa]) val t_ref = mk_return(mk_const("REF", ty_reft)) val t_ptr = mk_return(mk_const("PTR", ty_reft)) val t_na = mk_return(mk_const("NOT_ATOMIC", mk_type("memory_order", []))) val t_void = mk_return(mk_const("Void", ty_type)) val t_null = mk_return(infer_comb("INR", [infer_comb("RefV", [ mk_const("Void", ty_type), mk_const("NONE", ``:addr option``) ], ty_val)], ``:ssavar or_const``)) fun two_tys tys = let val (t1, rest) = take_one_type tys val (t2, rest') = take_one_type rest val _ = case rest' of [] => () | _ => raise ParseError "expected exactly 2 types in <...>" in (parse_type env t1, parse_type env t2) end fun ssa_noconst (Local, v) = mk_return(mk_ssa v) | ssa_noconst (Global, v) = raise ParseError( "global name @" ^ v ^ " used in illegal location") val (lhs, rhs) = case tokens of l::Eq::r => (l, r) | _ => (Parens [], tokens) fun mk_assign(inst : string, expr : term) = ir_ap("Assign", [mk_return (lhs_n lhs inst), expr], ty_inst) in case rhs of (* Identity instruction *) [Word "ID", Type ty, SSA i] => let val ty' = parse_type env ty in mk_assign("ID", ir_ap("Id", [ty', ssa_var env i], ty_expr)) end (* TODO: Compound value operations *) (* IRef instructions *) | [Word "GETIREF", Type ty, SSA opnd] => mk_assign("GETIREF", ir_ap("GetIRef", [parse_type env ty, ssa_var env opnd], ty_expr)) (* TODO: PTR variants of iref instructions *) | [Word "GETFIELDIREF", Type tys, SSA opnd] => let val (t1, rest) = take_one_type tys val index = case rest of [Int n] => Arbint.toNat n | _ => raise ParseError "GETFIELDIREF expected field index after type" in mk_assign("GETFIELDIREF", ir_ap("GetFieldIRef", [ t_ref, parse_type env t1, mk_return(lift_num ty_num index), ssa_var env opnd ], ty_expr)) end | [Word "GETELEMIREF", Type tys, SSA opnd, SSA index] => let val (t1, t2) = two_tys tys in mk_assign("GETELEMIREF", ir_ap("GetElementIRef", [ t_ref, t1, t2, ssa_var env opnd, ssa_var env index ], ty_expr)) end | [Word "SHIFTIREF", Type tys, SSA opnd, SSA index] => let val (t1, t2) = two_tys tys in mk_assign("SHIFTIREF", ir_ap("ShiftIRef", [ t_ref, t1, t2, ssa_var env opnd, ssa_var env index ], ty_expr)) end | [Word "GETVARPARTIREF", Type ty, SSA opnd] => mk_assign("GETVARPARTIREF", ir_ap("GetVarPartIRef", [t_ref, parse_type env ty, ssa_var env opnd], ty_expr)) (* Load *) | [Word "LOAD", Type ty, SSA src] => ir_ap("Load", [lhs1 lhs "LOAD", t_ref, parse_type env ty, ssa_noconst src, t_na], ty_inst) | [Word "LOAD", Word "PTR", Type ty, SSA src] => ir_ap("Load", [lhs1 lhs "LOAD", t_ptr, parse_type env ty, ssa_noconst src, t_na], ty_inst) | [Word "LOAD", Word ord, Type ty, SSA src] => ir_ap("Load", [ lhs1 lhs "LOAD", t_ref, parse_type env ty, ssa_noconst src, mk_return(parse_mem_order ord) ], ty_inst) | [Word "LOAD", Word "PTR", Word ord, Type ty, SSA src] => ir_ap("Load", [ lhs1 lhs "LOAD", t_ptr, parse_type env ty, ssa_noconst src, mk_return(parse_mem_order ord) ], ty_inst) (* Store *) | [Word "STORE", Type ty, SSA dst, SSA src] => let val _ = lhs0 lhs "STORE" val ty' = parse_type env ty in ir_ap("Store", [ ssa_var env src, t_ref, ty', ssa_noconst dst, t_na ], ty_inst) end | [Word "STORE", Word "PTR", Type ty, SSA dst, SSA src] => let val _ = lhs0 lhs "STORE" val ty' = parse_type env ty in ir_ap("Store", [ ssa_var env src, t_ptr, ty', ssa_noconst dst, t_na ], ty_inst) end | [Word "STORE", Word ord, Type ty, SSA dst, SSA src] => let val _ = lhs0 lhs "STORE" val ty' = parse_type env ty in ir_ap("Store", [ ssa_var env src, t_ref, ty', ssa_noconst dst, mk_return(parse_mem_order ord) ], ty_inst) end | [Word "STORE", Word "PTR", Word ord, Type ty, SSA dst, SSA src] => let val _ = lhs0 lhs "STORE" val ty' = parse_type env ty in ir_ap("Store", [ ssa_var env src, t_ptr, ty', ssa_noconst dst, mk_return(parse_mem_order ord) ], ty_inst) end (* Fence *) | [Word "FENCE", Word ord] => ir_ap("Fence", [mk_return(parse_mem_order ord)], ty_inst) (* New, Alloca *) | [Word "NEW", Type ty] => ir_ap("New", [lhs1 lhs "NEW", parse_type env ty], ty_inst) | [Word "ALLOCA", Type ty] => ir_ap("Alloca", [lhs1 lhs "ALLOCA", parse_type env ty], ty_inst) | [Word "NEWHYBRID", Type tys, SSA len] => let val (t1, t2) = two_tys tys in ir_ap("NewHybrid", [lhs1 lhs "NEWHYBRID", t1, t2, ssa_var env len], ty_inst) end | [Word "ALLOCAHYBRID", Type tys, SSA len] => let val (t1, t2) = two_tys tys in ir_ap("AllocaHybrid", [ lhs1 lhs "ALLOCAHYBRID", t1, t2, ssa_var env len ], ty_inst) end (* New Thread *) | Word "NEWTHREAD" :: SSA stack :: rest => let val (nsc, nil) = parse_new_stack_clause env rest in case nil of [] => ir_ap("NewThread", [ lhs1 lhs "NEWTHREAD", ssa_var env stack, t_null, nsc ], ty_inst) | _ => raise ParseError "invalid arguments for NEWTHREAD" end (* Binary operations *) | [Word opn, Type ty, SSA l, SSA r] => let val ty' = parse_type env ty in mk_assign(opn, if exists (fn x => x = opn) bin_ops then ir_ap("BinOp", [ mk_return(ir_const opn (mk_type("bin_op", []))), ty', ssa_var env l, ssa_var env r ], ty_expr) else if exists (fn x => x = opn) cmp_ops then ir_ap("CmpOp", [ mk_return(ir_const opn (mk_type("cmp_op", []))), ty', ssa_var env l, ssa_var env r ], ty_expr) else raise ParseError (opn ^ " is not a binary operation")) end (* Conversion operations *) | [Word opn, Type tys, SSA opnd] => if exists (fn x => x = opn) conv_ops then let val (t1', t2') = two_tys tys in mk_assign(opn, ir_ap("ConvOp", [ mk_return(mk_thy_const{Thy="uvmTypes", Name=opn, Ty=``:convtype``}), t1', t2', ssa_var env opnd ], ty_expr)) end else raise ParseError (opn ^ " is not a conversion operation") | Word opn::_ => if exists (fn x => x = opn) insts then raise ParseError ("invalid arguments for " ^ opn) else if exists (fn x => x = opn) terminsts then raise ParseError (opn ^ " can only be used in terminating position") else raise ParseError (opn ^ " is not an instruction") | _ => raise ParseError "expected instruction" end fun parse_comminst (env : env_context) (name : string) (tokens : uir_token list) : (term * uir_token list) = let fun fsig (SSA (Global, v)) = #get_funcsig env v | fsig _ = raise ParseError "expected global SSA variable in <[]>" fun arg (SSA v) = ssa_var env v | arg _ = raise ParseError "expected SSA variable in ()" val (flags, t1) = case tokens of Brackets xs::t1 => (xs, t1) | _ => ([], tokens) val (types, t2) = case t1 of Type [Brackets _]::_ => ([], t1) | Type xs::t2 => (parse_type_list env xs, t2) | _ => ([], t1) val (fsigs, t3) = case t2 of Type [Brackets xs]::t3 => (map fsig xs, t3) | _ => ([], t2) val (args, t4) = case t3 of Parens xs::t4 => (map arg xs, t4) | _ => ([], t3) in (( case (name, flags, types, fsigs, args) of ("uvm.new_stack", [], [], [s], [f]) => ir_ap("NewStack", [s, f], ty_ci) | ("uvm.current_stack", [], [], [], []) => mk_return(mk_const("CurrentStack", ty_ci)) | _ => if exists (fn x => x = name) comminsts then raise ParseError ("invalid arguments for " ^ name) else raise ParseError ("unsupported common instruction: " ^ name)), t4) end (* Parses a list of tokens into a Mu IR terminating instruction. Returns a term of type ``:ssavar terminst or_error``. *) fun parse_terminst (env : env_context) (tokens : uir_token list) : term = let fun param (SSA v) = ssa_var env v | param _ = raise ParseError "expected SSA variable" fun params vs = mk_sequence(map param vs, or_const ty_ssa) val ty_dest = ``:ssavar destination`` val ty_destopt = mk_type("option", [ty_dest]) val x = mk_var("x", ty_list (or_const ty_ssa)) fun dest label vs = mk_lift( mk_abs(x, mk_pair(infer_comb("BlockLabel", [fromMLstring label], ty_blabel), x)), params vs) fun calldata(fsig, callee, args) = let val fsig' = parse_funcsig env fsig in lift_record(mk_type("calldata", [ty_ssa]), [ ("name", mk_bind( ssa_var env callee, mk_const("get_callee", or_const ty_ssa --> or_error (mk_sum(ty_ssa, ty_funcn))))), ("signature", fsig'), ("args", params args), ("convention", mk_return(mk_const("Mu", mk_type("callconvention", [])))) ]) end fun exc_clause [SSA (Local, l1), Parens a1, SSA (Local, l2), Parens a2] = lift_record(mk_type("resumption_data", [ty_ssa]), [ ("normal_dest", dest l1 a1), ("exceptional_dest", mk_lift( mk_const("SOME", ty_dest --> ty_destopt), dest l2 a2)) ]) | exc_clause [SSA (Local, l1), Parens a1] = lift_record(mk_type("resumption_data", [ty_ssa]), [ ("normal_dest", dest l1 a1), ("exceptional_dest", mk_return(mk_const("NONE", ty_destopt))) ]) | exc_clause _ = raise ParseError "invalid syntax for EXC clause" val ty_inst = mk_type("terminst", [ty_ssa]) in case tokens of [Word "RET", Parens vs] => ir_ap("Ret", [params vs], ty_inst) | [Word "THROW", v] => ir_ap("Throw", [param v], ty_inst) | [Word "CALL", Type _, SSA _, Parens _] => raise ParseError "terminating CALL must have an EXC clause" | [_, Eq, Word "CALL", Type _, SSA _, Parens _] => raise ParseError "terminating CALL must have an EXC clause" | [Word "CALL", Type fsig, SSA callee, Parens args, Word "EXC", Parens exc] => ir_ap("Call", [ mk_return(mk_list([], ty_ssa)), calldata(fsig, callee, args), exc_clause exc], ty_inst) | [lhs, Eq, Word "CALL", Type fsig, SSA callee, Parens args, Word "EXC", Parens exc] => ir_ap("Call", [ mk_return(lhs_n lhs "CALL"), calldata(fsig, callee, args), exc_clause exc], ty_inst) | [Word "TAILCALL", Type fsig, SSA callee, Parens args] => ir_ap("TailCall", [calldata(fsig, callee, args)], ty_inst) | [Word "BRANCH", SSA (Local, label), Parens vs] => ir_ap("Branch1", [dest label vs], ty_inst) | [Word "BRANCH2", SSA cond, SSA (Local, l1), Parens a1, SSA (Local, l2), Parens a2] => let val ty_int1 = mk_return(mk_comb( mk_const("Int", ty_num --> ty_type), lift_num ty_num Arbnum.one)) in ir_ap("Branch2", [ssa_var env cond, dest l1 a1, dest l2 a2], ty_inst) end | Word "SWITCH"::_ => (* TODO: SWITCH *) raise ParseError "SWITCH is not yet implemented" | Word "TRAP"::_ => (* TODO: TRAP *) raise ParseError "TRAP is not yet implemented" | Word "WATCHPOINT"::_ => (* TODO: WATCHPOINT *) raise ParseError "WATCHPOINT is not yet implemented" | Word "WPBRANCH"::_ => (* TODO: WPBRANCH *) raise ParseError "WPBRANCH is not yet implemented" | Word "SWAPSTACK"::_ => (* TODO: SWAPSTACK *) raise ParseError "SWAPSTACK is not yet implemented" | Word "COMMINST"::SSA (Global, name)::args => let val (ci, rest) = parse_comminst env name args in case rest of [Word "EXC", Parens exc] => ir_ap("CommInst", [mk_return(mk_list([], ty_ssa)), ci, exc_clause exc], ty_inst) | _ => raise ParseError "terminating COMMINST must have an EXC clause" end | lhs::Eq::Word "COMMINST"::SSA (Global, name)::args => let val (ci, rest) = parse_comminst env name args in case rest of [Word "EXC", Parens exc] => ir_ap("CommInst", [mk_return(lhs_n lhs "COMMINST"), ci, exc_clause exc], ty_inst) | _ => raise ParseError "terminating COMMINST must have an EXC clause" end | _::Eq::_ => raise ParseError "assignment is not valid in terminating position" | Word opn::_ => if exists (fn x => x = opn) terminsts then raise ParseError ("invalid arguments for " ^ opn) else if exists (fn x => x = opn) insts then raise ParseError (opn ^ " is not a terminating instruction") else raise ParseError (opn ^ " is not an instruction") | _ => raise ParseError "expected terminating instruction" end fun parse_block (env : env_context) (lines : uir_token list list) : string * term = let val (header::all_insts) = filter (fn l => l <> []) lines val inst_lines = List.take(all_insts, length all_insts - 1) val terminst_line = List.last all_insts val x = mk_var("x", ty_type) fun get_args (Type t::SSA (Local, v)::rest) accum = get_args rest ( mk_lift(mk_abs(x, mk_pair(mk_ssa v, x)), parse_type env t)::accum) | get_args [] accum = List.rev accum | get_args _ _ = raise ParseError "expected %arg in block args" val inst_ty = mk_type("instruction", [ty_ssa]) in case header of [SSA (Local, label), Parens arg_tokens, Colon] => (label, lift_record(mk_type("bblock", [ty_ssa]), [ ("args", mk_sequence(get_args arg_tokens [], mk_prod(ty_ssa, ty_type))), ("body", mk_sequence(map (parse_inst env) inst_lines, inst_ty)), ("exit", parse_terminst env terminst_line), ("keepalives", mk_return(mk_list([], ty_ssa))) ])) | _ => raise ParseError "expected block header" end fun parse_func (env : env_context) (fsig : term) (lines : uir_token list list) : term = let val block_tokens = split_by_header (fn l => List.last l = Colon) (filter (fn l => l <> []) lines) val ty_bb = mk_type("bblock", [ty_ssa]) val ty_lb = mk_prod(ty_blabel, ty_bb) val ty_bbmap = mk_fmap_ty(ty_blabel, ty_bb) val t_b = mk_var("b", ty_bb) val blocks = mk_sequence( map (fn block => let val (l, b) = parse_block env block in mk_lift( mk_abs(t_b, mk_pair(infer_comb("BlockLabel", [fromMLstring l], ty_blabel), t_b)), b) end) block_tokens, ty_lb) in lift_record(mk_type("function", []), [ ("signature", fsig), ("entry_block", mk_lift(mk_const("FST", ty_lb --> ty_blabel), mk_lift(mk_const("HD", ty_list ty_lb --> ty_lb), blocks))), ("blocks", mk_lift( infer_comb("FOLDL", [ mk_const("FUPDATE", ty_bbmap --> ty_lb --> ty_bbmap), mk_const("FEMPTY", ty_bbmap)], ty_list ty_lb --> ty_bbmap), blocks)) ]) end fun uir_inst (env : env_context) (quotation : 'a frag list) : term = parse_inst env (tokenize (unquote_all quotation)) fun uir_terminst (env : env_context) (quotation : 'a frag list) : term = parse_terminst env (tokenize (unquote_all quotation)) fun uir_block (env : env_context) (quotation : 'a frag list) : string * term = let val str = unquote_all quotation val lines = map tokenize (String.tokens (fn x => x = #"\n") str) in parse_block env lines end fun uir_func (env : env_context) (fsig : term) (quotation : 'a frag list) : term = let val str = unquote_all quotation val lines = map tokenize (String.tokens (fn x => x = #"\n") str) in parse_func env fsig lines end end