WARNING! Access to this system is limited to authorised users only.
Unauthorised users may be subject to prosecution.
Unauthorised access to this system is a criminal offence under Australian law (Federal Crimes Act 1914 Part VIA)
It is a criminal offence to:
(1) Obtain access to data without authority. -Penalty 2 years imprisonment.
(2) Damage, delete, alter or insert data without authority. -Penalty 10 years imprisonment.
User activity is monitored and recorded. Anyone using this system expressly consents to such monitoring and recording.

To protect your data, the CISO officer has suggested users to enable 2FA as soon as possible.
Currently 2.7% of users enabled 2FA.

Commit 2b46255c authored by Stefan Marr's avatar Stefan Marr
Browse files

Added Parser class


Signed-off-by: default avatarStefan Marr <git@stefan-marr.de>
parent b4330998
from som.compiler.symbol import Symbol
from som.compiler.lexer import Lexer
from som.compiler.bytecode_generator import BytecodeGenerator
from som.compiler.method_generation_context import MethodGenerationContext
class Parser(object):
_single_op_syms = (Symbol.Not, Symbol.And, Symbol.Or, Symbol.Star,
Symbol.Div, Symbol.Mod, Symbol.Plus, Symbol.Equal,
Symbol.More, Symbol.Less, Symbol.Comma, Symbol.At,
Symbol.Per, Symbol.NONE)
_binary_op_syms = (Symbol.Or, Symbol.Comma, Symbol.Minus, Symbol.Equal,
Symbol.Not, Symbol.And, Symbol.Or, Symbol.Star,
Symbol.Div, Symbol.Mod, Symbol.Plus, Symbol.Equal,
Symbol.More, Symbol.Less, Symbol.Comma, Symbol.At,
Symbol.Per, Symbol.NONE)
_keyword_selector_syms = (Symbol.Keyword, Symbol.KeywordSequence)
def __init__(self, reader, universe):
self._universe = universe
self._lexer = Lexer(reader)
self._bc_gen = BytecodeGenerator()
self._sym = Symbol.NONE
self._text = None
self._next_sym = Symbol.NONE
self._get_symbol_from_lexer()
def classdef(self, cgenc):
cgenc.set_name(self._universe.symbol_for(self._text))
self._expect(Symbol.Identifier)
self._expect(Symbol.Equal)
if self._sym == Symbol.Identifier:
super_name = self._universe.symbol_for(self._text)
self._accept(Symbol.Identifier)
else:
super_name = self._universe.symbol_for("Object")
cgenc.set_super_name(super_name)
# Load the super class
if super_name.get_string() == "nil": # Break the dependency cycle by hard coding the values for Object
cgenc.set_number_of_instance_fields_of_super(0) # Object's super class is nil, has no fields
cgenc.set_number_of_class_fields_of_super(4) # Object's class has the fields of Class
else:
super_class = self._universe.load_class(super_name)
cgenc.set_number_of_instance_fields_of_super(super_class.get_number_of_instance_fields())
cgenc.set_number_of_class_fields_of_super(super_class.get_class().get_number_of_instance_fields())
self._expect(Symbol.NewTerm)
self._instance_fields(cgenc)
while (self._sym == Symbol.Identifier or self._sym == Symbol.Keyword or
self._sym == Symbol.OperatorSequence or
self._sym_in(self._binary_op_syms)):
mgenc = MethodGenerationContext()
mgenc.set_holder(cgenc)
mgenc.add_argument("self")
self._method(mgenc)
if mgenc.is_primitive():
cgenc.add_instance_method(mgenc.assemble_primitive(self._universe))
else:
cgenc.add_instance_method(mgenc.assemble(self._universe))
if self._accept(Symbol.Separator):
cgenc.set_class_side(True)
self._class_fields(cgenc)
while (self._sym == Symbol.Identifier or
self._sym == Symbol.Keyword or
self._sym == Symbol.OperatorSequence or
self._sym_in(self._binary_op_syms)):
mgenc = MethodGenerationContext()
mgenc.set_holder(cgenc)
mgenc.add_argument("self")
self._method(mgenc)
if mgenc.is_primitive():
cgenc.add_class_method(mgenc.assemble_primitive(self._universe))
else:
cgenc.add_class_method(mgenc.assemble(self._universe))
self._expect(Symbol.EndTerm)
def _sym_in(self, symbol_list):
return self._sym in symbol_list
def _accept(self, s):
if self._sym == s:
self._get_symbol_from_lexer()
return True
return False
def _accept_one_of(self, symbol_list):
if self._sym_in(symbol_list):
self._get_symbol_from_lexer()
return True
return False
def _expect(self, s):
if self._accept(s):
return True
err = ("Error: unexpected symbol in line %d. Expected %s, but found %s" %
(self._lexer.get_current_line_number(), Symbol.as_str(s), Symbol.as_str(self._sym)))
if self._printable_symbol():
err += " (" + self._text + ")"
err += ": " + self._lexer.get_raw_buffer()
raise ValueError(err)
def _expect_one_of(self, symbol_list):
if self._accept_one_of(symbol_list):
return True
expected = ", ".join([Symbol.as_str(x) for x in symbol_list])
err = ("Error: unexpected symbol in line %d. Expected one of %s, but found %s" %
(self._lexer.get_current_line_number(), expected, Symbol.as_str(self._sym)))
if self._printable_symbol():
err += " (" + self._text + ")"
err += ": " + self._lexer.get_raw_buffer()
raise ValueError(err)
def _instance_fields(self, cgenc):
if self._accept(Symbol.Or):
while self._sym == Symbol.Identifier:
var = self._variable()
cgenc.add_instance_field(self._universe.symbol_for(var))
self._expect(Symbol.Or)
def _class_fields(self, cgenc):
if self._accept(Symbol.Or):
while self._sym == Symbol.Identifier:
var = self._variable()
cgenc.add_class_field(self._universe.symbol_for(var))
self._expect(Symbol.Or)
def _method(self, mgenc):
self._pattern(mgenc)
self._expect(Symbol.Equal)
if self._sym == Symbol.Primitive:
mgenc.set_primitive(True)
self._primitive_block()
else:
self._method_block(mgenc)
def _primitive_block(self):
self._expect(Symbol.Primitive)
def _pattern(self, mgenc):
if self._sym == Symbol.Identifier:
self._unary_pattern(mgenc)
elif self._sym == Symbol.Keyword:
self._keyword_pattern(mgenc)
else:
self._binary_pattern(mgenc)
def _unary_pattern(self, mgenc):
mgenc.set_signature(self._unary_selector())
def _binary_pattern(self, mgenc):
mgenc.set_signature(self._binary_selector())
mgenc.add_argument_if_absent(self._argument())
def _keyword_pattern(self, mgenc):
kw = self._keyword()
mgenc.add_argument_if_absent(self._argument())
while self._sym == Symbol.Keyword:
kw += self._keyword()
mgenc.add_argument_if_absent(self._argument())
mgenc.set_signature(self._universe.symbol_for(kw))
def _method_block(self, mgenc):
self._expect(Symbol.NewTerm)
self._block_contents(mgenc)
# if no return has been generated so far, we can be sure there was no .
# terminating the last expression, so the last expression's value must
# be popped off the stack and a ^self be generated
if not mgenc.is_finished():
self._bc_gen.emitPOP(mgenc)
self._bc_gen.emitPUSHARGUMENT(mgenc, 0, 0)
self._bc_gen.emitRETURNLOCAL(mgenc)
mgenc.set_finished()
self._expect(Symbol.EndTerm)
def _unary_selector(self):
return self._universe.symbol_for(self._identifier())
def _binary_selector(self):
s = self._text
if self._accept(Symbol.Or): pass
elif self._accept(Symbol.Comma): pass
elif self._accept(Symbol.Minus): pass
elif self._accept(Symbol.Equal): pass
elif self._accept_one_of(self._single_op_syms): pass
elif self._accept(Symbol.OperatorSequence): pass
else: self._expect(Symbol.NONE)
return self._universe.symbol_for(s)
def _identifier(self):
s = self._text
is_primitive = self._accept(Symbol.Primitive)
if not is_primitive:
self._expect(Symbol.Identifier)
return s
def _keyword(self):
s = self._text
self._expect(Symbol.Keyword)
return s
def _argument(self):
return self._variable()
def _block_contents(self, mgenc):
if self._accept(Symbol.Or):
self._locals(mgenc)
self._expect(Symbol.Or)
self._block_body(mgenc, False)
def locals(self, mgenc):
while (self._sym == Symbol.Identifier):
mgenc.add_local_if_absent(self._variable())
def _block_body(self, mgenc, seenPeriod):
if self._accept(Symbol.Exit):
self._result(mgenc)
elif self._sym == Symbol.EndBlock:
if seenPeriod:
# a POP has been generated which must be elided (blocks always
# return the value of the last expression, regardless of
# whether it was terminated with a . or not)
mgenc.remove_last_bytecode()
self._bc_gen.emitRETURNLOCAL(mgenc)
mgenc.setfinished()
elif self._sym == Symbol.EndTerm:
# it does not matter whether a period has been seen, as the end of
# the method has been found (EndTerm) - so it is safe to emit a
# "return self"
self._bc_gen.emitPUSHARGUMENT(mgenc, 0, 0)
self._bc_gen.emitRETURNLOCAL(mgenc)
mgenc.set_finished()
else:
self._expression(mgenc)
if self._accept(Symbol.Period):
self._bc_gen.emitPOP(mgenc)
self._block_body(mgenc, True)
def _result(self, mgenc):
self._expression(mgenc)
if mgenc.is_block_method():
self._bc_gen.emitRETURNNONLOCAL(mgenc)
else:
self._bc_gen.emitRETURNLOCAL(mgenc)
mgenc.set_finished(True)
self._accept(Symbol.Period)
def _expression(self, mgenc):
self._peek_for_next_symbol_from_lexer()
if self._next_sym == Symbol.Assign:
self._assignation(mgenc)
else:
self._evaluation(mgenc)
def assignation(self, mgenc):
l = []
self._assignments(mgenc, l)
self._evaluation(mgenc)
for assignment in l:
self._bc_gen.emitDUP(mgenc)
for assignment in l:
self._gen_pop_variable(mgenc, assignment)
def _assignments(self, mgenc, l):
if self._sym == Symbol.Identifier:
l.append(self._assignment(mgenc))
self._peek_for_next_symbol_from_lexer()
if self._next_sym == Symbol.Assign:
self._assignments(mgenc, l)
def _assignment(self, mgenc):
v = self._variable()
var = self._universe.symbol_for(v)
mgenc.add_literal_if_absent(var)
self._expect(Symbol.Assign)
return v
def _evaluation(self, mgenc):
# single: superSend
is_super_send = [False]
self._primary(mgenc, is_super_send)
if (self._sym == Symbol.Identifier or
self._sym == Symbol.Keyword or
self._sym == Symbol.OperatorSequence or
self._sym_in(self._binary_op_syms)):
self._messages(mgenc, is_super_send)
def _primary(self, mgenc, is_super_send):
is_super_send[0] = False
if self._sym == Symbol.Identifier:
v = self._variable()
if v == "super":
is_super_send[0] = True
# sends to super push self as the receiver
v = "self"
self._gen_push_variable(mgenc, v)
elif self._sym == Symbol.NewTerm:
self._nested_term(mgenc)
elif self._sym == Symbol.NewBlock:
bgenc = MethodGenerationContext()
bgenc.set_is_block_method(True)
bgenc.set_holder(mgenc.get_holder())
bgenc.set_outer(mgenc)
self._nested_block(bgenc)
block_method = bgenc.assemble(self._universe)
mgenc.add_literal(block_method)
self._bc_gen.emitPUSHBLOCK(mgenc, block_method)
else:
self._literal(mgenc)
def _variable(self):
return self._identifier()
def _messages(self, mgenc, is_super_send):
if self._sym == Symbol.Identifier:
while self._sym == Symbol.Identifier:
# only the first message in a sequence can be a super send
self._unary_message(mgenc, is_super_send)
is_super_send[0] = False
while (self._sym == Symbol.OperatorSequence or
self._sym_in(self._binary_op_syms)):
self._binary_message(mgenc, [False])
if self._sym == Symbol.Keyword:
self._keyword_message(mgenc, [False])
elif (self._sym == Symbol.OperatorSequence or
self._sym_in(self._binary_op_syms)):
while (self._sym == Symbol.OperatorSequence or
self._sym_in(self._binary_op_syms)):
# only the first message in a sequence can be a super send
self._binary_message(mgenc, is_super_send)
is_super_send[0] = False
if self._sym == Symbol.Keyword:
self._keyword_message(mgenc, [False])
else:
self._keyword_message(mgenc, is_super_send)
def _unary_message(self, mgenc, is_super_send):
msg = self._unary_selector()
mgenc.add_literal_if_absent(msg)
if is_super_send[0]:
self._bc_gen.emitSUPERSEND(mgenc, msg)
else:
self._bc_gen.emitSEND(mgenc, msg)
def _binary_message(self, mgenc, is_super_send):
msg = self._binary_selector()
mgenc.add_literal_if_absent(msg)
self._binary_operand(mgenc, False)
if is_super_send[0]:
self._bc_gen.emitSUPERSEND(mgenc, msg)
else:
self._bc_gen.emitSEND(mgenc, msg)
def _binary_operand(self, mgenc, is_super_send):
self._primary(mgenc, is_super_send)
while self._sym == Symbol.Identifier:
self._unary_message(mgenc, is_super_send)
def _keyword_message(self, mgenc, is_super_send):
kw = self._keyword()
self._formula(mgenc)
while self._sym == Symbol.Keyword:
kw += self._keyword()
self._formula(mgenc)
msg = self._universe.symbol_for(kw)
mgenc.add_literal_if_absent(msg)
if is_super_send[0]:
self._bc_gen.emitSUPERSEND(mgenc, msg)
else:
self._bc_gen.emitSEND(mgenc, msg)
def _formula(self, mgenc):
is_super_send = [False]
self._binary_operand(mgenc, is_super_send)
# only the first message in a sequence can be a super send
if self._sym == Symbol.OperatorSequence or self._sym_in(self._binary_op_syms):
self._binary_message(mgenc, is_super_send)
while self._sym == Symbol.OperatorSequence or self.sym_in(self._binary_op_syms):
self._binary_message(mgenc, [False])
def _nested_term(self, mgenc):
self._expect(Symbol.NewTerm)
self._expression(mgenc)
self._expect(Symbol.EndTerm)
def _literal(self, mgenc):
if self._sym == Symbol.Pound:
self._literal_symbol(mgenc)
elif self._sym == Symbol.STString:
self._literal_string(mgenc)
else:
self._literal_number(mgenc)
def _literal_number(self, mgenc):
if self._sym == Symbol.Minus:
val = self._negative_decimal()
else:
val = self._literal_decimal()
if isinstance(val, long):
lit = self._universe.new_big_integer(val)
else:
lit = self._universe.new_integer(val)
mgenc.add_literal_if_absent(lit)
self._bc_gen.emitPUSHCONSTANT(mgenc, lit)
def _literal_decimal(self):
return self._literal_integer()
def _negative_decimal(self):
self._expect(Symbol.Minus)
return -self._literal_integer()
def _literal_integer(self):
i = long(self._text)
self._expect(Symbol.Integer)
return i
def _literal_symbol(self, mgenc):
self._expect(Symbol.Pound)
if self._sym == Symbol.STString:
s = self._string()
symb = self._universe.symbol_for(s)
else:
symb = self._selector()
mgenc.add_literal_if_absent(symb)
self._bc_gen.emitPUSHCONSTANT(mgenc, symb)
def _literal_string(self, mgenc):
s = self._string()
string = self._universe.new_string(s)
mgenc.add_literal_if_absent(string)
self._bc_gen.emitPUSHCONSTANT(mgenc, string)
def _selector(self):
if self._sym == Symbol.OperatorSequence or self._sym_in(self._single_op_syms):
return self._binary_selector()
elif self._sym == Symbol.Keyword or self._sym == Symbol.KeywordSequence:
return self._keyword_selector()
else:
return self._unary_selector()
def _keyword_selector(self):
s = self._text
self._expect_one_of(self._keyword_selector_syms)
symb = self._universe.symbol_for(s)
return symb
def _string(self):
s = self._text
self._expect(Symbol.STString)
return s
def _nested_block(self, mgenc):
mgenc.add_argument_if_absent("$block self")
self._expect(Symbol.NewBlock)
if self._sym == Symbol.Colon:
self._block_pattern(mgenc)
# generate Block signature
block_sig = "$block method"
arg_size = mgenc.get_number_of_arguments()
block_sig += ":" * (arg_size - 1)
mgenc.set_signature(self._universe.symbol_for(block_sig))
self._block_contents(mgenc)
# if no return has been generated, we can be sure that the last
# expression in the block was not terminated by ., and can generate
# a return
if not mgenc.is_finished():
self._bc_gen.emitRETURNLOCAL(mgenc)
mgenc.set_finished(True)
self._expect(Symbol.EndBlock)
def _block_pattern(self, mgenc):
self._block_arguments(mgenc)
self._expect(Symbol.Or)
def _block_arguments(self, mgenc):
self._expect(Symbol.Colon)
mgenc.add_argument_if_absent(self._argument())
while self._sym == Symbol.Colon:
self._expect(Symbol.Colon)
mgenc.add_argument_if_absent(self._argument())
def _gen_push_variable(self, mgenc, var):
# The purpose of this function is to find out whether the variable to be
# pushed on the stack is a local variable, argument, or object field.
# This is done by examining all available lexical contexts, starting with
# the innermost (i.e., the one represented by mgenc).
# triplet: index, context, isArgument
triplet = [0, 0, False]
if mgenc.find_var(var, triplet):
if triplet[2]:
self._bc_gen.emitPUSHARGUMENT(mgenc, triplet[0], triplet[1])
else:
self._bc_gen.emitPUSHLOCAL(mgenc, triplet[0], triplet[1])
else:
identifier = self._universe.symbol_for(var)
if mgenc.has_field(identifier):
field_name = identifier
mgenc.add_literal_if_absent(field_name)
self._bc_gen.emitPUSHFIELD(mgenc, field_name)
else:
globe = identifier
mgenc.add_literal_if_absent(globe)
self._bc_gen.emitPUSHGLOBAL(mgenc, globe)
def _gen_pop_variable(self, mgenc, var):
# The purpose of this function is to find out whether the variable to be
# popped off the stack is a local variable, argument, or object field.
# This is done by examining all available lexical contexts, starting with
# the innermost (i.e., the one represented by mgenc).
# triplet: index, context, isArgument
triplet = [0, 0, False]
if mgenc.find_var(var, triplet):
if triplet[2]:
self._bc_gen.emitPOPARGUMENT(mgenc, triplet[0], triplet[1])
else:
self._bc_gen.emitPOPLOCAL(mgenc, triplet[0], triplet[1])
else:
self._bc_gen.emitPOPFIELD(mgenc, self._universe.symbol_for(var))
def _get_symbol_from_lexer(self):
self._sym = self._lexer.get_sym()
self._text = self._lexer.get_text()
def _peek_for_next_symbol_from_lexer(self):
self._next_sym = self