__file__ = 'ky_parser_gen2.py' __usage__ = ''' = RecursiveDescentExpressionParser_GEN2( ... ) outstream - output stream, must be a file-like object lexer - the tokeniser [precedence] - operator precedence table [debug] - print debug information''' __description__ = '''Implementation of a generalised recursive descent expression parser which allows addition of new operators on the fly with definable precedence levels. Also allows for unary prefix operators such as unary minus and logical not ''' __author__ = 'Timothy Wakeham (timmeh)' __contact__ = 'timmeh@hiddenworlds.org' __updated__ = '02-04-2008 @ 01:29:59' __todo__ = None #start from ky_operator import Operator, UnaryOperator from ky_token import Token from ky_data_structures import Node from ky_lexical_scanner import LexicalScanner ass = Operator('=', 'POP') add = Operator('+', 'ADD') sub = Operator('-', 'SUB') mul = Operator('*', 'MUL') div = Operator('/', 'DIV') exp = Operator('^', 'POW') lt = Operator('<', 'LT') gt = Operator('>', 'GT') equ = Operator('==', 'EQU') neq = Operator('<>', 'NEQ') _or = Operator('|', 'OR') _and = Operator('&', 'AND') _xor = Operator('~', 'XOR') _not = UnaryOperator('!', 'NOT') inc = UnaryOperator('++', 'INC') dec = UnaryOperator('--', 'DEC') uminus = UnaryOperator('-', 'NEG') lbr = Operator('(', None ) rbr = Operator(')', None ) operator_precedence = [ [ass, equ, lt, gt, neq], [add, sub, _or, _xor], [mul, div, _and], [exp,], [None] ] class RecursiveDescentExpressionParser_GEN2: def __init__(self, output_stream, lexical_scanner, prec=operator_precedence, debug=False): self.outstrm = output_stream self.lexical_scanner = lexical_scanner self.debug = debug self.outidx = 1 self.lvls = len(prec) - 1 self.prec = prec self.ast = Node(None, 'ROOT', None) def output(self, out): if not out: return if type(out) == type(''): out = [out] for line in out: self.outstrm.write('%04d\t\t%s\n' % (self.outidx, line)) self.outidx += 1 def __call__(self): if self.debug: print 'Expression: %s\n' % self.lexical_scanner.code self.look = self.lexical_scanner.get_symbol() self.expression() def match(self, ttype, tval): if self.look.token_type == ttype and self.look.token_value == tval: self.look = self.lexical_scanner.get_symbol() return raise SyntaxError, 'Expected %s, got %s' % (tval, self.look.token_value) def match_type(self, ttype): if self.look.token_type == ttype: self.look = self.lexical_scanner.get_symbol() return raise SyntaxError, 'Expected token of type %s, got %s' % (Token.mapping[ttype], Token.mapping[self.look.token_type]) def expression(self, lvl=0): if lvl <> self.lvls: self.expression(lvl+1) else: if self.look.token_type == Token.OPERATOR and self.look.token_value.operator == '(': self.look = self.lexical_scanner.get_symbol() self.expression() self.match(Token.OPERATOR, Operator.operators[')']) elif self.look.token_type == Token.NUMBER: self.output('PUSH #%s' % self.look.token_value) self.ast.child = Node(self.ast, self.look.token_value, self.look, None) self.match_type(Token.NUMBER) elif self.look.token_type == Token.UNOP: out = self.look.token_value.opcode self.ast = Node(self.ast, self.look.token_value.operator, self.look, None) self.ast.parent.child = self.ast self.look = self.lexical_scanner.get_symbol() self.expression(self.lvls) self.output(out) elif self.look.token_type == Token.IDENT: if self.look.next.token_type == Token.OPERATOR and self.look.next.token_value.operator == '=': self.look = self.lexical_scanner.get_symbol() return self.output('PUSH %s' % self.look.token_value) self.ast.child = Node(self.ast, self.look.token_value, self.look, None) self.match_type(Token.IDENT) else: raise SyntaxError, 'Unexpected token: %s' % self.look while(self.look.token_type <> Token.EOF and self.look.token_value in self.prec[lvl]): opcode = self.look.token_value.opcode self.ast = Node(self.ast, self.look.token_value.operator, self.look, None) self.ast.parent.child = self.ast #assignment special case if self.look.token_value == Operator.operators['=']: opcode = 'POP %s' % self.look.prev.prev.token_value if self.look.prev.prev.prev: opcode = ['DUP', opcode] self.look = self.lexical_scanner.get_symbol() self.expression(lvl+1) self.output(opcode) if __name__ == '__main__': import sys #lexer = LexicalScanner('abs = x*(1+2*(x<0))') #lexer = LexicalScanner('c=-a*-(b* ++d)') lexer = LexicalScanner('1*2*3*(4+8)') parser = RecursiveDescentExpressionParser_GEN2(sys.stdout, lexer, debug=True) parser() ## tok = lexer.get_symbol() ## while(tok.token_type <> Token.EOF): ## print tok ## tok = lexer.get_symbol()