__file__ = 'ky_parser.py' __usage__ = ''' = Parser( ... ) Ky parser implementation/s''' __description__ = '''Ky Parser class/es ''' __author__ = 'Timothy Wakeham (timmeh)' __contact__ = 'timmeh@hiddenworlds.org' __updated__ = '28-03-2008 @ 22:19:05' __todo__ = '''Implement bottom-up precedence parser. ***DONE*** Implement top-down recursive parser. Unit test both parsers and examine results. Research benefits of both approaches.''' #start from ky_operator import Operator from ky_token import Token from ky_lexical_scanner import LexicalScanner # initialise operators ay = Operator.PREC_YIELD_LEFT by = Operator.PREC_YIELD_RIGHT ep = Operator.PREC_EQUAL add_prec = {'-':by, '*':ay, '/':by, '^':by, '(':ay, ')':by} sub_prec = {'+':by, '*':ay, '/':by, '^':by, '(':ay, ')':by} mul_prec = {'+':by, '-':ay, '/':by, '^':by, '(':ay, ')':by} div_prec = {'+':by, '-':ay, '*':by, '^':by, '(':ay, ')':by} exp_prec = {'+':by, '-':ay, '*':by, '/':by, '(':ay, ')':by} rbr_prec = {'+':ay, '-':ay, '*':ay, '/':ay, '^':ay, ')':ep, '(':ay} lbr_prec = {'+':by, '-':by, '*':by, '/':by, '^':by, '(':ep, ')':by} ass_prec = {'+':by, '-':by, '*':by, '/':by, '^':by, '(':by, ')':by} ass = Operator('=', 'POP', ass_prec) add = Operator('+', 'ADD', add_prec, Operator.ASSOC_LEFT) sub = Operator('-', 'SUB', sub_prec, Operator.ASSOC_LEFT) mul = Operator('*', 'MUL', mul_prec, Operator.ASSOC_LEFT) div = Operator('/', 'DIV', div_prec, Operator.ASSOC_LEFT) exp = Operator('^', 'POW', exp_prec, Operator.ASSOC_LEFT) lbr = Operator('(', None , lbr_prec, Operator.ASSOC_RIGHT) rbr = Operator(')', None , rbr_prec, Operator.ASSOC_LEFT) class OperatorPrecedenceExpressionParser: ID = 'KY_PRECEDENCE_PARSER' def __init__(self, output_stream, lexical_scanner, debug=False): self.outstrm = output_stream self.lexical_scanner = lexical_scanner self.debug = debug self.outidx = 1 def output(self, out): if not out: return self.outstrm.write(out + '\n') def expression(self): if self.debug: print 'Expression: %s\n' % self.lexical_scanner.code stack = [] while(1): symbol = self.lexical_scanner.get_symbol() if symbol.token_type == Token.EOF: break elif symbol.token_type == Token.IDENT: if symbol.next.token_type == Token.OPERATOR and symbol.next.token_value <> Operator.operators['=']: self.output('PUSH %s' % symbol.token_value) else: symbol.next.token_value.assign = symbol.token_value elif symbol.token_type == Token.NUMBER: self.output('PUSH #%s' % symbol.token_value) elif symbol.token_type == Token.OPERATOR: if not len(stack): stack.append(symbol) else: # ay - a yields to b if stack[-1].token_value >= symbol.token_value: stack.append(symbol) else: # by - b yields to a while(len(stack) and stack[-1].token_value < symbol.token_value): self.output(stack.pop().token_value.opcode) stack.append(symbol) else: #print symbol break while(len(stack)): self.output(stack.pop().token_value.opcode) class RecursiveDescentExpressionParser: def __init__(self, output_stream, lexical_scanner, debug=False): self.outstrm = output_stream self.lexical_scanner = lexical_scanner self.debug = debug self.outidx = 1 def __call__(self): self.assignment() def output(self, out): if not out: return self.outstrm.write('%04d\t\t%s\n' % (self.outidx, out)) self.outidx += 1 def match(self, ttype, tval): if self.look.token_type == ttype and self.look.token_value == tval: self.look = self.lexical_scanner.get_symbol() return raise SyntaxError, 'Expected %s, got %s' % (tval, self.look.token_value) def match_type(self, ttype): if self.look.token_type == ttype: self.look = self.lexical_scanner.get_symbol() return raise SyntaxError, 'Expected token of type %s, got %s' % (Token.mapping[ttype], Token.mapping[self.look.token_type]) def assignment(self): self.look = self.lexical_scanner.get_symbol() self.expression() while(self.look.token_type <> Token.EOF and self.look.token_value.operator == '='): var = self.look.prev.prev self.look = self.lexical_scanner.get_symbol() self.expression() if var.prev: self.output('DUP') self.output('POP %s' % var.token_value) def expression(self): self.term() while(self.look.token_type <> Token.EOF and (self.look.token_value.operator == '+' or self.look.token_value.operator == '-')): opcode = 'ADD' if self.look.token_value.operator == '+' else 'SUB' self.look = self.lexical_scanner.get_symbol() self.term() self.output(opcode) def term(self): self.factor() while(self.look.token_type <> Token.EOF and (self.look.token_value.operator == '*' or self.look.token_value.operator == '/')): opcode = 'MUL' if self.look.token_value.operator == '*' else 'DIV' self.look = self.lexical_scanner.get_symbol() self.factor() self.output(opcode) def factor(self): self.exponent() #print self.look while(self.look.token_type <> Token.EOF and self.look.token_value.operator == '^'): opcode = 'POW' self.look = self.lexical_scanner.get_symbol() self.exponent() self.output(opcode) def exponent(self): if self.look.token_type == Token.OPERATOR and self.look.token_value.operator == '(': self.assignment() self.match(Token.OPERATOR, Operator.operators[')']) elif self.look.token_type == Token.NUMBER: self.output('PUSH #%s' % self.look.token_value) self.match_type(Token.NUMBER) elif self.look.token_type == Token.IDENT: if self.look.next.token_type == Token.OPERATOR and self.look.next.token_value.operator == '=': self.look = self.lexical_scanner.get_symbol() return self.output('PUSH %s' % self.look.token_value) self.match_type(Token.IDENT) else: raise SyntaxError, 'Unexpected token: %s' % self.look if __name__ == '__main__': import sys lexer = LexicalScanner('a = 5 * (b =4 * (c = 7 * d) )') parser = RecursiveDescentExpressionParser(sys.stdout, lexer, debug=True) parser()