import logging, sys #logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - [%(levelname)s] %(name)s.%(funcName)s: %(message)s',filename='server.log') class lex: '''Class to hold lexer related variables''' digits = '0123456789' alpha = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' ident = alpha + digits + '_' operators = '+-*/' newline = ';' addsub = ['+', '-'] muldiv = ['*', '/'] boolops = ['<', '>', '==', '!='] def __init__(self, code): self.pos = 0 self.char = code[0] self.code = code self.count = len(code) self.col = 1 self.sym = None self.linebuf = '' self.line = 1 self.end = False self.logger = logging.getLogger('Lexer') self.logger.debug('Lexer initialised') self.logger.debug('Code length: %s characters' % self.count) self.next_symbol() def isdigit(self): if self.char in lex.digits: return True return False def isnumber(self): if sum([1 for i in self.sym if i in lex.digits]) < len(self.sym): return False return True def isalpha(self): if self.char in lex.alpha: return True return False def isident(self): if self.char in lex.ident: return True return False def isidentifier(self): if self.sym[0] in lex.alpha and sum([1 for i in self.sym[1:] if i in lex.ident]) == len(self.sym) - 1: return True return False def isop(self): if self.char in lex.operators: return True return False def next_char(self): #print self.char char = self.char self.pos += 1 self.col += 1 self.char = self.code[self.pos] self.linebuf += self.char if self.pos > self.count: self.end = True def next_symbol(self): while(1): if self.end: return if self.char in ' \t\n': self.next_char() continue elif self.char is lex.newline: self.line += 1 self.col = 1 self.linebuf = '' self.next_char() #for when there is more than 1 line elif self.isdigit(): num = '' while(1): num += self.char self.next_char() if not self.isdigit(): self.sym = num return elif self.isalpha(): ident = '' while(1): ident += self.char self.next_char() if not self.isident(): self.sym = ident return elif self.isop(): op = '' while(1): op += self.char self.next_char() if not self.isop(): self.sym = op return else: self.sym = self.char self.next_char() return def expect_symbol(self, sym): if self.sym == sym: self.next_symbol() return True self.logger.error('Expect symbol: %s, got %s' % (sym, self.sym)) return False def accept_symbol(self, sym): if self.sym == sym: return True return False def accept_symbols(self, sym): if self.sym in sym: return True return False class output: def __init__(self): self.out_file = sys.stdout self.opcodes = [] self.count = 0 def __call__(self, data): self.out_file.write(data + '\n') self.opcodes.append(data) self.count += 1 class block: def __init__(self, parent): self.opcodes = [] self.count = 0 self.parent = parent def __call__(self, data): self.opcodes.append(data) self.count += 1 def insert_block(self): self.parent.count += self.count self.parent.opcodes.extend(self.opcodes) for op in self.opcodes: print op return self.parent class parser: def __init__(self, lexer, out=output()): self.lexer = lexer self.output = out self.logger = logging.getLogger('Parser') self.logger.debug('Parser initialised') self.symbol_table = [] def error(self, exception, message): self.logger.critical(message) raise exception, message + '\n' + 'Line: %s, Column: %s\n %s' % (self.lexer.line, self.lexer.col, self.lexer.linebuf) def expression(self): self.term() while(self.lexer.accept_symbols(lex.addsub)): opcode = 'ADD' if self.lexer.sym is '+' else 'SUB' self.lexer.next_symbol() self.term() self.output(opcode) def term(self): self.factor() while(self.lexer.accept_symbols(lex.muldiv)): opcode = 'MUL' if self.lexer.sym is '*' else 'DIV' self.lexer.next_symbol() self.factor() self.output(opcode) def factor(self): self.exponent() while(self.lexer.accept_symbol('^')): opcode = 'POW' self.lexer.next_symbol() self.exponent() self.output(opcode) def exponent(self): if self.lexer.isnumber(): self.output('PUSH #%s' % self.lexer.sym) self.lexer.next_symbol() elif self.lexer.isidentifier(): ##check if the symbol is defined if self.lexer.sym not in self.symbol_table: self.error(NameError, 'Variable referenced before assignement') self.output('PUSH %s' % self.lexer.sym) self.lexer.next_symbol() elif self.lexer.accept_symbol('('): self.lexer.next_symbol() self.expression() if not self.lexer.expect_symbol(')'): self.error(SyntaxError, 'Close parentheses expected ")", got %s' % self.lexer.symbol) else: #self.error(SyntaxError, 'Syntax error') pass def statement(self): #print self.lexer.sym if self.lexer.accept_symbol('END'): self.lexer.end = True return ## assignment statement if self.lexer.isidentifier(): ident = self.lexer.sym self.lexer.next_symbol() self.lexer.expect_symbol('=') self.expression() self.output('POP %s' % ident) self.symbol_table.append(ident) ## if statement if self.lexer.accept_symbol('if'): self.lexer.next_symbol() self.expression() if self.lexer.accept_symbols(lex.boolops): boolop = self.lexer.sym self.lexer.next_symbol() else: self.error(SyntaxError, 'Expected boolean operator, got %s' % self.lexer.sym) self.expression() self.lexer.expect_symbol('then') if_block = block(self.output) self.output = if_block while (self.lexer.accept_symbol('endif') is False): self.statement() self.lexer.next_symbol() jmp = if_block.count + 1 if boolop is '==': #do opposite so that you jump on bool false opcode = 'JNEQ' elif boolop is '!=': opcode = 'JEQU' elif boolop is '<': opcode = 'JGE' else: opcode = 'JLT' if_block.parent(opcode + ' .+%s' % jmp) self.output = if_block.insert_block() def run(self): while(not self.lexer.end): self.statement() #lexer = lex('234 + a * (3+(89-8))^4;') lexer = lex('''a = 1; b=2; if a > b then x = 100; c = 4; if (x+1)/3 < c then y = 4; endif; endif; d=4; END;''') parse = parser(lexer) parse.run()