classLexer: deftokenize(self, expression) -> list[Token]: tokens = [] while expression: match = None # Try to match each token type for token_type, pattern in TOKEN_REGEX.items(): match = re.match(pattern, expression)
if match: value = match.group(0) expression = expression[len(value):]
def__repr__(self, indent) -> str: res = "\t" * indent + "{\n" res += "\t" * (indent+1) + f"{self.type.name}\n" # NOTE: no sub tree res += "\t" * indent + "}\n" return res
def__repr__(self, indent) -> str: res = "\t" * indent + "{\n" res += "\t" * (indent+1) + f"{self.type.name}\n" # NOTE: indent sub tree res += f"{self.left.__repr__(indent+1)}" res += "\t" * (indent+1) + f"{self.operator}\n" res += f"{self.right.__repr__(indent+1)}" res += "\t" * indent + "}\n" return res
def__repr__(self, indent) -> str: res = "\t" * indent + "{\n" res += "\t" * (indent+1) + f"{self.type.name}\n" res += "\t" * (indent+1) + f"{self.value}\n" # NOTE: no sub tree res += "\t" * indent + "}\n" return res
from lexer import Token, TokenType, Lexer from ast import Number, Program, Stmt, Expr, BinExpr
''' parser program -> stmt* # a program consists of multiple statements stmt -> expr # each statement is an expression expr -> bin_expr # each expression is a binary expression bin_expr -> add_expr # binary expression is an addition expression add_expr -> mul_expr((PLUS|MIN mul_expr)*) # addition expression can be: mul_expr followed by multiple +/mul_expr, because addition and subtraction have the same priority, so they are closer to the leaf mul_expr -> NUM((MUL|DIV)NUM*) # multiplication expression can be: NUM followed by multiple *NUM, because multiplication and division have the same priority, so they are closer to the leaf NUM -> INT # NUM is a factor which can be a number, variable, or parenthesized expression '''
defparse_program(self) -> Program: ''' program -> stmt* ''' program = Program() while self.tk().type != TokenType.EOF: program.body.append(self.parse_stmt()) return program
while self.tk().value in ('+', '-'): op = self.eat() right = self.parse_mul_expr() left = BinExpr(left, op, right) return left
defparse_mul_expr(self) -> Expr: ''' mul_expr -> NUM((MUL|DIV)NUM*) ''' left = self.parse_num() while self.tk().value in ('*', '/'): op = self.eat() right = self.parse_num() left = BinExpr(left, op, right) return left
defparse_num(self) -> Expr: ''' NUM -> INT ''' if self.tk().type == TokenType.NUMBER: return Number(self.eat().value) elif self.tk().type == TokenType.OPEN_PAREN: self.eat() expr = self.parse_expr() self.expect(TokenType.CLOSE_PAREN) return expr
raise Exception(__file__, f"Expected NUM, but got {self.tk().type}")
defmain(): whileTrue: expr = input(">>>") if expr == ".q": break parser = Parser() program = parser.parse(expr) print(program)