// Copyright (c) 2013 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "tools/gn/parser.h" #include "base/logging.h" #include "tools/gn/functions.h" #include "tools/gn/operators.h" #include "tools/gn/token.h" // grammar: // // file := (statement)* // statement := block | if | assignment // block := '{' statement* '}' // if := 'if' '(' expr ')' statement [ else ] // else := 'else' (if | statement)* // assignment := ident {'=' | '+=' | '-='} expr enum Precedence { PRECEDENCE_ASSIGNMENT = 1, PRECEDENCE_OR = 2, PRECEDENCE_AND = 3, PRECEDENCE_EQUALITY = 4, PRECEDENCE_RELATION = 5, PRECEDENCE_SUM = 6, PRECEDENCE_PREFIX = 7, PRECEDENCE_CALL = 8, }; // The top-level for blocks/ifs is still recursive descent, the expression // parser is a Pratt parser. The basic idea there is to have the precedences // (and associativities) encoded relative to each other and only parse up // until you hit something of that precedence. There's a dispatch table in // expressions_ at the top of parser.cc that describes how each token // dispatches if it's seen as either a prefix or infix operator, and if it's // infix, what its precedence is. // // Refs: // - http://javascript.crockford.com/tdop/tdop.html // - http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/ // Indexed by Token::Type. ParserHelper Parser::expressions_[] = { {NULL, NULL, -1}, // INVALID {&Parser::Literal, NULL, -1}, // INTEGER {&Parser::Literal, NULL, -1}, // STRING {&Parser::Literal, NULL, -1}, // TRUE_TOKEN {&Parser::Literal, NULL, -1}, // FALSE_TOKEN {NULL, &Parser::Assignment, PRECEDENCE_ASSIGNMENT}, // EQUAL {NULL, &Parser::BinaryOperator, PRECEDENCE_SUM}, // PLUS {NULL, &Parser::BinaryOperator, PRECEDENCE_SUM}, // MINUS {NULL, &Parser::Assignment, PRECEDENCE_ASSIGNMENT}, // PLUS_EQUALS {NULL, &Parser::Assignment, PRECEDENCE_ASSIGNMENT}, // MINUS_EQUALS {NULL, &Parser::BinaryOperator, PRECEDENCE_EQUALITY}, // EQUAL_EQUAL {NULL, &Parser::BinaryOperator, PRECEDENCE_EQUALITY}, // NOT_EQUAL {NULL, &Parser::BinaryOperator, PRECEDENCE_RELATION}, // LESS_EQUAL {NULL, &Parser::BinaryOperator, PRECEDENCE_RELATION}, // GREATER_EQUAL {NULL, &Parser::BinaryOperator, PRECEDENCE_RELATION}, // LESS_THAN {NULL, &Parser::BinaryOperator, PRECEDENCE_RELATION}, // GREATER_THAN {NULL, &Parser::BinaryOperator, PRECEDENCE_AND}, // BOOLEAN_AND {NULL, &Parser::BinaryOperator, PRECEDENCE_OR}, // BOOLEAN_OR {&Parser::Not, NULL, -1}, // BANG {&Parser::Group, NULL, -1}, // LEFT_PAREN {NULL, NULL, -1}, // RIGHT_PAREN {&Parser::List, &Parser::Subscript, PRECEDENCE_CALL}, // LEFT_BRACKET {NULL, NULL, -1}, // RIGHT_BRACKET {NULL, NULL, -1}, // LEFT_BRACE {NULL, NULL, -1}, // RIGHT_BRACE {NULL, NULL, -1}, // IF {NULL, NULL, -1}, // ELSE {&Parser::Name, &Parser::IdentifierOrCall, PRECEDENCE_CALL}, // IDENTIFIER {NULL, NULL, -1}, // COMMA {NULL, NULL, -1}, // COMMENT }; Parser::Parser(const std::vector& tokens, Err* err) : tokens_(tokens), err_(err), cur_(0) { } Parser::~Parser() { } // static scoped_ptr Parser::Parse(const std::vector& tokens, Err* err) { Parser p(tokens, err); return p.ParseFile().PassAs(); } // static scoped_ptr Parser::ParseExpression(const std::vector& tokens, Err* err) { Parser p(tokens, err); return p.ParseExpression().Pass(); } bool Parser::IsAssignment(const ParseNode* node) const { return node && node->AsBinaryOp() && (node->AsBinaryOp()->op().type() == Token::EQUAL || node->AsBinaryOp()->op().type() == Token::PLUS_EQUALS || node->AsBinaryOp()->op().type() == Token::MINUS_EQUALS); } bool Parser::IsStatementBreak(Token::Type token_type) const { switch (token_type) { case Token::IDENTIFIER: case Token::LEFT_BRACE: case Token::RIGHT_BRACE: case Token::IF: case Token::ELSE: return true; default: return false; } } bool Parser::LookAhead(Token::Type type) { if (at_end()) return false; return cur_token().type() == type; } bool Parser::Match(Token::Type type) { if (!LookAhead(type)) return false; Consume(); return true; } Token Parser::Consume(Token::Type type, const char* error_message) { Token::Type types[1] = { type }; return Consume(types, 1, error_message); } Token Parser::Consume(Token::Type* types, size_t num_types, const char* error_message) { if (has_error()) { // Don't overwrite current error, but make progress through tokens so that // a loop that's expecting a particular token will still terminate. cur_++; return Token(Location(), Token::INVALID, base::StringPiece()); } if (at_end()) { const char kEOFMsg[] = "I hit EOF instead."; if (tokens_.empty()) *err_ = Err(Location(), error_message, kEOFMsg); else *err_ = Err(tokens_[tokens_.size() - 1], error_message, kEOFMsg); return Token(Location(), Token::INVALID, base::StringPiece()); } for (size_t i = 0; i < num_types; ++i) { if (cur_token().type() == types[i]) return tokens_[cur_++]; } *err_ = Err(cur_token(), error_message); return Token(Location(), Token::INVALID, base::StringPiece()); } Token Parser::Consume() { return tokens_[cur_++]; } scoped_ptr Parser::ParseExpression() { return ParseExpression(0); } scoped_ptr Parser::ParseExpression(int precedence) { if (at_end()) return scoped_ptr(); Token token = Consume(); PrefixFunc prefix = expressions_[token.type()].prefix; if (prefix == NULL) { *err_ = Err(token, std::string("Unexpected token '") + token.value().as_string() + std::string("'")); return scoped_ptr(); } scoped_ptr left = (this->*prefix)(token); if (has_error()) return left.Pass(); while (!at_end() && !IsStatementBreak(cur_token().type()) && precedence <= expressions_[cur_token().type()].precedence) { token = Consume(); InfixFunc infix = expressions_[token.type()].infix; if (infix == NULL) { *err_ = Err(token, std::string("Unexpected token '") + token.value().as_string() + std::string("'")); return scoped_ptr(); } left = (this->*infix)(left.Pass(), token); if (has_error()) return scoped_ptr(); } return left.Pass(); } scoped_ptr Parser::Literal(Token token) { return scoped_ptr(new LiteralNode(token)).Pass(); } scoped_ptr Parser::Name(Token token) { return IdentifierOrCall(scoped_ptr(), token).Pass(); } scoped_ptr Parser::Group(Token token) { scoped_ptr expr = ParseExpression(); if (has_error()) return scoped_ptr(); Consume(Token::RIGHT_PAREN, "Expected ')'"); return expr.Pass(); } scoped_ptr Parser::Not(Token token) { scoped_ptr expr = ParseExpression(PRECEDENCE_PREFIX + 1); if (has_error()) return scoped_ptr(); scoped_ptr unary_op(new UnaryOpNode); unary_op->set_op(token); unary_op->set_operand(expr.Pass()); return unary_op.PassAs(); } scoped_ptr Parser::List(Token node) { scoped_ptr list(ParseList(Token::RIGHT_BRACKET, true)); if (!has_error() && !at_end()) Consume(Token::RIGHT_BRACKET, "Expected ']'"); return list.Pass(); } scoped_ptr Parser::BinaryOperator(scoped_ptr left, Token token) { scoped_ptr right = ParseExpression(expressions_[token.type()].precedence + 1); if (!right) { *err_ = Err(token, "Expected right hand side for '" + token.value().as_string() + "'"); return scoped_ptr(); } scoped_ptr binary_op(new BinaryOpNode); binary_op->set_op(token); binary_op->set_left(left.Pass()); binary_op->set_right(right.Pass()); return binary_op.PassAs(); } scoped_ptr Parser::IdentifierOrCall(scoped_ptr left, Token token) { scoped_ptr list(new ListNode); list->set_begin_token(token); list->set_end_token(token); scoped_ptr block; bool has_arg = false; if (Match(Token::LEFT_PAREN)) { // Parsing a function call. has_arg = true; if (Match(Token::RIGHT_PAREN)) { // Nothing, just an empty call. } else { list = ParseList(Token::RIGHT_PAREN, false); if (has_error()) return scoped_ptr(); Consume(Token::RIGHT_PAREN, "Expected ')' after call"); } // Optionally with a scope. if (LookAhead(Token::LEFT_BRACE)) { block = ParseBlock(); if (has_error()) return scoped_ptr(); } } if (!left && !has_arg) { // Not a function call, just a standalone identifier. return scoped_ptr(new IdentifierNode(token)).Pass(); } scoped_ptr func_call(new FunctionCallNode); func_call->set_function(token); func_call->set_args(list.Pass()); if (block) func_call->set_block(block.Pass()); return func_call.PassAs(); } scoped_ptr Parser::Assignment(scoped_ptr left, Token token) { if (left->AsIdentifier() == NULL) { *err_ = Err(left.get(), "Left-hand side of assignment must be identifier."); return scoped_ptr(); } scoped_ptr value = ParseExpression(PRECEDENCE_ASSIGNMENT); scoped_ptr assign(new BinaryOpNode); assign->set_op(token); assign->set_left(left.Pass()); assign->set_right(value.Pass()); return assign.PassAs(); } scoped_ptr Parser::Subscript(scoped_ptr left, Token token) { // TODO: Maybe support more complex expressions like a[0][0]. This would // require work on the evaluator too. if (left->AsIdentifier() == NULL) { *err_ = Err(left.get(), "May only subscript simple identifiers"); return scoped_ptr(); } scoped_ptr value = ParseExpression(); Consume(Token::RIGHT_BRACKET, "Expecting ']' after subscript."); scoped_ptr accessor(new AccessorNode); accessor->set_base(left->AsIdentifier()->value()); accessor->set_index(value.Pass()); return accessor.PassAs(); } // Does not Consume the start or end token. scoped_ptr Parser::ParseList(Token::Type stop_before, bool allow_trailing_comma) { scoped_ptr list(new ListNode); list->set_begin_token(cur_token()); bool just_got_comma = false; while (!LookAhead(stop_before)) { just_got_comma = false; // Why _OR? We're parsing things that are higher precedence than the , // that separates the items of the list. , should appear lower than // boolean expressions (the lowest of which is OR), but above assignments. list->append_item(ParseExpression(PRECEDENCE_OR)); if (has_error()) return scoped_ptr(); if (at_end()) { *err_ = Err(tokens_[tokens_.size() - 1], "Unexpected end of file in list."); return scoped_ptr(); } just_got_comma = Match(Token::COMMA); } if (just_got_comma && !allow_trailing_comma) { *err_ = Err(cur_token(), "Trailing comma"); return scoped_ptr(); } list->set_end_token(cur_token()); return list.Pass(); } scoped_ptr Parser::ParseFile() { scoped_ptr file(new BlockNode(false)); for (;;) { if (at_end()) break; scoped_ptr statement = ParseStatement(); if (!statement) break; file->append_statement(statement.Pass()); } if (!at_end() && !has_error()) *err_ = Err(cur_token(), "Unexpected here, should be newline."); if (has_error()) return scoped_ptr(); return file.PassAs(); } scoped_ptr Parser::ParseStatement() { if (LookAhead(Token::LEFT_BRACE)) { return ParseBlock().PassAs(); } else if (LookAhead(Token::IF)) { return ParseCondition(); } else { // TODO(scottmg): Is this too strict? Just drop all the testing if we want // to allow "pointless" expressions and return ParseExpression() directly. scoped_ptr stmt = ParseExpression(); if (stmt) { if (stmt->AsFunctionCall() || IsAssignment(stmt.get())) return stmt.Pass(); } if (!has_error()) { Token token = at_end() ? tokens_[tokens_.size() - 1] : cur_token(); *err_ = Err(token, "Expecting assignment or function call."); } return scoped_ptr(); } } scoped_ptr Parser::ParseBlock() { Token begin_token = Consume(Token::LEFT_BRACE, "Expected '{' to start a block."); if (has_error()) return scoped_ptr(); scoped_ptr block(new BlockNode(true)); block->set_begin_token(begin_token); for (;;) { if (LookAhead(Token::RIGHT_BRACE)) { block->set_end_token(Consume()); break; } scoped_ptr statement = ParseStatement(); if (!statement) return scoped_ptr(); block->append_statement(statement.Pass()); } return block.Pass(); } scoped_ptr Parser::ParseCondition() { scoped_ptr condition(new ConditionNode); Consume(Token::IF, "Expected 'if'"); Consume(Token::LEFT_PAREN, "Expected '(' after 'if'."); condition->set_condition(ParseExpression()); if (IsAssignment(condition->condition())) *err_ = Err(condition->condition(), "Assignment not allowed in 'if'."); Consume(Token::RIGHT_PAREN, "Expected ')' after condition of 'if'."); condition->set_if_true(ParseBlock().Pass()); if (Match(Token::ELSE)) condition->set_if_false(ParseStatement().Pass()); if (has_error()) return scoped_ptr(); return condition.PassAs(); }