// Copyright (c) 2013 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "tools/gn/parser.h" #include "base/logging.h" #include "tools/gn/functions.h" #include "tools/gn/operators.h" #include "tools/gn/token.h" // grammar: // // file := (statement)* // statement := block | if | assignment // block := '{' statement* '}' // if := 'if' '(' expr ')' statement [ else ] // else := 'else' (if | statement)* // assignment := ident {'=' | '+=' | '-='} expr enum Precedence { PRECEDENCE_ASSIGNMENT = 1, // Lowest precedence. PRECEDENCE_OR = 2, PRECEDENCE_AND = 3, PRECEDENCE_EQUALITY = 4, PRECEDENCE_RELATION = 5, PRECEDENCE_SUM = 6, PRECEDENCE_PREFIX = 7, PRECEDENCE_CALL = 8, PRECEDENCE_DOT = 9, // Highest precedence. }; // The top-level for blocks/ifs is recursive descent, the expression parser is // a Pratt parser. The basic idea there is to have the precedences (and // associativities) encoded relative to each other and only parse up until you // hit something of that precedence. There's a dispatch table in expressions_ // at the top of parser.cc that describes how each token dispatches if it's // seen as either a prefix or infix operator, and if it's infix, what its // precedence is. // // Refs: // - http://javascript.crockford.com/tdop/tdop.html // - http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/ // Indexed by Token::Type. ParserHelper Parser::expressions_[] = { {NULL, NULL, -1}, // INVALID {&Parser::Literal, NULL, -1}, // INTEGER {&Parser::Literal, NULL, -1}, // STRING {&Parser::Literal, NULL, -1}, // TRUE_TOKEN {&Parser::Literal, NULL, -1}, // FALSE_TOKEN {NULL, &Parser::Assignment, PRECEDENCE_ASSIGNMENT}, // EQUAL {NULL, &Parser::BinaryOperator, PRECEDENCE_SUM}, // PLUS {NULL, &Parser::BinaryOperator, PRECEDENCE_SUM}, // MINUS {NULL, &Parser::Assignment, PRECEDENCE_ASSIGNMENT}, // PLUS_EQUALS {NULL, &Parser::Assignment, PRECEDENCE_ASSIGNMENT}, // MINUS_EQUALS {NULL, &Parser::BinaryOperator, PRECEDENCE_EQUALITY}, // EQUAL_EQUAL {NULL, &Parser::BinaryOperator, PRECEDENCE_EQUALITY}, // NOT_EQUAL {NULL, &Parser::BinaryOperator, PRECEDENCE_RELATION}, // LESS_EQUAL {NULL, &Parser::BinaryOperator, PRECEDENCE_RELATION}, // GREATER_EQUAL {NULL, &Parser::BinaryOperator, PRECEDENCE_RELATION}, // LESS_THAN {NULL, &Parser::BinaryOperator, PRECEDENCE_RELATION}, // GREATER_THAN {NULL, &Parser::BinaryOperator, PRECEDENCE_AND}, // BOOLEAN_AND {NULL, &Parser::BinaryOperator, PRECEDENCE_OR}, // BOOLEAN_OR {&Parser::Not, NULL, -1}, // BANG {NULL, &Parser::DotOperator, PRECEDENCE_DOT}, // DOT {&Parser::Group, NULL, -1}, // LEFT_PAREN {NULL, NULL, -1}, // RIGHT_PAREN {&Parser::List, &Parser::Subscript, PRECEDENCE_CALL}, // LEFT_BRACKET {NULL, NULL, -1}, // RIGHT_BRACKET {NULL, NULL, -1}, // LEFT_BRACE {NULL, NULL, -1}, // RIGHT_BRACE {NULL, NULL, -1}, // IF {NULL, NULL, -1}, // ELSE {&Parser::Name, &Parser::IdentifierOrCall, PRECEDENCE_CALL}, // IDENTIFIER {NULL, NULL, -1}, // COMMA {NULL, NULL, -1}, // COMMENT }; Parser::Parser(const std::vector& tokens, Err* err) : tokens_(tokens), err_(err), cur_(0) { } Parser::~Parser() { } // static scoped_ptr Parser::Parse(const std::vector& tokens, Err* err) { Parser p(tokens, err); return p.ParseFile().PassAs(); } // static scoped_ptr Parser::ParseExpression(const std::vector& tokens, Err* err) { Parser p(tokens, err); return p.ParseExpression().Pass(); } bool Parser::IsAssignment(const ParseNode* node) const { return node && node->AsBinaryOp() && (node->AsBinaryOp()->op().type() == Token::EQUAL || node->AsBinaryOp()->op().type() == Token::PLUS_EQUALS || node->AsBinaryOp()->op().type() == Token::MINUS_EQUALS); } bool Parser::IsStatementBreak(Token::Type token_type) const { switch (token_type) { case Token::IDENTIFIER: case Token::LEFT_BRACE: case Token::RIGHT_BRACE: case Token::IF: case Token::ELSE: return true; default: return false; } } bool Parser::LookAhead(Token::Type type) { if (at_end()) return false; return cur_token().type() == type; } bool Parser::Match(Token::Type type) { if (!LookAhead(type)) return false; Consume(); return true; } Token Parser::Consume(Token::Type type, const char* error_message) { Token::Type types[1] = { type }; return Consume(types, 1, error_message); } Token Parser::Consume(Token::Type* types, size_t num_types, const char* error_message) { if (has_error()) { // Don't overwrite current error, but make progress through tokens so that // a loop that's expecting a particular token will still terminate. cur_++; return Token(Location(), Token::INVALID, base::StringPiece()); } if (at_end()) { const char kEOFMsg[] = "I hit EOF instead."; if (tokens_.empty()) *err_ = Err(Location(), error_message, kEOFMsg); else *err_ = Err(tokens_[tokens_.size() - 1], error_message, kEOFMsg); return Token(Location(), Token::INVALID, base::StringPiece()); } for (size_t i = 0; i < num_types; ++i) { if (cur_token().type() == types[i]) return tokens_[cur_++]; } *err_ = Err(cur_token(), error_message); return Token(Location(), Token::INVALID, base::StringPiece()); } Token Parser::Consume() { return tokens_[cur_++]; } scoped_ptr Parser::ParseExpression() { return ParseExpression(0); } scoped_ptr Parser::ParseExpression(int precedence) { if (at_end()) return scoped_ptr(); Token token = Consume(); PrefixFunc prefix = expressions_[token.type()].prefix; if (prefix == NULL) { *err_ = Err(token, std::string("Unexpected token '") + token.value().as_string() + std::string("'")); return scoped_ptr(); } scoped_ptr left = (this->*prefix)(token); if (has_error()) return left.Pass(); while (!at_end() && !IsStatementBreak(cur_token().type()) && precedence <= expressions_[cur_token().type()].precedence) { token = Consume(); InfixFunc infix = expressions_[token.type()].infix; if (infix == NULL) { *err_ = Err(token, std::string("Unexpected token '") + token.value().as_string() + std::string("'")); return scoped_ptr(); } left = (this->*infix)(left.Pass(), token); if (has_error()) return scoped_ptr(); } return left.Pass(); } scoped_ptr Parser::Literal(Token token) { return scoped_ptr(new LiteralNode(token)).Pass(); } scoped_ptr Parser::Name(Token token) { return IdentifierOrCall(scoped_ptr(), token).Pass(); } scoped_ptr Parser::Group(Token token) { scoped_ptr expr = ParseExpression(); if (has_error()) return scoped_ptr(); Consume(Token::RIGHT_PAREN, "Expected ')'"); return expr.Pass(); } scoped_ptr Parser::Not(Token token) { scoped_ptr expr = ParseExpression(PRECEDENCE_PREFIX + 1); if (has_error()) return scoped_ptr(); scoped_ptr unary_op(new UnaryOpNode); unary_op->set_op(token); unary_op->set_operand(expr.Pass()); return unary_op.PassAs(); } scoped_ptr Parser::List(Token node) { scoped_ptr list(ParseList(Token::RIGHT_BRACKET, true)); if (!has_error() && !at_end()) Consume(Token::RIGHT_BRACKET, "Expected ']'"); return list.Pass(); } scoped_ptr Parser::BinaryOperator(scoped_ptr left, Token token) { scoped_ptr right = ParseExpression(expressions_[token.type()].precedence + 1); if (!right) { *err_ = Err(token, "Expected right hand side for '" + token.value().as_string() + "'"); return scoped_ptr(); } scoped_ptr binary_op(new BinaryOpNode); binary_op->set_op(token); binary_op->set_left(left.Pass()); binary_op->set_right(right.Pass()); return binary_op.PassAs(); } scoped_ptr Parser::IdentifierOrCall(scoped_ptr left, Token token) { scoped_ptr list(new ListNode); list->set_begin_token(token); list->set_end_token(token); scoped_ptr block; bool has_arg = false; if (Match(Token::LEFT_PAREN)) { // Parsing a function call. has_arg = true; if (Match(Token::RIGHT_PAREN)) { // Nothing, just an empty call. } else { list = ParseList(Token::RIGHT_PAREN, false); if (has_error()) return scoped_ptr(); Consume(Token::RIGHT_PAREN, "Expected ')' after call"); } // Optionally with a scope. if (LookAhead(Token::LEFT_BRACE)) { block = ParseBlock(); if (has_error()) return scoped_ptr(); } } if (!left && !has_arg) { // Not a function call, just a standalone identifier. return scoped_ptr(new IdentifierNode(token)).Pass(); } scoped_ptr func_call(new FunctionCallNode); func_call->set_function(token); func_call->set_args(list.Pass()); if (block) func_call->set_block(block.Pass()); return func_call.PassAs(); } scoped_ptr Parser::Assignment(scoped_ptr left, Token token) { if (left->AsIdentifier() == NULL) { *err_ = Err(left.get(), "Left-hand side of assignment must be identifier."); return scoped_ptr(); } scoped_ptr value = ParseExpression(PRECEDENCE_ASSIGNMENT); scoped_ptr assign(new BinaryOpNode); assign->set_op(token); assign->set_left(left.Pass()); assign->set_right(value.Pass()); return assign.PassAs(); } scoped_ptr Parser::Subscript(scoped_ptr left, Token token) { // TODO: Maybe support more complex expressions like a[0][0]. This would // require work on the evaluator too. if (left->AsIdentifier() == NULL) { *err_ = Err(left.get(), "May only subscript identifiers.", "The thing on the left hand side of the [] must be an identifier\n" "and not an expression. If you need this, you'll have to assign the\n" "value to a temporary before subscripting. Sorry."); return scoped_ptr(); } scoped_ptr value = ParseExpression(); Consume(Token::RIGHT_BRACKET, "Expecting ']' after subscript."); scoped_ptr accessor(new AccessorNode); accessor->set_base(left->AsIdentifier()->value()); accessor->set_index(value.Pass()); return accessor.PassAs(); } scoped_ptr Parser::DotOperator(scoped_ptr left, Token token) { if (left->AsIdentifier() == NULL) { *err_ = Err(left.get(), "May only use \".\" for identifiers.", "The thing on the left hand side of the dot must be an identifier\n" "and not an expression. If you need this, you'll have to assign the\n" "value to a temporary first. Sorry."); return scoped_ptr(); } scoped_ptr right = ParseExpression(PRECEDENCE_DOT); if (!right || !right->AsIdentifier()) { *err_ = Err(token, "Expected identifier for right-hand-side of \".\"", "Good: a.cookies\nBad: a.42\nLooks good but still bad: a.cookies()"); return scoped_ptr(); } scoped_ptr accessor(new AccessorNode); accessor->set_base(left->AsIdentifier()->value()); accessor->set_member(scoped_ptr( static_cast(right.release()))); return accessor.PassAs(); } // Does not Consume the start or end token. scoped_ptr Parser::ParseList(Token::Type stop_before, bool allow_trailing_comma) { scoped_ptr list(new ListNode); list->set_begin_token(cur_token()); bool just_got_comma = false; bool first_time = true; while (!LookAhead(stop_before)) { if (!first_time) { if (!just_got_comma) { // Require commas separate things in lists. *err_ = Err(cur_token(), "Expected comma between items."); return scoped_ptr(); } } first_time = false; // Why _OR? We're parsing things that are higher precedence than the , // that separates the items of the list. , should appear lower than // boolean expressions (the lowest of which is OR), but above assignments. list->append_item(ParseExpression(PRECEDENCE_OR)); if (has_error()) return scoped_ptr(); if (at_end()) { *err_ = Err(tokens_[tokens_.size() - 1], "Unexpected end of file in list."); return scoped_ptr(); } just_got_comma = Match(Token::COMMA); } if (just_got_comma && !allow_trailing_comma) { *err_ = Err(cur_token(), "Trailing comma"); return scoped_ptr(); } list->set_end_token(cur_token()); return list.Pass(); } scoped_ptr Parser::ParseFile() { scoped_ptr file(new BlockNode(false)); for (;;) { if (at_end()) break; scoped_ptr statement = ParseStatement(); if (!statement) break; file->append_statement(statement.Pass()); } if (!at_end() && !has_error()) *err_ = Err(cur_token(), "Unexpected here, should be newline."); if (has_error()) return scoped_ptr(); return file.PassAs(); } scoped_ptr Parser::ParseStatement() { if (LookAhead(Token::LEFT_BRACE)) { return ParseBlock().PassAs(); } else if (LookAhead(Token::IF)) { return ParseCondition(); } else { // TODO(scottmg): Is this too strict? Just drop all the testing if we want // to allow "pointless" expressions and return ParseExpression() directly. scoped_ptr stmt = ParseExpression(); if (stmt) { if (stmt->AsFunctionCall() || IsAssignment(stmt.get())) return stmt.Pass(); } if (!has_error()) { Token token = at_end() ? tokens_[tokens_.size() - 1] : cur_token(); *err_ = Err(token, "Expecting assignment or function call."); } return scoped_ptr(); } } scoped_ptr Parser::ParseBlock() { Token begin_token = Consume(Token::LEFT_BRACE, "Expected '{' to start a block."); if (has_error()) return scoped_ptr(); scoped_ptr block(new BlockNode(true)); block->set_begin_token(begin_token); for (;;) { if (LookAhead(Token::RIGHT_BRACE)) { block->set_end_token(Consume()); break; } scoped_ptr statement = ParseStatement(); if (!statement) return scoped_ptr(); block->append_statement(statement.Pass()); } return block.Pass(); } scoped_ptr Parser::ParseCondition() { scoped_ptr condition(new ConditionNode); Consume(Token::IF, "Expected 'if'"); Consume(Token::LEFT_PAREN, "Expected '(' after 'if'."); condition->set_condition(ParseExpression()); if (IsAssignment(condition->condition())) *err_ = Err(condition->condition(), "Assignment not allowed in 'if'."); Consume(Token::RIGHT_PAREN, "Expected ')' after condition of 'if'."); condition->set_if_true(ParseBlock().Pass()); if (Match(Token::ELSE)) condition->set_if_false(ParseStatement().Pass()); if (has_error()) return scoped_ptr(); return condition.PassAs(); }