// Copyright (c) 2013 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #ifndef TOOLS_GN_TOKENIZER_H_ #define TOOLS_GN_TOKENIZER_H_ #include #include "base/basictypes.h" #include "base/strings/string_piece.h" #include "tools/gn/err.h" #include "tools/gn/token.h" class InputFile; class Tokenizer { public: static std::vector Tokenize(const InputFile* input_file, Err* err); // Counts lines in the given buffer (the first line is "1") and returns // the byte offset of the beginning of that line, or (size_t)-1 if there // aren't that many lines in the file. Note that this will return the byte // one past the end of the input if the last character is a newline. // // This is a helper function for error output so that the tokenizer's // notion of lines can be used elsewhere. static size_t ByteOffsetOfNthLine(const base::StringPiece& buf, int n); // Returns true if the given offset of the string piece counts as a newline. // The offset must be in the buffer. static bool IsNewline(const base::StringPiece& buffer, size_t offset); static bool IsIdentifierFirstChar(char c) { return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '_'; } static bool IsIdentifierContinuingChar(char c) { // Also allow digits after the first char. return IsIdentifierFirstChar(c) || (c >= '0' && c <= '9'); } private: // InputFile must outlive the tokenizer and all generated tokens. explicit Tokenizer(const InputFile* input_file, Err* err); ~Tokenizer(); std::vector Run(); void AdvanceToNextToken(); Token::Type ClassifyCurrent() const; void AdvanceToEndOfToken(const Location& location, Token::Type type); bool IsCurrentWhitespace() const; bool IsCurrentNewline() const; bool IsCurrentStringTerminator(char quote_char) const; bool CanIncrement() const { return cur_ < input_.size(); } // Increments the current location by one. void Advance(); // Returns the current character in the file as a location. Location GetCurrentLocation() const; Err GetErrorForInvalidToken(const Location& location) const; bool done() const { return at_end() || has_error(); } bool at_end() const { return cur_ == input_.size(); } char cur_char() const { return input_[cur_]; } bool has_error() const { return err_->has_error(); } std::vector tokens_; const InputFile* input_file_; const base::StringPiece input_; Err* err_; size_t cur_; // Byte offset into input buffer. int line_number_; int char_in_line_; DISALLOW_COPY_AND_ASSIGN(Tokenizer); }; #endif // TOOLS_GN_TOKENIZER_H_