diff options
Diffstat (limited to 'tools/gn/pattern.cc')
-rw-r--r-- | tools/gn/pattern.cc | 185 |
1 files changed, 185 insertions, 0 deletions
diff --git a/tools/gn/pattern.cc b/tools/gn/pattern.cc new file mode 100644 index 0000000..cc08b2c --- /dev/null +++ b/tools/gn/pattern.cc @@ -0,0 +1,185 @@ +// Copyright (c) 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "tools/gn/pattern.h" + +#include "tools/gn/value.h" + +namespace { + +void ParsePattern(const std::string& s, std::vector<Pattern::Subrange>* out) { + // Set when the last subrange is a literal so we can just append when we + // find another literal. + Pattern::Subrange* last_literal = NULL; + + for (size_t i = 0; i < s.size(); i++) { + if (s[i] == '*') { + // Don't allow two **. + if (out->size() == 0 || + (*out)[out->size() - 1].type != Pattern::Subrange::ANYTHING) + out->push_back(Pattern::Subrange(Pattern::Subrange::ANYTHING)); + last_literal = NULL; + } else if (s[i] == '\\') { + if (i < s.size() - 1 && s[i + 1] == 'b') { + // "\b" means path boundary. + i++; + out->push_back(Pattern::Subrange(Pattern::Subrange::PATH_BOUNDARY)); + last_literal = NULL; + } else { + // Backslash + anything else means that literal char. + if (!last_literal) { + out->push_back(Pattern::Subrange(Pattern::Subrange::LITERAL)); + last_literal = &(*out)[out->size() - 1]; + } + if (i < s.size() - 1) { + i++; + last_literal->literal.push_back(s[i]); + } else { + // Single backslash at end, use literal backslash. + last_literal->literal.push_back('\\'); + } + } + } else { + if (!last_literal) { + out->push_back(Pattern::Subrange(Pattern::Subrange::LITERAL)); + last_literal = &(*out)[out->size() - 1]; + } + last_literal->literal.push_back(s[i]); + } + } +} + +} // namespace + +Pattern::Pattern(const std::string& s) { + ParsePattern(s, &subranges_); + is_suffix_ = + (subranges_.size() == 2 && + subranges_[0].type == Subrange::ANYTHING && + subranges_[1].type == Subrange::LITERAL); +} + +Pattern::~Pattern() { +} + +bool Pattern::MatchesString(const std::string& s) const { + // Empty pattern matches only empty string. + if (subranges_.empty()) + return s.empty(); + + if (is_suffix_) { + const std::string& suffix = subranges_[1].literal; + if (suffix.size() > s.size()) + return false; // Too short. + return s.compare(s.size() - suffix.size(), suffix.size(), suffix) == 0; + } + + return RecursiveMatch(s, 0, 0, true); +} + +// We assume the number of ranges is small so recursive is always reasonable. +// Could be optimized to only be recursive for *. +bool Pattern::RecursiveMatch(const std::string& s, + size_t begin_char, + size_t subrange_index, + bool allow_implicit_path_boundary) const { + if (subrange_index >= subranges_.size()) { + // Hit the end of our subranges, the text should also be at the end for a + // match. + return begin_char == s.size(); + } + + const Subrange& sr = subranges_[subrange_index]; + switch (sr.type) { + case Subrange::LITERAL: { + if (s.size() - begin_char < sr.literal.size()) + return false; // Not enough room. + if (s.compare(begin_char, sr.literal.size(), sr.literal) != 0) + return false; // Literal doesn't match. + + // Recursively check the next one. + return RecursiveMatch(s, begin_char + sr.literal.size(), + subrange_index + 1, true); + } + + case Subrange::PATH_BOUNDARY: { + // When we can accept an implicit path boundary, we have to check both + // a match of the literal and the implicit one. + if (allow_implicit_path_boundary && + (begin_char == 0 || begin_char == s.size())) { + // At implicit path boundary, see if the rest of the pattern matches. + if (RecursiveMatch(s, begin_char, subrange_index + 1, false)) + return true; + } + + // Check for a literal "/". + if (begin_char < s.size() && s[begin_char] == '/') { + // At explicit boundary, see if the rest of the pattern matches. + if (RecursiveMatch(s, begin_char + 1, subrange_index + 1, true)) + return true; + } + return false; + } + + case Subrange::ANYTHING: { + if (subrange_index == subranges_.size() - 1) + return true; // * at the end, consider it matching. + + size_t min_next_size = sr.MinSize(); + + // We don't care about exactly what matched as long as there was a match, + // so we can do this front-to-back. If we needed the match, we would + // normally want "*" to be greedy so would work backwards. + for (size_t i = begin_char; i < s.size() - min_next_size; i++) { + // Note: this could probably be faster by detecting the type of the + // next match in advance and checking for a match in this loop rather + // than doing a full recursive call for each character. + if (RecursiveMatch(s, i, subrange_index + 1, true)) + return true; + } + return false; + } + + default: + NOTREACHED(); + } + + return false; +} + +PatternList::PatternList() { +} + +PatternList::~PatternList() { +} + +void PatternList::SetFromValue(const Value& v, Err* err) { + patterns_.clear(); + + if (v.type() != Value::LIST) { + *err = Err(v.origin(), "This value must be a list."); + return; + } + + const std::vector<Value>& list = v.list_value(); + for (size_t i = 0; i < list.size(); i++) { + if (!list[i].VerifyTypeIs(Value::STRING, err)) + return; + patterns_.push_back(Pattern(list[i].string_value())); + } +} + +bool PatternList::MatchesString(const std::string& s) const { + for (size_t i = 0; i < patterns_.size(); i++) { + if (patterns_[i].MatchesString(s)) + return true; + } + return false; +} + +bool PatternList::MatchesValue(const Value& v) const { + if (v.type() == Value::STRING) + return MatchesString(v.string_value()); + return false; +} |