summaryrefslogtreecommitdiffstats
path: root/tools/gn/pattern.cc
blob: 4b14d2aae5251fdf3cd3a6be0eaf252aab7be9d1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
// Copyright (c) 2013 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "tools/gn/pattern.h"

#include "tools/gn/value.h"

namespace {

void ParsePattern(const std::string& s, std::vector<Pattern::Subrange>* out) {
  // Set when the last subrange is a literal so we can just append when we
  // find another literal.
  Pattern::Subrange* last_literal = nullptr;

  for (size_t i = 0; i < s.size(); i++) {
    if (s[i] == '*') {
      // Don't allow two **.
      if (out->size() == 0 ||
          (*out)[out->size() - 1].type != Pattern::Subrange::ANYTHING)
        out->push_back(Pattern::Subrange(Pattern::Subrange::ANYTHING));
      last_literal = nullptr;
    } else if (s[i] == '\\') {
      if (i < s.size() - 1 && s[i + 1] == 'b') {
        // "\b" means path boundary.
        i++;
        out->push_back(Pattern::Subrange(Pattern::Subrange::PATH_BOUNDARY));
        last_literal = nullptr;
      } else {
        // Backslash + anything else means that literal char.
        if (!last_literal) {
          out->push_back(Pattern::Subrange(Pattern::Subrange::LITERAL));
          last_literal = &(*out)[out->size() - 1];
        }
        if (i < s.size() - 1) {
          i++;
          last_literal->literal.push_back(s[i]);
        } else {
          // Single backslash at end, use literal backslash.
          last_literal->literal.push_back('\\');
        }
      }
    } else {
      if (!last_literal) {
        out->push_back(Pattern::Subrange(Pattern::Subrange::LITERAL));
        last_literal = &(*out)[out->size() - 1];
      }
      last_literal->literal.push_back(s[i]);
    }
  }
}

}  // namespace

Pattern::Pattern(const std::string& s) {
  ParsePattern(s, &subranges_);
  is_suffix_ =
      (subranges_.size() == 2 &&
       subranges_[0].type == Subrange::ANYTHING &&
       subranges_[1].type == Subrange::LITERAL);
}

Pattern::~Pattern() {
}

bool Pattern::MatchesString(const std::string& s) const {
  // Empty pattern matches only empty string.
  if (subranges_.empty())
    return s.empty();

  if (is_suffix_) {
    const std::string& suffix = subranges_[1].literal;
    if (suffix.size() > s.size())
      return false;  // Too short.
    return s.compare(s.size() - suffix.size(), suffix.size(), suffix) == 0;
  }

  return RecursiveMatch(s, 0, 0, true);
}

// We assume the number of ranges is small so recursive is always reasonable.
// Could be optimized to only be recursive for *.
bool Pattern::RecursiveMatch(const std::string& s,
                             size_t begin_char,
                             size_t subrange_index,
                             bool allow_implicit_path_boundary) const {
  if (subrange_index >= subranges_.size()) {
    // Hit the end of our subranges, the text should also be at the end for a
    // match.
    return begin_char == s.size();
  }

  const Subrange& sr = subranges_[subrange_index];
  switch (sr.type) {
    case Subrange::LITERAL: {
      if (s.size() - begin_char < sr.literal.size())
        return false;  // Not enough room.
      if (s.compare(begin_char, sr.literal.size(), sr.literal) != 0)
        return false;  // Literal doesn't match.

      // Recursively check the next one.
      return RecursiveMatch(s, begin_char + sr.literal.size(),
                            subrange_index + 1, true);
    }

    case Subrange::PATH_BOUNDARY: {
      // When we can accept an implicit path boundary, we have to check both
      // a match of the literal and the implicit one.
      if (allow_implicit_path_boundary &&
          (begin_char == 0 || begin_char == s.size())) {
        // At implicit path boundary, see if the rest of the pattern matches.
        if (RecursiveMatch(s, begin_char, subrange_index + 1, false))
          return true;
      }

      // Check for a literal "/".
      if (begin_char < s.size() && s[begin_char] == '/') {
        // At explicit boundary, see if the rest of the pattern matches.
        if (RecursiveMatch(s, begin_char + 1, subrange_index + 1, true))
          return true;
      }
      return false;
    }

    case Subrange::ANYTHING: {
      if (subrange_index == subranges_.size() - 1)
        return true;  // * at the end, consider it matching.

      size_t min_next_size = sr.MinSize();

      // We don't care about exactly what matched as long as there was a match,
      // so we can do this front-to-back. If we needed the match, we would
      // normally want "*" to be greedy so would work backwards.
      for (size_t i = begin_char; i < s.size() - min_next_size; i++) {
        // Note: this could probably be faster by detecting the type of the
        // next match in advance and checking for a match in this loop rather
        // than doing a full recursive call for each character.
        if (RecursiveMatch(s, i, subrange_index + 1, true))
          return true;
      }
      return false;
    }

    default:
      NOTREACHED();
  }

  return false;
}

PatternList::PatternList() {
}

PatternList::~PatternList() {
}

void PatternList::Append(const Pattern& pattern) {
  patterns_.push_back(pattern);
}

void PatternList::SetFromValue(const Value& v, Err* err) {
  patterns_.clear();

  if (v.type() != Value::LIST) {
    *err = Err(v.origin(), "This value must be a list.");
    return;
  }

  const std::vector<Value>& list = v.list_value();
  for (const auto& elem : list) {
    if (!elem.VerifyTypeIs(Value::STRING, err))
      return;
    patterns_.push_back(Pattern(elem.string_value()));
  }
}

bool PatternList::MatchesString(const std::string& s) const {
  for (const auto& pattern : patterns_) {
    if (pattern.MatchesString(s))
      return true;
  }
  return false;
}

bool PatternList::MatchesValue(const Value& v) const {
  if (v.type() == Value::STRING)
    return MatchesString(v.string_value());
  return false;
}