diff options
Diffstat (limited to 'cmdline/token_range.h')
-rw-r--r-- | cmdline/token_range.h | 425 |
1 files changed, 425 insertions, 0 deletions
diff --git a/cmdline/token_range.h b/cmdline/token_range.h new file mode 100644 index 0000000..50c54fe --- /dev/null +++ b/cmdline/token_range.h @@ -0,0 +1,425 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_CMDLINE_TOKEN_RANGE_H_ +#define ART_CMDLINE_TOKEN_RANGE_H_ + +#include <assert.h> +#include <vector> +#include <string> +#include <algorithm> +#include <memory> + +namespace art { +// A range of tokens to make token matching algorithms easier. +// +// We try really hard to avoid copying and store only a pointer and iterators to the +// interiors of the vector, so a typical copy constructor never ends up doing a deep copy. +// It is up to the user to play nice and not to mutate the strings in-place. +// +// Tokens are only copied if a mutating operation is performed (and even then only +// if it *actually* mutates the token). +struct TokenRange { + // Short-hand for a vector of strings. A single string and a token is synonymous. + using TokenList = std::vector<std::string>; + + // Copying-from-vector constructor. + explicit TokenRange(const TokenList& token_list) + : token_list_(new TokenList(token_list)), + begin_(token_list_->begin()), + end_(token_list_->end()) + {} + + // Copying-from-iterator constructor + template <typename ForwardIterator> + explicit TokenRange(ForwardIterator it_begin, ForwardIterator it_end) + : token_list_(new TokenList(it_begin, it_end)), + begin_(token_list_->begin()), + end_(token_list_->end()) + {} + +#if 0 + // Copying-from-vector constructor. + TokenRange(const TokenList& token_list ATTRIBUTE_UNUSED, + TokenList::const_iterator it_begin, + TokenList::const_iterator it_end) + : token_list_(new TokenList(it_begin, it_end)), + begin_(token_list_->begin()), + end_(token_list_->end()) { + assert(it_begin >= token_list.begin()); + assert(it_end <= token_list.end()); + } +#endif + + // Copying from char array constructor, convertings into tokens (strings) along the way. + TokenRange(const char* token_list[], size_t length) + : token_list_(new TokenList(&token_list[0], &token_list[length])), + begin_(token_list_->begin()), + end_(token_list_->end()) + {} + + // Non-copying move-from-vector constructor. Takes over the token vector. + explicit TokenRange(TokenList&& token_list) + : token_list_(new TokenList(std::forward<TokenList>(token_list))), + begin_(token_list_->begin()), + end_(token_list_->end()) + {} + + // Non-copying constructor. Retain reference to existing list of tokens. + TokenRange(std::shared_ptr<TokenList> token_list, + TokenList::const_iterator it_begin, + TokenList::const_iterator it_end) + : token_list_(token_list), + begin_(it_begin), + end_(it_end) { + assert(it_begin >= token_list->begin()); + assert(it_end <= token_list->end()); + } + + // Non-copying copy constructor. + TokenRange(const TokenRange& other) = default; + + // Non-copying move constructor. + TokenRange(TokenRange&& other) = default; + + // Non-copying constructor. Retains reference to an existing list of tokens, with offset. + explicit TokenRange(std::shared_ptr<TokenList> token_list) + : token_list_(token_list), + begin_(token_list_->begin()), + end_(token_list_->end()) + {} + + // Iterator type for begin() and end(). Guaranteed to be a RandomAccessIterator. + using iterator = TokenList::const_iterator; + + // Iterator type for const begin() and const end(). Guaranteed to be a RandomAccessIterator. + using const_iterator = iterator; + + // Create a token range by splitting a string. Each separator gets their own token. + // Since the separator are retained as tokens, it might be useful to call + // RemoveToken afterwards. + static TokenRange Split(const std::string& string, std::initializer_list<char> separators) { + TokenList new_token_list; + + std::string tok; + for (auto&& c : string) { + for (char sep : separators) { + if (c == sep) { + // We spotted a separator character. + // Push back everything before the last separator as a new token. + // Push back the separator as a token. + if (!tok.empty()) { + new_token_list.push_back(tok); + tok = ""; + } + new_token_list.push_back(std::string() + sep); + } else { + // Build up the token with another character. + tok += c; + } + } + } + + if (!tok.empty()) { + new_token_list.push_back(tok); + } + + return TokenRange(std::move(new_token_list)); + } + + // A RandomAccessIterator to the first element in this range. + iterator begin() const { + return begin_; + } + + // A RandomAccessIterator to one past the last element in this range. + iterator end() const { + return end_; + } + + // The size of the range, i.e. how many tokens are in it. + size_t Size() const { + return std::distance(begin_, end_); + } + + // Are there 0 tokens in this range? + bool IsEmpty() const { + return Size() > 0; + } + + // Look up a token by it's offset. + const std::string& GetToken(size_t offset) const { + assert(offset < Size()); + return *(begin_ + offset); + } + + // Does this token range equal the other range? + // Equality is defined as having both the same size, and + // each corresponding token being equal. + bool operator==(const TokenRange& other) const { + if (this == &other) { + return true; + } + + if (Size() != other.Size()) { + return false; + } + + return std::equal(begin(), end(), other.begin()); + } + + // Look up the token at the requested index. + const std::string& operator[](int index) const { + assert(index >= 0 && static_cast<size_t>(index) < Size()); + return *(begin() + index); + } + + // Does this current range start with the other range? + bool StartsWith(const TokenRange& other) const { + if (this == &other) { + return true; + } + + if (Size() < other.Size()) { + return false; + } + + auto& smaller = Size() < other.Size() ? *this : other; + auto& greater = Size() < other.Size() ? other : *this; + + return std::equal(smaller.begin(), smaller.end(), greater.begin()); + } + + // Remove all characters 'c' from each token, potentially copying the underlying tokens. + TokenRange RemoveCharacter(char c) const { + TokenList new_token_list(begin(), end()); + + bool changed = false; + for (auto&& token : new_token_list) { + auto it = std::remove_if(token.begin(), token.end(), [&](char ch) { + if (ch == c) { + changed = true; + return true; + } + return false; + }); + token.erase(it, token.end()); + } + + if (!changed) { + return *this; + } + + return TokenRange(std::move(new_token_list)); + } + + // Remove all tokens matching this one, potentially copying the underlying tokens. + TokenRange RemoveToken(const std::string& token) { + return RemoveIf([&](const std::string& tok) { return tok == token; }); + } + + // Discard all empty tokens, potentially copying the underlying tokens. + TokenRange DiscardEmpty() const { + return RemoveIf([](const std::string& token) { return token.empty(); }); + } + + // Create a non-copying subset of this range. + // Length is trimmed so that the Slice does not go out of range. + TokenRange Slice(size_t offset, size_t length = std::string::npos) const { + assert(offset < Size()); + + if (length != std::string::npos && offset + length > Size()) { + length = Size() - offset; + } + + iterator it_end; + if (length == std::string::npos) { + it_end = end(); + } else { + it_end = begin() + offset + length; + } + + return TokenRange(token_list_, begin() + offset, it_end); + } + + // Try to match the string with tokens from this range. + // Each token is used to match exactly once (after which the next token is used, and so on). + // The matching happens from left-to-right in a non-greedy fashion. + // If the currently-matched token is the wildcard, then the new outputted token will + // contain as much as possible until the next token is matched. + // + // For example, if this == ["a:", "_", "b:] and "_" is the match string, then + // MatchSubstrings on "a:foob:" will yield: ["a:", "foo", "b:"] + // + // Since the string matching can fail (e.g. ["foo"] against "bar"), then this + // function can fail, in which cause it will return null. + std::unique_ptr<TokenRange> MatchSubstrings(const std::string& string, + const std::string& wildcard) const { + TokenList new_token_list; + + size_t wildcard_idx = std::string::npos; + size_t string_idx = 0; + + // Function to push all the characters matched as a wildcard so far + // as a brand new token. It resets the wildcard matching. + // Empty wildcards are possible and ok, but only if wildcard matching was on. + auto maybe_push_wildcard_token = [&]() { + if (wildcard_idx != std::string::npos) { + size_t wildcard_length = string_idx - wildcard_idx; + std::string wildcard_substr = string.substr(wildcard_idx, wildcard_length); + new_token_list.push_back(std::move(wildcard_substr)); + + wildcard_idx = std::string::npos; + } + }; + + for (iterator it = begin(); it != end(); ++it) { + const std::string& tok = *it; + + if (tok == wildcard) { + maybe_push_wildcard_token(); + wildcard_idx = string_idx; + continue; + } + + size_t next_token_idx = string.find(tok); + if (next_token_idx == std::string::npos) { + // Could not find token at all + return nullptr; + } else if (next_token_idx != string_idx && wildcard_idx == std::string::npos) { + // Found the token at a non-starting location, and we weren't + // trying to parse the wildcard. + return nullptr; + } + + new_token_list.push_back(string.substr(next_token_idx, tok.size())); + maybe_push_wildcard_token(); + string_idx += tok.size(); + } + + size_t remaining = string.size() - string_idx; + if (remaining > 0) { + if (wildcard_idx == std::string::npos) { + // Some characters were still remaining in the string, + // but it wasn't trying to match a wildcard. + return nullptr; + } + } + + // If some characters are remaining, the rest must be a wildcard. + string_idx += remaining; + maybe_push_wildcard_token(); + + return std::unique_ptr<TokenRange>(new TokenRange(std::move(new_token_list))); + } + + // Do a quick match token-by-token, and see if they match. + // Any tokens with a wildcard in them are only matched up until the wildcard. + // If this is true, then the wildcard matching later on can still fail, so this is not + // a guarantee that the argument is correct, it's more of a strong hint that the + // user-provided input *probably* was trying to match this argument. + // + // Returns how many tokens were either matched (or ignored because there was a + // wildcard present). 0 means no match. If the size() tokens are returned. + size_t MaybeMatches(const TokenRange& token_list, const std::string& wildcard) const { + auto token_it = token_list.begin(); + auto token_end = token_list.end(); + auto name_it = begin(); + auto name_end = end(); + + size_t matched_tokens = 0; + + while (token_it != token_end && name_it != name_end) { + // Skip token matching when the corresponding name has a wildcard in it. + const std::string& name = *name_it; + + size_t wildcard_idx = name.find(wildcard); + if (wildcard_idx == std::string::npos) { // No wildcard present + // Did the definition token match the user token? + if (name != *token_it) { + return matched_tokens; + } + } else { + std::string name_prefix = name.substr(0, wildcard_idx); + + // Did the user token start with the up-to-the-wildcard prefix? + if (!StartsWith(*token_it, name_prefix)) { + return matched_tokens; + } + } + + ++token_it; + ++name_it; + ++matched_tokens; + } + + // If we got this far, it's either a full match or the token list was too short. + return matched_tokens; + } + + // Flatten the token range by joining every adjacent token with the separator character. + // e.g. ["hello", "world"].join('$') == "hello$world" + std::string Join(char separator) const { + TokenList tmp(begin(), end()); + return art::Join(tmp, separator); + // TODO: Join should probably take an offset or iterators + } + + private: + static bool StartsWith(const std::string& larger, const std::string& smaller) { + if (larger.size() >= smaller.size()) { + return std::equal(smaller.begin(), smaller.end(), larger.begin()); + } + + return false; + } + + template <typename TPredicate> + TokenRange RemoveIf(const TPredicate& predicate) const { + // If any of the tokens in the token lists are empty, then + // we need to remove them and compress the token list into a smaller one. + bool remove = false; + for (auto it = begin_; it != end_; ++it) { + auto&& token = *it; + + if (predicate(token)) { + remove = true; + break; + } + } + + // Actually copy the token list and remove the tokens that don't match our predicate. + if (remove) { + auto token_list = std::make_shared<TokenList>(begin(), end()); + TokenList::iterator new_end = + std::remove_if(token_list->begin(), token_list->end(), predicate); + token_list->erase(new_end, token_list->end()); + + assert(token_list_->size() > token_list->size() && "Nothing was actually removed!"); + + return TokenRange(token_list); + } + + return *this; + } + + const std::shared_ptr<std::vector<std::string>> token_list_; + const iterator begin_; + const iterator end_; +}; +} // namespace art + +#endif // ART_CMDLINE_TOKEN_RANGE_H_ |