diff options
Diffstat (limited to 'cmdline/detail/cmdline_parse_argument_detail.h')
-rw-r--r-- | cmdline/detail/cmdline_parse_argument_detail.h | 503 |
1 files changed, 503 insertions, 0 deletions
diff --git a/cmdline/detail/cmdline_parse_argument_detail.h b/cmdline/detail/cmdline_parse_argument_detail.h new file mode 100644 index 0000000..81ef36b --- /dev/null +++ b/cmdline/detail/cmdline_parse_argument_detail.h @@ -0,0 +1,503 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_CMDLINE_DETAIL_CMDLINE_PARSE_ARGUMENT_DETAIL_H_ +#define ART_CMDLINE_DETAIL_CMDLINE_PARSE_ARGUMENT_DETAIL_H_ + +#include <type_traits> +#include <assert.h> +#include <functional> +#include <vector> +#include <algorithm> +#include <numeric> +#include <memory> + +#include "cmdline/cmdline_parse_result.h" +#include "cmdline/token_range.h" +#include "cmdline/unit.h" +#include "cmdline/cmdline_types.h" + +namespace art { + // Implementation details for the parser. Do not look inside if you hate templates. + namespace detail { + // A non-templated base class for argument parsers. Used by the general parser + // to parse arguments, without needing to know the argument type at compile time. + // + // This is an application of the type erasure idiom. + struct CmdlineParseArgumentAny { + virtual ~CmdlineParseArgumentAny() {} + + // Attempt to parse this argument starting at arguments[position]. + // If the parsing succeeds, the parsed value will be saved as a side-effect. + // + // In most situations, the parsing will not match by returning kUnknown. In this case, + // no tokens were consumed and the position variable will not be updated. + // + // At other times, parsing may fail due to validation but the initial token was still matched + // (for example an out of range value, or passing in a string where an int was expected). + // In this case the tokens are still consumed, and the position variable will get incremented + // by all the consumed tokens. + // + // The # of tokens consumed by the parse attempt will be set as an out-parameter into + // consumed_tokens. The parser should skip this many tokens before parsing the next + // argument. + virtual CmdlineResult ParseArgument(const TokenRange& arguments, size_t* consumed_tokens) = 0; + // How many tokens should be taken off argv for parsing this argument. + // For example "--help" is just 1, "-compiler-option _" would be 2 (since there's a space). + // + // A [min,max] range is returned to represent argument definitions with multiple + // value tokens. (e.g. {"-h", "-h " } would return [1,2]). + virtual std::pair<size_t, size_t> GetNumTokens() const = 0; + // Get the run-time typename of the argument type. + virtual const char* GetTypeName() const = 0; + // Try to do a close match, returning how many tokens were matched against this argument + // definition. More tokens is better. + // + // Do a quick match token-by-token, and see if they match. + // Any tokens with a wildcard in them are only matched up until the wildcard. + // If this is true, then the wildcard matching later on can still fail, so this is not + // a guarantee that the argument is correct, it's more of a strong hint that the + // user-provided input *probably* was trying to match this argument. + // + // Returns how many tokens were either matched (or ignored because there was a + // wildcard present). 0 means no match. If the Size() tokens are returned. + virtual size_t MaybeMatches(const TokenRange& tokens) = 0; + }; + + template <typename T> + using EnableIfNumeric = std::enable_if<std::is_arithmetic<T>::value>; + + template <typename T> + using DisableIfNumeric = std::enable_if<!std::is_arithmetic<T>::value>; + + // Argument definition information, created by an ArgumentBuilder and an UntypedArgumentBuilder. + template <typename TArg> + struct CmdlineParserArgumentInfo { + // This version will only be used if TArg is arithmetic and thus has the <= operators. + template <typename T = TArg> // Necessary to get SFINAE to kick in. + bool CheckRange(const TArg& value, typename EnableIfNumeric<T>::type* = 0) { + if (has_range_) { + return min_ <= value && value <= max_; + } + return true; + } + + // This version will be used at other times when TArg is not arithmetic. + template <typename T = TArg> + bool CheckRange(const TArg&, typename DisableIfNumeric<T>::type* = 0) { + assert(!has_range_); + return true; + } + + // Do a quick match token-by-token, and see if they match. + // Any tokens with a wildcard in them only match the prefix up until the wildcard. + // + // If this is true, then the wildcard matching later on can still fail, so this is not + // a guarantee that the argument is correct, it's more of a strong hint that the + // user-provided input *probably* was trying to match this argument. + size_t MaybeMatches(TokenRange token_list) const { + auto best_match = FindClosestMatch(token_list); + + return best_match.second; + } + + // Attempt to find the closest match (see MaybeMatches). + // + // Returns the token range that was the closest match and the # of tokens that + // this range was matched up until. + std::pair<const TokenRange*, size_t> FindClosestMatch(TokenRange token_list) const { + const TokenRange* best_match_ptr = nullptr; + + size_t best_match = 0; + for (auto&& token_range : tokenized_names_) { + size_t this_match = token_range.MaybeMatches(token_list, std::string("_")); + + if (this_match > best_match) { + best_match_ptr = &token_range; + best_match = this_match; + } + } + + return std::make_pair(best_match_ptr, best_match); + } + + // Mark the argument definition as completed, do not mutate the object anymore after this + // call is done. + // + // Performs several sanity checks and token calculations. + void CompleteArgument() { + assert(names_.size() >= 1); + assert(!is_completed_); + + is_completed_ = true; + + size_t blank_count = 0; + size_t token_count = 0; + + size_t global_blank_count = 0; + size_t global_token_count = 0; + for (auto&& name : names_) { + std::string s(name); + + size_t local_blank_count = std::count(s.begin(), s.end(), '_'); + size_t local_token_count = std::count(s.begin(), s.end(), ' '); + + if (global_blank_count != 0) { + assert(local_blank_count == global_blank_count + && "Every argument descriptor string must have same amount of blanks (_)"); + } + + if (local_blank_count != 0) { + global_blank_count = local_blank_count; + blank_count++; + + assert(local_blank_count == 1 && "More than one blank is not supported"); + assert(s.back() == '_' && "The blank character must only be at the end of the string"); + } + + if (global_token_count != 0) { + assert(local_token_count == global_token_count + && "Every argument descriptor string must have same amount of tokens (spaces)"); + } + + if (local_token_count != 0) { + global_token_count = local_token_count; + token_count++; + } + + // Tokenize every name, turning it from a string to a token list. + tokenized_names_.clear(); + for (auto&& name1 : names_) { + // Split along ' ' only, removing any duplicated spaces. + tokenized_names_.push_back( + TokenRange::Split(name1, {' '}).RemoveToken(" ")); + } + + // remove the _ character from each of the token ranges + // we will often end up with an empty token (i.e. ["-XX", "_"] -> ["-XX", ""] + // and this is OK because we still need an empty token to simplify + // range comparisons + simple_names_.clear(); + + for (auto&& tokenized_name : tokenized_names_) { + simple_names_.push_back(tokenized_name.RemoveCharacter('_')); + } + } + + if (token_count != 0) { + assert(("Every argument descriptor string must have equal amount of tokens (spaces)" && + token_count == names_.size())); + } + + if (blank_count != 0) { + assert(("Every argument descriptor string must have an equal amount of blanks (_)" && + blank_count == names_.size())); + } + + using_blanks_ = blank_count > 0; + { + size_t smallest_name_token_range_size = + std::accumulate(tokenized_names_.begin(), tokenized_names_.end(), ~(0u), + [](size_t min, const TokenRange& cur) { + return std::min(min, cur.Size()); + }); + size_t largest_name_token_range_size = + std::accumulate(tokenized_names_.begin(), tokenized_names_.end(), 0u, + [](size_t max, const TokenRange& cur) { + return std::max(max, cur.Size()); + }); + + token_range_size_ = std::make_pair(smallest_name_token_range_size, + largest_name_token_range_size); + } + + if (has_value_list_) { + assert(names_.size() == value_list_.size() + && "Number of arg descriptors must match number of values"); + assert(!has_value_map_); + } + if (has_value_map_) { + if (!using_blanks_) { + assert(names_.size() == value_map_.size() && + "Since no blanks were specified, each arg is mapped directly into a mapped " + "value without parsing; sizes must match"); + } + + assert(!has_value_list_); + } + + if (!using_blanks_ && !CmdlineType<TArg>::kCanParseBlankless) { + assert((has_value_map_ || has_value_list_) && + "Arguments without a blank (_) must provide either a value map or a value list"); + } + + TypedCheck(); + } + + // List of aliases for a single argument definition, e.g. {"-Xdex2oat", "-Xnodex2oat"}. + std::vector<const char*> names_; + // Is there at least 1 wildcard '_' in the argument definition? + bool using_blanks_ = false; + // [min, max] token counts in each arg def + std::pair<size_t, size_t> token_range_size_; + + // contains all the names in a tokenized form, i.e. as a space-delimited list + std::vector<TokenRange> tokenized_names_; + + // contains the tokenized names, but with the _ character stripped + std::vector<TokenRange> simple_names_; + + // For argument definitions created with '.AppendValues()' + // Meaning that parsing should mutate the existing value in-place if possible. + bool appending_values_ = false; + + // For argument definitions created with '.WithRange(min, max)' + bool has_range_ = false; + TArg min_; + TArg max_; + + // For argument definitions created with '.WithValueMap' + bool has_value_map_ = false; + std::vector<std::pair<const char*, TArg>> value_map_; + + // For argument definitions created with '.WithValues' + bool has_value_list_ = false; + std::vector<TArg> value_list_; + + // Make sure there's a default constructor. + CmdlineParserArgumentInfo() = default; + + // Ensure there's a default move constructor. + CmdlineParserArgumentInfo(CmdlineParserArgumentInfo&&) = default; + + private: + // Perform type-specific checks at runtime. + template <typename T = TArg> + void TypedCheck(typename std::enable_if<std::is_same<Unit, T>::value>::type* = 0) { + assert(!using_blanks_ && + "Blanks are not supported in Unit arguments; since a Unit has no parse-able value"); + } + + void TypedCheck() {} + + bool is_completed_ = false; + }; + + // A virtual-implementation of the necessary argument information in order to + // be able to parse arguments. + template <typename TArg> + struct CmdlineParseArgument : CmdlineParseArgumentAny { + explicit CmdlineParseArgument(CmdlineParserArgumentInfo<TArg>&& argument_info, + std::function<void(TArg&)>&& save_argument, + std::function<TArg&(void)>&& load_argument) + : argument_info_(std::forward<decltype(argument_info)>(argument_info)), + save_argument_(std::forward<decltype(save_argument)>(save_argument)), + load_argument_(std::forward<decltype(load_argument)>(load_argument)) { + } + + using UserTypeInfo = CmdlineType<TArg>; + + virtual CmdlineResult ParseArgument(const TokenRange& arguments, size_t* consumed_tokens) { + assert(arguments.Size() > 0); + assert(consumed_tokens != nullptr); + + auto closest_match_res = argument_info_.FindClosestMatch(arguments); + size_t best_match_size = closest_match_res.second; + const TokenRange* best_match_arg_def = closest_match_res.first; + + if (best_match_size > arguments.Size()) { + // The best match has more tokens than were provided. + // Shouldn't happen in practice since the outer parser does this check. + return CmdlineResult(CmdlineResult::kUnknown, "Size mismatch"); + } + + assert(best_match_arg_def != nullptr); + *consumed_tokens = best_match_arg_def->Size(); + + if (!argument_info_.using_blanks_) { + return ParseArgumentSingle(arguments.Join(' ')); + } + + // Extract out the blank value from arguments + // e.g. for a def of "foo:_" and input "foo:bar", blank_value == "bar" + std::string blank_value = ""; + size_t idx = 0; + for (auto&& def_token : *best_match_arg_def) { + auto&& arg_token = arguments[idx]; + + // Does this definition-token have a wildcard in it? + if (def_token.find('_') == std::string::npos) { + // No, regular token. Match 1:1 against the argument token. + bool token_match = def_token == arg_token; + + if (!token_match) { + return CmdlineResult(CmdlineResult::kFailure, + std::string("Failed to parse ") + best_match_arg_def->GetToken(0) + + " at token " + std::to_string(idx)); + } + } else { + // This is a wild-carded token. + TokenRange def_split_wildcards = TokenRange::Split(def_token, {'_'}); + + // Extract the wildcard contents out of the user-provided arg_token. + std::unique_ptr<TokenRange> arg_matches = + def_split_wildcards.MatchSubstrings(arg_token, "_"); + if (arg_matches == nullptr) { + return CmdlineResult(CmdlineResult::kFailure, + std::string("Failed to parse ") + best_match_arg_def->GetToken(0) + + ", with a wildcard pattern " + def_token + + " at token " + std::to_string(idx)); + } + + // Get the corresponding wildcard tokens from arg_matches, + // and concatenate it to blank_value. + for (size_t sub_idx = 0; + sub_idx < def_split_wildcards.Size() && sub_idx < arg_matches->Size(); ++sub_idx) { + if (def_split_wildcards[sub_idx] == "_") { + blank_value += arg_matches->GetToken(sub_idx); + } + } + } + + ++idx; + } + + return ParseArgumentSingle(blank_value); + } + + private: + virtual CmdlineResult ParseArgumentSingle(const std::string& argument) { + // TODO: refactor to use LookupValue for the value lists/maps + + // Handle the 'WithValueMap(...)' argument definition + if (argument_info_.has_value_map_) { + for (auto&& value_pair : argument_info_.value_map_) { + const char* name = value_pair.first; + + if (argument == name) { + return SaveArgument(value_pair.second); + } + } + + // Error case: Fail, telling the user what the allowed values were. + std::vector<std::string> allowed_values; + for (auto&& value_pair : argument_info_.value_map_) { + const char* name = value_pair.first; + allowed_values.push_back(name); + } + + std::string allowed_values_flat = Join(allowed_values, ','); + return CmdlineResult(CmdlineResult::kFailure, + "Argument value '" + argument + "' does not match any of known valid" + "values: {" + allowed_values_flat + "}"); + } + + // Handle the 'WithValues(...)' argument definition + if (argument_info_.has_value_list_) { + size_t arg_def_idx = 0; + for (auto&& value : argument_info_.value_list_) { + auto&& arg_def_token = argument_info_.names_[arg_def_idx]; + + if (arg_def_token == argument) { + return SaveArgument(value); + } + ++arg_def_idx; + } + + assert(arg_def_idx + 1 == argument_info_.value_list_.size() && + "Number of named argument definitions must match number of values defined"); + + // Error case: Fail, telling the user what the allowed values were. + std::vector<std::string> allowed_values; + for (auto&& arg_name : argument_info_.names_) { + allowed_values.push_back(arg_name); + } + + std::string allowed_values_flat = Join(allowed_values, ','); + return CmdlineResult(CmdlineResult::kFailure, + "Argument value '" + argument + "' does not match any of known valid" + "values: {" + allowed_values_flat + "}"); + } + + // Handle the regular case where we parsed an unknown value from a blank. + UserTypeInfo type_parser; + + if (argument_info_.appending_values_) { + TArg& existing = load_argument_(); + CmdlineParseResult<TArg> result = type_parser.ParseAndAppend(argument, existing); + + assert(!argument_info_.has_range_); + + return result; + } + + CmdlineParseResult<TArg> result = type_parser.Parse(argument); + + if (result.IsSuccess()) { + TArg& value = result.GetValue(); + + // Do a range check for 'WithRange(min,max)' argument definition. + if (!argument_info_.CheckRange(value)) { + return CmdlineParseResult<TArg>::OutOfRange( + value, argument_info_.min_, argument_info_.max_); + } + + return SaveArgument(value); + } + + // Some kind of type-specific parse error. Pass the result as-is. + CmdlineResult raw_result = std::move(result); + return raw_result; + } + + public: + virtual const char* GetTypeName() const { + // TODO: Obviate the need for each type specialization to hardcode the type name + return UserTypeInfo::Name(); + } + + // How many tokens should be taken off argv for parsing this argument. + // For example "--help" is just 1, "-compiler-option _" would be 2 (since there's a space). + // + // A [min,max] range is returned to represent argument definitions with multiple + // value tokens. (e.g. {"-h", "-h " } would return [1,2]). + virtual std::pair<size_t, size_t> GetNumTokens() const { + return argument_info_.token_range_size_; + } + + // See if this token range might begin the same as the argument definition. + virtual size_t MaybeMatches(const TokenRange& tokens) { + return argument_info_.MaybeMatches(tokens); + } + + private: + CmdlineResult SaveArgument(const TArg& value) { + assert(!argument_info_.appending_values_ + && "If the values are being appended, then the updated parse value is " + "updated by-ref as a side effect and shouldn't be stored directly"); + TArg val = value; + save_argument_(val); + return CmdlineResult(CmdlineResult::kSuccess); + } + + CmdlineParserArgumentInfo<TArg> argument_info_; + std::function<void(TArg&)> save_argument_; + std::function<TArg&(void)> load_argument_; + }; + } // namespace detail // NOLINT [readability/namespace] [5] [whitespace/comments] [2] +} // namespace art + +#endif // ART_CMDLINE_DETAIL_CMDLINE_PARSE_ARGUMENT_DETAIL_H_ |