diff options
author | David Wagner <david.wagner@intel.com> | 2015-03-19 16:35:47 +0100 |
---|---|---|
committer | Eric Laurent <elaurent@google.com> | 2015-04-24 13:39:11 -0700 |
commit | 99b3e852e553103ff75ed38117baa5bce2befbdb (patch) | |
tree | 26ee3110bc4b147ed96821540c28ae8286f564b9 /utility/Tokenizer.h | |
parent | b2742cf39a7ca6c8a1e25698e2065258da1d1a2f (diff) | |
download | external_parameter-framework-99b3e852e553103ff75ed38117baa5bce2befbdb.zip external_parameter-framework-99b3e852e553103ff75ed38117baa5bce2befbdb.tar.gz external_parameter-framework-99b3e852e553103ff75ed38117baa5bce2befbdb.tar.bz2 |
Re-implement the Tokenizer class from scratch
It didn't have a license header. Even though we received explicit authorization
to use it, it wasn't a comfortable situation.
This is an original implementation that only keeps the APIs we use in the
parameter-framework and keep them identical (except for a small exception). The
behaviour is also exactly the same.
Change-Id: I85a69c76027ee026a693d79cd19edd3b86796f9d
Signed-off-by: David Wagner <david.wagner@intel.com>
Diffstat (limited to 'utility/Tokenizer.h')
-rw-r--r-- | utility/Tokenizer.h | 129 |
1 files changed, 74 insertions, 55 deletions
diff --git a/utility/Tokenizer.h b/utility/Tokenizer.h index de3f86c..c48747a 100644 --- a/utility/Tokenizer.h +++ b/utility/Tokenizer.h @@ -1,56 +1,75 @@ -///////////////////////////////////////////////////////////////////////////////
-// Tokenizer.h
-// ===========
-// General purpose string tokenizer (C++ string version)
-//
-// The default delimiters are space(" "), tab(\t, \v), newline(\n),
-// carriage return(\r), and form feed(\f).
-// If you want to use different delimiters, then use setDelimiter() to override
-// the delimiters. Note that the delimiter string can hold multiple characters.
-//
-// AUTHOR: Song Ho Ahn (song.ahn@gmail.com)
-// CREATED: 2005-05-25
-// UPDATED: 2011-03-08
-///////////////////////////////////////////////////////////////////////////////
-
-#ifndef TOKENIZER_H
-#define TOKENIZER_H
-
-#include <string>
+/* + * Copyright (c) 2015, Intel Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, + * are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation and/or + * other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its contributors + * may be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#pragma once + +#include <string> #include <vector> -
-// default delimiter string (space, tab, newline, carriage return, form feed)
-const std::string DEFAULT_DELIMITER = " \t\v\n\r\f";
-
-class Tokenizer
-{
-public:
- // ctor/dtor
- Tokenizer();
- Tokenizer(const std::string& str, const std::string& delimiter=DEFAULT_DELIMITER);
- ~Tokenizer();
-
- // set string and delimiter
- void set(const std::string& str, const std::string& delimiter=DEFAULT_DELIMITER);
- void setString(const std::string& str); // set source string only
- void setDelimiter(const std::string& delimiter); // set delimiter string only
-
- std::string next(); // return the next token, return "" if it ends
-
- std::vector<std::string> split(); // return array of tokens from current cursor
-
-protected:
-
-
-private:
- void skipDelimiter(); // ignore leading delimiters
- bool isDelimiter(char c); // check if the current char is delimiter
-
- std::string buffer; // input string
- std::string token; // output string
- std::string delimiter; // delimiter string
- std::string::const_iterator currPos; // string iterator pointing the current position
-
-};
-
-#endif // TOKENIZER_H
+ +/** Tokenizer class + * + * Must be initialized with a string to be tokenized and, optionally, a string + * of delimiters (@see Tokenizer::defaultDelimiters). + * + * Multiple consecutive delimiters (even if different) are considered as a + * single one. As a result, there can't be empty tokens. + */ +class Tokenizer +{ +public: + /** Constructs a Tokenizer + * + * @param[in] input The string to be tokenized + * @param[in] delimiters A string containing all the token delimiters + * (hence, each delimiter can only be a single character) + */ + Tokenizer(const std::string &input, const std::string &delimiters=defaultDelimiters); + ~Tokenizer() {}; + + /** Return the next token or an empty string if no more token + * + * Multiple consecutive delimiters are considered as a single one - i.e. + * "a bc d " will be tokenized as ("a", "bc", "d") if the delimiter + * is ' '. + */ + std::string next(); + + /** Return a vector of all tokens + */ + std::vector<std::string> split(); + + /** Default list of delimiters (" \n\r\t\v\f") */ + static const std::string defaultDelimiters; + +private: + const std::string _input; //< string to be tokenized + const std::string _delimiters; //< token delimiters + + std::string::size_type _position; //< end of the last returned token +}; |