summaryrefslogtreecommitdiffstats
path: root/utility
diff options
context:
space:
mode:
Diffstat (limited to 'utility')
-rw-r--r--utility/Tokenizer.cpp200
-rw-r--r--utility/Tokenizer.h129
2 files changed, 149 insertions, 180 deletions
diff --git a/utility/Tokenizer.cpp b/utility/Tokenizer.cpp
index 9ea4ea4..a4cfcf0 100644
--- a/utility/Tokenizer.cpp
+++ b/utility/Tokenizer.cpp
@@ -1,125 +1,75 @@
-///////////////////////////////////////////////////////////////////////////////
-// Tokenizer.cpp
-// =============
-// General purpose string tokenizer (C++ string version)
-//
-// The default delimiters are space(" "), tab(\t, \v), newline(\n),
-// carriage return(\r), and form feed(\f).
-// If you want to use different delimiters, then use setDelimiter() to override
-// the delimiters. Note that the delimiter string can hold multiple characters.
-//
-// AUTHOR: Song Ho Ahn (song.ahn@gmail.com)
-// CREATED: 2005-05-25
-// UPDATED: 2011-03-08
-///////////////////////////////////////////////////////////////////////////////
-
-#include "Tokenizer.h"
-
-
-///////////////////////////////////////////////////////////////////////////////
-// constructor
-///////////////////////////////////////////////////////////////////////////////
-Tokenizer::Tokenizer() : buffer(""), token(""), delimiter(DEFAULT_DELIMITER)
-{
- currPos = buffer.begin();
-}
-
-Tokenizer::Tokenizer(const std::string& str, const std::string& delimiter) : buffer(str), token(""), delimiter(delimiter)
-{
- currPos = buffer.begin();
-}
-
-
-
-///////////////////////////////////////////////////////////////////////////////
-// destructor
-///////////////////////////////////////////////////////////////////////////////
-Tokenizer::~Tokenizer()
-{
-}
-
-
-
-///////////////////////////////////////////////////////////////////////////////
-// reset string buffer, delimiter and the currsor position
-///////////////////////////////////////////////////////////////////////////////
-void Tokenizer::set(const std::string& str, const std::string& delimiter)
-{
- this->buffer = str;
- this->delimiter = delimiter;
- this->currPos = buffer.begin();
-}
-
-void Tokenizer::setString(const std::string& str)
-{
- this->buffer = str;
- this->currPos = buffer.begin();
-}
-
-void Tokenizer::setDelimiter(const std::string& delimiter)
-{
- this->delimiter = delimiter;
- this->currPos = buffer.begin();
-}
-
-
-
-///////////////////////////////////////////////////////////////////////////////
-// return the next token
-// If cannot find a token anymore, return "".
-///////////////////////////////////////////////////////////////////////////////
-std::string Tokenizer::next()
-{
- if(buffer.size() <= 0) return ""; // skip if buffer is empty
-
- token.clear(); // reset token string
-
- this->skipDelimiter(); // skip leading delimiters
-
- // append each char to token string until it meets delimiter
- while(currPos != buffer.end() && !isDelimiter(*currPos))
- {
- token += *currPos;
- ++currPos;
- }
- return token;
-}
-
-
-
-///////////////////////////////////////////////////////////////////////////////
-// skip ang leading delimiters
-///////////////////////////////////////////////////////////////////////////////
-void Tokenizer::skipDelimiter()
-{
- while(currPos != buffer.end() && isDelimiter(*currPos))
- ++currPos;
-}
-
-
-
-///////////////////////////////////////////////////////////////////////////////
-// return true if the current character is delimiter
-///////////////////////////////////////////////////////////////////////////////
-bool Tokenizer::isDelimiter(char c)
-{
- return (delimiter.find(c) != std::string::npos);
-}
-
-
-
-///////////////////////////////////////////////////////////////////////////////
-// split the input string into multiple tokens
-// This function scans tokens from the current cursor position.
-///////////////////////////////////////////////////////////////////////////////
-std::vector<std::string> Tokenizer::split()
-{
- std::vector<std::string> tokens;
- std::string token;
- while((token = this->next()) != "")
- {
- tokens.push_back(token);
- }
-
- return tokens;
-}
+/*
+ * Copyright (c) 2015, Intel Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification,
+ * are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation and/or
+ * other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include "Tokenizer.h"
+
+using std::string;
+using std::vector;
+
+const string Tokenizer::defaultDelimiters = " \n\r\t\v\f";
+
+Tokenizer::Tokenizer(const string &input, const string &delimiters)
+ : _input(input), _delimiters(delimiters), _position(0)
+{
+}
+
+string Tokenizer::next()
+{
+ string token;
+
+ // Skip all leading delimiters
+ string::size_type tokenStart = _input.find_first_not_of(_delimiters, _position);
+
+ // Special case if there isn't any token anymore (string::substr's
+ // throws when pos==npos)
+ if (tokenStart == string::npos) {
+ return "";
+ }
+
+ // Starting from the token's start, find the first delimiter
+ string::size_type tokenEnd = _input.find_first_of(_delimiters, tokenStart);
+
+ _position = tokenEnd;
+
+ return _input.substr(tokenStart, tokenEnd - tokenStart);
+}
+
+vector<string> Tokenizer::split()
+{
+ vector<string> result;
+ string token;
+
+ while (true) {
+ token = next();
+ if (token.empty()) {
+ return result;
+ }
+ result.push_back(token);
+ }
+}
diff --git a/utility/Tokenizer.h b/utility/Tokenizer.h
index de3f86c..c48747a 100644
--- a/utility/Tokenizer.h
+++ b/utility/Tokenizer.h
@@ -1,56 +1,75 @@
-///////////////////////////////////////////////////////////////////////////////
-// Tokenizer.h
-// ===========
-// General purpose string tokenizer (C++ string version)
-//
-// The default delimiters are space(" "), tab(\t, \v), newline(\n),
-// carriage return(\r), and form feed(\f).
-// If you want to use different delimiters, then use setDelimiter() to override
-// the delimiters. Note that the delimiter string can hold multiple characters.
-//
-// AUTHOR: Song Ho Ahn (song.ahn@gmail.com)
-// CREATED: 2005-05-25
-// UPDATED: 2011-03-08
-///////////////////////////////////////////////////////////////////////////////
-
-#ifndef TOKENIZER_H
-#define TOKENIZER_H
-
-#include <string>
+/*
+ * Copyright (c) 2015, Intel Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification,
+ * are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation and/or
+ * other materials provided with the distribution.
+ *
+ * 3. Neither the name of the copyright holder nor the names of its contributors
+ * may be used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#pragma once
+
+#include <string>
#include <vector>
-
-// default delimiter string (space, tab, newline, carriage return, form feed)
-const std::string DEFAULT_DELIMITER = " \t\v\n\r\f";
-
-class Tokenizer
-{
-public:
- // ctor/dtor
- Tokenizer();
- Tokenizer(const std::string& str, const std::string& delimiter=DEFAULT_DELIMITER);
- ~Tokenizer();
-
- // set string and delimiter
- void set(const std::string& str, const std::string& delimiter=DEFAULT_DELIMITER);
- void setString(const std::string& str); // set source string only
- void setDelimiter(const std::string& delimiter); // set delimiter string only
-
- std::string next(); // return the next token, return "" if it ends
-
- std::vector<std::string> split(); // return array of tokens from current cursor
-
-protected:
-
-
-private:
- void skipDelimiter(); // ignore leading delimiters
- bool isDelimiter(char c); // check if the current char is delimiter
-
- std::string buffer; // input string
- std::string token; // output string
- std::string delimiter; // delimiter string
- std::string::const_iterator currPos; // string iterator pointing the current position
-
-};
-
-#endif // TOKENIZER_H
+
+/** Tokenizer class
+ *
+ * Must be initialized with a string to be tokenized and, optionally, a string
+ * of delimiters (@see Tokenizer::defaultDelimiters).
+ *
+ * Multiple consecutive delimiters (even if different) are considered as a
+ * single one. As a result, there can't be empty tokens.
+ */
+class Tokenizer
+{
+public:
+ /** Constructs a Tokenizer
+ *
+ * @param[in] input The string to be tokenized
+ * @param[in] delimiters A string containing all the token delimiters
+ * (hence, each delimiter can only be a single character)
+ */
+ Tokenizer(const std::string &input, const std::string &delimiters=defaultDelimiters);
+ ~Tokenizer() {};
+
+ /** Return the next token or an empty string if no more token
+ *
+ * Multiple consecutive delimiters are considered as a single one - i.e.
+ * "a bc d " will be tokenized as ("a", "bc", "d") if the delimiter
+ * is ' '.
+ */
+ std::string next();
+
+ /** Return a vector of all tokens
+ */
+ std::vector<std::string> split();
+
+ /** Default list of delimiters (" \n\r\t\v\f") */
+ static const std::string defaultDelimiters;
+
+private:
+ const std::string _input; //< string to be tokenized
+ const std::string _delimiters; //< token delimiters
+
+ std::string::size_type _position; //< end of the last returned token
+};