Kea  1.5.0
strutil.cc
Go to the documentation of this file.
1 // Copyright (C) 2011-2018 Internet Systems Consortium, Inc. ("ISC")
2 //
3 // This Source Code Form is subject to the terms of the Mozilla Public
4 // License, v. 2.0. If a copy of the MPL was not distributed with this
5 // file, You can obtain one at http://mozilla.org/MPL/2.0/.
6 
7 #include <config.h>
8 
9 #include <util/encode/hex.h>
10 #include <util/strutil.h>
11 
12 #include <boost/algorithm/string/classification.hpp>
13 #include <boost/algorithm/string/constants.hpp>
14 #include <boost/algorithm/string/split.hpp>
15 
16 #include <numeric>
17 #include <iostream>
18 #include <sstream>
19 
20 // Early versions of C++11 regex were buggy, use it if we
21 // can otherwise, we fall back to regcomp/regexec. For more info see:
22 // https://stackoverflow.com/questions/12530406/is-gcc-4-8-or-earlier-buggy-about-regular-expressions
23 #ifdef USE_REGEX
24 #include <regex>
25 #else
26 #include <sys/types.h>
27 #include <regex.h>
28 #endif
29 
30 #include <string.h>
31 
32 using namespace std;
33 
34 namespace isc {
35 namespace util {
36 namespace str {
37 
38 // Normalize slashes
39 
40 void
41 normalizeSlash(std::string& name) {
42  if (!name.empty()) {
43  size_t pos = 0;
44  while ((pos = name.find('\\', pos)) != std::string::npos) {
45  name[pos] = '/';
46  }
47  }
48 }
49 
50 // Trim String
51 
52 string
53 trim(const string& instring) {
54  string retstring = "";
55  if (!instring.empty()) {
56  static const char* blanks = " \t\n";
57 
58  // Search for first non-blank character in the string
59  size_t first = instring.find_first_not_of(blanks);
60  if (first != string::npos) {
61 
62  // String not all blanks, so look for last character
63  size_t last = instring.find_last_not_of(blanks);
64 
65  // Extract the trimmed substring
66  retstring = instring.substr(first, (last - first + 1));
67  }
68  }
69 
70  return (retstring);
71 }
72 
73 // Tokenize string. As noted in the header, this is locally written to avoid
74 // another dependency on a Boost library.
75 
76 vector<string>
77 tokens(const std::string& text, const std::string& delim, bool escape) {
78  vector<string> result;
79  string token;
80  bool in_token = false;
81  bool escaped = false;
82  for (auto c = text.cbegin(); c != text.cend(); ++c) {
83  if (delim.find(*c) != string::npos) {
84  // Current character is a delimiter
85  if (!in_token) {
86  // Two or more delimiters, eat them
87  } else if (escaped) {
88  // Escaped delimiter in a token: reset escaped and keep it
89  escaped = false;
90  token.push_back(*c);
91  } else {
92  // End of the current token: save it if not empty
93  if (!token.empty()) {
94  result.push_back(token);
95  }
96  // Reset state
97  in_token = false;
98  token.clear();
99  }
100  } else if (escape && (*c == '\\')) {
101  // Current character is the escape character
102  if (!in_token) {
103  // The escape character is the first character of a new token
104  in_token = true;
105  }
106  if (escaped) {
107  // Escaped escape: reset escaped and keep one character
108  escaped = false;
109  token.push_back(*c);
110  } else {
111  // Remember to keep the next character
112  escaped = true;
113  }
114  } else {
115  // Not a delimiter nor an escape
116  if (!in_token) {
117  // First character of a new token
118  in_token = true;
119  }
120  if (escaped) {
121  // Escaped common character: as escape was false
122  escaped = false;
123  token.push_back('\\');
124  token.push_back(*c);
125  } else {
126  // The common case: keep it
127  token.push_back(*c);
128  }
129  }
130  }
131  // End of input: close and save the current token if not empty
132  if (escaped) {
133  // Pending escape
134  token.push_back('\\');
135  }
136  if (!token.empty()) {
137  result.push_back(token);
138  }
139 
140  return (result);
141 }
142 
143 // Local function to pass to accumulate() for summing up string lengths.
144 
145 namespace {
146 
147 size_t
148 lengthSum(string::size_type curlen, const string& cur_string) {
149  return (curlen + cur_string.size());
150 }
151 
152 }
153 
154 // Provide printf-style formatting.
155 
156 std::string
157 format(const std::string& format, const std::vector<std::string>& args) {
158 
159  static const string flag = "%s";
160 
161  // Initialize return string. To speed things up, we'll reserve an
162  // appropriate amount of space - current string size, plus length of all
163  // the argument strings, less two characters for each argument (the %s in
164  // the format string is being replaced).
165  string result;
166  size_t length = accumulate(args.begin(), args.end(), format.size(),
167  lengthSum) - (args.size() * flag.size());
168  result.reserve(length);
169 
170  // Iterate through replacing all tokens
171  result = format;
172  size_t tokenpos = 0; // Position of last token replaced
173  std::vector<std::string>::size_type i = 0; // Index into argument array
174 
175  while ((i < args.size()) && (tokenpos != string::npos)) {
176  tokenpos = result.find(flag, tokenpos);
177  if (tokenpos != string::npos) {
178  result.replace(tokenpos, flag.size(), args[i++]);
179  }
180  }
181 
182  return (result);
183 }
184 
185 std::string
186 getToken(std::istringstream& iss) {
187  string token;
188  iss >> token;
189  if (iss.bad() || iss.fail()) {
190  isc_throw(StringTokenError, "could not read token from string");
191  }
192  return (token);
193 }
194 
195 std::vector<uint8_t>
196 quotedStringToBinary(const std::string& quoted_string) {
197  std::vector<uint8_t> binary;
198  // Remove whitespace before and after the quotes.
199  std::string trimmed_string = trim(quoted_string);
200 
201  // We require two quote characters, so the length of the string must be
202  // equal to 2 at minimum, and it must start and end with quotes.
203  if ((trimmed_string.length() > 1) && ((trimmed_string[0] == '\'') &&
204  (trimmed_string[trimmed_string.length()-1] == '\''))) {
205  // Remove quotes and trim the text inside the quotes.
206  trimmed_string = trim(trimmed_string.substr(1, trimmed_string.length() - 2));
207  // Copy string contents into the vector.
208  binary.assign(trimmed_string.begin(), trimmed_string.end());
209  }
210  // Return resulting vector or empty vector.
211  return (binary);
212 }
213 
214 void
215 decodeColonSeparatedHexString(const std::string& hex_string,
216  std::vector<uint8_t>& binary) {
217  std::vector<std::string> split_text;
218  boost::split(split_text, hex_string, boost::is_any_of(":"),
219  boost::algorithm::token_compress_off);
220 
221  std::vector<uint8_t> binary_vec;
222  for (size_t i = 0; i < split_text.size(); ++i) {
223 
224  // If there are multiple tokens and the current one is empty, it
225  // means that two consecutive colons were specified. This is not
226  // allowed.
227  if ((split_text.size() > 1) && split_text[i].empty()) {
228  isc_throw(isc::BadValue, "two consecutive colons specified in"
229  " a decoded string '" << hex_string << "'");
230 
231  // Between a colon we expect at most two characters.
232  } else if (split_text[i].size() > 2) {
233  isc_throw(isc::BadValue, "invalid format of the decoded string"
234  << " '" << hex_string << "'");
235 
236  } else if (!split_text[i].empty()) {
237  std::stringstream s;
238  s << "0x";
239 
240  for (unsigned int j = 0; j < split_text[i].length(); ++j) {
241  // Check if we're dealing with hexadecimal digit.
242  if (!isxdigit(split_text[i][j])) {
243  isc_throw(isc::BadValue, "'" << split_text[i][j]
244  << "' is not a valid hexadecimal digit in"
245  << " decoded string '" << hex_string << "'");
246  }
247  s << split_text[i][j];
248  }
249 
250  // The stream should now have one or two hexadecimal digits.
251  // Let's convert it to a number and store in a temporary
252  // vector.
253  unsigned int binary_value;
254  s >> std::hex >> binary_value;
255 
256  binary_vec.push_back(static_cast<uint8_t>(binary_value));
257  }
258 
259  }
260 
261  // All ok, replace the data in the output vector with a result.
262  binary.swap(binary_vec);
263 }
264 
265 void
266 decodeFormattedHexString(const std::string& hex_string,
267  std::vector<uint8_t>& binary) {
268  // If there is at least one colon we assume that the string
269  // comprises octets separated by colons (e.g. MAC address notation).
270  if (hex_string.find(':') != std::string::npos) {
271  decodeColonSeparatedHexString(hex_string, binary);
272 
273  } else {
274  std::ostringstream s;
275 
276  // If we have odd number of digits we'll have to prepend '0'.
277  if (hex_string.length() % 2 != 0) {
278  s << "0";
279  }
280 
281  // It is ok to use '0x' prefix in a string.
282  if ((hex_string.length() > 2) && (hex_string.substr(0, 2) == "0x")) {
283  // Exclude '0x' from the decoded string.
284  s << hex_string.substr(2);
285 
286  } else {
287  // No '0x', so decode the whole string.
288  s << hex_string;
289  }
290 
291  try {
292  // Decode the hex string.
293  encode::decodeHex(s.str(), binary);
294 
295  } catch (...) {
296  isc_throw(isc::BadValue, "'" << hex_string << "' is not a valid"
297  " string of hexadecimal digits");
298  }
299  }
300 }
301 
303 public:
304  StringSanitizerImpl(const std::string& char_set, const std::string& char_replacement)
305  : char_set_(char_set), char_replacement_(char_replacement) {
306 #ifdef USE_REGEX
307  try {
308  scrub_exp_ = std::regex(char_set, std::regex::extended);
309  } catch (const std::exception& ex) {
310  isc_throw(isc::BadValue, "invalid regex: '"
311  << char_set_ << "', " << ex.what());
312  }
313 #else
314  int ec = regcomp(&scrub_exp_, char_set_.c_str(), REG_EXTENDED);
315  if (ec) {
316  char errbuf[512] = "";
317  static_cast<void>(regerror(ec, &scrub_exp_, errbuf, sizeof(errbuf)));
318  regfree(&scrub_exp_);
319  isc_throw(isc::BadValue, "invalid regex: '" << char_set_ << "', " << errbuf);
320  }
321 #endif
322  }
323 
326 #ifndef USE_REGEX
327  regfree(&scrub_exp_);
328 #endif
329  }
330 
331  std::string scrub(const std::string& original) {
332 #ifdef USE_REGEX
333  std::stringstream result;
334  try {
335  std::regex_replace(std::ostream_iterator<char>(result),
336  original.begin(), original.end(),
337  scrub_exp_, char_replacement_);
338  } catch (const std::exception& ex) {
339  isc_throw(isc::BadValue, "replacing '" << char_set_ << "' with '"
340  << char_replacement_ << "' in '" << original << "' failed: ,"
341  << ex.what());
342  }
343 
344  return (result.str());
345 #else
346  // Iterate over original string, match by match.
347  const char* origStr = original.c_str();
348  const char* startFrom = origStr;
349  const char* endAt = origStr + strlen(origStr);
350  regmatch_t matches[2]; // n matches + 1
351  stringstream result;
352 
353  while (startFrom < endAt) {
354  // Look for the next match
355  if (regexec(&scrub_exp_, startFrom, 1, matches, 0) == REG_NOMATCH) {
356  // No matches, so add in the remainder
357  result << startFrom;
358  break;
359  }
360 
361  // Shouldn't happen, but one never knows eh?
362  if (matches[0].rm_so == -1) {
363  isc_throw(isc::Unexpected, "matched but so is -1?");
364  }
365 
366  // Add everything from starting point up to the current match
367  const char* matchAt = startFrom + matches[0].rm_so;
368  while (startFrom < matchAt) {
369  result << *startFrom;
370  ++startFrom;
371  }
372 
373  // Add in the replacement
374  result << char_replacement_;
375 
376  // Move past the match.
377  ++startFrom;
378  }
379 
380  return (result.str());
381 #endif
382  }
383 
384 private:
385  std::string char_set_;
386  std::string char_replacement_;
387 
388 #ifdef USE_REGEX
389  regex scrub_exp_;
390 #else
391  regex_t scrub_exp_;
392 #endif
393 };
394 
395 StringSanitizer::StringSanitizer(const std::string& char_set,
396  const std::string& char_replacement)
397  : impl_(new StringSanitizerImpl(char_set, char_replacement)) {
398 }
399 
401  delete impl_;
402 }
403 
404 std::string
405 StringSanitizer::scrub(const std::string& original) {
406  return (impl_->scrub(original));
407 }
408 
409 } // namespace str
410 } // namespace util
411 } // namespace isc
virtual const char * what() const
Returns a C-style character string of the cause of the exception.
void decodeFormattedHexString(const std::string &hex_string, std::vector< uint8_t > &binary)
Converts a formatted string of hexadecimal digits into a vector.
Definition: strutil.cc:266
#define isc_throw(type, stream)
A shortcut macro to insert known values into exception arguments.
A generic exception that is thrown if a parameter given to a method is considered invalid in that con...
std::string scrub(const std::string &original)
Definition: strutil.cc:331
void decodeHex(const string &input, vector< uint8_t > &result)
Decode a text encoded in the base16 ('hex') format into the original data.
Definition: base_n.cc:466
A generic exception that is thrown when an unexpected error condition occurs.
std::string getToken(std::istringstream &iss)
Returns one token from the given stringstream.
Definition: strutil.cc:186
void normalizeSlash(std::string &name)
Normalize Backslash.
Definition: strutil.cc:41
vector< string > tokens(const std::string &text, const std::string &delim, bool escape)
Split String into Tokens.
Definition: strutil.cc:77
A Set of C++ Utilities for Manipulating Strings.
Definition: strutil.h:30
std::vector< uint8_t > quotedStringToBinary(const std::string &quoted_string)
Converts a string in quotes into vector.
Definition: strutil.cc:196
Defines the logger used by the top-level component of kea-dhcp-ddns.
void decodeColonSeparatedHexString(const std::string &hex_string, std::vector< uint8_t > &binary)
Converts a string of hexadecimal digits with colons into a vector.
Definition: strutil.cc:215
std::string scrub(const std::string &original)
Returns a scrubbed copy of a given string.
Definition: strutil.cc:405
string trim(const string &instring)
Trim Leading and Trailing Spaces.
Definition: strutil.cc:53
StringSanitizerImpl(const std::string &char_set, const std::string &char_replacement)
Definition: strutil.cc:304
std::string format(const std::string &format, const std::vector< std::string > &args)
Apply Formatting.
Definition: strutil.cc:157