1*635a8641SAndroid Build Coastguard Worker // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2*635a8641SAndroid Build Coastguard Worker // Use of this source code is governed by a BSD-style license that can be
3*635a8641SAndroid Build Coastguard Worker // found in the LICENSE file.
4*635a8641SAndroid Build Coastguard Worker
5*635a8641SAndroid Build Coastguard Worker #include "base/strings/string_split.h"
6*635a8641SAndroid Build Coastguard Worker
7*635a8641SAndroid Build Coastguard Worker #include <stddef.h>
8*635a8641SAndroid Build Coastguard Worker
9*635a8641SAndroid Build Coastguard Worker #include "base/logging.h"
10*635a8641SAndroid Build Coastguard Worker #include "base/strings/string_util.h"
11*635a8641SAndroid Build Coastguard Worker #include "base/third_party/icu/icu_utf.h"
12*635a8641SAndroid Build Coastguard Worker
13*635a8641SAndroid Build Coastguard Worker namespace base {
14*635a8641SAndroid Build Coastguard Worker
15*635a8641SAndroid Build Coastguard Worker namespace {
16*635a8641SAndroid Build Coastguard Worker
17*635a8641SAndroid Build Coastguard Worker // PieceToOutputType converts a StringPiece as needed to a given output type,
18*635a8641SAndroid Build Coastguard Worker // which is either the same type of StringPiece (a NOP) or the corresponding
19*635a8641SAndroid Build Coastguard Worker // non-piece string type.
20*635a8641SAndroid Build Coastguard Worker //
21*635a8641SAndroid Build Coastguard Worker // The default converter is a NOP, it works when the OutputType is the
22*635a8641SAndroid Build Coastguard Worker // correct StringPiece.
23*635a8641SAndroid Build Coastguard Worker template<typename Str, typename OutputType>
PieceToOutputType(BasicStringPiece<Str> piece)24*635a8641SAndroid Build Coastguard Worker OutputType PieceToOutputType(BasicStringPiece<Str> piece) {
25*635a8641SAndroid Build Coastguard Worker return piece;
26*635a8641SAndroid Build Coastguard Worker }
27*635a8641SAndroid Build Coastguard Worker template<> // Convert StringPiece to std::string
PieceToOutputType(StringPiece piece)28*635a8641SAndroid Build Coastguard Worker std::string PieceToOutputType<std::string, std::string>(StringPiece piece) {
29*635a8641SAndroid Build Coastguard Worker return piece.as_string();
30*635a8641SAndroid Build Coastguard Worker }
31*635a8641SAndroid Build Coastguard Worker template<> // Convert StringPiece16 to string16.
PieceToOutputType(StringPiece16 piece)32*635a8641SAndroid Build Coastguard Worker string16 PieceToOutputType<string16, string16>(StringPiece16 piece) {
33*635a8641SAndroid Build Coastguard Worker return piece.as_string();
34*635a8641SAndroid Build Coastguard Worker }
35*635a8641SAndroid Build Coastguard Worker
36*635a8641SAndroid Build Coastguard Worker // Returns either the ASCII or UTF-16 whitespace.
37*635a8641SAndroid Build Coastguard Worker template<typename Str> BasicStringPiece<Str> WhitespaceForType();
WhitespaceForType()38*635a8641SAndroid Build Coastguard Worker template<> StringPiece16 WhitespaceForType<string16>() {
39*635a8641SAndroid Build Coastguard Worker return kWhitespaceUTF16;
40*635a8641SAndroid Build Coastguard Worker }
WhitespaceForType()41*635a8641SAndroid Build Coastguard Worker template<> StringPiece WhitespaceForType<std::string>() {
42*635a8641SAndroid Build Coastguard Worker return kWhitespaceASCII;
43*635a8641SAndroid Build Coastguard Worker }
44*635a8641SAndroid Build Coastguard Worker
45*635a8641SAndroid Build Coastguard Worker // Optimize the single-character case to call find() on the string instead,
46*635a8641SAndroid Build Coastguard Worker // since this is the common case and can be made faster. This could have been
47*635a8641SAndroid Build Coastguard Worker // done with template specialization too, but would have been less clear.
48*635a8641SAndroid Build Coastguard Worker //
49*635a8641SAndroid Build Coastguard Worker // There is no corresponding FindFirstNotOf because StringPiece already
50*635a8641SAndroid Build Coastguard Worker // implements these different versions that do the optimized searching.
FindFirstOf(StringPiece piece,char c,size_t pos)51*635a8641SAndroid Build Coastguard Worker size_t FindFirstOf(StringPiece piece, char c, size_t pos) {
52*635a8641SAndroid Build Coastguard Worker return piece.find(c, pos);
53*635a8641SAndroid Build Coastguard Worker }
FindFirstOf(StringPiece16 piece,char16 c,size_t pos)54*635a8641SAndroid Build Coastguard Worker size_t FindFirstOf(StringPiece16 piece, char16 c, size_t pos) {
55*635a8641SAndroid Build Coastguard Worker return piece.find(c, pos);
56*635a8641SAndroid Build Coastguard Worker }
FindFirstOf(StringPiece piece,StringPiece one_of,size_t pos)57*635a8641SAndroid Build Coastguard Worker size_t FindFirstOf(StringPiece piece, StringPiece one_of, size_t pos) {
58*635a8641SAndroid Build Coastguard Worker return piece.find_first_of(one_of, pos);
59*635a8641SAndroid Build Coastguard Worker }
FindFirstOf(StringPiece16 piece,StringPiece16 one_of,size_t pos)60*635a8641SAndroid Build Coastguard Worker size_t FindFirstOf(StringPiece16 piece, StringPiece16 one_of, size_t pos) {
61*635a8641SAndroid Build Coastguard Worker return piece.find_first_of(one_of, pos);
62*635a8641SAndroid Build Coastguard Worker }
63*635a8641SAndroid Build Coastguard Worker
64*635a8641SAndroid Build Coastguard Worker // General string splitter template. Can take 8- or 16-bit input, can produce
65*635a8641SAndroid Build Coastguard Worker // the corresponding string or StringPiece output, and can take single- or
66*635a8641SAndroid Build Coastguard Worker // multiple-character delimiters.
67*635a8641SAndroid Build Coastguard Worker //
68*635a8641SAndroid Build Coastguard Worker // DelimiterType is either a character (Str::value_type) or a string piece of
69*635a8641SAndroid Build Coastguard Worker // multiple characters (BasicStringPiece<Str>). StringPiece has a version of
70*635a8641SAndroid Build Coastguard Worker // find for both of these cases, and the single-character version is the most
71*635a8641SAndroid Build Coastguard Worker // common and can be implemented faster, which is why this is a template.
72*635a8641SAndroid Build Coastguard Worker template<typename Str, typename OutputStringType, typename DelimiterType>
SplitStringT(BasicStringPiece<Str> str,DelimiterType delimiter,WhitespaceHandling whitespace,SplitResult result_type)73*635a8641SAndroid Build Coastguard Worker static std::vector<OutputStringType> SplitStringT(
74*635a8641SAndroid Build Coastguard Worker BasicStringPiece<Str> str,
75*635a8641SAndroid Build Coastguard Worker DelimiterType delimiter,
76*635a8641SAndroid Build Coastguard Worker WhitespaceHandling whitespace,
77*635a8641SAndroid Build Coastguard Worker SplitResult result_type) {
78*635a8641SAndroid Build Coastguard Worker std::vector<OutputStringType> result;
79*635a8641SAndroid Build Coastguard Worker if (str.empty())
80*635a8641SAndroid Build Coastguard Worker return result;
81*635a8641SAndroid Build Coastguard Worker
82*635a8641SAndroid Build Coastguard Worker size_t start = 0;
83*635a8641SAndroid Build Coastguard Worker while (start != Str::npos) {
84*635a8641SAndroid Build Coastguard Worker size_t end = FindFirstOf(str, delimiter, start);
85*635a8641SAndroid Build Coastguard Worker
86*635a8641SAndroid Build Coastguard Worker BasicStringPiece<Str> piece;
87*635a8641SAndroid Build Coastguard Worker if (end == Str::npos) {
88*635a8641SAndroid Build Coastguard Worker piece = str.substr(start);
89*635a8641SAndroid Build Coastguard Worker start = Str::npos;
90*635a8641SAndroid Build Coastguard Worker } else {
91*635a8641SAndroid Build Coastguard Worker piece = str.substr(start, end - start);
92*635a8641SAndroid Build Coastguard Worker start = end + 1;
93*635a8641SAndroid Build Coastguard Worker }
94*635a8641SAndroid Build Coastguard Worker
95*635a8641SAndroid Build Coastguard Worker if (whitespace == TRIM_WHITESPACE)
96*635a8641SAndroid Build Coastguard Worker piece = TrimString(piece, WhitespaceForType<Str>(), TRIM_ALL);
97*635a8641SAndroid Build Coastguard Worker
98*635a8641SAndroid Build Coastguard Worker if (result_type == SPLIT_WANT_ALL || !piece.empty())
99*635a8641SAndroid Build Coastguard Worker result.push_back(PieceToOutputType<Str, OutputStringType>(piece));
100*635a8641SAndroid Build Coastguard Worker }
101*635a8641SAndroid Build Coastguard Worker return result;
102*635a8641SAndroid Build Coastguard Worker }
103*635a8641SAndroid Build Coastguard Worker
AppendStringKeyValue(StringPiece input,char delimiter,StringPairs * result)104*635a8641SAndroid Build Coastguard Worker bool AppendStringKeyValue(StringPiece input,
105*635a8641SAndroid Build Coastguard Worker char delimiter,
106*635a8641SAndroid Build Coastguard Worker StringPairs* result) {
107*635a8641SAndroid Build Coastguard Worker // Always append a new item regardless of success (it might be empty). The
108*635a8641SAndroid Build Coastguard Worker // below code will copy the strings directly into the result pair.
109*635a8641SAndroid Build Coastguard Worker result->resize(result->size() + 1);
110*635a8641SAndroid Build Coastguard Worker auto& result_pair = result->back();
111*635a8641SAndroid Build Coastguard Worker
112*635a8641SAndroid Build Coastguard Worker // Find the delimiter.
113*635a8641SAndroid Build Coastguard Worker size_t end_key_pos = input.find_first_of(delimiter);
114*635a8641SAndroid Build Coastguard Worker if (end_key_pos == std::string::npos) {
115*635a8641SAndroid Build Coastguard Worker DVLOG(1) << "cannot find delimiter in: " << input;
116*635a8641SAndroid Build Coastguard Worker return false; // No delimiter.
117*635a8641SAndroid Build Coastguard Worker }
118*635a8641SAndroid Build Coastguard Worker input.substr(0, end_key_pos).CopyToString(&result_pair.first);
119*635a8641SAndroid Build Coastguard Worker
120*635a8641SAndroid Build Coastguard Worker // Find the value string.
121*635a8641SAndroid Build Coastguard Worker StringPiece remains = input.substr(end_key_pos, input.size() - end_key_pos);
122*635a8641SAndroid Build Coastguard Worker size_t begin_value_pos = remains.find_first_not_of(delimiter);
123*635a8641SAndroid Build Coastguard Worker if (begin_value_pos == StringPiece::npos) {
124*635a8641SAndroid Build Coastguard Worker DVLOG(1) << "cannot parse value from input: " << input;
125*635a8641SAndroid Build Coastguard Worker return false; // No value.
126*635a8641SAndroid Build Coastguard Worker }
127*635a8641SAndroid Build Coastguard Worker remains.substr(begin_value_pos, remains.size() - begin_value_pos)
128*635a8641SAndroid Build Coastguard Worker .CopyToString(&result_pair.second);
129*635a8641SAndroid Build Coastguard Worker
130*635a8641SAndroid Build Coastguard Worker return true;
131*635a8641SAndroid Build Coastguard Worker }
132*635a8641SAndroid Build Coastguard Worker
133*635a8641SAndroid Build Coastguard Worker template <typename Str, typename OutputStringType>
SplitStringUsingSubstrT(BasicStringPiece<Str> input,BasicStringPiece<Str> delimiter,WhitespaceHandling whitespace,SplitResult result_type,std::vector<OutputStringType> * result)134*635a8641SAndroid Build Coastguard Worker void SplitStringUsingSubstrT(BasicStringPiece<Str> input,
135*635a8641SAndroid Build Coastguard Worker BasicStringPiece<Str> delimiter,
136*635a8641SAndroid Build Coastguard Worker WhitespaceHandling whitespace,
137*635a8641SAndroid Build Coastguard Worker SplitResult result_type,
138*635a8641SAndroid Build Coastguard Worker std::vector<OutputStringType>* result) {
139*635a8641SAndroid Build Coastguard Worker using Piece = BasicStringPiece<Str>;
140*635a8641SAndroid Build Coastguard Worker using size_type = typename Piece::size_type;
141*635a8641SAndroid Build Coastguard Worker
142*635a8641SAndroid Build Coastguard Worker result->clear();
143*635a8641SAndroid Build Coastguard Worker for (size_type begin_index = 0, end_index = 0; end_index != Piece::npos;
144*635a8641SAndroid Build Coastguard Worker begin_index = end_index + delimiter.size()) {
145*635a8641SAndroid Build Coastguard Worker end_index = input.find(delimiter, begin_index);
146*635a8641SAndroid Build Coastguard Worker Piece term = end_index == Piece::npos
147*635a8641SAndroid Build Coastguard Worker ? input.substr(begin_index)
148*635a8641SAndroid Build Coastguard Worker : input.substr(begin_index, end_index - begin_index);
149*635a8641SAndroid Build Coastguard Worker
150*635a8641SAndroid Build Coastguard Worker if (whitespace == TRIM_WHITESPACE)
151*635a8641SAndroid Build Coastguard Worker term = TrimString(term, WhitespaceForType<Str>(), TRIM_ALL);
152*635a8641SAndroid Build Coastguard Worker
153*635a8641SAndroid Build Coastguard Worker if (result_type == SPLIT_WANT_ALL || !term.empty())
154*635a8641SAndroid Build Coastguard Worker result->push_back(PieceToOutputType<Str, OutputStringType>(term));
155*635a8641SAndroid Build Coastguard Worker }
156*635a8641SAndroid Build Coastguard Worker }
157*635a8641SAndroid Build Coastguard Worker
158*635a8641SAndroid Build Coastguard Worker } // namespace
159*635a8641SAndroid Build Coastguard Worker
SplitString(StringPiece input,StringPiece separators,WhitespaceHandling whitespace,SplitResult result_type)160*635a8641SAndroid Build Coastguard Worker std::vector<std::string> SplitString(StringPiece input,
161*635a8641SAndroid Build Coastguard Worker StringPiece separators,
162*635a8641SAndroid Build Coastguard Worker WhitespaceHandling whitespace,
163*635a8641SAndroid Build Coastguard Worker SplitResult result_type) {
164*635a8641SAndroid Build Coastguard Worker if (separators.size() == 1) {
165*635a8641SAndroid Build Coastguard Worker return SplitStringT<std::string, std::string, char>(
166*635a8641SAndroid Build Coastguard Worker input, separators[0], whitespace, result_type);
167*635a8641SAndroid Build Coastguard Worker }
168*635a8641SAndroid Build Coastguard Worker return SplitStringT<std::string, std::string, StringPiece>(
169*635a8641SAndroid Build Coastguard Worker input, separators, whitespace, result_type);
170*635a8641SAndroid Build Coastguard Worker }
171*635a8641SAndroid Build Coastguard Worker
SplitString(StringPiece16 input,StringPiece16 separators,WhitespaceHandling whitespace,SplitResult result_type)172*635a8641SAndroid Build Coastguard Worker std::vector<string16> SplitString(StringPiece16 input,
173*635a8641SAndroid Build Coastguard Worker StringPiece16 separators,
174*635a8641SAndroid Build Coastguard Worker WhitespaceHandling whitespace,
175*635a8641SAndroid Build Coastguard Worker SplitResult result_type) {
176*635a8641SAndroid Build Coastguard Worker if (separators.size() == 1) {
177*635a8641SAndroid Build Coastguard Worker return SplitStringT<string16, string16, char16>(
178*635a8641SAndroid Build Coastguard Worker input, separators[0], whitespace, result_type);
179*635a8641SAndroid Build Coastguard Worker }
180*635a8641SAndroid Build Coastguard Worker return SplitStringT<string16, string16, StringPiece16>(
181*635a8641SAndroid Build Coastguard Worker input, separators, whitespace, result_type);
182*635a8641SAndroid Build Coastguard Worker }
183*635a8641SAndroid Build Coastguard Worker
SplitStringPiece(StringPiece input,StringPiece separators,WhitespaceHandling whitespace,SplitResult result_type)184*635a8641SAndroid Build Coastguard Worker std::vector<StringPiece> SplitStringPiece(StringPiece input,
185*635a8641SAndroid Build Coastguard Worker StringPiece separators,
186*635a8641SAndroid Build Coastguard Worker WhitespaceHandling whitespace,
187*635a8641SAndroid Build Coastguard Worker SplitResult result_type) {
188*635a8641SAndroid Build Coastguard Worker if (separators.size() == 1) {
189*635a8641SAndroid Build Coastguard Worker return SplitStringT<std::string, StringPiece, char>(
190*635a8641SAndroid Build Coastguard Worker input, separators[0], whitespace, result_type);
191*635a8641SAndroid Build Coastguard Worker }
192*635a8641SAndroid Build Coastguard Worker return SplitStringT<std::string, StringPiece, StringPiece>(
193*635a8641SAndroid Build Coastguard Worker input, separators, whitespace, result_type);
194*635a8641SAndroid Build Coastguard Worker }
195*635a8641SAndroid Build Coastguard Worker
SplitStringPiece(StringPiece16 input,StringPiece16 separators,WhitespaceHandling whitespace,SplitResult result_type)196*635a8641SAndroid Build Coastguard Worker std::vector<StringPiece16> SplitStringPiece(StringPiece16 input,
197*635a8641SAndroid Build Coastguard Worker StringPiece16 separators,
198*635a8641SAndroid Build Coastguard Worker WhitespaceHandling whitespace,
199*635a8641SAndroid Build Coastguard Worker SplitResult result_type) {
200*635a8641SAndroid Build Coastguard Worker if (separators.size() == 1) {
201*635a8641SAndroid Build Coastguard Worker return SplitStringT<string16, StringPiece16, char16>(
202*635a8641SAndroid Build Coastguard Worker input, separators[0], whitespace, result_type);
203*635a8641SAndroid Build Coastguard Worker }
204*635a8641SAndroid Build Coastguard Worker return SplitStringT<string16, StringPiece16, StringPiece16>(
205*635a8641SAndroid Build Coastguard Worker input, separators, whitespace, result_type);
206*635a8641SAndroid Build Coastguard Worker }
207*635a8641SAndroid Build Coastguard Worker
SplitStringIntoKeyValuePairs(StringPiece input,char key_value_delimiter,char key_value_pair_delimiter,StringPairs * key_value_pairs)208*635a8641SAndroid Build Coastguard Worker bool SplitStringIntoKeyValuePairs(StringPiece input,
209*635a8641SAndroid Build Coastguard Worker char key_value_delimiter,
210*635a8641SAndroid Build Coastguard Worker char key_value_pair_delimiter,
211*635a8641SAndroid Build Coastguard Worker StringPairs* key_value_pairs) {
212*635a8641SAndroid Build Coastguard Worker key_value_pairs->clear();
213*635a8641SAndroid Build Coastguard Worker
214*635a8641SAndroid Build Coastguard Worker std::vector<StringPiece> pairs = SplitStringPiece(
215*635a8641SAndroid Build Coastguard Worker input, std::string(1, key_value_pair_delimiter),
216*635a8641SAndroid Build Coastguard Worker TRIM_WHITESPACE, SPLIT_WANT_NONEMPTY);
217*635a8641SAndroid Build Coastguard Worker key_value_pairs->reserve(pairs.size());
218*635a8641SAndroid Build Coastguard Worker
219*635a8641SAndroid Build Coastguard Worker bool success = true;
220*635a8641SAndroid Build Coastguard Worker for (const StringPiece& pair : pairs) {
221*635a8641SAndroid Build Coastguard Worker if (!AppendStringKeyValue(pair, key_value_delimiter, key_value_pairs)) {
222*635a8641SAndroid Build Coastguard Worker // Don't return here, to allow for pairs without associated
223*635a8641SAndroid Build Coastguard Worker // value or key; just record that the split failed.
224*635a8641SAndroid Build Coastguard Worker success = false;
225*635a8641SAndroid Build Coastguard Worker }
226*635a8641SAndroid Build Coastguard Worker }
227*635a8641SAndroid Build Coastguard Worker return success;
228*635a8641SAndroid Build Coastguard Worker }
229*635a8641SAndroid Build Coastguard Worker
SplitStringUsingSubstr(StringPiece16 input,StringPiece16 delimiter,WhitespaceHandling whitespace,SplitResult result_type)230*635a8641SAndroid Build Coastguard Worker std::vector<string16> SplitStringUsingSubstr(StringPiece16 input,
231*635a8641SAndroid Build Coastguard Worker StringPiece16 delimiter,
232*635a8641SAndroid Build Coastguard Worker WhitespaceHandling whitespace,
233*635a8641SAndroid Build Coastguard Worker SplitResult result_type) {
234*635a8641SAndroid Build Coastguard Worker std::vector<string16> result;
235*635a8641SAndroid Build Coastguard Worker SplitStringUsingSubstrT(input, delimiter, whitespace, result_type, &result);
236*635a8641SAndroid Build Coastguard Worker return result;
237*635a8641SAndroid Build Coastguard Worker }
238*635a8641SAndroid Build Coastguard Worker
SplitStringUsingSubstr(StringPiece input,StringPiece delimiter,WhitespaceHandling whitespace,SplitResult result_type)239*635a8641SAndroid Build Coastguard Worker std::vector<std::string> SplitStringUsingSubstr(StringPiece input,
240*635a8641SAndroid Build Coastguard Worker StringPiece delimiter,
241*635a8641SAndroid Build Coastguard Worker WhitespaceHandling whitespace,
242*635a8641SAndroid Build Coastguard Worker SplitResult result_type) {
243*635a8641SAndroid Build Coastguard Worker std::vector<std::string> result;
244*635a8641SAndroid Build Coastguard Worker SplitStringUsingSubstrT(input, delimiter, whitespace, result_type, &result);
245*635a8641SAndroid Build Coastguard Worker return result;
246*635a8641SAndroid Build Coastguard Worker }
247*635a8641SAndroid Build Coastguard Worker
SplitStringPieceUsingSubstr(StringPiece16 input,StringPiece16 delimiter,WhitespaceHandling whitespace,SplitResult result_type)248*635a8641SAndroid Build Coastguard Worker std::vector<StringPiece16> SplitStringPieceUsingSubstr(
249*635a8641SAndroid Build Coastguard Worker StringPiece16 input,
250*635a8641SAndroid Build Coastguard Worker StringPiece16 delimiter,
251*635a8641SAndroid Build Coastguard Worker WhitespaceHandling whitespace,
252*635a8641SAndroid Build Coastguard Worker SplitResult result_type) {
253*635a8641SAndroid Build Coastguard Worker std::vector<StringPiece16> result;
254*635a8641SAndroid Build Coastguard Worker SplitStringUsingSubstrT(input, delimiter, whitespace, result_type, &result);
255*635a8641SAndroid Build Coastguard Worker return result;
256*635a8641SAndroid Build Coastguard Worker }
257*635a8641SAndroid Build Coastguard Worker
SplitStringPieceUsingSubstr(StringPiece input,StringPiece delimiter,WhitespaceHandling whitespace,SplitResult result_type)258*635a8641SAndroid Build Coastguard Worker std::vector<StringPiece> SplitStringPieceUsingSubstr(
259*635a8641SAndroid Build Coastguard Worker StringPiece input,
260*635a8641SAndroid Build Coastguard Worker StringPiece delimiter,
261*635a8641SAndroid Build Coastguard Worker WhitespaceHandling whitespace,
262*635a8641SAndroid Build Coastguard Worker SplitResult result_type) {
263*635a8641SAndroid Build Coastguard Worker std::vector<StringPiece> result;
264*635a8641SAndroid Build Coastguard Worker SplitStringUsingSubstrT(input, delimiter, whitespace, result_type, &result);
265*635a8641SAndroid Build Coastguard Worker return result;
266*635a8641SAndroid Build Coastguard Worker }
267*635a8641SAndroid Build Coastguard Worker
268*635a8641SAndroid Build Coastguard Worker } // namespace base
269