Avi Drissman | e4622aa | 2022-09-08 20:36:06 | [diff] [blame] | 1 | // Copyright 2012 The Chromium Authors |
[email protected] | 5ae0b763e | 2013-02-07 23:01:39 | [diff] [blame] | 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
| 4 | |
| 5 | #ifndef BASE_STRINGS_STRING_SPLIT_H_ |
| 6 | #define BASE_STRINGS_STRING_SPLIT_H_ |
| 7 | |
| 8 | #include <string> |
| 9 | #include <utility> |
| 10 | #include <vector> |
| 11 | |
| 12 | #include "base/base_export.h" |
brettw | 977caaa | 2015-06-12 19:57:50 | [diff] [blame] | 13 | #include "base/strings/string_piece.h" |
Jan Wilken Dörrie | 7179fc9 | 2019-10-08 07:44:05 | [diff] [blame] | 14 | #include "build/build_config.h" |
[email protected] | 5ae0b763e | 2013-02-07 23:01:39 | [diff] [blame] | 15 | |
| 16 | namespace base { |
| 17 | |
brettw | 977caaa | 2015-06-12 19:57:50 | [diff] [blame] | 18 | enum WhitespaceHandling { |
| 19 | KEEP_WHITESPACE, |
| 20 | TRIM_WHITESPACE, |
| 21 | }; |
| 22 | |
| 23 | enum SplitResult { |
| 24 | // Strictly return all results. |
| 25 | // |
| 26 | // If the input is ",," and the separator is ',' this will return a |
| 27 | // vector of three empty strings. |
| 28 | SPLIT_WANT_ALL, |
| 29 | |
| 30 | // Only nonempty results will be added to the results. Multiple separators |
| 31 | // will be coalesced. Separators at the beginning and end of the input will |
| 32 | // be ignored. With TRIM_WHITESPACE, whitespace-only results will be dropped. |
| 33 | // |
| 34 | // If the input is ",," and the separator is ',', this will return an empty |
| 35 | // vector. |
| 36 | SPLIT_WANT_NONEMPTY, |
| 37 | }; |
| 38 | |
| 39 | // Split the given string on ANY of the given separators, returning copies of |
| 40 | // the result. |
| 41 | // |
Robert Liao | 793242a | 2019-12-04 22:17:33 | [diff] [blame] | 42 | // Note this is inverse of JoinString() defined in string_util.h. |
| 43 | // |
brettw | 977caaa | 2015-06-12 19:57:50 | [diff] [blame] | 44 | // To split on either commas or semicolons, keeping all whitespace: |
| 45 | // |
| 46 | // std::vector<std::string> tokens = base::SplitString( |
Hajime Hoshi | 3bd6f0ef | 2020-09-10 17:14:50 | [diff] [blame] | 47 | // input, ",;", base::KEEP_WHITESPACE, base::SPLIT_WANT_ALL); |
Daniel Cheng | 4455c984 | 2022-01-13 23:26:37 | [diff] [blame] | 48 | [[nodiscard]] BASE_EXPORT std::vector<std::string> SplitString( |
| 49 | StringPiece input, |
| 50 | StringPiece separators, |
| 51 | WhitespaceHandling whitespace, |
| 52 | SplitResult result_type); |
| 53 | [[nodiscard]] BASE_EXPORT std::vector<std::u16string> SplitString( |
Jan Wilken Dörrie | 085b2aa | 2021-03-12 16:26:57 | [diff] [blame] | 54 | StringPiece16 input, |
| 55 | StringPiece16 separators, |
| 56 | WhitespaceHandling whitespace, |
Daniel Cheng | 4455c984 | 2022-01-13 23:26:37 | [diff] [blame] | 57 | SplitResult result_type); |
brettw | 977caaa | 2015-06-12 19:57:50 | [diff] [blame] | 58 | |
| 59 | // Like SplitString above except it returns a vector of StringPieces which |
| 60 | // reference the original buffer without copying. Although you have to be |
| 61 | // careful to keep the original string unmodified, this provides an efficient |
| 62 | // way to iterate through tokens in a string. |
| 63 | // |
Robert Liao | 793242a | 2019-12-04 22:17:33 | [diff] [blame] | 64 | // Note this is inverse of JoinString() defined in string_util.h. |
| 65 | // |
brettw | 977caaa | 2015-06-12 19:57:50 | [diff] [blame] | 66 | // To iterate through all whitespace-separated tokens in an input string: |
| 67 | // |
| 68 | // for (const auto& cur : |
| 69 | // base::SplitStringPiece(input, base::kWhitespaceASCII, |
| 70 | // base::KEEP_WHITESPACE, |
| 71 | // base::SPLIT_WANT_NONEMPTY)) { |
| 72 | // ... |
Daniel Cheng | 4455c984 | 2022-01-13 23:26:37 | [diff] [blame] | 73 | [[nodiscard]] BASE_EXPORT std::vector<StringPiece> SplitStringPiece( |
brettw | 977caaa | 2015-06-12 19:57:50 | [diff] [blame] | 74 | StringPiece input, |
| 75 | StringPiece separators, |
| 76 | WhitespaceHandling whitespace, |
Daniel Cheng | 4455c984 | 2022-01-13 23:26:37 | [diff] [blame] | 77 | SplitResult result_type); |
| 78 | [[nodiscard]] BASE_EXPORT std::vector<StringPiece16> SplitStringPiece( |
brettw | 977caaa | 2015-06-12 19:57:50 | [diff] [blame] | 79 | StringPiece16 input, |
| 80 | StringPiece16 separators, |
| 81 | WhitespaceHandling whitespace, |
Daniel Cheng | 4455c984 | 2022-01-13 23:26:37 | [diff] [blame] | 82 | SplitResult result_type); |
brettw | 977caaa | 2015-06-12 19:57:50 | [diff] [blame] | 83 | |
| 84 | using StringPairs = std::vector<std::pair<std::string, std::string>>; |
| 85 | |
| 86 | // Splits |line| into key value pairs according to the given delimiters and |
| 87 | // removes whitespace leading each key and trailing each value. Returns true |
| 88 | // only if each pair has a non-empty key and value. |key_value_pairs| will |
| 89 | // include ("","") pairs for entries without |key_value_delimiter|. |
brettw | ce0fbef | 2015-08-13 22:10:03 | [diff] [blame] | 90 | BASE_EXPORT bool SplitStringIntoKeyValuePairs(StringPiece input, |
brettw | 977caaa | 2015-06-12 19:57:50 | [diff] [blame] | 91 | char key_value_delimiter, |
| 92 | char key_value_pair_delimiter, |
| 93 | StringPairs* key_value_pairs); |
| 94 | |
Luum Habtemariam | 9268d9a | 2018-10-29 22:43:12 | [diff] [blame] | 95 | // Similar to SplitStringIntoKeyValuePairs, but use a substring |
| 96 | // |key_value_pair_delimiter| instead of a single char. |
| 97 | BASE_EXPORT bool SplitStringIntoKeyValuePairsUsingSubstr( |
| 98 | StringPiece input, |
| 99 | char key_value_delimiter, |
| 100 | StringPiece key_value_pair_delimiter, |
| 101 | StringPairs* key_value_pairs); |
| 102 | |
brettw | 977caaa | 2015-06-12 19:57:50 | [diff] [blame] | 103 | // Similar to SplitString, but use a substring delimiter instead of a list of |
| 104 | // characters that are all possible delimiters. |
Daniel Cheng | 4455c984 | 2022-01-13 23:26:37 | [diff] [blame] | 105 | [[nodiscard]] BASE_EXPORT std::vector<std::u16string> SplitStringUsingSubstr( |
brettw | 8d858df9 | 2016-09-23 03:13:29 | [diff] [blame] | 106 | StringPiece16 input, |
| 107 | StringPiece16 delimiter, |
| 108 | WhitespaceHandling whitespace, |
Daniel Cheng | 4455c984 | 2022-01-13 23:26:37 | [diff] [blame] | 109 | SplitResult result_type); |
| 110 | [[nodiscard]] BASE_EXPORT std::vector<std::string> SplitStringUsingSubstr( |
brettw | 8d858df9 | 2016-09-23 03:13:29 | [diff] [blame] | 111 | StringPiece input, |
| 112 | StringPiece delimiter, |
| 113 | WhitespaceHandling whitespace, |
Daniel Cheng | 4455c984 | 2022-01-13 23:26:37 | [diff] [blame] | 114 | SplitResult result_type); |
brettw | 977caaa | 2015-06-12 19:57:50 | [diff] [blame] | 115 | |
arjanl | 0e8b35a | 2016-01-05 10:24:54 | [diff] [blame] | 116 | // Like SplitStringUsingSubstr above except it returns a vector of StringPieces |
| 117 | // which reference the original buffer without copying. Although you have to be |
| 118 | // careful to keep the original string unmodified, this provides an efficient |
| 119 | // way to iterate through tokens in a string. |
| 120 | // |
| 121 | // To iterate through all newline-separated tokens in an input string: |
| 122 | // |
| 123 | // for (const auto& cur : |
| 124 | // base::SplitStringUsingSubstr(input, "\r\n", |
| 125 | // base::KEEP_WHITESPACE, |
| 126 | // base::SPLIT_WANT_NONEMPTY)) { |
| 127 | // ... |
Daniel Cheng | 4455c984 | 2022-01-13 23:26:37 | [diff] [blame] | 128 | [[nodiscard]] BASE_EXPORT std::vector<StringPiece16> |
| 129 | SplitStringPieceUsingSubstr(StringPiece16 input, |
| 130 | StringPiece16 delimiter, |
| 131 | WhitespaceHandling whitespace, |
| 132 | SplitResult result_type); |
| 133 | [[nodiscard]] BASE_EXPORT std::vector<StringPiece> SplitStringPieceUsingSubstr( |
arjanl | 0e8b35a | 2016-01-05 10:24:54 | [diff] [blame] | 134 | StringPiece input, |
| 135 | StringPiece delimiter, |
| 136 | WhitespaceHandling whitespace, |
Daniel Cheng | 4455c984 | 2022-01-13 23:26:37 | [diff] [blame] | 137 | SplitResult result_type); |
arjanl | 0e8b35a | 2016-01-05 10:24:54 | [diff] [blame] | 138 | |
[email protected] | 5ae0b763e | 2013-02-07 23:01:39 | [diff] [blame] | 139 | } // namespace base |
| 140 | |
Xiaohan Wang | 6700dcf1 | 2022-01-15 14:47:00 | [diff] [blame] | 141 | #if BUILDFLAG(IS_WIN) |
Jan Wilken Dörrie | 665969c | 2020-06-04 11:46:25 | [diff] [blame] | 142 | #include "base/strings/string_split_win.h" |
| 143 | #endif |
| 144 | |
[email protected] | 5ae0b763e | 2013-02-07 23:01:39 | [diff] [blame] | 145 | #endif // BASE_STRINGS_STRING_SPLIT_H_ |