xref: /aosp_15_r20/external/abseil-cpp/absl/strings/ascii.cc (revision 9356374a3709195abf420251b3e825997ff56c0f)
1*9356374aSAndroid Build Coastguard Worker // Copyright 2017 The Abseil Authors.
2*9356374aSAndroid Build Coastguard Worker //
3*9356374aSAndroid Build Coastguard Worker // Licensed under the Apache License, Version 2.0 (the "License");
4*9356374aSAndroid Build Coastguard Worker // you may not use this file except in compliance with the License.
5*9356374aSAndroid Build Coastguard Worker // You may obtain a copy of the License at
6*9356374aSAndroid Build Coastguard Worker //
7*9356374aSAndroid Build Coastguard Worker //      https://www.apache.org/licenses/LICENSE-2.0
8*9356374aSAndroid Build Coastguard Worker //
9*9356374aSAndroid Build Coastguard Worker // Unless required by applicable law or agreed to in writing, software
10*9356374aSAndroid Build Coastguard Worker // distributed under the License is distributed on an "AS IS" BASIS,
11*9356374aSAndroid Build Coastguard Worker // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12*9356374aSAndroid Build Coastguard Worker // See the License for the specific language governing permissions and
13*9356374aSAndroid Build Coastguard Worker // limitations under the License.
14*9356374aSAndroid Build Coastguard Worker 
15*9356374aSAndroid Build Coastguard Worker #include "absl/strings/ascii.h"
16*9356374aSAndroid Build Coastguard Worker 
17*9356374aSAndroid Build Coastguard Worker #include <climits>
18*9356374aSAndroid Build Coastguard Worker #include <cstddef>
19*9356374aSAndroid Build Coastguard Worker #include <cstring>
20*9356374aSAndroid Build Coastguard Worker #include <string>
21*9356374aSAndroid Build Coastguard Worker 
22*9356374aSAndroid Build Coastguard Worker #include "absl/base/attributes.h"
23*9356374aSAndroid Build Coastguard Worker #include "absl/base/config.h"
24*9356374aSAndroid Build Coastguard Worker #include "absl/base/nullability.h"
25*9356374aSAndroid Build Coastguard Worker #include "absl/base/optimization.h"
26*9356374aSAndroid Build Coastguard Worker 
27*9356374aSAndroid Build Coastguard Worker namespace absl {
28*9356374aSAndroid Build Coastguard Worker ABSL_NAMESPACE_BEGIN
29*9356374aSAndroid Build Coastguard Worker namespace ascii_internal {
30*9356374aSAndroid Build Coastguard Worker 
31*9356374aSAndroid Build Coastguard Worker // # Table generated by this Python code (bit 0x02 is currently unused):
32*9356374aSAndroid Build Coastguard Worker // TODO(mbar) Move Python code for generation of table to BUILD and link here.
33*9356374aSAndroid Build Coastguard Worker 
34*9356374aSAndroid Build Coastguard Worker // NOTE: The kAsciiPropertyBits table used within this code was generated by
35*9356374aSAndroid Build Coastguard Worker // Python code of the following form. (Bit 0x02 is currently unused and
36*9356374aSAndroid Build Coastguard Worker // available.)
37*9356374aSAndroid Build Coastguard Worker //
38*9356374aSAndroid Build Coastguard Worker // def Hex2(n):
39*9356374aSAndroid Build Coastguard Worker //   return '0x' + hex(n/16)[2:] + hex(n%16)[2:]
40*9356374aSAndroid Build Coastguard Worker // def IsPunct(ch):
41*9356374aSAndroid Build Coastguard Worker //   return (ord(ch) >= 32 and ord(ch) < 127 and
42*9356374aSAndroid Build Coastguard Worker //           not ch.isspace() and not ch.isalnum())
43*9356374aSAndroid Build Coastguard Worker // def IsBlank(ch):
44*9356374aSAndroid Build Coastguard Worker //   return ch in ' \t'
45*9356374aSAndroid Build Coastguard Worker // def IsCntrl(ch):
46*9356374aSAndroid Build Coastguard Worker //   return ord(ch) < 32 or ord(ch) == 127
47*9356374aSAndroid Build Coastguard Worker // def IsXDigit(ch):
48*9356374aSAndroid Build Coastguard Worker //   return ch.isdigit() or ch.lower() in 'abcdef'
49*9356374aSAndroid Build Coastguard Worker // for i in range(128):
50*9356374aSAndroid Build Coastguard Worker //   ch = chr(i)
51*9356374aSAndroid Build Coastguard Worker //   mask = ((ch.isalpha() and 0x01 or 0) |
52*9356374aSAndroid Build Coastguard Worker //           (ch.isalnum() and 0x04 or 0) |
53*9356374aSAndroid Build Coastguard Worker //           (ch.isspace() and 0x08 or 0) |
54*9356374aSAndroid Build Coastguard Worker //           (IsPunct(ch) and 0x10 or 0) |
55*9356374aSAndroid Build Coastguard Worker //           (IsBlank(ch) and 0x20 or 0) |
56*9356374aSAndroid Build Coastguard Worker //           (IsCntrl(ch) and 0x40 or 0) |
57*9356374aSAndroid Build Coastguard Worker //           (IsXDigit(ch) and 0x80 or 0))
58*9356374aSAndroid Build Coastguard Worker //   print Hex2(mask) + ',',
59*9356374aSAndroid Build Coastguard Worker //   if i % 16 == 7:
60*9356374aSAndroid Build Coastguard Worker //     print ' //', Hex2(i & 0x78)
61*9356374aSAndroid Build Coastguard Worker //   elif i % 16 == 15:
62*9356374aSAndroid Build Coastguard Worker //     print
63*9356374aSAndroid Build Coastguard Worker 
64*9356374aSAndroid Build Coastguard Worker // clang-format off
65*9356374aSAndroid Build Coastguard Worker // Array of bitfields holding character information. Each bit value corresponds
66*9356374aSAndroid Build Coastguard Worker // to a particular character feature. For readability, and because the value
67*9356374aSAndroid Build Coastguard Worker // of these bits is tightly coupled to this implementation, the individual bits
68*9356374aSAndroid Build Coastguard Worker // are not named. Note that bitfields for all characters above ASCII 127 are
69*9356374aSAndroid Build Coastguard Worker // zero-initialized.
70*9356374aSAndroid Build Coastguard Worker ABSL_DLL const unsigned char kPropertyBits[256] = {
71*9356374aSAndroid Build Coastguard Worker     0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,  // 0x00
72*9356374aSAndroid Build Coastguard Worker     0x40, 0x68, 0x48, 0x48, 0x48, 0x48, 0x40, 0x40,
73*9356374aSAndroid Build Coastguard Worker     0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,  // 0x10
74*9356374aSAndroid Build Coastguard Worker     0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40,
75*9356374aSAndroid Build Coastguard Worker     0x28, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,  // 0x20
76*9356374aSAndroid Build Coastguard Worker     0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
77*9356374aSAndroid Build Coastguard Worker     0x84, 0x84, 0x84, 0x84, 0x84, 0x84, 0x84, 0x84,  // 0x30
78*9356374aSAndroid Build Coastguard Worker     0x84, 0x84, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
79*9356374aSAndroid Build Coastguard Worker     0x10, 0x85, 0x85, 0x85, 0x85, 0x85, 0x85, 0x05,  // 0x40
80*9356374aSAndroid Build Coastguard Worker     0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
81*9356374aSAndroid Build Coastguard Worker     0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,  // 0x50
82*9356374aSAndroid Build Coastguard Worker     0x05, 0x05, 0x05, 0x10, 0x10, 0x10, 0x10, 0x10,
83*9356374aSAndroid Build Coastguard Worker     0x10, 0x85, 0x85, 0x85, 0x85, 0x85, 0x85, 0x05,  // 0x60
84*9356374aSAndroid Build Coastguard Worker     0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
85*9356374aSAndroid Build Coastguard Worker     0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,  // 0x70
86*9356374aSAndroid Build Coastguard Worker     0x05, 0x05, 0x05, 0x10, 0x10, 0x10, 0x10, 0x40,
87*9356374aSAndroid Build Coastguard Worker };
88*9356374aSAndroid Build Coastguard Worker 
89*9356374aSAndroid Build Coastguard Worker // Array of characters for the ascii_tolower() function. For values 'A'
90*9356374aSAndroid Build Coastguard Worker // through 'Z', return the lower-case character; otherwise, return the
91*9356374aSAndroid Build Coastguard Worker // identity of the passed character.
92*9356374aSAndroid Build Coastguard Worker ABSL_DLL const char kToLower[256] = {
93*9356374aSAndroid Build Coastguard Worker   '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',
94*9356374aSAndroid Build Coastguard Worker   '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f',
95*9356374aSAndroid Build Coastguard Worker   '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17',
96*9356374aSAndroid Build Coastguard Worker   '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f',
97*9356374aSAndroid Build Coastguard Worker   '\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27',
98*9356374aSAndroid Build Coastguard Worker   '\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f',
99*9356374aSAndroid Build Coastguard Worker   '\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37',
100*9356374aSAndroid Build Coastguard Worker   '\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f',
101*9356374aSAndroid Build Coastguard Worker   '\x40',    'a',    'b',    'c',    'd',    'e',    'f',    'g',
102*9356374aSAndroid Build Coastguard Worker      'h',    'i',    'j',    'k',    'l',    'm',    'n',    'o',
103*9356374aSAndroid Build Coastguard Worker      'p',    'q',    'r',    's',    't',    'u',    'v',    'w',
104*9356374aSAndroid Build Coastguard Worker      'x',    'y',    'z', '\x5b', '\x5c', '\x5d', '\x5e', '\x5f',
105*9356374aSAndroid Build Coastguard Worker   '\x60', '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67',
106*9356374aSAndroid Build Coastguard Worker   '\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f',
107*9356374aSAndroid Build Coastguard Worker   '\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77',
108*9356374aSAndroid Build Coastguard Worker   '\x78', '\x79', '\x7a', '\x7b', '\x7c', '\x7d', '\x7e', '\x7f',
109*9356374aSAndroid Build Coastguard Worker   '\x80', '\x81', '\x82', '\x83', '\x84', '\x85', '\x86', '\x87',
110*9356374aSAndroid Build Coastguard Worker   '\x88', '\x89', '\x8a', '\x8b', '\x8c', '\x8d', '\x8e', '\x8f',
111*9356374aSAndroid Build Coastguard Worker   '\x90', '\x91', '\x92', '\x93', '\x94', '\x95', '\x96', '\x97',
112*9356374aSAndroid Build Coastguard Worker   '\x98', '\x99', '\x9a', '\x9b', '\x9c', '\x9d', '\x9e', '\x9f',
113*9356374aSAndroid Build Coastguard Worker   '\xa0', '\xa1', '\xa2', '\xa3', '\xa4', '\xa5', '\xa6', '\xa7',
114*9356374aSAndroid Build Coastguard Worker   '\xa8', '\xa9', '\xaa', '\xab', '\xac', '\xad', '\xae', '\xaf',
115*9356374aSAndroid Build Coastguard Worker   '\xb0', '\xb1', '\xb2', '\xb3', '\xb4', '\xb5', '\xb6', '\xb7',
116*9356374aSAndroid Build Coastguard Worker   '\xb8', '\xb9', '\xba', '\xbb', '\xbc', '\xbd', '\xbe', '\xbf',
117*9356374aSAndroid Build Coastguard Worker   '\xc0', '\xc1', '\xc2', '\xc3', '\xc4', '\xc5', '\xc6', '\xc7',
118*9356374aSAndroid Build Coastguard Worker   '\xc8', '\xc9', '\xca', '\xcb', '\xcc', '\xcd', '\xce', '\xcf',
119*9356374aSAndroid Build Coastguard Worker   '\xd0', '\xd1', '\xd2', '\xd3', '\xd4', '\xd5', '\xd6', '\xd7',
120*9356374aSAndroid Build Coastguard Worker   '\xd8', '\xd9', '\xda', '\xdb', '\xdc', '\xdd', '\xde', '\xdf',
121*9356374aSAndroid Build Coastguard Worker   '\xe0', '\xe1', '\xe2', '\xe3', '\xe4', '\xe5', '\xe6', '\xe7',
122*9356374aSAndroid Build Coastguard Worker   '\xe8', '\xe9', '\xea', '\xeb', '\xec', '\xed', '\xee', '\xef',
123*9356374aSAndroid Build Coastguard Worker   '\xf0', '\xf1', '\xf2', '\xf3', '\xf4', '\xf5', '\xf6', '\xf7',
124*9356374aSAndroid Build Coastguard Worker   '\xf8', '\xf9', '\xfa', '\xfb', '\xfc', '\xfd', '\xfe', '\xff',
125*9356374aSAndroid Build Coastguard Worker };
126*9356374aSAndroid Build Coastguard Worker 
127*9356374aSAndroid Build Coastguard Worker // Array of characters for the ascii_toupper() function. For values 'a'
128*9356374aSAndroid Build Coastguard Worker // through 'z', return the upper-case character; otherwise, return the
129*9356374aSAndroid Build Coastguard Worker // identity of the passed character.
130*9356374aSAndroid Build Coastguard Worker ABSL_DLL const char kToUpper[256] = {
131*9356374aSAndroid Build Coastguard Worker   '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',
132*9356374aSAndroid Build Coastguard Worker   '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f',
133*9356374aSAndroid Build Coastguard Worker   '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17',
134*9356374aSAndroid Build Coastguard Worker   '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f',
135*9356374aSAndroid Build Coastguard Worker   '\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27',
136*9356374aSAndroid Build Coastguard Worker   '\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f',
137*9356374aSAndroid Build Coastguard Worker   '\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37',
138*9356374aSAndroid Build Coastguard Worker   '\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f',
139*9356374aSAndroid Build Coastguard Worker   '\x40', '\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47',
140*9356374aSAndroid Build Coastguard Worker   '\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f',
141*9356374aSAndroid Build Coastguard Worker   '\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57',
142*9356374aSAndroid Build Coastguard Worker   '\x58', '\x59', '\x5a', '\x5b', '\x5c', '\x5d', '\x5e', '\x5f',
143*9356374aSAndroid Build Coastguard Worker   '\x60',    'A',    'B',    'C',    'D',    'E',    'F',    'G',
144*9356374aSAndroid Build Coastguard Worker      'H',    'I',    'J',    'K',    'L',    'M',    'N',    'O',
145*9356374aSAndroid Build Coastguard Worker      'P',    'Q',    'R',    'S',    'T',    'U',    'V',    'W',
146*9356374aSAndroid Build Coastguard Worker      'X',    'Y',    'Z', '\x7b', '\x7c', '\x7d', '\x7e', '\x7f',
147*9356374aSAndroid Build Coastguard Worker   '\x80', '\x81', '\x82', '\x83', '\x84', '\x85', '\x86', '\x87',
148*9356374aSAndroid Build Coastguard Worker   '\x88', '\x89', '\x8a', '\x8b', '\x8c', '\x8d', '\x8e', '\x8f',
149*9356374aSAndroid Build Coastguard Worker   '\x90', '\x91', '\x92', '\x93', '\x94', '\x95', '\x96', '\x97',
150*9356374aSAndroid Build Coastguard Worker   '\x98', '\x99', '\x9a', '\x9b', '\x9c', '\x9d', '\x9e', '\x9f',
151*9356374aSAndroid Build Coastguard Worker   '\xa0', '\xa1', '\xa2', '\xa3', '\xa4', '\xa5', '\xa6', '\xa7',
152*9356374aSAndroid Build Coastguard Worker   '\xa8', '\xa9', '\xaa', '\xab', '\xac', '\xad', '\xae', '\xaf',
153*9356374aSAndroid Build Coastguard Worker   '\xb0', '\xb1', '\xb2', '\xb3', '\xb4', '\xb5', '\xb6', '\xb7',
154*9356374aSAndroid Build Coastguard Worker   '\xb8', '\xb9', '\xba', '\xbb', '\xbc', '\xbd', '\xbe', '\xbf',
155*9356374aSAndroid Build Coastguard Worker   '\xc0', '\xc1', '\xc2', '\xc3', '\xc4', '\xc5', '\xc6', '\xc7',
156*9356374aSAndroid Build Coastguard Worker   '\xc8', '\xc9', '\xca', '\xcb', '\xcc', '\xcd', '\xce', '\xcf',
157*9356374aSAndroid Build Coastguard Worker   '\xd0', '\xd1', '\xd2', '\xd3', '\xd4', '\xd5', '\xd6', '\xd7',
158*9356374aSAndroid Build Coastguard Worker   '\xd8', '\xd9', '\xda', '\xdb', '\xdc', '\xdd', '\xde', '\xdf',
159*9356374aSAndroid Build Coastguard Worker   '\xe0', '\xe1', '\xe2', '\xe3', '\xe4', '\xe5', '\xe6', '\xe7',
160*9356374aSAndroid Build Coastguard Worker   '\xe8', '\xe9', '\xea', '\xeb', '\xec', '\xed', '\xee', '\xef',
161*9356374aSAndroid Build Coastguard Worker   '\xf0', '\xf1', '\xf2', '\xf3', '\xf4', '\xf5', '\xf6', '\xf7',
162*9356374aSAndroid Build Coastguard Worker   '\xf8', '\xf9', '\xfa', '\xfb', '\xfc', '\xfd', '\xfe', '\xff',
163*9356374aSAndroid Build Coastguard Worker };
164*9356374aSAndroid Build Coastguard Worker // clang-format on
165*9356374aSAndroid Build Coastguard Worker 
166*9356374aSAndroid Build Coastguard Worker // Returns whether `c` is in the a-z/A-Z range (w.r.t. `ToUpper`).
167*9356374aSAndroid Build Coastguard Worker // Implemented by:
168*9356374aSAndroid Build Coastguard Worker //  1. Pushing the a-z/A-Z range to [SCHAR_MIN, SCHAR_MIN + 26).
169*9356374aSAndroid Build Coastguard Worker //  2. Comparing to SCHAR_MIN + 26.
170*9356374aSAndroid Build Coastguard Worker template <bool ToUpper>
AsciiInAZRange(unsigned char c)171*9356374aSAndroid Build Coastguard Worker constexpr bool AsciiInAZRange(unsigned char c) {
172*9356374aSAndroid Build Coastguard Worker   constexpr unsigned char sub = (ToUpper ? 'a' : 'A') - SCHAR_MIN;
173*9356374aSAndroid Build Coastguard Worker   constexpr signed char threshold = SCHAR_MIN + 26;  // 26 = alphabet size.
174*9356374aSAndroid Build Coastguard Worker   // Using unsigned arithmetic as overflows/underflows are well defined.
175*9356374aSAndroid Build Coastguard Worker   unsigned char u = c - sub;
176*9356374aSAndroid Build Coastguard Worker   // Using signed cmp, as SIMD unsigned cmp isn't available in many platforms.
177*9356374aSAndroid Build Coastguard Worker   return static_cast<signed char>(u) < threshold;
178*9356374aSAndroid Build Coastguard Worker }
179*9356374aSAndroid Build Coastguard Worker 
180*9356374aSAndroid Build Coastguard Worker // Force-inline so the compiler won't merge the short and long implementations.
181*9356374aSAndroid Build Coastguard Worker template <bool ToUpper>
AsciiStrCaseFoldImpl(absl::Nonnull<char * > p,size_t size)182*9356374aSAndroid Build Coastguard Worker ABSL_ATTRIBUTE_ALWAYS_INLINE inline constexpr void AsciiStrCaseFoldImpl(
183*9356374aSAndroid Build Coastguard Worker     absl::Nonnull<char*> p, size_t size) {
184*9356374aSAndroid Build Coastguard Worker   // The upper- and lowercase versions of ASCII characters differ by only 1 bit.
185*9356374aSAndroid Build Coastguard Worker   // When we need to flip the case, we can xor with this bit to achieve the
186*9356374aSAndroid Build Coastguard Worker   // desired result. Note that the choice of 'a' and 'A' here is arbitrary. We
187*9356374aSAndroid Build Coastguard Worker   // could have chosen 'z' and 'Z', or any other pair of characters as they all
188*9356374aSAndroid Build Coastguard Worker   // have the same single bit difference.
189*9356374aSAndroid Build Coastguard Worker   constexpr unsigned char kAsciiCaseBitFlip = 'a' ^ 'A';
190*9356374aSAndroid Build Coastguard Worker 
191*9356374aSAndroid Build Coastguard Worker   for (size_t i = 0; i < size; ++i) {
192*9356374aSAndroid Build Coastguard Worker     unsigned char v = static_cast<unsigned char>(p[i]);
193*9356374aSAndroid Build Coastguard Worker     v ^= AsciiInAZRange<ToUpper>(v) ? kAsciiCaseBitFlip : 0;
194*9356374aSAndroid Build Coastguard Worker     p[i] = static_cast<char>(v);
195*9356374aSAndroid Build Coastguard Worker   }
196*9356374aSAndroid Build Coastguard Worker }
197*9356374aSAndroid Build Coastguard Worker 
198*9356374aSAndroid Build Coastguard Worker // The string size threshold for starting using the long string version.
199*9356374aSAndroid Build Coastguard Worker constexpr size_t kCaseFoldThreshold = 16;
200*9356374aSAndroid Build Coastguard Worker 
201*9356374aSAndroid Build Coastguard Worker // No-inline so the compiler won't merge the short and long implementations.
202*9356374aSAndroid Build Coastguard Worker template <bool ToUpper>
AsciiStrCaseFoldLong(absl::Nonnull<char * > p,size_t size)203*9356374aSAndroid Build Coastguard Worker ABSL_ATTRIBUTE_NOINLINE constexpr void AsciiStrCaseFoldLong(
204*9356374aSAndroid Build Coastguard Worker     absl::Nonnull<char*> p, size_t size) {
205*9356374aSAndroid Build Coastguard Worker   ABSL_ASSUME(size >= kCaseFoldThreshold);
206*9356374aSAndroid Build Coastguard Worker   AsciiStrCaseFoldImpl<ToUpper>(p, size);
207*9356374aSAndroid Build Coastguard Worker }
208*9356374aSAndroid Build Coastguard Worker 
209*9356374aSAndroid Build Coastguard Worker // Splitting to short and long strings to allow vectorization decisions
210*9356374aSAndroid Build Coastguard Worker // to be made separately in the long and short cases.
211*9356374aSAndroid Build Coastguard Worker template <bool ToUpper>
AsciiStrCaseFold(absl::Nonnull<char * > p,size_t size)212*9356374aSAndroid Build Coastguard Worker constexpr void AsciiStrCaseFold(absl::Nonnull<char*> p, size_t size) {
213*9356374aSAndroid Build Coastguard Worker   size < kCaseFoldThreshold ? AsciiStrCaseFoldImpl<ToUpper>(p, size)
214*9356374aSAndroid Build Coastguard Worker                             : AsciiStrCaseFoldLong<ToUpper>(p, size);
215*9356374aSAndroid Build Coastguard Worker }
216*9356374aSAndroid Build Coastguard Worker 
ValidateAsciiCasefold()217*9356374aSAndroid Build Coastguard Worker static constexpr size_t ValidateAsciiCasefold() {
218*9356374aSAndroid Build Coastguard Worker   constexpr size_t num_chars = 1 + CHAR_MAX - CHAR_MIN;
219*9356374aSAndroid Build Coastguard Worker   size_t incorrect_index = 0;
220*9356374aSAndroid Build Coastguard Worker   char lowered[num_chars] = {};
221*9356374aSAndroid Build Coastguard Worker   char uppered[num_chars] = {};
222*9356374aSAndroid Build Coastguard Worker   for (unsigned int i = 0; i < num_chars; ++i) {
223*9356374aSAndroid Build Coastguard Worker     uppered[i] = lowered[i] = static_cast<char>(i);
224*9356374aSAndroid Build Coastguard Worker   }
225*9356374aSAndroid Build Coastguard Worker   AsciiStrCaseFold<false>(&lowered[0], num_chars);
226*9356374aSAndroid Build Coastguard Worker   AsciiStrCaseFold<true>(&uppered[0], num_chars);
227*9356374aSAndroid Build Coastguard Worker   for (size_t i = 0; i < num_chars; ++i) {
228*9356374aSAndroid Build Coastguard Worker     const char ch = static_cast<char>(i),
229*9356374aSAndroid Build Coastguard Worker                ch_upper = ('a' <= ch && ch <= 'z' ? 'A' + (ch - 'a') : ch),
230*9356374aSAndroid Build Coastguard Worker                ch_lower = ('A' <= ch && ch <= 'Z' ? 'a' + (ch - 'A') : ch);
231*9356374aSAndroid Build Coastguard Worker     if (uppered[i] != ch_upper || lowered[i] != ch_lower) {
232*9356374aSAndroid Build Coastguard Worker       incorrect_index = i > 0 ? i : num_chars;
233*9356374aSAndroid Build Coastguard Worker       break;
234*9356374aSAndroid Build Coastguard Worker     }
235*9356374aSAndroid Build Coastguard Worker   }
236*9356374aSAndroid Build Coastguard Worker   return incorrect_index;
237*9356374aSAndroid Build Coastguard Worker }
238*9356374aSAndroid Build Coastguard Worker 
239*9356374aSAndroid Build Coastguard Worker static_assert(ValidateAsciiCasefold() == 0, "error in case conversion");
240*9356374aSAndroid Build Coastguard Worker 
241*9356374aSAndroid Build Coastguard Worker }  // namespace ascii_internal
242*9356374aSAndroid Build Coastguard Worker 
AsciiStrToLower(absl::Nonnull<std::string * > s)243*9356374aSAndroid Build Coastguard Worker void AsciiStrToLower(absl::Nonnull<std::string*> s) {
244*9356374aSAndroid Build Coastguard Worker   return ascii_internal::AsciiStrCaseFold<false>(&(*s)[0], s->size());
245*9356374aSAndroid Build Coastguard Worker }
246*9356374aSAndroid Build Coastguard Worker 
AsciiStrToUpper(absl::Nonnull<std::string * > s)247*9356374aSAndroid Build Coastguard Worker void AsciiStrToUpper(absl::Nonnull<std::string*> s) {
248*9356374aSAndroid Build Coastguard Worker   return ascii_internal::AsciiStrCaseFold<true>(&(*s)[0], s->size());
249*9356374aSAndroid Build Coastguard Worker }
250*9356374aSAndroid Build Coastguard Worker 
RemoveExtraAsciiWhitespace(absl::Nonnull<std::string * > str)251*9356374aSAndroid Build Coastguard Worker void RemoveExtraAsciiWhitespace(absl::Nonnull<std::string*> str) {
252*9356374aSAndroid Build Coastguard Worker   auto stripped = StripAsciiWhitespace(*str);
253*9356374aSAndroid Build Coastguard Worker 
254*9356374aSAndroid Build Coastguard Worker   if (stripped.empty()) {
255*9356374aSAndroid Build Coastguard Worker     str->clear();
256*9356374aSAndroid Build Coastguard Worker     return;
257*9356374aSAndroid Build Coastguard Worker   }
258*9356374aSAndroid Build Coastguard Worker 
259*9356374aSAndroid Build Coastguard Worker   auto input_it = stripped.begin();
260*9356374aSAndroid Build Coastguard Worker   auto input_end = stripped.end();
261*9356374aSAndroid Build Coastguard Worker   auto output_it = &(*str)[0];
262*9356374aSAndroid Build Coastguard Worker   bool is_ws = false;
263*9356374aSAndroid Build Coastguard Worker 
264*9356374aSAndroid Build Coastguard Worker   for (; input_it < input_end; ++input_it) {
265*9356374aSAndroid Build Coastguard Worker     if (is_ws) {
266*9356374aSAndroid Build Coastguard Worker       // Consecutive whitespace?  Keep only the last.
267*9356374aSAndroid Build Coastguard Worker       is_ws = absl::ascii_isspace(static_cast<unsigned char>(*input_it));
268*9356374aSAndroid Build Coastguard Worker       if (is_ws) --output_it;
269*9356374aSAndroid Build Coastguard Worker     } else {
270*9356374aSAndroid Build Coastguard Worker       is_ws = absl::ascii_isspace(static_cast<unsigned char>(*input_it));
271*9356374aSAndroid Build Coastguard Worker     }
272*9356374aSAndroid Build Coastguard Worker 
273*9356374aSAndroid Build Coastguard Worker     *output_it = *input_it;
274*9356374aSAndroid Build Coastguard Worker     ++output_it;
275*9356374aSAndroid Build Coastguard Worker   }
276*9356374aSAndroid Build Coastguard Worker 
277*9356374aSAndroid Build Coastguard Worker   str->erase(static_cast<size_t>(output_it - &(*str)[0]));
278*9356374aSAndroid Build Coastguard Worker }
279*9356374aSAndroid Build Coastguard Worker 
280*9356374aSAndroid Build Coastguard Worker ABSL_NAMESPACE_END
281*9356374aSAndroid Build Coastguard Worker }  // namespace absl
282