1 // Scintilla source code edit control 2 /** @file CharacterSet.h 3 ** Encapsulates a set of characters. Used to test if a character is within a set. 4 **/ 5 // Copyright 2007 by Neil Hodgson <[email protected]> 6 // The License.txt file describes the conditions under which this software may be distributed. 7 8 #ifndef CHARACTERSET_H 9 #define CHARACTERSET_H 10 11 namespace Scintilla { 12 13 class CharacterSet { 14 int size; 15 bool valueAfter; 16 bool *bset; 17 public: 18 enum setBase { 19 setNone=0, 20 setLower=1, 21 setUpper=2, 22 setDigits=4, 23 setAlpha=setLower|setUpper, 24 setAlphaNum=setAlpha|setDigits 25 }; 26 CharacterSet(setBase base=setNone, const char *initialSet="", int size_=0x80, bool valueAfter_=false) { 27 size = size_; 28 valueAfter = valueAfter_; 29 bset = new bool[size]; 30 for (int i=0; i < size; i++) { 31 bset[i] = false; 32 } 33 AddString(initialSet); 34 if (base & setLower) 35 AddString("abcdefghijklmnopqrstuvwxyz"); 36 if (base & setUpper) 37 AddString("ABCDEFGHIJKLMNOPQRSTUVWXYZ"); 38 if (base & setDigits) 39 AddString("0123456789"); 40 } 41 CharacterSet(const CharacterSet &other) { 42 size = other.size; 43 valueAfter = other.valueAfter; 44 bset = new bool[size]; 45 for (int i=0; i < size; i++) { 46 bset[i] = other.bset[i]; 47 } 48 } 49 CharacterSet(CharacterSet &&other) noexcept { 50 size = other.size; 51 valueAfter = other.valueAfter; 52 bset = other.bset; 53 other.size = 0; 54 other.bset = nullptr; 55 } 56 CharacterSet &operator=(const CharacterSet &other) { 57 if (this != &other) { 58 bool *bsetNew = new bool[other.size]; 59 for (int i = 0; i < other.size; i++) { 60 bsetNew[i] = other.bset[i]; 61 } 62 delete[]bset; 63 size = other.size; 64 valueAfter = other.valueAfter; 65 bset = bsetNew; 66 } 67 return *this; 68 } 69 CharacterSet &operator=(CharacterSet &&other) noexcept { 70 if (this != &other) { 71 delete []bset; 72 size = other.size; 73 valueAfter = other.valueAfter; 74 bset = other.bset; 75 other.size = 0; 76 other.bset = nullptr; 77 } 78 return *this; 79 } 80 ~CharacterSet() { 81 delete []bset; 82 bset = nullptr; 83 size = 0; 84 } 85 void Add(int val) { 86 assert(val >= 0); 87 assert(val < size); 88 bset[val] = true; 89 } 90 void AddString(const char *setToAdd) { 91 for (const char *cp=setToAdd; *cp; cp++) { 92 const unsigned char uch = *cp; 93 assert(uch < size); 94 bset[uch] = true; 95 } 96 } 97 bool Contains(int val) const noexcept { 98 assert(val >= 0); 99 if (val < 0) return false; 100 return (val < size) ? bset[val] : valueAfter; 101 } 102 bool Contains(char ch) const noexcept { 103 // Overload char as char may be signed 104 const unsigned char uch = ch; 105 return Contains(uch); 106 } 107 }; 108 109 // Functions for classifying characters 110 111 constexpr bool IsASpace(int ch) noexcept { 112 return (ch == ' ') || ((ch >= 0x09) && (ch <= 0x0d)); 113 } 114 115 constexpr bool IsASpaceOrTab(int ch) noexcept { 116 return (ch == ' ') || (ch == '\t'); 117 } 118 119 constexpr bool IsADigit(int ch) noexcept { 120 return (ch >= '0') && (ch <= '9'); 121 } 122 123 constexpr bool IsADigit(int ch, int base) noexcept { 124 if (base <= 10) { 125 return (ch >= '0') && (ch < '0' + base); 126 } else { 127 return ((ch >= '0') && (ch <= '9')) || 128 ((ch >= 'A') && (ch < 'A' + base - 10)) || 129 ((ch >= 'a') && (ch < 'a' + base - 10)); 130 } 131 } 132 133 constexpr bool IsASCII(int ch) noexcept { 134 return (ch >= 0) && (ch < 0x80); 135 } 136 137 constexpr bool IsLowerCase(int ch) noexcept { 138 return (ch >= 'a') && (ch <= 'z'); 139 } 140 141 constexpr bool IsUpperCase(int ch) noexcept { 142 return (ch >= 'A') && (ch <= 'Z'); 143 } 144 145 constexpr bool IsUpperOrLowerCase(int ch) noexcept { 146 return IsUpperCase(ch) || IsLowerCase(ch); 147 } 148 149 constexpr bool IsAlphaNumeric(int ch) noexcept { 150 return 151 ((ch >= '0') && (ch <= '9')) || 152 ((ch >= 'a') && (ch <= 'z')) || 153 ((ch >= 'A') && (ch <= 'Z')); 154 } 155 156 /** 157 * Check if a character is a space. 158 * This is ASCII specific but is safe with chars >= 0x80. 159 */ 160 constexpr bool isspacechar(int ch) noexcept { 161 return (ch == ' ') || ((ch >= 0x09) && (ch <= 0x0d)); 162 } 163 164 constexpr bool iswordchar(int ch) noexcept { 165 return IsAlphaNumeric(ch) || ch == '.' || ch == '_'; 166 } 167 168 constexpr bool iswordstart(int ch) noexcept { 169 return IsAlphaNumeric(ch) || ch == '_'; 170 } 171 172 constexpr bool isoperator(int ch) noexcept { 173 if (IsAlphaNumeric(ch)) 174 return false; 175 if (ch == '%' || ch == '^' || ch == '&' || ch == '*' || 176 ch == '(' || ch == ')' || ch == '-' || ch == '+' || 177 ch == '=' || ch == '|' || ch == '{' || ch == '}' || 178 ch == '[' || ch == ']' || ch == ':' || ch == ';' || 179 ch == '<' || ch == '>' || ch == ',' || ch == '/' || 180 ch == '?' || ch == '!' || ch == '.' || ch == '~') 181 return true; 182 return false; 183 } 184 185 // Simple case functions for ASCII supersets. 186 187 template <typename T> 188 constexpr T MakeUpperCase(T ch) noexcept { 189 if (ch < 'a' || ch > 'z') 190 return ch; 191 else 192 return ch - 'a' + 'A'; 193 } 194 195 template <typename T> 196 constexpr T MakeLowerCase(T ch) noexcept { 197 if (ch < 'A' || ch > 'Z') 198 return ch; 199 else 200 return ch - 'A' + 'a'; 201 } 202 203 int CompareCaseInsensitive(const char *a, const char *b) noexcept; 204 int CompareNCaseInsensitive(const char *a, const char *b, size_t len) noexcept; 205 206 } 207 208 #endif 209