1 /* 2 * Copyright (c) 2006-2018, RT-Thread Development Team 3 * 4 * SPDX-License-Identifier: Apache-2.0 5 * 6 * Change Logs: 7 * Date Author Notes 8 * 2010-03-22 Bernard first version 9 * 2013-04-03 Bernard strip more characters. 10 */ 11 #include <finsh.h> 12 #include <stdlib.h> 13 14 #include "finsh_token.h" 15 #include "finsh_error.h" 16 17 #define is_alpha(ch) ((ch | 0x20) - 'a') < 26u 18 #define is_digit(ch) ((ch) >= '0' && (ch) <= '9') 19 #define is_xdigit(ch) (((ch) >= '0' && (ch) <= '9') || (((ch | 0x20) - 'a') < 6u)) 20 #define is_separator(ch) !(((ch) >= 'a' && (ch) <= 'z') \ 21 || ((ch) >= 'A' && (ch) <= 'Z') || ((ch) >= '0' && (ch) <= '9') || ((ch) == '_')) 22 #define is_eof(self) (self)->eof 23 24 struct name_table 25 { 26 char* name; 27 enum finsh_token_type type; 28 }; 29 30 /* keyword */ 31 static const struct name_table finsh_name_table[] = 32 { 33 {"void", finsh_token_type_void}, 34 {"char", finsh_token_type_char}, 35 {"short", finsh_token_type_short}, 36 {"int", finsh_token_type_int}, 37 {"long", finsh_token_type_long}, 38 {"unsigned", finsh_token_type_unsigned}, 39 40 {"NULL", finsh_token_type_value_null}, 41 {"null", finsh_token_type_value_null} 42 }; 43 44 static char token_next_char(struct finsh_token* self); 45 static void token_prev_char(struct finsh_token* self); 46 static long token_spec_number(char* string, int length, int b); 47 static void token_run(struct finsh_token* self); 48 static int token_match_name(struct finsh_token* self, const char* str); 49 static void token_proc_number(struct finsh_token* self); 50 static uint8_t* token_proc_string(struct finsh_token* self); 51 static void token_trim_space(struct finsh_token* self); 52 static char token_proc_char(struct finsh_token* self); 53 static int token_proc_escape(struct finsh_token* self); 54 55 void finsh_token_init(struct finsh_token* self, uint8_t* line) 56 { 57 memset(self, 0, sizeof(struct finsh_token)); 58 59 self->line = line; 60 } 61 62 enum finsh_token_type finsh_token_token(struct finsh_token* self) 63 { 64 if ( self->replay ) self->replay = 0; 65 else token_run(self); 66 67 return (enum finsh_token_type)self->current_token; 68 } 69 70 void finsh_token_get_token(struct finsh_token* self, uint8_t* token) 71 { 72 strncpy((char*)token, (char*)self->string, FINSH_NAME_MAX); 73 } 74 75 int token_get_string(struct finsh_token* self, uint8_t* str) 76 { 77 unsigned char *p=str; 78 char ch; 79 80 ch = token_next_char(self); 81 if (is_eof(self)) return -1; 82 83 str[0] = '\0'; 84 85 if ( is_digit(ch) )/*the first character of identifier is not a digit.*/ 86 { 87 token_prev_char(self); 88 return -1; 89 } 90 91 while (!is_separator(ch) && !is_eof(self)) 92 { 93 *p++ = ch; 94 95 ch = token_next_char(self); 96 } 97 self->eof = 0; 98 99 token_prev_char(self); 100 *p = '\0'; 101 102 return 0; 103 } 104 105 /* 106 get next character. 107 */ 108 static char token_next_char(struct finsh_token* self) 109 { 110 if (self->eof) return '\0'; 111 112 if (self->position == (int)strlen((char*)self->line) || self->line[self->position] =='\n') 113 { 114 self->eof = 1; 115 self->position = 0; 116 return '\0'; 117 } 118 119 return self->line[self->position++]; 120 } 121 122 static void token_prev_char(struct finsh_token* self) 123 { 124 if ( self->eof ) return; 125 126 if ( self->position == 0 ) return; 127 else self->position--; 128 } 129 130 static void token_run(struct finsh_token* self) 131 { 132 char ch; 133 134 token_trim_space(self); /* first trim space and tab. */ 135 token_get_string(self, &(self->string[0])); 136 137 if ( is_eof(self) ) /*if it is eof, break;*/ 138 { 139 self->current_token = finsh_token_type_eof; 140 return ; 141 } 142 143 if (self->string[0] != '\0') /*It is a key word or a identifier.*/ 144 { 145 if ( !token_match_name(self, (char*)self->string) ) 146 { 147 self->current_token = finsh_token_type_identifier; 148 } 149 } 150 else/*It is a operator character.*/ 151 { 152 ch = token_next_char(self); 153 154 switch ( ch ) 155 { 156 case '(': 157 self->current_token = finsh_token_type_left_paren; 158 break; 159 160 case ')': 161 self->current_token = finsh_token_type_right_paren; 162 break; 163 164 case ',': 165 self->current_token = finsh_token_type_comma; 166 break; 167 168 case ';': 169 self->current_token = finsh_token_type_semicolon; 170 break; 171 172 case '&': 173 self->current_token = finsh_token_type_and; 174 break; 175 176 case '*': 177 self->current_token = finsh_token_type_mul; 178 break; 179 180 case '+': 181 ch = token_next_char(self); 182 183 if ( ch == '+' ) 184 { 185 self->current_token = finsh_token_type_inc; 186 } 187 else 188 { 189 token_prev_char(self); 190 self->current_token = finsh_token_type_add; 191 } 192 break; 193 194 case '-': 195 ch = token_next_char(self); 196 197 if ( ch == '-' ) 198 { 199 self->current_token = finsh_token_type_dec; 200 } 201 else 202 { 203 token_prev_char(self); 204 self->current_token = finsh_token_type_sub; 205 } 206 break; 207 208 case '/': 209 ch = token_next_char(self); 210 if (ch == '/') 211 { 212 /* line comments, set to end of file */ 213 self->current_token = finsh_token_type_eof; 214 } 215 else 216 { 217 token_prev_char(self); 218 self->current_token = finsh_token_type_div; 219 } 220 break; 221 222 case '<': 223 ch = token_next_char(self); 224 225 if ( ch == '<' ) 226 { 227 self->current_token = finsh_token_type_shl; 228 } 229 else 230 { 231 token_prev_char(self); 232 self->current_token = finsh_token_type_bad; 233 } 234 break; 235 236 case '>': 237 ch = token_next_char(self); 238 239 if ( ch == '>' ) 240 { 241 self->current_token = finsh_token_type_shr; 242 } 243 else 244 { 245 token_prev_char(self); 246 self->current_token = finsh_token_type_bad; 247 } 248 break; 249 250 case '|': 251 self->current_token = finsh_token_type_or; 252 break; 253 254 case '%': 255 self->current_token = finsh_token_type_mod; 256 break; 257 258 case '~': 259 self->current_token = finsh_token_type_bitwise; 260 break; 261 262 case '^': 263 self->current_token = finsh_token_type_xor; 264 break; 265 266 case '=': 267 self->current_token = finsh_token_type_assign; 268 break; 269 270 case '\'': 271 self->value.char_value = token_proc_char(self); 272 self->current_token = finsh_token_type_value_char; 273 break; 274 275 case '"': 276 token_proc_string(self); 277 self->current_token = finsh_token_type_value_string; 278 break; 279 280 default: 281 if ( is_digit(ch) ) 282 { 283 token_prev_char(self); 284 token_proc_number(self); 285 break; 286 } 287 288 finsh_error_set(FINSH_ERROR_UNKNOWN_TOKEN); 289 self->current_token = finsh_token_type_bad; 290 291 break; 292 } 293 } 294 } 295 296 static int token_match_name(struct finsh_token* self, const char* str) 297 { 298 int i; 299 300 for (i = 0; i < sizeof(finsh_name_table)/sizeof(struct name_table); i++) 301 { 302 if ( strcmp(finsh_name_table[i].name, str)==0 ) 303 { 304 self->current_token = finsh_name_table[i].type; 305 return 1; 306 } 307 } 308 309 return 0; 310 } 311 312 static void token_trim_space(struct finsh_token* self) 313 { 314 char ch; 315 while ( (ch = token_next_char(self)) ==' ' || 316 ch == '\t' || 317 ch == '\r'); 318 319 token_prev_char(self); 320 } 321 322 static char token_proc_char(struct finsh_token* self) 323 { 324 char ch; 325 char buf[4], *p; 326 327 p = buf; 328 ch = token_next_char(self); 329 330 if ( ch == '\\' ) 331 { 332 ch = token_next_char(self); 333 switch ( ch ) 334 { 335 case 'n': ch = '\n'; break; 336 case 't': ch = '\t'; break; 337 case 'v': ch = '\v'; break; 338 case 'b': ch = '\b'; break; 339 case 'r': ch = '\r'; break; 340 case '\\': ch = '\\'; break; 341 case '\'': ch = '\''; break; 342 default : 343 while ( is_digit(ch) )/*for '\113' char*/ 344 { 345 ch = token_next_char(self); 346 *p++ = ch; 347 } 348 349 token_prev_char(self); 350 *p = '\0'; 351 ch = atoi(p); 352 break; 353 } 354 } 355 356 if ( token_next_char(self) != '\'' ) 357 { 358 token_prev_char(self); 359 finsh_error_set(FINSH_ERROR_EXPECT_CHAR); 360 return ch; 361 } 362 363 return ch; 364 } 365 366 static uint8_t* token_proc_string(struct finsh_token* self) 367 { 368 uint8_t* p; 369 370 for ( p = &self->string[0]; p - &(self->string[0]) < FINSH_STRING_MAX; ) 371 { 372 char ch = token_next_char(self); 373 374 if ( is_eof(self) ) 375 { 376 finsh_error_set(FINSH_ERROR_UNEXPECT_END); 377 return NULL;; 378 } 379 if ( ch == '\\' ) 380 { 381 ch = token_proc_escape(self); 382 } 383 else if ( ch == '"' )/*end of string.*/ 384 { 385 *p = '\0'; 386 return self->string; 387 } 388 389 *p++ = ch; 390 } 391 392 return NULL; 393 } 394 395 static int token_proc_escape(struct finsh_token* self) 396 { 397 char ch; 398 int result=0; 399 400 ch = token_next_char(self); 401 switch (ch) 402 { 403 case 'n': 404 result = '\n'; 405 break; 406 case 't': 407 result = '\t'; 408 break; 409 case 'v': 410 result = '\v'; 411 break; 412 case 'b': 413 result = '\b'; 414 break; 415 case 'r': 416 result = '\r'; 417 break; 418 case 'f': 419 result = '\f'; 420 break; 421 case 'a': 422 result = '\007'; 423 break; 424 case '"': 425 result = '"'; 426 break; 427 case 'x': 428 case 'X': 429 result = 0; 430 ch = token_next_char(self); 431 while (is_xdigit(ch)) 432 { 433 result = result * 16 + ((ch < 'A') ? (ch - '0') : (ch | 0x20) - 'a' + 10); 434 ch = token_next_char(self); 435 } 436 token_prev_char(self); 437 break; 438 default: 439 if ( (ch - '0') < 8u) 440 { 441 result = 0; 442 while ( (ch - '0') < 8u ) 443 { 444 result = result*8 + ch - '0'; 445 ch = token_next_char(self); 446 } 447 448 token_prev_char(self); 449 } 450 break; 451 } 452 453 return result; 454 } 455 456 /* 457 (0|0x|0X|0b|0B)number+(l|L) 458 */ 459 static void token_proc_number(struct finsh_token* self) 460 { 461 char ch; 462 char *p, buf[128]; 463 long value; 464 465 value = 0; 466 p = buf; 467 468 ch = token_next_char(self); 469 if ( ch == '0' ) 470 { 471 int b; 472 ch = token_next_char(self); 473 if ( ch == 'x' || ch == 'X' )/*it's a hex number*/ 474 { 475 b = 16; 476 ch = token_next_char(self); 477 while ( is_digit(ch) || is_alpha(ch) ) 478 { 479 *p++ = ch; 480 ch = token_next_char(self); 481 } 482 483 *p = '\0'; 484 } 485 else if ( ch == 'b' || ch == 'B' ) 486 { 487 b = 2; 488 ch = token_next_char(self); 489 while ( (ch=='0')||(ch=='1') ) 490 { 491 *p++ = ch; 492 ch = token_next_char(self); 493 } 494 495 *p = '\0'; 496 } 497 else if ( '0' <= ch && ch <= '7' ) 498 { 499 b = 8; 500 while ( '0' <= ch && ch <= '7' ) 501 { 502 *p++ = ch; 503 ch = token_next_char(self); 504 } 505 506 *p = '\0'; 507 } 508 else 509 { 510 token_prev_char(self); 511 512 /* made as 0 value */ 513 self->value.int_value = 0; 514 self->current_token = finsh_token_type_value_int; 515 return; 516 } 517 518 self->value.int_value = token_spec_number(buf, strlen(buf), b); 519 self->current_token = finsh_token_type_value_int; 520 } 521 else 522 { 523 while ( is_digit(ch) ) 524 { 525 value = value*10 + ( ch - '0' ); 526 ch = token_next_char(self); 527 } 528 529 self->value.int_value = value; 530 self->current_token = finsh_token_type_value_int; 531 } 532 533 switch ( ch ) 534 { 535 case 'l': 536 case 'L': 537 self->current_token = finsh_token_type_value_long; 538 break; 539 540 default: 541 token_prev_char(self); 542 break; 543 } 544 } 545 546 /*use 64 bit number*/ 547 #define BN_SIZE 2 548 549 static long token_spec_number(char* string, int length, int b) 550 { 551 char* p; 552 int t; 553 int i, j, shift=1; 554 unsigned int bn[BN_SIZE], v; 555 long d; 556 557 p = string; 558 i = 0; 559 560 switch ( b ) 561 { 562 case 16: shift = 4; 563 break; 564 case 8: shift = 3; 565 break; 566 case 2: shift = 1; 567 break; 568 default: break; 569 } 570 571 for ( j=0; j<BN_SIZE ; j++) bn[j] = 0; 572 573 while ( i<length ) 574 { 575 t = *p++; 576 if ( t>='a' && t <='f' ) 577 { 578 t = t - 'a' +10; 579 } 580 else if ( t >='A' && t <='F' ) 581 { 582 t = t - 'A' +10; 583 } 584 else t = t - '0'; 585 586 for ( j=0; j<BN_SIZE ; j++) 587 { 588 v = bn[j]; 589 bn[j] = (v<<shift) | t; 590 t = v >> (32 - shift); 591 } 592 i++; 593 } 594 595 d = (long)bn[0]; 596 597 return d; 598 } 599