1// Copyright 2015 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5// Package asm implements the parser and instruction generator for the assembler. 6// TODO: Split apart? 7package asm 8 9import ( 10 "fmt" 11 "io" 12 "log" 13 "os" 14 "strconv" 15 "strings" 16 "text/scanner" 17 "unicode/utf8" 18 19 "cmd/asm/internal/arch" 20 "cmd/asm/internal/flags" 21 "cmd/asm/internal/lex" 22 "cmd/internal/obj" 23 "cmd/internal/obj/arm64" 24 "cmd/internal/obj/x86" 25 "cmd/internal/objabi" 26 "cmd/internal/src" 27 "cmd/internal/sys" 28) 29 30type Parser struct { 31 lex lex.TokenReader 32 lineNum int // Line number in source file. 33 errorLine int // Line number of last error. 34 errorCount int // Number of errors. 35 sawCode bool // saw code in this file (as opposed to comments and blank lines) 36 pc int64 // virtual PC; count of Progs; doesn't advance for GLOBL or DATA. 37 input []lex.Token 38 inputPos int 39 pendingLabels []string // Labels to attach to next instruction. 40 labels map[string]*obj.Prog 41 toPatch []Patch 42 addr []obj.Addr 43 arch *arch.Arch 44 ctxt *obj.Link 45 firstProg *obj.Prog 46 lastProg *obj.Prog 47 dataAddr map[string]int64 // Most recent address for DATA for this symbol. 48 isJump bool // Instruction being assembled is a jump. 49 allowABI bool // Whether ABI selectors are allowed. 50 pkgPrefix string // Prefix to add to local symbols. 51 errorWriter io.Writer 52} 53 54type Patch struct { 55 addr *obj.Addr 56 label string 57} 58 59func NewParser(ctxt *obj.Link, ar *arch.Arch, lexer lex.TokenReader) *Parser { 60 pkgPrefix := obj.UnlinkablePkg 61 if ctxt != nil { 62 pkgPrefix = objabi.PathToPrefix(ctxt.Pkgpath) 63 } 64 return &Parser{ 65 ctxt: ctxt, 66 arch: ar, 67 lex: lexer, 68 labels: make(map[string]*obj.Prog), 69 dataAddr: make(map[string]int64), 70 errorWriter: os.Stderr, 71 allowABI: ctxt != nil && objabi.LookupPkgSpecial(ctxt.Pkgpath).AllowAsmABI, 72 pkgPrefix: pkgPrefix, 73 } 74} 75 76// panicOnError is enabled when testing to abort execution on the first error 77// and turn it into a recoverable panic. 78var panicOnError bool 79 80func (p *Parser) errorf(format string, args ...interface{}) { 81 if panicOnError { 82 panic(fmt.Errorf(format, args...)) 83 } 84 if p.lineNum == p.errorLine { 85 // Only one error per line. 86 return 87 } 88 p.errorLine = p.lineNum 89 if p.lex != nil { 90 // Put file and line information on head of message. 91 format = "%s:%d: " + format + "\n" 92 args = append([]interface{}{p.lex.File(), p.lineNum}, args...) 93 } 94 fmt.Fprintf(p.errorWriter, format, args...) 95 p.errorCount++ 96 if p.errorCount > 10 && !*flags.AllErrors { 97 log.Fatal("too many errors") 98 } 99} 100 101func (p *Parser) pos() src.XPos { 102 return p.ctxt.PosTable.XPos(src.MakePos(p.lex.Base(), uint(p.lineNum), 0)) 103} 104 105func (p *Parser) Parse() (*obj.Prog, bool) { 106 scratch := make([][]lex.Token, 0, 3) 107 for { 108 word, cond, operands, ok := p.line(scratch) 109 if !ok { 110 break 111 } 112 scratch = operands 113 114 if p.pseudo(word, operands) { 115 continue 116 } 117 i, present := p.arch.Instructions[word] 118 if present { 119 p.instruction(i, word, cond, operands) 120 continue 121 } 122 p.errorf("unrecognized instruction %q", word) 123 } 124 if p.errorCount > 0 { 125 return nil, false 126 } 127 p.patch() 128 return p.firstProg, true 129} 130 131// ParseSymABIs parses p's assembly code to find text symbol 132// definitions and references and writes a symabis file to w. 133func (p *Parser) ParseSymABIs(w io.Writer) bool { 134 operands := make([][]lex.Token, 0, 3) 135 for { 136 word, _, operands1, ok := p.line(operands) 137 if !ok { 138 break 139 } 140 operands = operands1 141 142 p.symDefRef(w, word, operands) 143 } 144 return p.errorCount == 0 145} 146 147// nextToken returns the next non-build-comment token from the lexer. 148// It reports misplaced //go:build comments but otherwise discards them. 149func (p *Parser) nextToken() lex.ScanToken { 150 for { 151 tok := p.lex.Next() 152 if tok == lex.BuildComment { 153 if p.sawCode { 154 p.errorf("misplaced //go:build comment") 155 } 156 continue 157 } 158 if tok != '\n' { 159 p.sawCode = true 160 } 161 if tok == '#' { 162 // A leftover wisp of a #include/#define/etc, 163 // to let us know that p.sawCode should be true now. 164 // Otherwise ignored. 165 continue 166 } 167 return tok 168 } 169} 170 171// line consumes a single assembly line from p.lex of the form 172// 173// {label:} WORD[.cond] [ arg {, arg} ] (';' | '\n') 174// 175// It adds any labels to p.pendingLabels and returns the word, cond, 176// operand list, and true. If there is an error or EOF, it returns 177// ok=false. 178// 179// line may reuse the memory from scratch. 180func (p *Parser) line(scratch [][]lex.Token) (word, cond string, operands [][]lex.Token, ok bool) { 181next: 182 // Skip newlines. 183 var tok lex.ScanToken 184 for { 185 tok = p.nextToken() 186 // We save the line number here so error messages from this instruction 187 // are labeled with this line. Otherwise we complain after we've absorbed 188 // the terminating newline and the line numbers are off by one in errors. 189 p.lineNum = p.lex.Line() 190 switch tok { 191 case '\n', ';': 192 continue 193 case scanner.EOF: 194 return "", "", nil, false 195 } 196 break 197 } 198 // First item must be an identifier. 199 if tok != scanner.Ident { 200 p.errorf("expected identifier, found %q", p.lex.Text()) 201 return "", "", nil, false // Might as well stop now. 202 } 203 word, cond = p.lex.Text(), "" 204 operands = scratch[:0] 205 // Zero or more comma-separated operands, one per loop. 206 nesting := 0 207 colon := -1 208 for tok != '\n' && tok != ';' { 209 // Process one operand. 210 var items []lex.Token 211 if cap(operands) > len(operands) { 212 // Reuse scratch items slice. 213 items = operands[:cap(operands)][len(operands)][:0] 214 } else { 215 items = make([]lex.Token, 0, 3) 216 } 217 for { 218 tok = p.nextToken() 219 if len(operands) == 0 && len(items) == 0 { 220 if p.arch.InFamily(sys.ARM, sys.ARM64, sys.AMD64, sys.I386, sys.RISCV64) && tok == '.' { 221 // Suffixes: ARM conditionals, RISCV rounding mode or x86 modifiers. 222 tok = p.nextToken() 223 str := p.lex.Text() 224 if tok != scanner.Ident { 225 p.errorf("instruction suffix expected identifier, found %s", str) 226 } 227 cond = cond + "." + str 228 continue 229 } 230 if tok == ':' { 231 // Labels. 232 p.pendingLabels = append(p.pendingLabels, word) 233 goto next 234 } 235 } 236 if tok == scanner.EOF { 237 p.errorf("unexpected EOF") 238 return "", "", nil, false 239 } 240 // Split operands on comma. Also, the old syntax on x86 for a "register pair" 241 // was AX:DX, for which the new syntax is DX, AX. Note the reordering. 242 if tok == '\n' || tok == ';' || (nesting == 0 && (tok == ',' || tok == ':')) { 243 if tok == ':' { 244 // Remember this location so we can swap the operands below. 245 if colon >= 0 { 246 p.errorf("invalid ':' in operand") 247 return word, cond, operands, true 248 } 249 colon = len(operands) 250 } 251 break 252 } 253 if tok == '(' || tok == '[' { 254 nesting++ 255 } 256 if tok == ')' || tok == ']' { 257 nesting-- 258 } 259 items = append(items, lex.Make(tok, p.lex.Text())) 260 } 261 if len(items) > 0 { 262 operands = append(operands, items) 263 if colon >= 0 && len(operands) == colon+2 { 264 // AX:DX becomes DX, AX. 265 operands[colon], operands[colon+1] = operands[colon+1], operands[colon] 266 colon = -1 267 } 268 } else if len(operands) > 0 || tok == ',' || colon >= 0 { 269 // Had a separator with nothing after. 270 p.errorf("missing operand") 271 } 272 } 273 return word, cond, operands, true 274} 275 276func (p *Parser) instruction(op obj.As, word, cond string, operands [][]lex.Token) { 277 p.addr = p.addr[0:0] 278 p.isJump = p.arch.IsJump(word) 279 for _, op := range operands { 280 addr := p.address(op) 281 if !p.isJump && addr.Reg < 0 { // Jumps refer to PC, a pseudo. 282 p.errorf("illegal use of pseudo-register in %s", word) 283 } 284 p.addr = append(p.addr, addr) 285 } 286 if p.isJump { 287 p.asmJump(op, cond, p.addr) 288 return 289 } 290 p.asmInstruction(op, cond, p.addr) 291} 292 293func (p *Parser) pseudo(word string, operands [][]lex.Token) bool { 294 switch word { 295 case "DATA": 296 p.asmData(operands) 297 case "FUNCDATA": 298 p.asmFuncData(operands) 299 case "GLOBL": 300 p.asmGlobl(operands) 301 case "PCDATA": 302 p.asmPCData(operands) 303 case "PCALIGN": 304 p.asmPCAlign(operands) 305 case "TEXT": 306 p.asmText(operands) 307 default: 308 return false 309 } 310 return true 311} 312 313// symDefRef scans a line for potential text symbol definitions and 314// references and writes symabis information to w. 315// 316// The symabis format is documented at 317// cmd/compile/internal/ssagen.ReadSymABIs. 318func (p *Parser) symDefRef(w io.Writer, word string, operands [][]lex.Token) { 319 switch word { 320 case "TEXT": 321 // Defines text symbol in operands[0]. 322 if len(operands) > 0 { 323 p.start(operands[0]) 324 if name, abi, ok := p.funcAddress(); ok { 325 fmt.Fprintf(w, "def %s %s\n", name, abi) 326 } 327 } 328 return 329 case "GLOBL", "PCDATA": 330 // No text definitions or symbol references. 331 case "DATA", "FUNCDATA": 332 // For DATA, operands[0] is defined symbol. 333 // For FUNCDATA, operands[0] is an immediate constant. 334 // Remaining operands may have references. 335 if len(operands) < 2 { 336 return 337 } 338 operands = operands[1:] 339 } 340 // Search for symbol references. 341 for _, op := range operands { 342 p.start(op) 343 if name, abi, ok := p.funcAddress(); ok { 344 fmt.Fprintf(w, "ref %s %s\n", name, abi) 345 } 346 } 347} 348 349func (p *Parser) start(operand []lex.Token) { 350 p.input = operand 351 p.inputPos = 0 352} 353 354// address parses the operand into a link address structure. 355func (p *Parser) address(operand []lex.Token) obj.Addr { 356 p.start(operand) 357 addr := obj.Addr{} 358 p.operand(&addr) 359 return addr 360} 361 362// parseScale converts a decimal string into a valid scale factor. 363func (p *Parser) parseScale(s string) int8 { 364 switch s { 365 case "1", "2", "4", "8": 366 return int8(s[0] - '0') 367 } 368 p.errorf("bad scale: %s", s) 369 return 0 370} 371 372// operand parses a general operand and stores the result in *a. 373func (p *Parser) operand(a *obj.Addr) { 374 //fmt.Printf("Operand: %v\n", p.input) 375 if len(p.input) == 0 { 376 p.errorf("empty operand: cannot happen") 377 return 378 } 379 // General address (with a few exceptions) looks like 380 // $sym±offset(SB)(reg)(index*scale) 381 // Exceptions are: 382 // 383 // R1 384 // offset 385 // $offset 386 // Every piece is optional, so we scan left to right and what 387 // we discover tells us where we are. 388 389 // Prefix: $. 390 var prefix rune 391 switch tok := p.peek(); tok { 392 case '$', '*': 393 prefix = rune(tok) 394 p.next() 395 } 396 397 // Symbol: sym±offset(SB) 398 tok := p.next() 399 name := tok.String() 400 if tok.ScanToken == scanner.Ident && !p.atStartOfRegister(name) { 401 switch p.arch.Family { 402 case sys.ARM64: 403 // arm64 special operands. 404 if opd := arch.GetARM64SpecialOperand(name); opd != arm64.SPOP_END { 405 a.Type = obj.TYPE_SPECIAL 406 a.Offset = int64(opd) 407 break 408 } 409 fallthrough 410 default: 411 // We have a symbol. Parse $sym±offset(symkind) 412 p.symbolReference(a, p.qualifySymbol(name), prefix) 413 } 414 // fmt.Printf("SYM %s\n", obj.Dconv(&emptyProg, 0, a)) 415 if p.peek() == scanner.EOF { 416 return 417 } 418 } 419 420 // Special register list syntax for arm: [R1,R3-R7] 421 if tok.ScanToken == '[' { 422 if prefix != 0 { 423 p.errorf("illegal use of register list") 424 } 425 p.registerList(a) 426 p.expectOperandEnd() 427 return 428 } 429 430 // Register: R1 431 if tok.ScanToken == scanner.Ident && p.atStartOfRegister(name) { 432 if p.atRegisterShift() { 433 // ARM shifted register such as R1<<R2 or R1>>2. 434 a.Type = obj.TYPE_SHIFT 435 a.Offset = p.registerShift(tok.String(), prefix) 436 if p.peek() == '(' { 437 // Can only be a literal register here. 438 p.next() 439 tok := p.next() 440 name := tok.String() 441 if !p.atStartOfRegister(name) { 442 p.errorf("expected register; found %s", name) 443 } 444 a.Reg, _ = p.registerReference(name) 445 p.get(')') 446 } 447 } else if p.atRegisterExtension() { 448 a.Type = obj.TYPE_REG 449 p.registerExtension(a, tok.String(), prefix) 450 p.expectOperandEnd() 451 return 452 } else if r1, r2, scale, ok := p.register(tok.String(), prefix); ok { 453 if scale != 0 { 454 p.errorf("expected simple register reference") 455 } 456 a.Type = obj.TYPE_REG 457 a.Reg = r1 458 if r2 != 0 { 459 // Form is R1:R2. It is on RHS and the second register 460 // needs to go into the LHS. 461 panic("cannot happen (Addr.Reg2)") 462 } 463 } 464 // fmt.Printf("REG %s\n", obj.Dconv(&emptyProg, 0, a)) 465 p.expectOperandEnd() 466 return 467 } 468 469 // Constant. 470 haveConstant := false 471 switch tok.ScanToken { 472 case scanner.Int, scanner.Float, scanner.String, scanner.Char, '+', '-', '~': 473 haveConstant = true 474 case '(': 475 // Could be parenthesized expression or (R). Must be something, though. 476 tok := p.next() 477 if tok.ScanToken == scanner.EOF { 478 p.errorf("missing right parenthesis") 479 return 480 } 481 rname := tok.String() 482 p.back() 483 haveConstant = !p.atStartOfRegister(rname) 484 if !haveConstant { 485 p.back() // Put back the '('. 486 } 487 } 488 if haveConstant { 489 p.back() 490 if p.have(scanner.Float) { 491 if prefix != '$' { 492 p.errorf("floating-point constant must be an immediate") 493 } 494 a.Type = obj.TYPE_FCONST 495 a.Val = p.floatExpr() 496 // fmt.Printf("FCONST %s\n", obj.Dconv(&emptyProg, 0, a)) 497 p.expectOperandEnd() 498 return 499 } 500 if p.have(scanner.String) { 501 if prefix != '$' { 502 p.errorf("string constant must be an immediate") 503 return 504 } 505 str, err := strconv.Unquote(p.get(scanner.String).String()) 506 if err != nil { 507 p.errorf("string parse error: %s", err) 508 } 509 a.Type = obj.TYPE_SCONST 510 a.Val = str 511 // fmt.Printf("SCONST %s\n", obj.Dconv(&emptyProg, 0, a)) 512 p.expectOperandEnd() 513 return 514 } 515 a.Offset = int64(p.expr()) 516 if p.peek() != '(' { 517 switch prefix { 518 case '$': 519 a.Type = obj.TYPE_CONST 520 case '*': 521 a.Type = obj.TYPE_INDIR // Can appear but is illegal, will be rejected by the linker. 522 default: 523 a.Type = obj.TYPE_MEM 524 } 525 // fmt.Printf("CONST %d %s\n", a.Offset, obj.Dconv(&emptyProg, 0, a)) 526 p.expectOperandEnd() 527 return 528 } 529 // fmt.Printf("offset %d \n", a.Offset) 530 } 531 532 // Register indirection: (reg) or (index*scale). We are on the opening paren. 533 p.registerIndirect(a, prefix) 534 // fmt.Printf("DONE %s\n", p.arch.Dconv(&emptyProg, 0, a)) 535 536 p.expectOperandEnd() 537 return 538} 539 540// atStartOfRegister reports whether the parser is at the start of a register definition. 541func (p *Parser) atStartOfRegister(name string) bool { 542 // Simple register: R10. 543 _, present := p.arch.Register[name] 544 if present { 545 return true 546 } 547 // Parenthesized register: R(10). 548 return p.arch.RegisterPrefix[name] && p.peek() == '(' 549} 550 551// atRegisterShift reports whether we are at the start of an ARM shifted register. 552// We have consumed the register or R prefix. 553func (p *Parser) atRegisterShift() bool { 554 // ARM only. 555 if !p.arch.InFamily(sys.ARM, sys.ARM64) { 556 return false 557 } 558 // R1<<... 559 if lex.IsRegisterShift(p.peek()) { 560 return true 561 } 562 // R(1)<<... Ugly check. TODO: Rethink how we handle ARM register shifts to be 563 // less special. 564 if p.peek() != '(' || len(p.input)-p.inputPos < 4 { 565 return false 566 } 567 return p.at('(', scanner.Int, ')') && lex.IsRegisterShift(p.input[p.inputPos+3].ScanToken) 568} 569 570// atRegisterExtension reports whether we are at the start of an ARM64 extended register. 571// We have consumed the register or R prefix. 572func (p *Parser) atRegisterExtension() bool { 573 // ARM64 only. 574 if p.arch.Family != sys.ARM64 { 575 return false 576 } 577 // R1.xxx 578 return p.peek() == '.' 579} 580 581// registerReference parses a register given either the name, R10, or a parenthesized form, SPR(10). 582func (p *Parser) registerReference(name string) (int16, bool) { 583 r, present := p.arch.Register[name] 584 if present { 585 return r, true 586 } 587 if !p.arch.RegisterPrefix[name] { 588 p.errorf("expected register; found %s", name) 589 return 0, false 590 } 591 p.get('(') 592 tok := p.get(scanner.Int) 593 num, err := strconv.ParseInt(tok.String(), 10, 16) 594 p.get(')') 595 if err != nil { 596 p.errorf("parsing register list: %s", err) 597 return 0, false 598 } 599 r, ok := p.arch.RegisterNumber(name, int16(num)) 600 if !ok { 601 p.errorf("illegal register %s(%d)", name, r) 602 return 0, false 603 } 604 return r, true 605} 606 607// register parses a full register reference where there is no symbol present (as in 4(R0) or R(10) but not sym(SB)) 608// including forms involving multiple registers such as R1:R2. 609func (p *Parser) register(name string, prefix rune) (r1, r2 int16, scale int8, ok bool) { 610 // R1 or R(1) R1:R2 R1,R2 R1+R2, or R1*scale. 611 r1, ok = p.registerReference(name) 612 if !ok { 613 return 614 } 615 if prefix != 0 && prefix != '*' { // *AX is OK. 616 p.errorf("prefix %c not allowed for register: %c%s", prefix, prefix, name) 617 } 618 c := p.peek() 619 if c == ':' || c == ',' || c == '+' { 620 // 2nd register; syntax (R1+R2) etc. No two architectures agree. 621 // Check the architectures match the syntax. 622 switch p.next().ScanToken { 623 case ',': 624 if !p.arch.InFamily(sys.ARM, sys.ARM64) { 625 p.errorf("(register,register) not supported on this architecture") 626 return 627 } 628 case '+': 629 if p.arch.Family != sys.PPC64 { 630 p.errorf("(register+register) not supported on this architecture") 631 return 632 } 633 } 634 name := p.next().String() 635 r2, ok = p.registerReference(name) 636 if !ok { 637 return 638 } 639 } 640 if p.peek() == '*' { 641 // Scale 642 p.next() 643 scale = p.parseScale(p.next().String()) 644 } 645 return r1, r2, scale, true 646} 647 648// registerShift parses an ARM/ARM64 shifted register reference and returns the encoded representation. 649// There is known to be a register (current token) and a shift operator (peeked token). 650func (p *Parser) registerShift(name string, prefix rune) int64 { 651 if prefix != 0 { 652 p.errorf("prefix %c not allowed for shifted register: $%s", prefix, name) 653 } 654 // R1 op R2 or r1 op constant. 655 // op is: 656 // "<<" == 0 657 // ">>" == 1 658 // "->" == 2 659 // "@>" == 3 660 r1, ok := p.registerReference(name) 661 if !ok { 662 return 0 663 } 664 var op int16 665 switch p.next().ScanToken { 666 case lex.LSH: 667 op = 0 668 case lex.RSH: 669 op = 1 670 case lex.ARR: 671 op = 2 672 case lex.ROT: 673 // following instructions on ARM64 support rotate right 674 // AND, ANDS, TST, BIC, BICS, EON, EOR, ORR, MVN, ORN 675 op = 3 676 } 677 tok := p.next() 678 str := tok.String() 679 var count int16 680 switch tok.ScanToken { 681 case scanner.Ident: 682 if p.arch.Family == sys.ARM64 { 683 p.errorf("rhs of shift must be integer: %s", str) 684 } else { 685 r2, ok := p.registerReference(str) 686 if !ok { 687 p.errorf("rhs of shift must be register or integer: %s", str) 688 } 689 count = (r2&15)<<8 | 1<<4 690 } 691 case scanner.Int, '(': 692 p.back() 693 x := int64(p.expr()) 694 if p.arch.Family == sys.ARM64 { 695 if x >= 64 { 696 p.errorf("register shift count too large: %s", str) 697 } 698 count = int16((x & 63) << 10) 699 } else { 700 if x >= 32 { 701 p.errorf("register shift count too large: %s", str) 702 } 703 count = int16((x & 31) << 7) 704 } 705 default: 706 p.errorf("unexpected %s in register shift", tok.String()) 707 } 708 if p.arch.Family == sys.ARM64 { 709 off, err := arch.ARM64RegisterShift(r1, op, count) 710 if err != nil { 711 p.errorf(err.Error()) 712 } 713 return off 714 } else { 715 return int64((r1 & 15) | op<<5 | count) 716 } 717} 718 719// registerExtension parses a register with extension or arrangement. 720// There is known to be a register (current token) and an extension operator (peeked token). 721func (p *Parser) registerExtension(a *obj.Addr, name string, prefix rune) { 722 if prefix != 0 { 723 p.errorf("prefix %c not allowed for shifted register: $%s", prefix, name) 724 } 725 726 reg, ok := p.registerReference(name) 727 if !ok { 728 p.errorf("unexpected %s in register extension", name) 729 return 730 } 731 732 isIndex := false 733 num := int16(0) 734 isAmount := true // Amount is zero by default 735 ext := "" 736 if p.peek() == lex.LSH { 737 // (Rn)(Rm<<2), the shifted offset register. 738 ext = "LSL" 739 } else { 740 // (Rn)(Rm.UXTW<1), the extended offset register. 741 // Rm.UXTW<<3, the extended register. 742 p.get('.') 743 tok := p.next() 744 ext = tok.String() 745 } 746 if p.peek() == lex.LSH { 747 // parses left shift amount applied after extension: <<Amount 748 p.get(lex.LSH) 749 tok := p.get(scanner.Int) 750 amount, err := strconv.ParseInt(tok.String(), 10, 16) 751 if err != nil { 752 p.errorf("parsing left shift amount: %s", err) 753 } 754 num = int16(amount) 755 } else if p.peek() == '[' { 756 // parses an element: [Index] 757 p.get('[') 758 tok := p.get(scanner.Int) 759 index, err := strconv.ParseInt(tok.String(), 10, 16) 760 p.get(']') 761 if err != nil { 762 p.errorf("parsing element index: %s", err) 763 } 764 isIndex = true 765 isAmount = false 766 num = int16(index) 767 } 768 769 switch p.arch.Family { 770 case sys.ARM64: 771 err := arch.ARM64RegisterExtension(a, ext, reg, num, isAmount, isIndex) 772 if err != nil { 773 p.errorf(err.Error()) 774 } 775 default: 776 p.errorf("register extension not supported on this architecture") 777 } 778} 779 780// qualifySymbol returns name as a package-qualified symbol name. If 781// name starts with a period, qualifySymbol prepends the package 782// prefix. Otherwise it returns name unchanged. 783func (p *Parser) qualifySymbol(name string) string { 784 if strings.HasPrefix(name, ".") { 785 name = p.pkgPrefix + name 786 } 787 return name 788} 789 790// symbolReference parses a symbol that is known not to be a register. 791func (p *Parser) symbolReference(a *obj.Addr, name string, prefix rune) { 792 // Identifier is a name. 793 switch prefix { 794 case 0: 795 a.Type = obj.TYPE_MEM 796 case '$': 797 a.Type = obj.TYPE_ADDR 798 case '*': 799 a.Type = obj.TYPE_INDIR 800 } 801 802 // Parse optional <> (indicates a static symbol) or 803 // <ABIxxx> (selecting text symbol with specific ABI). 804 doIssueError := true 805 isStatic, abi := p.symRefAttrs(name, doIssueError) 806 807 if p.peek() == '+' || p.peek() == '-' { 808 a.Offset = int64(p.expr()) 809 } 810 if isStatic { 811 a.Sym = p.ctxt.LookupStatic(name) 812 } else { 813 a.Sym = p.ctxt.LookupABI(name, abi) 814 } 815 if p.peek() == scanner.EOF { 816 if prefix == 0 && p.isJump { 817 // Symbols without prefix or suffix are jump labels. 818 return 819 } 820 p.errorf("illegal or missing addressing mode for symbol %s", name) 821 return 822 } 823 // Expect (SB), (FP), (PC), or (SP) 824 p.get('(') 825 reg := p.get(scanner.Ident).String() 826 p.get(')') 827 p.setPseudoRegister(a, reg, isStatic, prefix) 828} 829 830// setPseudoRegister sets the NAME field of addr for a pseudo-register reference such as (SB). 831func (p *Parser) setPseudoRegister(addr *obj.Addr, reg string, isStatic bool, prefix rune) { 832 if addr.Reg != 0 { 833 p.errorf("internal error: reg %s already set in pseudo", reg) 834 } 835 switch reg { 836 case "FP": 837 addr.Name = obj.NAME_PARAM 838 case "PC": 839 if prefix != 0 { 840 p.errorf("illegal addressing mode for PC") 841 } 842 addr.Type = obj.TYPE_BRANCH // We set the type and leave NAME untouched. See asmJump. 843 case "SB": 844 addr.Name = obj.NAME_EXTERN 845 if isStatic { 846 addr.Name = obj.NAME_STATIC 847 } 848 case "SP": 849 addr.Name = obj.NAME_AUTO // The pseudo-stack. 850 default: 851 p.errorf("expected pseudo-register; found %s", reg) 852 } 853 if prefix == '$' { 854 addr.Type = obj.TYPE_ADDR 855 } 856} 857 858// symRefAttrs parses an optional function symbol attribute clause for 859// the function symbol 'name', logging an error for a malformed 860// attribute clause if 'issueError' is true. The return value is a 861// (boolean, ABI) pair indicating that the named symbol is either 862// static or a particular ABI specification. 863// 864// The expected form of the attribute clause is: 865// 866// empty, yielding (false, obj.ABI0) 867// "<>", yielding (true, obj.ABI0) 868// "<ABI0>" yielding (false, obj.ABI0) 869// "<ABIInternal>" yielding (false, obj.ABIInternal) 870// 871// Anything else beginning with "<" logs an error if issueError is 872// true, otherwise returns (false, obj.ABI0). 873func (p *Parser) symRefAttrs(name string, issueError bool) (bool, obj.ABI) { 874 abi := obj.ABI0 875 isStatic := false 876 if p.peek() != '<' { 877 return isStatic, abi 878 } 879 p.next() 880 tok := p.peek() 881 if tok == '>' { 882 isStatic = true 883 } else if tok == scanner.Ident { 884 abistr := p.get(scanner.Ident).String() 885 if !p.allowABI { 886 if issueError { 887 p.errorf("ABI selector only permitted when compiling runtime, reference was to %q", name) 888 } 889 } else { 890 theabi, valid := obj.ParseABI(abistr) 891 if !valid { 892 if issueError { 893 p.errorf("malformed ABI selector %q in reference to %q", 894 abistr, name) 895 } 896 } else { 897 abi = theabi 898 } 899 } 900 } 901 p.get('>') 902 return isStatic, abi 903} 904 905// funcAddress parses an external function address. This is a 906// constrained form of the operand syntax that's always SB-based, 907// non-static, and has at most a simple integer offset: 908// 909// [$|*]sym[<abi>][+Int](SB) 910func (p *Parser) funcAddress() (string, obj.ABI, bool) { 911 switch p.peek() { 912 case '$', '*': 913 // Skip prefix. 914 p.next() 915 } 916 917 tok := p.next() 918 name := tok.String() 919 if tok.ScanToken != scanner.Ident || p.atStartOfRegister(name) { 920 return "", obj.ABI0, false 921 } 922 name = p.qualifySymbol(name) 923 // Parse optional <> (indicates a static symbol) or 924 // <ABIxxx> (selecting text symbol with specific ABI). 925 noErrMsg := false 926 isStatic, abi := p.symRefAttrs(name, noErrMsg) 927 if isStatic { 928 return "", obj.ABI0, false // This function rejects static symbols. 929 } 930 tok = p.next() 931 if tok.ScanToken == '+' { 932 if p.next().ScanToken != scanner.Int { 933 return "", obj.ABI0, false 934 } 935 tok = p.next() 936 } 937 if tok.ScanToken != '(' { 938 return "", obj.ABI0, false 939 } 940 if reg := p.next(); reg.ScanToken != scanner.Ident || reg.String() != "SB" { 941 return "", obj.ABI0, false 942 } 943 if p.next().ScanToken != ')' || p.peek() != scanner.EOF { 944 return "", obj.ABI0, false 945 } 946 return name, abi, true 947} 948 949// registerIndirect parses the general form of a register indirection. 950// It can be (R1), (R2*scale), (R1)(R2*scale), (R1)(R2.SXTX<<3) or (R1)(R2<<3) 951// where R1 may be a simple register or register pair R:R or (R, R) or (R+R). 952// Or it might be a pseudo-indirection like (FP). 953// We are sitting on the opening parenthesis. 954func (p *Parser) registerIndirect(a *obj.Addr, prefix rune) { 955 p.get('(') 956 tok := p.next() 957 name := tok.String() 958 r1, r2, scale, ok := p.register(name, 0) 959 if !ok { 960 p.errorf("indirect through non-register %s", tok) 961 } 962 p.get(')') 963 a.Type = obj.TYPE_MEM 964 if r1 < 0 { 965 // Pseudo-register reference. 966 if r2 != 0 { 967 p.errorf("cannot use pseudo-register in pair") 968 return 969 } 970 // For SB, SP, and FP, there must be a name here. 0(FP) is not legal. 971 if name != "PC" && a.Name == obj.NAME_NONE { 972 p.errorf("cannot reference %s without a symbol", name) 973 } 974 p.setPseudoRegister(a, name, false, prefix) 975 return 976 } 977 a.Reg = r1 978 if r2 != 0 { 979 // TODO: Consistency in the encoding would be nice here. 980 if p.arch.InFamily(sys.ARM, sys.ARM64) { 981 // Special form 982 // ARM: destination register pair (R1, R2). 983 // ARM64: register pair (R1, R2) for LDP/STP. 984 if prefix != 0 || scale != 0 { 985 p.errorf("illegal address mode for register pair") 986 return 987 } 988 a.Type = obj.TYPE_REGREG 989 a.Offset = int64(r2) 990 // Nothing may follow 991 return 992 } 993 if p.arch.Family == sys.PPC64 { 994 // Special form for PPC64: (R1+R2); alias for (R1)(R2). 995 if prefix != 0 || scale != 0 { 996 p.errorf("illegal address mode for register+register") 997 return 998 } 999 a.Type = obj.TYPE_MEM 1000 a.Scale = 0 1001 a.Index = r2 1002 // Nothing may follow. 1003 return 1004 } 1005 } 1006 if r2 != 0 { 1007 p.errorf("indirect through register pair") 1008 } 1009 if prefix == '$' { 1010 a.Type = obj.TYPE_ADDR 1011 } 1012 if r1 == arch.RPC && prefix != 0 { 1013 p.errorf("illegal addressing mode for PC") 1014 } 1015 if scale == 0 && p.peek() == '(' { 1016 // General form (R)(R*scale). 1017 p.next() 1018 tok := p.next() 1019 if p.atRegisterExtension() { 1020 p.registerExtension(a, tok.String(), prefix) 1021 } else if p.atRegisterShift() { 1022 // (R1)(R2<<3) 1023 p.registerExtension(a, tok.String(), prefix) 1024 } else { 1025 r1, r2, scale, ok = p.register(tok.String(), 0) 1026 if !ok { 1027 p.errorf("indirect through non-register %s", tok) 1028 } 1029 if r2 != 0 { 1030 p.errorf("unimplemented two-register form") 1031 } 1032 a.Index = r1 1033 if scale != 0 && scale != 1 && (p.arch.Family == sys.ARM64 || 1034 p.arch.Family == sys.PPC64) { 1035 // Support (R1)(R2) (no scaling) and (R1)(R2*1). 1036 p.errorf("%s doesn't support scaled register format", p.arch.Name) 1037 } else { 1038 a.Scale = int16(scale) 1039 } 1040 } 1041 p.get(')') 1042 } else if scale != 0 { 1043 if p.arch.Family == sys.ARM64 { 1044 p.errorf("arm64 doesn't support scaled register format") 1045 } 1046 // First (R) was missing, all we have is (R*scale). 1047 a.Reg = 0 1048 a.Index = r1 1049 a.Scale = int16(scale) 1050 } 1051} 1052 1053// registerList parses an ARM or ARM64 register list expression, a list of 1054// registers in []. There may be comma-separated ranges or individual 1055// registers, as in [R1,R3-R5] or [V1.S4, V2.S4, V3.S4, V4.S4]. 1056// For ARM, only R0 through R15 may appear. 1057// For ARM64, V0 through V31 with arrangement may appear. 1058// 1059// For 386/AMD64 register list specifies 4VNNIW-style multi-source operand. 1060// For range of 4 elements, Intel manual uses "+3" notation, for example: 1061// 1062// VP4DPWSSDS zmm1{k1}{z}, zmm2+3, m128 1063// 1064// Given asm line: 1065// 1066// VP4DPWSSDS Z5, [Z10-Z13], (AX) 1067// 1068// zmm2 is Z10, and Z13 is the only valid value for it (Z10+3). 1069// Only simple ranges are accepted, like [Z0-Z3]. 1070// 1071// The opening bracket has been consumed. 1072func (p *Parser) registerList(a *obj.Addr) { 1073 if p.arch.InFamily(sys.I386, sys.AMD64) { 1074 p.registerListX86(a) 1075 } else { 1076 p.registerListARM(a) 1077 } 1078} 1079 1080func (p *Parser) registerListARM(a *obj.Addr) { 1081 // One range per loop. 1082 var maxReg int 1083 var bits uint16 1084 var arrangement int64 1085 switch p.arch.Family { 1086 case sys.ARM: 1087 maxReg = 16 1088 case sys.ARM64: 1089 maxReg = 32 1090 default: 1091 p.errorf("unexpected register list") 1092 } 1093 firstReg := -1 1094 nextReg := -1 1095 regCnt := 0 1096ListLoop: 1097 for { 1098 tok := p.next() 1099 switch tok.ScanToken { 1100 case ']': 1101 break ListLoop 1102 case scanner.EOF: 1103 p.errorf("missing ']' in register list") 1104 return 1105 } 1106 switch p.arch.Family { 1107 case sys.ARM64: 1108 // Vn.T 1109 name := tok.String() 1110 r, ok := p.registerReference(name) 1111 if !ok { 1112 p.errorf("invalid register: %s", name) 1113 } 1114 reg := r - p.arch.Register["V0"] 1115 p.get('.') 1116 tok := p.next() 1117 ext := tok.String() 1118 curArrangement, err := arch.ARM64RegisterArrangement(reg, name, ext) 1119 if err != nil { 1120 p.errorf(err.Error()) 1121 } 1122 if firstReg == -1 { 1123 // only record the first register and arrangement 1124 firstReg = int(reg) 1125 nextReg = firstReg 1126 arrangement = curArrangement 1127 } else if curArrangement != arrangement { 1128 p.errorf("inconsistent arrangement in ARM64 register list") 1129 } else if nextReg != int(reg) { 1130 p.errorf("incontiguous register in ARM64 register list: %s", name) 1131 } 1132 regCnt++ 1133 nextReg = (nextReg + 1) % 32 1134 case sys.ARM: 1135 // Parse the upper and lower bounds. 1136 lo := p.registerNumber(tok.String()) 1137 hi := lo 1138 if p.peek() == '-' { 1139 p.next() 1140 hi = p.registerNumber(p.next().String()) 1141 } 1142 if hi < lo { 1143 lo, hi = hi, lo 1144 } 1145 // Check there are no duplicates in the register list. 1146 for i := 0; lo <= hi && i < maxReg; i++ { 1147 if bits&(1<<lo) != 0 { 1148 p.errorf("register R%d already in list", lo) 1149 } 1150 bits |= 1 << lo 1151 lo++ 1152 } 1153 default: 1154 p.errorf("unexpected register list") 1155 } 1156 if p.peek() != ']' { 1157 p.get(',') 1158 } 1159 } 1160 a.Type = obj.TYPE_REGLIST 1161 switch p.arch.Family { 1162 case sys.ARM: 1163 a.Offset = int64(bits) 1164 case sys.ARM64: 1165 offset, err := arch.ARM64RegisterListOffset(firstReg, regCnt, arrangement) 1166 if err != nil { 1167 p.errorf(err.Error()) 1168 } 1169 a.Offset = offset 1170 default: 1171 p.errorf("register list not supported on this architecture") 1172 } 1173} 1174 1175func (p *Parser) registerListX86(a *obj.Addr) { 1176 // Accept only [RegA-RegB] syntax. 1177 // Don't use p.get() to provide better error messages. 1178 1179 loName := p.next().String() 1180 lo, ok := p.arch.Register[loName] 1181 if !ok { 1182 if loName == "EOF" { 1183 p.errorf("register list: expected ']', found EOF") 1184 } else { 1185 p.errorf("register list: bad low register in `[%s`", loName) 1186 } 1187 return 1188 } 1189 if tok := p.next().ScanToken; tok != '-' { 1190 p.errorf("register list: expected '-' after `[%s`, found %s", loName, tok) 1191 return 1192 } 1193 hiName := p.next().String() 1194 hi, ok := p.arch.Register[hiName] 1195 if !ok { 1196 p.errorf("register list: bad high register in `[%s-%s`", loName, hiName) 1197 return 1198 } 1199 if tok := p.next().ScanToken; tok != ']' { 1200 p.errorf("register list: expected ']' after `[%s-%s`, found %s", loName, hiName, tok) 1201 } 1202 1203 a.Type = obj.TYPE_REGLIST 1204 a.Reg = lo 1205 a.Offset = x86.EncodeRegisterRange(lo, hi) 1206} 1207 1208// registerNumber is ARM-specific. It returns the number of the specified register. 1209func (p *Parser) registerNumber(name string) uint16 { 1210 if p.arch.Family == sys.ARM && name == "g" { 1211 return 10 1212 } 1213 if name[0] != 'R' { 1214 p.errorf("expected g or R0 through R15; found %s", name) 1215 return 0 1216 } 1217 r, ok := p.registerReference(name) 1218 if !ok { 1219 return 0 1220 } 1221 reg := r - p.arch.Register["R0"] 1222 if reg < 0 { 1223 // Could happen for an architecture having other registers prefixed by R 1224 p.errorf("expected g or R0 through R15; found %s", name) 1225 return 0 1226 } 1227 return uint16(reg) 1228} 1229 1230// Note: There are two changes in the expression handling here 1231// compared to the old yacc/C implementations. Neither has 1232// much practical consequence because the expressions we 1233// see in assembly code are simple, but for the record: 1234// 1235// 1) Evaluation uses uint64; the old one used int64. 1236// 2) Precedence uses Go rules not C rules. 1237 1238// expr = term | term ('+' | '-' | '|' | '^') term. 1239func (p *Parser) expr() uint64 { 1240 value := p.term() 1241 for { 1242 switch p.peek() { 1243 case '+': 1244 p.next() 1245 value += p.term() 1246 case '-': 1247 p.next() 1248 value -= p.term() 1249 case '|': 1250 p.next() 1251 value |= p.term() 1252 case '^': 1253 p.next() 1254 value ^= p.term() 1255 default: 1256 return value 1257 } 1258 } 1259} 1260 1261// floatExpr = fconst | '-' floatExpr | '+' floatExpr | '(' floatExpr ')' 1262func (p *Parser) floatExpr() float64 { 1263 tok := p.next() 1264 switch tok.ScanToken { 1265 case '(': 1266 v := p.floatExpr() 1267 if p.next().ScanToken != ')' { 1268 p.errorf("missing closing paren") 1269 } 1270 return v 1271 case '+': 1272 return +p.floatExpr() 1273 case '-': 1274 return -p.floatExpr() 1275 case scanner.Float: 1276 return p.atof(tok.String()) 1277 } 1278 p.errorf("unexpected %s evaluating float expression", tok) 1279 return 0 1280} 1281 1282// term = factor | factor ('*' | '/' | '%' | '>>' | '<<' | '&') factor 1283func (p *Parser) term() uint64 { 1284 value := p.factor() 1285 for { 1286 switch p.peek() { 1287 case '*': 1288 p.next() 1289 value *= p.factor() 1290 case '/': 1291 p.next() 1292 if int64(value) < 0 { 1293 p.errorf("divide of value with high bit set") 1294 } 1295 divisor := p.factor() 1296 if divisor == 0 { 1297 p.errorf("division by zero") 1298 } else { 1299 value /= divisor 1300 } 1301 case '%': 1302 p.next() 1303 divisor := p.factor() 1304 if int64(value) < 0 { 1305 p.errorf("modulo of value with high bit set") 1306 } 1307 if divisor == 0 { 1308 p.errorf("modulo by zero") 1309 } else { 1310 value %= divisor 1311 } 1312 case lex.LSH: 1313 p.next() 1314 shift := p.factor() 1315 if int64(shift) < 0 { 1316 p.errorf("negative left shift count") 1317 } 1318 return value << shift 1319 case lex.RSH: 1320 p.next() 1321 shift := p.term() 1322 if int64(shift) < 0 { 1323 p.errorf("negative right shift count") 1324 } 1325 if int64(value) < 0 { 1326 p.errorf("right shift of value with high bit set") 1327 } 1328 value >>= shift 1329 case '&': 1330 p.next() 1331 value &= p.factor() 1332 default: 1333 return value 1334 } 1335 } 1336} 1337 1338// factor = const | '+' factor | '-' factor | '~' factor | '(' expr ')' 1339func (p *Parser) factor() uint64 { 1340 tok := p.next() 1341 switch tok.ScanToken { 1342 case scanner.Int: 1343 return p.atoi(tok.String()) 1344 case scanner.Char: 1345 str, err := strconv.Unquote(tok.String()) 1346 if err != nil { 1347 p.errorf("%s", err) 1348 } 1349 r, w := utf8.DecodeRuneInString(str) 1350 if w == 1 && r == utf8.RuneError { 1351 p.errorf("illegal UTF-8 encoding for character constant") 1352 } 1353 return uint64(r) 1354 case '+': 1355 return +p.factor() 1356 case '-': 1357 return -p.factor() 1358 case '~': 1359 return ^p.factor() 1360 case '(': 1361 v := p.expr() 1362 if p.next().ScanToken != ')' { 1363 p.errorf("missing closing paren") 1364 } 1365 return v 1366 } 1367 p.errorf("unexpected %s evaluating expression", tok) 1368 return 0 1369} 1370 1371// positiveAtoi returns an int64 that must be >= 0. 1372func (p *Parser) positiveAtoi(str string) int64 { 1373 value, err := strconv.ParseInt(str, 0, 64) 1374 if err != nil { 1375 p.errorf("%s", err) 1376 } 1377 if value < 0 { 1378 p.errorf("%s overflows int64", str) 1379 } 1380 return value 1381} 1382 1383func (p *Parser) atoi(str string) uint64 { 1384 value, err := strconv.ParseUint(str, 0, 64) 1385 if err != nil { 1386 p.errorf("%s", err) 1387 } 1388 return value 1389} 1390 1391func (p *Parser) atof(str string) float64 { 1392 value, err := strconv.ParseFloat(str, 64) 1393 if err != nil { 1394 p.errorf("%s", err) 1395 } 1396 return value 1397} 1398 1399// EOF represents the end of input. 1400var EOF = lex.Make(scanner.EOF, "EOF") 1401 1402func (p *Parser) next() lex.Token { 1403 if !p.more() { 1404 return EOF 1405 } 1406 tok := p.input[p.inputPos] 1407 p.inputPos++ 1408 return tok 1409} 1410 1411func (p *Parser) back() { 1412 if p.inputPos == 0 { 1413 p.errorf("internal error: backing up before BOL") 1414 } else { 1415 p.inputPos-- 1416 } 1417} 1418 1419func (p *Parser) peek() lex.ScanToken { 1420 if p.more() { 1421 return p.input[p.inputPos].ScanToken 1422 } 1423 return scanner.EOF 1424} 1425 1426func (p *Parser) more() bool { 1427 return p.inputPos < len(p.input) 1428} 1429 1430// get verifies that the next item has the expected type and returns it. 1431func (p *Parser) get(expected lex.ScanToken) lex.Token { 1432 p.expect(expected, expected.String()) 1433 return p.next() 1434} 1435 1436// expectOperandEnd verifies that the parsing state is properly at the end of an operand. 1437func (p *Parser) expectOperandEnd() { 1438 p.expect(scanner.EOF, "end of operand") 1439} 1440 1441// expect verifies that the next item has the expected type. It does not consume it. 1442func (p *Parser) expect(expectedToken lex.ScanToken, expectedMessage string) { 1443 if p.peek() != expectedToken { 1444 p.errorf("expected %s, found %s", expectedMessage, p.next()) 1445 } 1446} 1447 1448// have reports whether the remaining tokens (including the current one) contain the specified token. 1449func (p *Parser) have(token lex.ScanToken) bool { 1450 for i := p.inputPos; i < len(p.input); i++ { 1451 if p.input[i].ScanToken == token { 1452 return true 1453 } 1454 } 1455 return false 1456} 1457 1458// at reports whether the next tokens are as requested. 1459func (p *Parser) at(next ...lex.ScanToken) bool { 1460 if len(p.input)-p.inputPos < len(next) { 1461 return false 1462 } 1463 for i, r := range next { 1464 if p.input[p.inputPos+i].ScanToken != r { 1465 return false 1466 } 1467 } 1468 return true 1469} 1470