1// Copyright 2015 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Package asm implements the parser and instruction generator for the assembler.
6// TODO: Split apart?
7package asm
8
9import (
10	"fmt"
11	"io"
12	"log"
13	"os"
14	"strconv"
15	"strings"
16	"text/scanner"
17	"unicode/utf8"
18
19	"cmd/asm/internal/arch"
20	"cmd/asm/internal/flags"
21	"cmd/asm/internal/lex"
22	"cmd/internal/obj"
23	"cmd/internal/obj/arm64"
24	"cmd/internal/obj/x86"
25	"cmd/internal/objabi"
26	"cmd/internal/src"
27	"cmd/internal/sys"
28)
29
30type Parser struct {
31	lex           lex.TokenReader
32	lineNum       int   // Line number in source file.
33	errorLine     int   // Line number of last error.
34	errorCount    int   // Number of errors.
35	sawCode       bool  // saw code in this file (as opposed to comments and blank lines)
36	pc            int64 // virtual PC; count of Progs; doesn't advance for GLOBL or DATA.
37	input         []lex.Token
38	inputPos      int
39	pendingLabels []string // Labels to attach to next instruction.
40	labels        map[string]*obj.Prog
41	toPatch       []Patch
42	addr          []obj.Addr
43	arch          *arch.Arch
44	ctxt          *obj.Link
45	firstProg     *obj.Prog
46	lastProg      *obj.Prog
47	dataAddr      map[string]int64 // Most recent address for DATA for this symbol.
48	isJump        bool             // Instruction being assembled is a jump.
49	allowABI      bool             // Whether ABI selectors are allowed.
50	pkgPrefix     string           // Prefix to add to local symbols.
51	errorWriter   io.Writer
52}
53
54type Patch struct {
55	addr  *obj.Addr
56	label string
57}
58
59func NewParser(ctxt *obj.Link, ar *arch.Arch, lexer lex.TokenReader) *Parser {
60	pkgPrefix := obj.UnlinkablePkg
61	if ctxt != nil {
62		pkgPrefix = objabi.PathToPrefix(ctxt.Pkgpath)
63	}
64	return &Parser{
65		ctxt:        ctxt,
66		arch:        ar,
67		lex:         lexer,
68		labels:      make(map[string]*obj.Prog),
69		dataAddr:    make(map[string]int64),
70		errorWriter: os.Stderr,
71		allowABI:    ctxt != nil && objabi.LookupPkgSpecial(ctxt.Pkgpath).AllowAsmABI,
72		pkgPrefix:   pkgPrefix,
73	}
74}
75
76// panicOnError is enabled when testing to abort execution on the first error
77// and turn it into a recoverable panic.
78var panicOnError bool
79
80func (p *Parser) errorf(format string, args ...interface{}) {
81	if panicOnError {
82		panic(fmt.Errorf(format, args...))
83	}
84	if p.lineNum == p.errorLine {
85		// Only one error per line.
86		return
87	}
88	p.errorLine = p.lineNum
89	if p.lex != nil {
90		// Put file and line information on head of message.
91		format = "%s:%d: " + format + "\n"
92		args = append([]interface{}{p.lex.File(), p.lineNum}, args...)
93	}
94	fmt.Fprintf(p.errorWriter, format, args...)
95	p.errorCount++
96	if p.errorCount > 10 && !*flags.AllErrors {
97		log.Fatal("too many errors")
98	}
99}
100
101func (p *Parser) pos() src.XPos {
102	return p.ctxt.PosTable.XPos(src.MakePos(p.lex.Base(), uint(p.lineNum), 0))
103}
104
105func (p *Parser) Parse() (*obj.Prog, bool) {
106	scratch := make([][]lex.Token, 0, 3)
107	for {
108		word, cond, operands, ok := p.line(scratch)
109		if !ok {
110			break
111		}
112		scratch = operands
113
114		if p.pseudo(word, operands) {
115			continue
116		}
117		i, present := p.arch.Instructions[word]
118		if present {
119			p.instruction(i, word, cond, operands)
120			continue
121		}
122		p.errorf("unrecognized instruction %q", word)
123	}
124	if p.errorCount > 0 {
125		return nil, false
126	}
127	p.patch()
128	return p.firstProg, true
129}
130
131// ParseSymABIs parses p's assembly code to find text symbol
132// definitions and references and writes a symabis file to w.
133func (p *Parser) ParseSymABIs(w io.Writer) bool {
134	operands := make([][]lex.Token, 0, 3)
135	for {
136		word, _, operands1, ok := p.line(operands)
137		if !ok {
138			break
139		}
140		operands = operands1
141
142		p.symDefRef(w, word, operands)
143	}
144	return p.errorCount == 0
145}
146
147// nextToken returns the next non-build-comment token from the lexer.
148// It reports misplaced //go:build comments but otherwise discards them.
149func (p *Parser) nextToken() lex.ScanToken {
150	for {
151		tok := p.lex.Next()
152		if tok == lex.BuildComment {
153			if p.sawCode {
154				p.errorf("misplaced //go:build comment")
155			}
156			continue
157		}
158		if tok != '\n' {
159			p.sawCode = true
160		}
161		if tok == '#' {
162			// A leftover wisp of a #include/#define/etc,
163			// to let us know that p.sawCode should be true now.
164			// Otherwise ignored.
165			continue
166		}
167		return tok
168	}
169}
170
171// line consumes a single assembly line from p.lex of the form
172//
173//	{label:} WORD[.cond] [ arg {, arg} ] (';' | '\n')
174//
175// It adds any labels to p.pendingLabels and returns the word, cond,
176// operand list, and true. If there is an error or EOF, it returns
177// ok=false.
178//
179// line may reuse the memory from scratch.
180func (p *Parser) line(scratch [][]lex.Token) (word, cond string, operands [][]lex.Token, ok bool) {
181next:
182	// Skip newlines.
183	var tok lex.ScanToken
184	for {
185		tok = p.nextToken()
186		// We save the line number here so error messages from this instruction
187		// are labeled with this line. Otherwise we complain after we've absorbed
188		// the terminating newline and the line numbers are off by one in errors.
189		p.lineNum = p.lex.Line()
190		switch tok {
191		case '\n', ';':
192			continue
193		case scanner.EOF:
194			return "", "", nil, false
195		}
196		break
197	}
198	// First item must be an identifier.
199	if tok != scanner.Ident {
200		p.errorf("expected identifier, found %q", p.lex.Text())
201		return "", "", nil, false // Might as well stop now.
202	}
203	word, cond = p.lex.Text(), ""
204	operands = scratch[:0]
205	// Zero or more comma-separated operands, one per loop.
206	nesting := 0
207	colon := -1
208	for tok != '\n' && tok != ';' {
209		// Process one operand.
210		var items []lex.Token
211		if cap(operands) > len(operands) {
212			// Reuse scratch items slice.
213			items = operands[:cap(operands)][len(operands)][:0]
214		} else {
215			items = make([]lex.Token, 0, 3)
216		}
217		for {
218			tok = p.nextToken()
219			if len(operands) == 0 && len(items) == 0 {
220				if p.arch.InFamily(sys.ARM, sys.ARM64, sys.AMD64, sys.I386, sys.RISCV64) && tok == '.' {
221					// Suffixes: ARM conditionals, RISCV rounding mode or x86 modifiers.
222					tok = p.nextToken()
223					str := p.lex.Text()
224					if tok != scanner.Ident {
225						p.errorf("instruction suffix expected identifier, found %s", str)
226					}
227					cond = cond + "." + str
228					continue
229				}
230				if tok == ':' {
231					// Labels.
232					p.pendingLabels = append(p.pendingLabels, word)
233					goto next
234				}
235			}
236			if tok == scanner.EOF {
237				p.errorf("unexpected EOF")
238				return "", "", nil, false
239			}
240			// Split operands on comma. Also, the old syntax on x86 for a "register pair"
241			// was AX:DX, for which the new syntax is DX, AX. Note the reordering.
242			if tok == '\n' || tok == ';' || (nesting == 0 && (tok == ',' || tok == ':')) {
243				if tok == ':' {
244					// Remember this location so we can swap the operands below.
245					if colon >= 0 {
246						p.errorf("invalid ':' in operand")
247						return word, cond, operands, true
248					}
249					colon = len(operands)
250				}
251				break
252			}
253			if tok == '(' || tok == '[' {
254				nesting++
255			}
256			if tok == ')' || tok == ']' {
257				nesting--
258			}
259			items = append(items, lex.Make(tok, p.lex.Text()))
260		}
261		if len(items) > 0 {
262			operands = append(operands, items)
263			if colon >= 0 && len(operands) == colon+2 {
264				// AX:DX becomes DX, AX.
265				operands[colon], operands[colon+1] = operands[colon+1], operands[colon]
266				colon = -1
267			}
268		} else if len(operands) > 0 || tok == ',' || colon >= 0 {
269			// Had a separator with nothing after.
270			p.errorf("missing operand")
271		}
272	}
273	return word, cond, operands, true
274}
275
276func (p *Parser) instruction(op obj.As, word, cond string, operands [][]lex.Token) {
277	p.addr = p.addr[0:0]
278	p.isJump = p.arch.IsJump(word)
279	for _, op := range operands {
280		addr := p.address(op)
281		if !p.isJump && addr.Reg < 0 { // Jumps refer to PC, a pseudo.
282			p.errorf("illegal use of pseudo-register in %s", word)
283		}
284		p.addr = append(p.addr, addr)
285	}
286	if p.isJump {
287		p.asmJump(op, cond, p.addr)
288		return
289	}
290	p.asmInstruction(op, cond, p.addr)
291}
292
293func (p *Parser) pseudo(word string, operands [][]lex.Token) bool {
294	switch word {
295	case "DATA":
296		p.asmData(operands)
297	case "FUNCDATA":
298		p.asmFuncData(operands)
299	case "GLOBL":
300		p.asmGlobl(operands)
301	case "PCDATA":
302		p.asmPCData(operands)
303	case "PCALIGN":
304		p.asmPCAlign(operands)
305	case "TEXT":
306		p.asmText(operands)
307	default:
308		return false
309	}
310	return true
311}
312
313// symDefRef scans a line for potential text symbol definitions and
314// references and writes symabis information to w.
315//
316// The symabis format is documented at
317// cmd/compile/internal/ssagen.ReadSymABIs.
318func (p *Parser) symDefRef(w io.Writer, word string, operands [][]lex.Token) {
319	switch word {
320	case "TEXT":
321		// Defines text symbol in operands[0].
322		if len(operands) > 0 {
323			p.start(operands[0])
324			if name, abi, ok := p.funcAddress(); ok {
325				fmt.Fprintf(w, "def %s %s\n", name, abi)
326			}
327		}
328		return
329	case "GLOBL", "PCDATA":
330		// No text definitions or symbol references.
331	case "DATA", "FUNCDATA":
332		// For DATA, operands[0] is defined symbol.
333		// For FUNCDATA, operands[0] is an immediate constant.
334		// Remaining operands may have references.
335		if len(operands) < 2 {
336			return
337		}
338		operands = operands[1:]
339	}
340	// Search for symbol references.
341	for _, op := range operands {
342		p.start(op)
343		if name, abi, ok := p.funcAddress(); ok {
344			fmt.Fprintf(w, "ref %s %s\n", name, abi)
345		}
346	}
347}
348
349func (p *Parser) start(operand []lex.Token) {
350	p.input = operand
351	p.inputPos = 0
352}
353
354// address parses the operand into a link address structure.
355func (p *Parser) address(operand []lex.Token) obj.Addr {
356	p.start(operand)
357	addr := obj.Addr{}
358	p.operand(&addr)
359	return addr
360}
361
362// parseScale converts a decimal string into a valid scale factor.
363func (p *Parser) parseScale(s string) int8 {
364	switch s {
365	case "1", "2", "4", "8":
366		return int8(s[0] - '0')
367	}
368	p.errorf("bad scale: %s", s)
369	return 0
370}
371
372// operand parses a general operand and stores the result in *a.
373func (p *Parser) operand(a *obj.Addr) {
374	//fmt.Printf("Operand: %v\n", p.input)
375	if len(p.input) == 0 {
376		p.errorf("empty operand: cannot happen")
377		return
378	}
379	// General address (with a few exceptions) looks like
380	//	$sym±offset(SB)(reg)(index*scale)
381	// Exceptions are:
382	//
383	//	R1
384	//	offset
385	//	$offset
386	// Every piece is optional, so we scan left to right and what
387	// we discover tells us where we are.
388
389	// Prefix: $.
390	var prefix rune
391	switch tok := p.peek(); tok {
392	case '$', '*':
393		prefix = rune(tok)
394		p.next()
395	}
396
397	// Symbol: sym±offset(SB)
398	tok := p.next()
399	name := tok.String()
400	if tok.ScanToken == scanner.Ident && !p.atStartOfRegister(name) {
401		switch p.arch.Family {
402		case sys.ARM64:
403			// arm64 special operands.
404			if opd := arch.GetARM64SpecialOperand(name); opd != arm64.SPOP_END {
405				a.Type = obj.TYPE_SPECIAL
406				a.Offset = int64(opd)
407				break
408			}
409			fallthrough
410		default:
411			// We have a symbol. Parse $sym±offset(symkind)
412			p.symbolReference(a, p.qualifySymbol(name), prefix)
413		}
414		// fmt.Printf("SYM %s\n", obj.Dconv(&emptyProg, 0, a))
415		if p.peek() == scanner.EOF {
416			return
417		}
418	}
419
420	// Special register list syntax for arm: [R1,R3-R7]
421	if tok.ScanToken == '[' {
422		if prefix != 0 {
423			p.errorf("illegal use of register list")
424		}
425		p.registerList(a)
426		p.expectOperandEnd()
427		return
428	}
429
430	// Register: R1
431	if tok.ScanToken == scanner.Ident && p.atStartOfRegister(name) {
432		if p.atRegisterShift() {
433			// ARM shifted register such as R1<<R2 or R1>>2.
434			a.Type = obj.TYPE_SHIFT
435			a.Offset = p.registerShift(tok.String(), prefix)
436			if p.peek() == '(' {
437				// Can only be a literal register here.
438				p.next()
439				tok := p.next()
440				name := tok.String()
441				if !p.atStartOfRegister(name) {
442					p.errorf("expected register; found %s", name)
443				}
444				a.Reg, _ = p.registerReference(name)
445				p.get(')')
446			}
447		} else if p.atRegisterExtension() {
448			a.Type = obj.TYPE_REG
449			p.registerExtension(a, tok.String(), prefix)
450			p.expectOperandEnd()
451			return
452		} else if r1, r2, scale, ok := p.register(tok.String(), prefix); ok {
453			if scale != 0 {
454				p.errorf("expected simple register reference")
455			}
456			a.Type = obj.TYPE_REG
457			a.Reg = r1
458			if r2 != 0 {
459				// Form is R1:R2. It is on RHS and the second register
460				// needs to go into the LHS.
461				panic("cannot happen (Addr.Reg2)")
462			}
463		}
464		// fmt.Printf("REG %s\n", obj.Dconv(&emptyProg, 0, a))
465		p.expectOperandEnd()
466		return
467	}
468
469	// Constant.
470	haveConstant := false
471	switch tok.ScanToken {
472	case scanner.Int, scanner.Float, scanner.String, scanner.Char, '+', '-', '~':
473		haveConstant = true
474	case '(':
475		// Could be parenthesized expression or (R). Must be something, though.
476		tok := p.next()
477		if tok.ScanToken == scanner.EOF {
478			p.errorf("missing right parenthesis")
479			return
480		}
481		rname := tok.String()
482		p.back()
483		haveConstant = !p.atStartOfRegister(rname)
484		if !haveConstant {
485			p.back() // Put back the '('.
486		}
487	}
488	if haveConstant {
489		p.back()
490		if p.have(scanner.Float) {
491			if prefix != '$' {
492				p.errorf("floating-point constant must be an immediate")
493			}
494			a.Type = obj.TYPE_FCONST
495			a.Val = p.floatExpr()
496			// fmt.Printf("FCONST %s\n", obj.Dconv(&emptyProg, 0, a))
497			p.expectOperandEnd()
498			return
499		}
500		if p.have(scanner.String) {
501			if prefix != '$' {
502				p.errorf("string constant must be an immediate")
503				return
504			}
505			str, err := strconv.Unquote(p.get(scanner.String).String())
506			if err != nil {
507				p.errorf("string parse error: %s", err)
508			}
509			a.Type = obj.TYPE_SCONST
510			a.Val = str
511			// fmt.Printf("SCONST %s\n", obj.Dconv(&emptyProg, 0, a))
512			p.expectOperandEnd()
513			return
514		}
515		a.Offset = int64(p.expr())
516		if p.peek() != '(' {
517			switch prefix {
518			case '$':
519				a.Type = obj.TYPE_CONST
520			case '*':
521				a.Type = obj.TYPE_INDIR // Can appear but is illegal, will be rejected by the linker.
522			default:
523				a.Type = obj.TYPE_MEM
524			}
525			// fmt.Printf("CONST %d %s\n", a.Offset, obj.Dconv(&emptyProg, 0, a))
526			p.expectOperandEnd()
527			return
528		}
529		// fmt.Printf("offset %d \n", a.Offset)
530	}
531
532	// Register indirection: (reg) or (index*scale). We are on the opening paren.
533	p.registerIndirect(a, prefix)
534	// fmt.Printf("DONE %s\n", p.arch.Dconv(&emptyProg, 0, a))
535
536	p.expectOperandEnd()
537	return
538}
539
540// atStartOfRegister reports whether the parser is at the start of a register definition.
541func (p *Parser) atStartOfRegister(name string) bool {
542	// Simple register: R10.
543	_, present := p.arch.Register[name]
544	if present {
545		return true
546	}
547	// Parenthesized register: R(10).
548	return p.arch.RegisterPrefix[name] && p.peek() == '('
549}
550
551// atRegisterShift reports whether we are at the start of an ARM shifted register.
552// We have consumed the register or R prefix.
553func (p *Parser) atRegisterShift() bool {
554	// ARM only.
555	if !p.arch.InFamily(sys.ARM, sys.ARM64) {
556		return false
557	}
558	// R1<<...
559	if lex.IsRegisterShift(p.peek()) {
560		return true
561	}
562	// R(1)<<...   Ugly check. TODO: Rethink how we handle ARM register shifts to be
563	// less special.
564	if p.peek() != '(' || len(p.input)-p.inputPos < 4 {
565		return false
566	}
567	return p.at('(', scanner.Int, ')') && lex.IsRegisterShift(p.input[p.inputPos+3].ScanToken)
568}
569
570// atRegisterExtension reports whether we are at the start of an ARM64 extended register.
571// We have consumed the register or R prefix.
572func (p *Parser) atRegisterExtension() bool {
573	// ARM64 only.
574	if p.arch.Family != sys.ARM64 {
575		return false
576	}
577	// R1.xxx
578	return p.peek() == '.'
579}
580
581// registerReference parses a register given either the name, R10, or a parenthesized form, SPR(10).
582func (p *Parser) registerReference(name string) (int16, bool) {
583	r, present := p.arch.Register[name]
584	if present {
585		return r, true
586	}
587	if !p.arch.RegisterPrefix[name] {
588		p.errorf("expected register; found %s", name)
589		return 0, false
590	}
591	p.get('(')
592	tok := p.get(scanner.Int)
593	num, err := strconv.ParseInt(tok.String(), 10, 16)
594	p.get(')')
595	if err != nil {
596		p.errorf("parsing register list: %s", err)
597		return 0, false
598	}
599	r, ok := p.arch.RegisterNumber(name, int16(num))
600	if !ok {
601		p.errorf("illegal register %s(%d)", name, r)
602		return 0, false
603	}
604	return r, true
605}
606
607// register parses a full register reference where there is no symbol present (as in 4(R0) or R(10) but not sym(SB))
608// including forms involving multiple registers such as R1:R2.
609func (p *Parser) register(name string, prefix rune) (r1, r2 int16, scale int8, ok bool) {
610	// R1 or R(1) R1:R2 R1,R2 R1+R2, or R1*scale.
611	r1, ok = p.registerReference(name)
612	if !ok {
613		return
614	}
615	if prefix != 0 && prefix != '*' { // *AX is OK.
616		p.errorf("prefix %c not allowed for register: %c%s", prefix, prefix, name)
617	}
618	c := p.peek()
619	if c == ':' || c == ',' || c == '+' {
620		// 2nd register; syntax (R1+R2) etc. No two architectures agree.
621		// Check the architectures match the syntax.
622		switch p.next().ScanToken {
623		case ',':
624			if !p.arch.InFamily(sys.ARM, sys.ARM64) {
625				p.errorf("(register,register) not supported on this architecture")
626				return
627			}
628		case '+':
629			if p.arch.Family != sys.PPC64 {
630				p.errorf("(register+register) not supported on this architecture")
631				return
632			}
633		}
634		name := p.next().String()
635		r2, ok = p.registerReference(name)
636		if !ok {
637			return
638		}
639	}
640	if p.peek() == '*' {
641		// Scale
642		p.next()
643		scale = p.parseScale(p.next().String())
644	}
645	return r1, r2, scale, true
646}
647
648// registerShift parses an ARM/ARM64 shifted register reference and returns the encoded representation.
649// There is known to be a register (current token) and a shift operator (peeked token).
650func (p *Parser) registerShift(name string, prefix rune) int64 {
651	if prefix != 0 {
652		p.errorf("prefix %c not allowed for shifted register: $%s", prefix, name)
653	}
654	// R1 op R2 or r1 op constant.
655	// op is:
656	//	"<<" == 0
657	//	">>" == 1
658	//	"->" == 2
659	//	"@>" == 3
660	r1, ok := p.registerReference(name)
661	if !ok {
662		return 0
663	}
664	var op int16
665	switch p.next().ScanToken {
666	case lex.LSH:
667		op = 0
668	case lex.RSH:
669		op = 1
670	case lex.ARR:
671		op = 2
672	case lex.ROT:
673		// following instructions on ARM64 support rotate right
674		// AND, ANDS, TST, BIC, BICS, EON, EOR, ORR, MVN, ORN
675		op = 3
676	}
677	tok := p.next()
678	str := tok.String()
679	var count int16
680	switch tok.ScanToken {
681	case scanner.Ident:
682		if p.arch.Family == sys.ARM64 {
683			p.errorf("rhs of shift must be integer: %s", str)
684		} else {
685			r2, ok := p.registerReference(str)
686			if !ok {
687				p.errorf("rhs of shift must be register or integer: %s", str)
688			}
689			count = (r2&15)<<8 | 1<<4
690		}
691	case scanner.Int, '(':
692		p.back()
693		x := int64(p.expr())
694		if p.arch.Family == sys.ARM64 {
695			if x >= 64 {
696				p.errorf("register shift count too large: %s", str)
697			}
698			count = int16((x & 63) << 10)
699		} else {
700			if x >= 32 {
701				p.errorf("register shift count too large: %s", str)
702			}
703			count = int16((x & 31) << 7)
704		}
705	default:
706		p.errorf("unexpected %s in register shift", tok.String())
707	}
708	if p.arch.Family == sys.ARM64 {
709		off, err := arch.ARM64RegisterShift(r1, op, count)
710		if err != nil {
711			p.errorf(err.Error())
712		}
713		return off
714	} else {
715		return int64((r1 & 15) | op<<5 | count)
716	}
717}
718
719// registerExtension parses a register with extension or arrangement.
720// There is known to be a register (current token) and an extension operator (peeked token).
721func (p *Parser) registerExtension(a *obj.Addr, name string, prefix rune) {
722	if prefix != 0 {
723		p.errorf("prefix %c not allowed for shifted register: $%s", prefix, name)
724	}
725
726	reg, ok := p.registerReference(name)
727	if !ok {
728		p.errorf("unexpected %s in register extension", name)
729		return
730	}
731
732	isIndex := false
733	num := int16(0)
734	isAmount := true // Amount is zero by default
735	ext := ""
736	if p.peek() == lex.LSH {
737		// (Rn)(Rm<<2), the shifted offset register.
738		ext = "LSL"
739	} else {
740		// (Rn)(Rm.UXTW<1), the extended offset register.
741		// Rm.UXTW<<3, the extended register.
742		p.get('.')
743		tok := p.next()
744		ext = tok.String()
745	}
746	if p.peek() == lex.LSH {
747		// parses left shift amount applied after extension: <<Amount
748		p.get(lex.LSH)
749		tok := p.get(scanner.Int)
750		amount, err := strconv.ParseInt(tok.String(), 10, 16)
751		if err != nil {
752			p.errorf("parsing left shift amount: %s", err)
753		}
754		num = int16(amount)
755	} else if p.peek() == '[' {
756		// parses an element: [Index]
757		p.get('[')
758		tok := p.get(scanner.Int)
759		index, err := strconv.ParseInt(tok.String(), 10, 16)
760		p.get(']')
761		if err != nil {
762			p.errorf("parsing element index: %s", err)
763		}
764		isIndex = true
765		isAmount = false
766		num = int16(index)
767	}
768
769	switch p.arch.Family {
770	case sys.ARM64:
771		err := arch.ARM64RegisterExtension(a, ext, reg, num, isAmount, isIndex)
772		if err != nil {
773			p.errorf(err.Error())
774		}
775	default:
776		p.errorf("register extension not supported on this architecture")
777	}
778}
779
780// qualifySymbol returns name as a package-qualified symbol name. If
781// name starts with a period, qualifySymbol prepends the package
782// prefix. Otherwise it returns name unchanged.
783func (p *Parser) qualifySymbol(name string) string {
784	if strings.HasPrefix(name, ".") {
785		name = p.pkgPrefix + name
786	}
787	return name
788}
789
790// symbolReference parses a symbol that is known not to be a register.
791func (p *Parser) symbolReference(a *obj.Addr, name string, prefix rune) {
792	// Identifier is a name.
793	switch prefix {
794	case 0:
795		a.Type = obj.TYPE_MEM
796	case '$':
797		a.Type = obj.TYPE_ADDR
798	case '*':
799		a.Type = obj.TYPE_INDIR
800	}
801
802	// Parse optional <> (indicates a static symbol) or
803	// <ABIxxx> (selecting text symbol with specific ABI).
804	doIssueError := true
805	isStatic, abi := p.symRefAttrs(name, doIssueError)
806
807	if p.peek() == '+' || p.peek() == '-' {
808		a.Offset = int64(p.expr())
809	}
810	if isStatic {
811		a.Sym = p.ctxt.LookupStatic(name)
812	} else {
813		a.Sym = p.ctxt.LookupABI(name, abi)
814	}
815	if p.peek() == scanner.EOF {
816		if prefix == 0 && p.isJump {
817			// Symbols without prefix or suffix are jump labels.
818			return
819		}
820		p.errorf("illegal or missing addressing mode for symbol %s", name)
821		return
822	}
823	// Expect (SB), (FP), (PC), or (SP)
824	p.get('(')
825	reg := p.get(scanner.Ident).String()
826	p.get(')')
827	p.setPseudoRegister(a, reg, isStatic, prefix)
828}
829
830// setPseudoRegister sets the NAME field of addr for a pseudo-register reference such as (SB).
831func (p *Parser) setPseudoRegister(addr *obj.Addr, reg string, isStatic bool, prefix rune) {
832	if addr.Reg != 0 {
833		p.errorf("internal error: reg %s already set in pseudo", reg)
834	}
835	switch reg {
836	case "FP":
837		addr.Name = obj.NAME_PARAM
838	case "PC":
839		if prefix != 0 {
840			p.errorf("illegal addressing mode for PC")
841		}
842		addr.Type = obj.TYPE_BRANCH // We set the type and leave NAME untouched. See asmJump.
843	case "SB":
844		addr.Name = obj.NAME_EXTERN
845		if isStatic {
846			addr.Name = obj.NAME_STATIC
847		}
848	case "SP":
849		addr.Name = obj.NAME_AUTO // The pseudo-stack.
850	default:
851		p.errorf("expected pseudo-register; found %s", reg)
852	}
853	if prefix == '$' {
854		addr.Type = obj.TYPE_ADDR
855	}
856}
857
858// symRefAttrs parses an optional function symbol attribute clause for
859// the function symbol 'name', logging an error for a malformed
860// attribute clause if 'issueError' is true. The return value is a
861// (boolean, ABI) pair indicating that the named symbol is either
862// static or a particular ABI specification.
863//
864// The expected form of the attribute clause is:
865//
866// empty,           yielding (false, obj.ABI0)
867// "<>",            yielding (true,  obj.ABI0)
868// "<ABI0>"         yielding (false, obj.ABI0)
869// "<ABIInternal>"  yielding (false, obj.ABIInternal)
870//
871// Anything else beginning with "<" logs an error if issueError is
872// true, otherwise returns (false, obj.ABI0).
873func (p *Parser) symRefAttrs(name string, issueError bool) (bool, obj.ABI) {
874	abi := obj.ABI0
875	isStatic := false
876	if p.peek() != '<' {
877		return isStatic, abi
878	}
879	p.next()
880	tok := p.peek()
881	if tok == '>' {
882		isStatic = true
883	} else if tok == scanner.Ident {
884		abistr := p.get(scanner.Ident).String()
885		if !p.allowABI {
886			if issueError {
887				p.errorf("ABI selector only permitted when compiling runtime, reference was to %q", name)
888			}
889		} else {
890			theabi, valid := obj.ParseABI(abistr)
891			if !valid {
892				if issueError {
893					p.errorf("malformed ABI selector %q in reference to %q",
894						abistr, name)
895				}
896			} else {
897				abi = theabi
898			}
899		}
900	}
901	p.get('>')
902	return isStatic, abi
903}
904
905// funcAddress parses an external function address. This is a
906// constrained form of the operand syntax that's always SB-based,
907// non-static, and has at most a simple integer offset:
908//
909//	[$|*]sym[<abi>][+Int](SB)
910func (p *Parser) funcAddress() (string, obj.ABI, bool) {
911	switch p.peek() {
912	case '$', '*':
913		// Skip prefix.
914		p.next()
915	}
916
917	tok := p.next()
918	name := tok.String()
919	if tok.ScanToken != scanner.Ident || p.atStartOfRegister(name) {
920		return "", obj.ABI0, false
921	}
922	name = p.qualifySymbol(name)
923	// Parse optional <> (indicates a static symbol) or
924	// <ABIxxx> (selecting text symbol with specific ABI).
925	noErrMsg := false
926	isStatic, abi := p.symRefAttrs(name, noErrMsg)
927	if isStatic {
928		return "", obj.ABI0, false // This function rejects static symbols.
929	}
930	tok = p.next()
931	if tok.ScanToken == '+' {
932		if p.next().ScanToken != scanner.Int {
933			return "", obj.ABI0, false
934		}
935		tok = p.next()
936	}
937	if tok.ScanToken != '(' {
938		return "", obj.ABI0, false
939	}
940	if reg := p.next(); reg.ScanToken != scanner.Ident || reg.String() != "SB" {
941		return "", obj.ABI0, false
942	}
943	if p.next().ScanToken != ')' || p.peek() != scanner.EOF {
944		return "", obj.ABI0, false
945	}
946	return name, abi, true
947}
948
949// registerIndirect parses the general form of a register indirection.
950// It can be (R1), (R2*scale), (R1)(R2*scale), (R1)(R2.SXTX<<3) or (R1)(R2<<3)
951// where R1 may be a simple register or register pair R:R or (R, R) or (R+R).
952// Or it might be a pseudo-indirection like (FP).
953// We are sitting on the opening parenthesis.
954func (p *Parser) registerIndirect(a *obj.Addr, prefix rune) {
955	p.get('(')
956	tok := p.next()
957	name := tok.String()
958	r1, r2, scale, ok := p.register(name, 0)
959	if !ok {
960		p.errorf("indirect through non-register %s", tok)
961	}
962	p.get(')')
963	a.Type = obj.TYPE_MEM
964	if r1 < 0 {
965		// Pseudo-register reference.
966		if r2 != 0 {
967			p.errorf("cannot use pseudo-register in pair")
968			return
969		}
970		// For SB, SP, and FP, there must be a name here. 0(FP) is not legal.
971		if name != "PC" && a.Name == obj.NAME_NONE {
972			p.errorf("cannot reference %s without a symbol", name)
973		}
974		p.setPseudoRegister(a, name, false, prefix)
975		return
976	}
977	a.Reg = r1
978	if r2 != 0 {
979		// TODO: Consistency in the encoding would be nice here.
980		if p.arch.InFamily(sys.ARM, sys.ARM64) {
981			// Special form
982			// ARM: destination register pair (R1, R2).
983			// ARM64: register pair (R1, R2) for LDP/STP.
984			if prefix != 0 || scale != 0 {
985				p.errorf("illegal address mode for register pair")
986				return
987			}
988			a.Type = obj.TYPE_REGREG
989			a.Offset = int64(r2)
990			// Nothing may follow
991			return
992		}
993		if p.arch.Family == sys.PPC64 {
994			// Special form for PPC64: (R1+R2); alias for (R1)(R2).
995			if prefix != 0 || scale != 0 {
996				p.errorf("illegal address mode for register+register")
997				return
998			}
999			a.Type = obj.TYPE_MEM
1000			a.Scale = 0
1001			a.Index = r2
1002			// Nothing may follow.
1003			return
1004		}
1005	}
1006	if r2 != 0 {
1007		p.errorf("indirect through register pair")
1008	}
1009	if prefix == '$' {
1010		a.Type = obj.TYPE_ADDR
1011	}
1012	if r1 == arch.RPC && prefix != 0 {
1013		p.errorf("illegal addressing mode for PC")
1014	}
1015	if scale == 0 && p.peek() == '(' {
1016		// General form (R)(R*scale).
1017		p.next()
1018		tok := p.next()
1019		if p.atRegisterExtension() {
1020			p.registerExtension(a, tok.String(), prefix)
1021		} else if p.atRegisterShift() {
1022			// (R1)(R2<<3)
1023			p.registerExtension(a, tok.String(), prefix)
1024		} else {
1025			r1, r2, scale, ok = p.register(tok.String(), 0)
1026			if !ok {
1027				p.errorf("indirect through non-register %s", tok)
1028			}
1029			if r2 != 0 {
1030				p.errorf("unimplemented two-register form")
1031			}
1032			a.Index = r1
1033			if scale != 0 && scale != 1 && (p.arch.Family == sys.ARM64 ||
1034				p.arch.Family == sys.PPC64) {
1035				// Support (R1)(R2) (no scaling) and (R1)(R2*1).
1036				p.errorf("%s doesn't support scaled register format", p.arch.Name)
1037			} else {
1038				a.Scale = int16(scale)
1039			}
1040		}
1041		p.get(')')
1042	} else if scale != 0 {
1043		if p.arch.Family == sys.ARM64 {
1044			p.errorf("arm64 doesn't support scaled register format")
1045		}
1046		// First (R) was missing, all we have is (R*scale).
1047		a.Reg = 0
1048		a.Index = r1
1049		a.Scale = int16(scale)
1050	}
1051}
1052
1053// registerList parses an ARM or ARM64 register list expression, a list of
1054// registers in []. There may be comma-separated ranges or individual
1055// registers, as in [R1,R3-R5] or [V1.S4, V2.S4, V3.S4, V4.S4].
1056// For ARM, only R0 through R15 may appear.
1057// For ARM64, V0 through V31 with arrangement may appear.
1058//
1059// For 386/AMD64 register list specifies 4VNNIW-style multi-source operand.
1060// For range of 4 elements, Intel manual uses "+3" notation, for example:
1061//
1062//	VP4DPWSSDS zmm1{k1}{z}, zmm2+3, m128
1063//
1064// Given asm line:
1065//
1066//	VP4DPWSSDS Z5, [Z10-Z13], (AX)
1067//
1068// zmm2 is Z10, and Z13 is the only valid value for it (Z10+3).
1069// Only simple ranges are accepted, like [Z0-Z3].
1070//
1071// The opening bracket has been consumed.
1072func (p *Parser) registerList(a *obj.Addr) {
1073	if p.arch.InFamily(sys.I386, sys.AMD64) {
1074		p.registerListX86(a)
1075	} else {
1076		p.registerListARM(a)
1077	}
1078}
1079
1080func (p *Parser) registerListARM(a *obj.Addr) {
1081	// One range per loop.
1082	var maxReg int
1083	var bits uint16
1084	var arrangement int64
1085	switch p.arch.Family {
1086	case sys.ARM:
1087		maxReg = 16
1088	case sys.ARM64:
1089		maxReg = 32
1090	default:
1091		p.errorf("unexpected register list")
1092	}
1093	firstReg := -1
1094	nextReg := -1
1095	regCnt := 0
1096ListLoop:
1097	for {
1098		tok := p.next()
1099		switch tok.ScanToken {
1100		case ']':
1101			break ListLoop
1102		case scanner.EOF:
1103			p.errorf("missing ']' in register list")
1104			return
1105		}
1106		switch p.arch.Family {
1107		case sys.ARM64:
1108			// Vn.T
1109			name := tok.String()
1110			r, ok := p.registerReference(name)
1111			if !ok {
1112				p.errorf("invalid register: %s", name)
1113			}
1114			reg := r - p.arch.Register["V0"]
1115			p.get('.')
1116			tok := p.next()
1117			ext := tok.String()
1118			curArrangement, err := arch.ARM64RegisterArrangement(reg, name, ext)
1119			if err != nil {
1120				p.errorf(err.Error())
1121			}
1122			if firstReg == -1 {
1123				// only record the first register and arrangement
1124				firstReg = int(reg)
1125				nextReg = firstReg
1126				arrangement = curArrangement
1127			} else if curArrangement != arrangement {
1128				p.errorf("inconsistent arrangement in ARM64 register list")
1129			} else if nextReg != int(reg) {
1130				p.errorf("incontiguous register in ARM64 register list: %s", name)
1131			}
1132			regCnt++
1133			nextReg = (nextReg + 1) % 32
1134		case sys.ARM:
1135			// Parse the upper and lower bounds.
1136			lo := p.registerNumber(tok.String())
1137			hi := lo
1138			if p.peek() == '-' {
1139				p.next()
1140				hi = p.registerNumber(p.next().String())
1141			}
1142			if hi < lo {
1143				lo, hi = hi, lo
1144			}
1145			// Check there are no duplicates in the register list.
1146			for i := 0; lo <= hi && i < maxReg; i++ {
1147				if bits&(1<<lo) != 0 {
1148					p.errorf("register R%d already in list", lo)
1149				}
1150				bits |= 1 << lo
1151				lo++
1152			}
1153		default:
1154			p.errorf("unexpected register list")
1155		}
1156		if p.peek() != ']' {
1157			p.get(',')
1158		}
1159	}
1160	a.Type = obj.TYPE_REGLIST
1161	switch p.arch.Family {
1162	case sys.ARM:
1163		a.Offset = int64(bits)
1164	case sys.ARM64:
1165		offset, err := arch.ARM64RegisterListOffset(firstReg, regCnt, arrangement)
1166		if err != nil {
1167			p.errorf(err.Error())
1168		}
1169		a.Offset = offset
1170	default:
1171		p.errorf("register list not supported on this architecture")
1172	}
1173}
1174
1175func (p *Parser) registerListX86(a *obj.Addr) {
1176	// Accept only [RegA-RegB] syntax.
1177	// Don't use p.get() to provide better error messages.
1178
1179	loName := p.next().String()
1180	lo, ok := p.arch.Register[loName]
1181	if !ok {
1182		if loName == "EOF" {
1183			p.errorf("register list: expected ']', found EOF")
1184		} else {
1185			p.errorf("register list: bad low register in `[%s`", loName)
1186		}
1187		return
1188	}
1189	if tok := p.next().ScanToken; tok != '-' {
1190		p.errorf("register list: expected '-' after `[%s`, found %s", loName, tok)
1191		return
1192	}
1193	hiName := p.next().String()
1194	hi, ok := p.arch.Register[hiName]
1195	if !ok {
1196		p.errorf("register list: bad high register in `[%s-%s`", loName, hiName)
1197		return
1198	}
1199	if tok := p.next().ScanToken; tok != ']' {
1200		p.errorf("register list: expected ']' after `[%s-%s`, found %s", loName, hiName, tok)
1201	}
1202
1203	a.Type = obj.TYPE_REGLIST
1204	a.Reg = lo
1205	a.Offset = x86.EncodeRegisterRange(lo, hi)
1206}
1207
1208// registerNumber is ARM-specific. It returns the number of the specified register.
1209func (p *Parser) registerNumber(name string) uint16 {
1210	if p.arch.Family == sys.ARM && name == "g" {
1211		return 10
1212	}
1213	if name[0] != 'R' {
1214		p.errorf("expected g or R0 through R15; found %s", name)
1215		return 0
1216	}
1217	r, ok := p.registerReference(name)
1218	if !ok {
1219		return 0
1220	}
1221	reg := r - p.arch.Register["R0"]
1222	if reg < 0 {
1223		// Could happen for an architecture having other registers prefixed by R
1224		p.errorf("expected g or R0 through R15; found %s", name)
1225		return 0
1226	}
1227	return uint16(reg)
1228}
1229
1230// Note: There are two changes in the expression handling here
1231// compared to the old yacc/C implementations. Neither has
1232// much practical consequence because the expressions we
1233// see in assembly code are simple, but for the record:
1234//
1235// 1) Evaluation uses uint64; the old one used int64.
1236// 2) Precedence uses Go rules not C rules.
1237
1238// expr = term | term ('+' | '-' | '|' | '^') term.
1239func (p *Parser) expr() uint64 {
1240	value := p.term()
1241	for {
1242		switch p.peek() {
1243		case '+':
1244			p.next()
1245			value += p.term()
1246		case '-':
1247			p.next()
1248			value -= p.term()
1249		case '|':
1250			p.next()
1251			value |= p.term()
1252		case '^':
1253			p.next()
1254			value ^= p.term()
1255		default:
1256			return value
1257		}
1258	}
1259}
1260
1261// floatExpr = fconst | '-' floatExpr | '+' floatExpr | '(' floatExpr ')'
1262func (p *Parser) floatExpr() float64 {
1263	tok := p.next()
1264	switch tok.ScanToken {
1265	case '(':
1266		v := p.floatExpr()
1267		if p.next().ScanToken != ')' {
1268			p.errorf("missing closing paren")
1269		}
1270		return v
1271	case '+':
1272		return +p.floatExpr()
1273	case '-':
1274		return -p.floatExpr()
1275	case scanner.Float:
1276		return p.atof(tok.String())
1277	}
1278	p.errorf("unexpected %s evaluating float expression", tok)
1279	return 0
1280}
1281
1282// term = factor | factor ('*' | '/' | '%' | '>>' | '<<' | '&') factor
1283func (p *Parser) term() uint64 {
1284	value := p.factor()
1285	for {
1286		switch p.peek() {
1287		case '*':
1288			p.next()
1289			value *= p.factor()
1290		case '/':
1291			p.next()
1292			if int64(value) < 0 {
1293				p.errorf("divide of value with high bit set")
1294			}
1295			divisor := p.factor()
1296			if divisor == 0 {
1297				p.errorf("division by zero")
1298			} else {
1299				value /= divisor
1300			}
1301		case '%':
1302			p.next()
1303			divisor := p.factor()
1304			if int64(value) < 0 {
1305				p.errorf("modulo of value with high bit set")
1306			}
1307			if divisor == 0 {
1308				p.errorf("modulo by zero")
1309			} else {
1310				value %= divisor
1311			}
1312		case lex.LSH:
1313			p.next()
1314			shift := p.factor()
1315			if int64(shift) < 0 {
1316				p.errorf("negative left shift count")
1317			}
1318			return value << shift
1319		case lex.RSH:
1320			p.next()
1321			shift := p.term()
1322			if int64(shift) < 0 {
1323				p.errorf("negative right shift count")
1324			}
1325			if int64(value) < 0 {
1326				p.errorf("right shift of value with high bit set")
1327			}
1328			value >>= shift
1329		case '&':
1330			p.next()
1331			value &= p.factor()
1332		default:
1333			return value
1334		}
1335	}
1336}
1337
1338// factor = const | '+' factor | '-' factor | '~' factor | '(' expr ')'
1339func (p *Parser) factor() uint64 {
1340	tok := p.next()
1341	switch tok.ScanToken {
1342	case scanner.Int:
1343		return p.atoi(tok.String())
1344	case scanner.Char:
1345		str, err := strconv.Unquote(tok.String())
1346		if err != nil {
1347			p.errorf("%s", err)
1348		}
1349		r, w := utf8.DecodeRuneInString(str)
1350		if w == 1 && r == utf8.RuneError {
1351			p.errorf("illegal UTF-8 encoding for character constant")
1352		}
1353		return uint64(r)
1354	case '+':
1355		return +p.factor()
1356	case '-':
1357		return -p.factor()
1358	case '~':
1359		return ^p.factor()
1360	case '(':
1361		v := p.expr()
1362		if p.next().ScanToken != ')' {
1363			p.errorf("missing closing paren")
1364		}
1365		return v
1366	}
1367	p.errorf("unexpected %s evaluating expression", tok)
1368	return 0
1369}
1370
1371// positiveAtoi returns an int64 that must be >= 0.
1372func (p *Parser) positiveAtoi(str string) int64 {
1373	value, err := strconv.ParseInt(str, 0, 64)
1374	if err != nil {
1375		p.errorf("%s", err)
1376	}
1377	if value < 0 {
1378		p.errorf("%s overflows int64", str)
1379	}
1380	return value
1381}
1382
1383func (p *Parser) atoi(str string) uint64 {
1384	value, err := strconv.ParseUint(str, 0, 64)
1385	if err != nil {
1386		p.errorf("%s", err)
1387	}
1388	return value
1389}
1390
1391func (p *Parser) atof(str string) float64 {
1392	value, err := strconv.ParseFloat(str, 64)
1393	if err != nil {
1394		p.errorf("%s", err)
1395	}
1396	return value
1397}
1398
1399// EOF represents the end of input.
1400var EOF = lex.Make(scanner.EOF, "EOF")
1401
1402func (p *Parser) next() lex.Token {
1403	if !p.more() {
1404		return EOF
1405	}
1406	tok := p.input[p.inputPos]
1407	p.inputPos++
1408	return tok
1409}
1410
1411func (p *Parser) back() {
1412	if p.inputPos == 0 {
1413		p.errorf("internal error: backing up before BOL")
1414	} else {
1415		p.inputPos--
1416	}
1417}
1418
1419func (p *Parser) peek() lex.ScanToken {
1420	if p.more() {
1421		return p.input[p.inputPos].ScanToken
1422	}
1423	return scanner.EOF
1424}
1425
1426func (p *Parser) more() bool {
1427	return p.inputPos < len(p.input)
1428}
1429
1430// get verifies that the next item has the expected type and returns it.
1431func (p *Parser) get(expected lex.ScanToken) lex.Token {
1432	p.expect(expected, expected.String())
1433	return p.next()
1434}
1435
1436// expectOperandEnd verifies that the parsing state is properly at the end of an operand.
1437func (p *Parser) expectOperandEnd() {
1438	p.expect(scanner.EOF, "end of operand")
1439}
1440
1441// expect verifies that the next item has the expected type. It does not consume it.
1442func (p *Parser) expect(expectedToken lex.ScanToken, expectedMessage string) {
1443	if p.peek() != expectedToken {
1444		p.errorf("expected %s, found %s", expectedMessage, p.next())
1445	}
1446}
1447
1448// have reports whether the remaining tokens (including the current one) contain the specified token.
1449func (p *Parser) have(token lex.ScanToken) bool {
1450	for i := p.inputPos; i < len(p.input); i++ {
1451		if p.input[i].ScanToken == token {
1452			return true
1453		}
1454	}
1455	return false
1456}
1457
1458// at reports whether the next tokens are as requested.
1459func (p *Parser) at(next ...lex.ScanToken) bool {
1460	if len(p.input)-p.inputPos < len(next) {
1461		return false
1462	}
1463	for i, r := range next {
1464		if p.input[p.inputPos+i].ScanToken != r {
1465			return false
1466		}
1467	}
1468	return true
1469}
1470