1// Copyright 2016 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package x86
6
7import (
8	"fmt"
9	"math"
10
11	"cmd/compile/internal/base"
12	"cmd/compile/internal/ir"
13	"cmd/compile/internal/logopt"
14	"cmd/compile/internal/ssa"
15	"cmd/compile/internal/ssagen"
16	"cmd/compile/internal/types"
17	"cmd/internal/obj"
18	"cmd/internal/obj/x86"
19)
20
21// ssaMarkMoves marks any MOVXconst ops that need to avoid clobbering flags.
22func ssaMarkMoves(s *ssagen.State, b *ssa.Block) {
23	flive := b.FlagsLiveAtEnd
24	for _, c := range b.ControlValues() {
25		flive = c.Type.IsFlags() || flive
26	}
27	for i := len(b.Values) - 1; i >= 0; i-- {
28		v := b.Values[i]
29		if flive && v.Op == ssa.Op386MOVLconst {
30			// The "mark" is any non-nil Aux value.
31			v.Aux = ssa.AuxMark
32		}
33		if v.Type.IsFlags() {
34			flive = false
35		}
36		for _, a := range v.Args {
37			if a.Type.IsFlags() {
38				flive = true
39			}
40		}
41	}
42}
43
44// loadByType returns the load instruction of the given type.
45func loadByType(t *types.Type) obj.As {
46	// Avoid partial register write
47	if !t.IsFloat() {
48		switch t.Size() {
49		case 1:
50			return x86.AMOVBLZX
51		case 2:
52			return x86.AMOVWLZX
53		}
54	}
55	// Otherwise, there's no difference between load and store opcodes.
56	return storeByType(t)
57}
58
59// storeByType returns the store instruction of the given type.
60func storeByType(t *types.Type) obj.As {
61	width := t.Size()
62	if t.IsFloat() {
63		switch width {
64		case 4:
65			return x86.AMOVSS
66		case 8:
67			return x86.AMOVSD
68		}
69	} else {
70		switch width {
71		case 1:
72			return x86.AMOVB
73		case 2:
74			return x86.AMOVW
75		case 4:
76			return x86.AMOVL
77		}
78	}
79	panic("bad store type")
80}
81
82// moveByType returns the reg->reg move instruction of the given type.
83func moveByType(t *types.Type) obj.As {
84	if t.IsFloat() {
85		switch t.Size() {
86		case 4:
87			return x86.AMOVSS
88		case 8:
89			return x86.AMOVSD
90		default:
91			panic(fmt.Sprintf("bad float register width %d:%s", t.Size(), t))
92		}
93	} else {
94		switch t.Size() {
95		case 1:
96			// Avoids partial register write
97			return x86.AMOVL
98		case 2:
99			return x86.AMOVL
100		case 4:
101			return x86.AMOVL
102		default:
103			panic(fmt.Sprintf("bad int register width %d:%s", t.Size(), t))
104		}
105	}
106}
107
108// opregreg emits instructions for
109//
110//	dest := dest(To) op src(From)
111//
112// and also returns the created obj.Prog so it
113// may be further adjusted (offset, scale, etc).
114func opregreg(s *ssagen.State, op obj.As, dest, src int16) *obj.Prog {
115	p := s.Prog(op)
116	p.From.Type = obj.TYPE_REG
117	p.To.Type = obj.TYPE_REG
118	p.To.Reg = dest
119	p.From.Reg = src
120	return p
121}
122
123func ssaGenValue(s *ssagen.State, v *ssa.Value) {
124	switch v.Op {
125	case ssa.Op386ADDL:
126		r := v.Reg()
127		r1 := v.Args[0].Reg()
128		r2 := v.Args[1].Reg()
129		switch {
130		case r == r1:
131			p := s.Prog(v.Op.Asm())
132			p.From.Type = obj.TYPE_REG
133			p.From.Reg = r2
134			p.To.Type = obj.TYPE_REG
135			p.To.Reg = r
136		case r == r2:
137			p := s.Prog(v.Op.Asm())
138			p.From.Type = obj.TYPE_REG
139			p.From.Reg = r1
140			p.To.Type = obj.TYPE_REG
141			p.To.Reg = r
142		default:
143			p := s.Prog(x86.ALEAL)
144			p.From.Type = obj.TYPE_MEM
145			p.From.Reg = r1
146			p.From.Scale = 1
147			p.From.Index = r2
148			p.To.Type = obj.TYPE_REG
149			p.To.Reg = r
150		}
151
152	// 2-address opcode arithmetic
153	case ssa.Op386SUBL,
154		ssa.Op386MULL,
155		ssa.Op386ANDL,
156		ssa.Op386ORL,
157		ssa.Op386XORL,
158		ssa.Op386SHLL,
159		ssa.Op386SHRL, ssa.Op386SHRW, ssa.Op386SHRB,
160		ssa.Op386SARL, ssa.Op386SARW, ssa.Op386SARB,
161		ssa.Op386ROLL, ssa.Op386ROLW, ssa.Op386ROLB,
162		ssa.Op386ADDSS, ssa.Op386ADDSD, ssa.Op386SUBSS, ssa.Op386SUBSD,
163		ssa.Op386MULSS, ssa.Op386MULSD, ssa.Op386DIVSS, ssa.Op386DIVSD,
164		ssa.Op386PXOR,
165		ssa.Op386ADCL,
166		ssa.Op386SBBL:
167		opregreg(s, v.Op.Asm(), v.Reg(), v.Args[1].Reg())
168
169	case ssa.Op386ADDLcarry, ssa.Op386SUBLcarry:
170		// output 0 is carry/borrow, output 1 is the low 32 bits.
171		opregreg(s, v.Op.Asm(), v.Reg0(), v.Args[1].Reg())
172
173	case ssa.Op386ADDLconstcarry, ssa.Op386SUBLconstcarry:
174		// output 0 is carry/borrow, output 1 is the low 32 bits.
175		p := s.Prog(v.Op.Asm())
176		p.From.Type = obj.TYPE_CONST
177		p.From.Offset = v.AuxInt
178		p.To.Type = obj.TYPE_REG
179		p.To.Reg = v.Reg0()
180
181	case ssa.Op386DIVL, ssa.Op386DIVW,
182		ssa.Op386DIVLU, ssa.Op386DIVWU,
183		ssa.Op386MODL, ssa.Op386MODW,
184		ssa.Op386MODLU, ssa.Op386MODWU:
185
186		// Arg[0] is already in AX as it's the only register we allow
187		// and AX is the only output
188		x := v.Args[1].Reg()
189
190		// CPU faults upon signed overflow, which occurs when most
191		// negative int is divided by -1.
192		var j *obj.Prog
193		if v.Op == ssa.Op386DIVL || v.Op == ssa.Op386DIVW ||
194			v.Op == ssa.Op386MODL || v.Op == ssa.Op386MODW {
195
196			if ssa.DivisionNeedsFixUp(v) {
197				var c *obj.Prog
198				switch v.Op {
199				case ssa.Op386DIVL, ssa.Op386MODL:
200					c = s.Prog(x86.ACMPL)
201					j = s.Prog(x86.AJEQ)
202
203				case ssa.Op386DIVW, ssa.Op386MODW:
204					c = s.Prog(x86.ACMPW)
205					j = s.Prog(x86.AJEQ)
206				}
207				c.From.Type = obj.TYPE_REG
208				c.From.Reg = x
209				c.To.Type = obj.TYPE_CONST
210				c.To.Offset = -1
211
212				j.To.Type = obj.TYPE_BRANCH
213			}
214			// sign extend the dividend
215			switch v.Op {
216			case ssa.Op386DIVL, ssa.Op386MODL:
217				s.Prog(x86.ACDQ)
218			case ssa.Op386DIVW, ssa.Op386MODW:
219				s.Prog(x86.ACWD)
220			}
221		}
222
223		// for unsigned ints, we sign extend by setting DX = 0
224		// signed ints were sign extended above
225		if v.Op == ssa.Op386DIVLU || v.Op == ssa.Op386MODLU ||
226			v.Op == ssa.Op386DIVWU || v.Op == ssa.Op386MODWU {
227			c := s.Prog(x86.AXORL)
228			c.From.Type = obj.TYPE_REG
229			c.From.Reg = x86.REG_DX
230			c.To.Type = obj.TYPE_REG
231			c.To.Reg = x86.REG_DX
232		}
233
234		p := s.Prog(v.Op.Asm())
235		p.From.Type = obj.TYPE_REG
236		p.From.Reg = x
237
238		// signed division, rest of the check for -1 case
239		if j != nil {
240			j2 := s.Prog(obj.AJMP)
241			j2.To.Type = obj.TYPE_BRANCH
242
243			var n *obj.Prog
244			if v.Op == ssa.Op386DIVL || v.Op == ssa.Op386DIVW {
245				// n * -1 = -n
246				n = s.Prog(x86.ANEGL)
247				n.To.Type = obj.TYPE_REG
248				n.To.Reg = x86.REG_AX
249			} else {
250				// n % -1 == 0
251				n = s.Prog(x86.AXORL)
252				n.From.Type = obj.TYPE_REG
253				n.From.Reg = x86.REG_DX
254				n.To.Type = obj.TYPE_REG
255				n.To.Reg = x86.REG_DX
256			}
257
258			j.To.SetTarget(n)
259			j2.To.SetTarget(s.Pc())
260		}
261
262	case ssa.Op386HMULL, ssa.Op386HMULLU:
263		// the frontend rewrites constant division by 8/16/32 bit integers into
264		// HMUL by a constant
265		// SSA rewrites generate the 64 bit versions
266
267		// Arg[0] is already in AX as it's the only register we allow
268		// and DX is the only output we care about (the high bits)
269		p := s.Prog(v.Op.Asm())
270		p.From.Type = obj.TYPE_REG
271		p.From.Reg = v.Args[1].Reg()
272
273		// IMULB puts the high portion in AH instead of DL,
274		// so move it to DL for consistency
275		if v.Type.Size() == 1 {
276			m := s.Prog(x86.AMOVB)
277			m.From.Type = obj.TYPE_REG
278			m.From.Reg = x86.REG_AH
279			m.To.Type = obj.TYPE_REG
280			m.To.Reg = x86.REG_DX
281		}
282
283	case ssa.Op386MULLU:
284		// Arg[0] is already in AX as it's the only register we allow
285		// results lo in AX
286		p := s.Prog(v.Op.Asm())
287		p.From.Type = obj.TYPE_REG
288		p.From.Reg = v.Args[1].Reg()
289
290	case ssa.Op386MULLQU:
291		// AX * args[1], high 32 bits in DX (result[0]), low 32 bits in AX (result[1]).
292		p := s.Prog(v.Op.Asm())
293		p.From.Type = obj.TYPE_REG
294		p.From.Reg = v.Args[1].Reg()
295
296	case ssa.Op386AVGLU:
297		// compute (x+y)/2 unsigned.
298		// Do a 32-bit add, the overflow goes into the carry.
299		// Shift right once and pull the carry back into the 31st bit.
300		p := s.Prog(x86.AADDL)
301		p.From.Type = obj.TYPE_REG
302		p.To.Type = obj.TYPE_REG
303		p.To.Reg = v.Reg()
304		p.From.Reg = v.Args[1].Reg()
305		p = s.Prog(x86.ARCRL)
306		p.From.Type = obj.TYPE_CONST
307		p.From.Offset = 1
308		p.To.Type = obj.TYPE_REG
309		p.To.Reg = v.Reg()
310
311	case ssa.Op386ADDLconst:
312		r := v.Reg()
313		a := v.Args[0].Reg()
314		if r == a {
315			if v.AuxInt == 1 {
316				p := s.Prog(x86.AINCL)
317				p.To.Type = obj.TYPE_REG
318				p.To.Reg = r
319				return
320			}
321			if v.AuxInt == -1 {
322				p := s.Prog(x86.ADECL)
323				p.To.Type = obj.TYPE_REG
324				p.To.Reg = r
325				return
326			}
327			p := s.Prog(v.Op.Asm())
328			p.From.Type = obj.TYPE_CONST
329			p.From.Offset = v.AuxInt
330			p.To.Type = obj.TYPE_REG
331			p.To.Reg = r
332			return
333		}
334		p := s.Prog(x86.ALEAL)
335		p.From.Type = obj.TYPE_MEM
336		p.From.Reg = a
337		p.From.Offset = v.AuxInt
338		p.To.Type = obj.TYPE_REG
339		p.To.Reg = r
340
341	case ssa.Op386MULLconst:
342		r := v.Reg()
343		p := s.Prog(v.Op.Asm())
344		p.From.Type = obj.TYPE_CONST
345		p.From.Offset = v.AuxInt
346		p.To.Type = obj.TYPE_REG
347		p.To.Reg = r
348		p.AddRestSourceReg(v.Args[0].Reg())
349
350	case ssa.Op386SUBLconst,
351		ssa.Op386ADCLconst,
352		ssa.Op386SBBLconst,
353		ssa.Op386ANDLconst,
354		ssa.Op386ORLconst,
355		ssa.Op386XORLconst,
356		ssa.Op386SHLLconst,
357		ssa.Op386SHRLconst, ssa.Op386SHRWconst, ssa.Op386SHRBconst,
358		ssa.Op386SARLconst, ssa.Op386SARWconst, ssa.Op386SARBconst,
359		ssa.Op386ROLLconst, ssa.Op386ROLWconst, ssa.Op386ROLBconst:
360		p := s.Prog(v.Op.Asm())
361		p.From.Type = obj.TYPE_CONST
362		p.From.Offset = v.AuxInt
363		p.To.Type = obj.TYPE_REG
364		p.To.Reg = v.Reg()
365	case ssa.Op386SBBLcarrymask:
366		r := v.Reg()
367		p := s.Prog(v.Op.Asm())
368		p.From.Type = obj.TYPE_REG
369		p.From.Reg = r
370		p.To.Type = obj.TYPE_REG
371		p.To.Reg = r
372	case ssa.Op386LEAL1, ssa.Op386LEAL2, ssa.Op386LEAL4, ssa.Op386LEAL8:
373		r := v.Args[0].Reg()
374		i := v.Args[1].Reg()
375		p := s.Prog(x86.ALEAL)
376		switch v.Op {
377		case ssa.Op386LEAL1:
378			p.From.Scale = 1
379			if i == x86.REG_SP {
380				r, i = i, r
381			}
382		case ssa.Op386LEAL2:
383			p.From.Scale = 2
384		case ssa.Op386LEAL4:
385			p.From.Scale = 4
386		case ssa.Op386LEAL8:
387			p.From.Scale = 8
388		}
389		p.From.Type = obj.TYPE_MEM
390		p.From.Reg = r
391		p.From.Index = i
392		ssagen.AddAux(&p.From, v)
393		p.To.Type = obj.TYPE_REG
394		p.To.Reg = v.Reg()
395	case ssa.Op386LEAL:
396		p := s.Prog(x86.ALEAL)
397		p.From.Type = obj.TYPE_MEM
398		p.From.Reg = v.Args[0].Reg()
399		ssagen.AddAux(&p.From, v)
400		p.To.Type = obj.TYPE_REG
401		p.To.Reg = v.Reg()
402	case ssa.Op386CMPL, ssa.Op386CMPW, ssa.Op386CMPB,
403		ssa.Op386TESTL, ssa.Op386TESTW, ssa.Op386TESTB:
404		opregreg(s, v.Op.Asm(), v.Args[1].Reg(), v.Args[0].Reg())
405	case ssa.Op386UCOMISS, ssa.Op386UCOMISD:
406		// Go assembler has swapped operands for UCOMISx relative to CMP,
407		// must account for that right here.
408		opregreg(s, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg())
409	case ssa.Op386CMPLconst, ssa.Op386CMPWconst, ssa.Op386CMPBconst:
410		p := s.Prog(v.Op.Asm())
411		p.From.Type = obj.TYPE_REG
412		p.From.Reg = v.Args[0].Reg()
413		p.To.Type = obj.TYPE_CONST
414		p.To.Offset = v.AuxInt
415	case ssa.Op386TESTLconst, ssa.Op386TESTWconst, ssa.Op386TESTBconst:
416		p := s.Prog(v.Op.Asm())
417		p.From.Type = obj.TYPE_CONST
418		p.From.Offset = v.AuxInt
419		p.To.Type = obj.TYPE_REG
420		p.To.Reg = v.Args[0].Reg()
421	case ssa.Op386CMPLload, ssa.Op386CMPWload, ssa.Op386CMPBload:
422		p := s.Prog(v.Op.Asm())
423		p.From.Type = obj.TYPE_MEM
424		p.From.Reg = v.Args[0].Reg()
425		ssagen.AddAux(&p.From, v)
426		p.To.Type = obj.TYPE_REG
427		p.To.Reg = v.Args[1].Reg()
428	case ssa.Op386CMPLconstload, ssa.Op386CMPWconstload, ssa.Op386CMPBconstload:
429		sc := v.AuxValAndOff()
430		p := s.Prog(v.Op.Asm())
431		p.From.Type = obj.TYPE_MEM
432		p.From.Reg = v.Args[0].Reg()
433		ssagen.AddAux2(&p.From, v, sc.Off64())
434		p.To.Type = obj.TYPE_CONST
435		p.To.Offset = sc.Val64()
436	case ssa.Op386MOVLconst:
437		x := v.Reg()
438
439		// If flags aren't live (indicated by v.Aux == nil),
440		// then we can rewrite MOV $0, AX into XOR AX, AX.
441		if v.AuxInt == 0 && v.Aux == nil {
442			p := s.Prog(x86.AXORL)
443			p.From.Type = obj.TYPE_REG
444			p.From.Reg = x
445			p.To.Type = obj.TYPE_REG
446			p.To.Reg = x
447			break
448		}
449
450		p := s.Prog(v.Op.Asm())
451		p.From.Type = obj.TYPE_CONST
452		p.From.Offset = v.AuxInt
453		p.To.Type = obj.TYPE_REG
454		p.To.Reg = x
455	case ssa.Op386MOVSSconst, ssa.Op386MOVSDconst:
456		x := v.Reg()
457		p := s.Prog(v.Op.Asm())
458		p.From.Type = obj.TYPE_FCONST
459		p.From.Val = math.Float64frombits(uint64(v.AuxInt))
460		p.To.Type = obj.TYPE_REG
461		p.To.Reg = x
462	case ssa.Op386MOVSSconst1, ssa.Op386MOVSDconst1:
463		p := s.Prog(x86.ALEAL)
464		p.From.Type = obj.TYPE_MEM
465		p.From.Name = obj.NAME_EXTERN
466		f := math.Float64frombits(uint64(v.AuxInt))
467		if v.Op == ssa.Op386MOVSDconst1 {
468			p.From.Sym = base.Ctxt.Float64Sym(f)
469		} else {
470			p.From.Sym = base.Ctxt.Float32Sym(float32(f))
471		}
472		p.To.Type = obj.TYPE_REG
473		p.To.Reg = v.Reg()
474	case ssa.Op386MOVSSconst2, ssa.Op386MOVSDconst2:
475		p := s.Prog(v.Op.Asm())
476		p.From.Type = obj.TYPE_MEM
477		p.From.Reg = v.Args[0].Reg()
478		p.To.Type = obj.TYPE_REG
479		p.To.Reg = v.Reg()
480
481	case ssa.Op386MOVSSload, ssa.Op386MOVSDload, ssa.Op386MOVLload, ssa.Op386MOVWload, ssa.Op386MOVBload, ssa.Op386MOVBLSXload, ssa.Op386MOVWLSXload:
482		p := s.Prog(v.Op.Asm())
483		p.From.Type = obj.TYPE_MEM
484		p.From.Reg = v.Args[0].Reg()
485		ssagen.AddAux(&p.From, v)
486		p.To.Type = obj.TYPE_REG
487		p.To.Reg = v.Reg()
488	case ssa.Op386MOVBloadidx1, ssa.Op386MOVWloadidx1, ssa.Op386MOVLloadidx1, ssa.Op386MOVSSloadidx1, ssa.Op386MOVSDloadidx1,
489		ssa.Op386MOVSDloadidx8, ssa.Op386MOVLloadidx4, ssa.Op386MOVSSloadidx4, ssa.Op386MOVWloadidx2:
490		r := v.Args[0].Reg()
491		i := v.Args[1].Reg()
492		p := s.Prog(v.Op.Asm())
493		p.From.Type = obj.TYPE_MEM
494		switch v.Op {
495		case ssa.Op386MOVBloadidx1, ssa.Op386MOVWloadidx1, ssa.Op386MOVLloadidx1, ssa.Op386MOVSSloadidx1, ssa.Op386MOVSDloadidx1:
496			if i == x86.REG_SP {
497				r, i = i, r
498			}
499			p.From.Scale = 1
500		case ssa.Op386MOVSDloadidx8:
501			p.From.Scale = 8
502		case ssa.Op386MOVLloadidx4, ssa.Op386MOVSSloadidx4:
503			p.From.Scale = 4
504		case ssa.Op386MOVWloadidx2:
505			p.From.Scale = 2
506		}
507		p.From.Reg = r
508		p.From.Index = i
509		ssagen.AddAux(&p.From, v)
510		p.To.Type = obj.TYPE_REG
511		p.To.Reg = v.Reg()
512	case ssa.Op386ADDLloadidx4, ssa.Op386SUBLloadidx4, ssa.Op386MULLloadidx4,
513		ssa.Op386ANDLloadidx4, ssa.Op386ORLloadidx4, ssa.Op386XORLloadidx4:
514		p := s.Prog(v.Op.Asm())
515		p.From.Type = obj.TYPE_MEM
516		p.From.Reg = v.Args[1].Reg()
517		p.From.Index = v.Args[2].Reg()
518		p.From.Scale = 4
519		ssagen.AddAux(&p.From, v)
520		p.To.Type = obj.TYPE_REG
521		p.To.Reg = v.Reg()
522	case ssa.Op386ADDLload, ssa.Op386SUBLload, ssa.Op386MULLload,
523		ssa.Op386ANDLload, ssa.Op386ORLload, ssa.Op386XORLload,
524		ssa.Op386ADDSDload, ssa.Op386ADDSSload, ssa.Op386SUBSDload, ssa.Op386SUBSSload,
525		ssa.Op386MULSDload, ssa.Op386MULSSload, ssa.Op386DIVSSload, ssa.Op386DIVSDload:
526		p := s.Prog(v.Op.Asm())
527		p.From.Type = obj.TYPE_MEM
528		p.From.Reg = v.Args[1].Reg()
529		ssagen.AddAux(&p.From, v)
530		p.To.Type = obj.TYPE_REG
531		p.To.Reg = v.Reg()
532	case ssa.Op386MOVSSstore, ssa.Op386MOVSDstore, ssa.Op386MOVLstore, ssa.Op386MOVWstore, ssa.Op386MOVBstore,
533		ssa.Op386ADDLmodify, ssa.Op386SUBLmodify, ssa.Op386ANDLmodify, ssa.Op386ORLmodify, ssa.Op386XORLmodify:
534		p := s.Prog(v.Op.Asm())
535		p.From.Type = obj.TYPE_REG
536		p.From.Reg = v.Args[1].Reg()
537		p.To.Type = obj.TYPE_MEM
538		p.To.Reg = v.Args[0].Reg()
539		ssagen.AddAux(&p.To, v)
540	case ssa.Op386ADDLconstmodify:
541		sc := v.AuxValAndOff()
542		val := sc.Val()
543		if val == 1 || val == -1 {
544			var p *obj.Prog
545			if val == 1 {
546				p = s.Prog(x86.AINCL)
547			} else {
548				p = s.Prog(x86.ADECL)
549			}
550			off := sc.Off64()
551			p.To.Type = obj.TYPE_MEM
552			p.To.Reg = v.Args[0].Reg()
553			ssagen.AddAux2(&p.To, v, off)
554			break
555		}
556		fallthrough
557	case ssa.Op386ANDLconstmodify, ssa.Op386ORLconstmodify, ssa.Op386XORLconstmodify:
558		sc := v.AuxValAndOff()
559		off := sc.Off64()
560		val := sc.Val64()
561		p := s.Prog(v.Op.Asm())
562		p.From.Type = obj.TYPE_CONST
563		p.From.Offset = val
564		p.To.Type = obj.TYPE_MEM
565		p.To.Reg = v.Args[0].Reg()
566		ssagen.AddAux2(&p.To, v, off)
567	case ssa.Op386MOVBstoreidx1, ssa.Op386MOVWstoreidx1, ssa.Op386MOVLstoreidx1, ssa.Op386MOVSSstoreidx1, ssa.Op386MOVSDstoreidx1,
568		ssa.Op386MOVSDstoreidx8, ssa.Op386MOVSSstoreidx4, ssa.Op386MOVLstoreidx4, ssa.Op386MOVWstoreidx2,
569		ssa.Op386ADDLmodifyidx4, ssa.Op386SUBLmodifyidx4, ssa.Op386ANDLmodifyidx4, ssa.Op386ORLmodifyidx4, ssa.Op386XORLmodifyidx4:
570		r := v.Args[0].Reg()
571		i := v.Args[1].Reg()
572		p := s.Prog(v.Op.Asm())
573		p.From.Type = obj.TYPE_REG
574		p.From.Reg = v.Args[2].Reg()
575		p.To.Type = obj.TYPE_MEM
576		switch v.Op {
577		case ssa.Op386MOVBstoreidx1, ssa.Op386MOVWstoreidx1, ssa.Op386MOVLstoreidx1, ssa.Op386MOVSSstoreidx1, ssa.Op386MOVSDstoreidx1:
578			if i == x86.REG_SP {
579				r, i = i, r
580			}
581			p.To.Scale = 1
582		case ssa.Op386MOVSDstoreidx8:
583			p.To.Scale = 8
584		case ssa.Op386MOVSSstoreidx4, ssa.Op386MOVLstoreidx4,
585			ssa.Op386ADDLmodifyidx4, ssa.Op386SUBLmodifyidx4, ssa.Op386ANDLmodifyidx4, ssa.Op386ORLmodifyidx4, ssa.Op386XORLmodifyidx4:
586			p.To.Scale = 4
587		case ssa.Op386MOVWstoreidx2:
588			p.To.Scale = 2
589		}
590		p.To.Reg = r
591		p.To.Index = i
592		ssagen.AddAux(&p.To, v)
593	case ssa.Op386MOVLstoreconst, ssa.Op386MOVWstoreconst, ssa.Op386MOVBstoreconst:
594		p := s.Prog(v.Op.Asm())
595		p.From.Type = obj.TYPE_CONST
596		sc := v.AuxValAndOff()
597		p.From.Offset = sc.Val64()
598		p.To.Type = obj.TYPE_MEM
599		p.To.Reg = v.Args[0].Reg()
600		ssagen.AddAux2(&p.To, v, sc.Off64())
601	case ssa.Op386ADDLconstmodifyidx4:
602		sc := v.AuxValAndOff()
603		val := sc.Val()
604		if val == 1 || val == -1 {
605			var p *obj.Prog
606			if val == 1 {
607				p = s.Prog(x86.AINCL)
608			} else {
609				p = s.Prog(x86.ADECL)
610			}
611			off := sc.Off64()
612			p.To.Type = obj.TYPE_MEM
613			p.To.Reg = v.Args[0].Reg()
614			p.To.Scale = 4
615			p.To.Index = v.Args[1].Reg()
616			ssagen.AddAux2(&p.To, v, off)
617			break
618		}
619		fallthrough
620	case ssa.Op386MOVLstoreconstidx1, ssa.Op386MOVLstoreconstidx4, ssa.Op386MOVWstoreconstidx1, ssa.Op386MOVWstoreconstidx2, ssa.Op386MOVBstoreconstidx1,
621		ssa.Op386ANDLconstmodifyidx4, ssa.Op386ORLconstmodifyidx4, ssa.Op386XORLconstmodifyidx4:
622		p := s.Prog(v.Op.Asm())
623		p.From.Type = obj.TYPE_CONST
624		sc := v.AuxValAndOff()
625		p.From.Offset = sc.Val64()
626		r := v.Args[0].Reg()
627		i := v.Args[1].Reg()
628		switch v.Op {
629		case ssa.Op386MOVBstoreconstidx1, ssa.Op386MOVWstoreconstidx1, ssa.Op386MOVLstoreconstidx1:
630			p.To.Scale = 1
631			if i == x86.REG_SP {
632				r, i = i, r
633			}
634		case ssa.Op386MOVWstoreconstidx2:
635			p.To.Scale = 2
636		case ssa.Op386MOVLstoreconstidx4,
637			ssa.Op386ADDLconstmodifyidx4, ssa.Op386ANDLconstmodifyidx4, ssa.Op386ORLconstmodifyidx4, ssa.Op386XORLconstmodifyidx4:
638			p.To.Scale = 4
639		}
640		p.To.Type = obj.TYPE_MEM
641		p.To.Reg = r
642		p.To.Index = i
643		ssagen.AddAux2(&p.To, v, sc.Off64())
644	case ssa.Op386MOVWLSX, ssa.Op386MOVBLSX, ssa.Op386MOVWLZX, ssa.Op386MOVBLZX,
645		ssa.Op386CVTSL2SS, ssa.Op386CVTSL2SD,
646		ssa.Op386CVTTSS2SL, ssa.Op386CVTTSD2SL,
647		ssa.Op386CVTSS2SD, ssa.Op386CVTSD2SS:
648		opregreg(s, v.Op.Asm(), v.Reg(), v.Args[0].Reg())
649	case ssa.Op386DUFFZERO:
650		p := s.Prog(obj.ADUFFZERO)
651		p.To.Type = obj.TYPE_ADDR
652		p.To.Sym = ir.Syms.Duffzero
653		p.To.Offset = v.AuxInt
654	case ssa.Op386DUFFCOPY:
655		p := s.Prog(obj.ADUFFCOPY)
656		p.To.Type = obj.TYPE_ADDR
657		p.To.Sym = ir.Syms.Duffcopy
658		p.To.Offset = v.AuxInt
659
660	case ssa.OpCopy: // TODO: use MOVLreg for reg->reg copies instead of OpCopy?
661		if v.Type.IsMemory() {
662			return
663		}
664		x := v.Args[0].Reg()
665		y := v.Reg()
666		if x != y {
667			opregreg(s, moveByType(v.Type), y, x)
668		}
669	case ssa.OpLoadReg:
670		if v.Type.IsFlags() {
671			v.Fatalf("load flags not implemented: %v", v.LongString())
672			return
673		}
674		p := s.Prog(loadByType(v.Type))
675		ssagen.AddrAuto(&p.From, v.Args[0])
676		p.To.Type = obj.TYPE_REG
677		p.To.Reg = v.Reg()
678
679	case ssa.OpStoreReg:
680		if v.Type.IsFlags() {
681			v.Fatalf("store flags not implemented: %v", v.LongString())
682			return
683		}
684		p := s.Prog(storeByType(v.Type))
685		p.From.Type = obj.TYPE_REG
686		p.From.Reg = v.Args[0].Reg()
687		ssagen.AddrAuto(&p.To, v)
688	case ssa.Op386LoweredGetClosurePtr:
689		// Closure pointer is DX.
690		ssagen.CheckLoweredGetClosurePtr(v)
691	case ssa.Op386LoweredGetG:
692		r := v.Reg()
693		// See the comments in cmd/internal/obj/x86/obj6.go
694		// near CanUse1InsnTLS for a detailed explanation of these instructions.
695		if x86.CanUse1InsnTLS(base.Ctxt) {
696			// MOVL (TLS), r
697			p := s.Prog(x86.AMOVL)
698			p.From.Type = obj.TYPE_MEM
699			p.From.Reg = x86.REG_TLS
700			p.To.Type = obj.TYPE_REG
701			p.To.Reg = r
702		} else {
703			// MOVL TLS, r
704			// MOVL (r)(TLS*1), r
705			p := s.Prog(x86.AMOVL)
706			p.From.Type = obj.TYPE_REG
707			p.From.Reg = x86.REG_TLS
708			p.To.Type = obj.TYPE_REG
709			p.To.Reg = r
710			q := s.Prog(x86.AMOVL)
711			q.From.Type = obj.TYPE_MEM
712			q.From.Reg = r
713			q.From.Index = x86.REG_TLS
714			q.From.Scale = 1
715			q.To.Type = obj.TYPE_REG
716			q.To.Reg = r
717		}
718
719	case ssa.Op386LoweredGetCallerPC:
720		p := s.Prog(x86.AMOVL)
721		p.From.Type = obj.TYPE_MEM
722		p.From.Offset = -4 // PC is stored 4 bytes below first parameter.
723		p.From.Name = obj.NAME_PARAM
724		p.To.Type = obj.TYPE_REG
725		p.To.Reg = v.Reg()
726
727	case ssa.Op386LoweredGetCallerSP:
728		// caller's SP is the address of the first arg
729		p := s.Prog(x86.AMOVL)
730		p.From.Type = obj.TYPE_ADDR
731		p.From.Offset = -base.Ctxt.Arch.FixedFrameSize // 0 on 386, just to be consistent with other architectures
732		p.From.Name = obj.NAME_PARAM
733		p.To.Type = obj.TYPE_REG
734		p.To.Reg = v.Reg()
735
736	case ssa.Op386LoweredWB:
737		p := s.Prog(obj.ACALL)
738		p.To.Type = obj.TYPE_MEM
739		p.To.Name = obj.NAME_EXTERN
740		// AuxInt encodes how many buffer entries we need.
741		p.To.Sym = ir.Syms.GCWriteBarrier[v.AuxInt-1]
742
743	case ssa.Op386LoweredPanicBoundsA, ssa.Op386LoweredPanicBoundsB, ssa.Op386LoweredPanicBoundsC:
744		p := s.Prog(obj.ACALL)
745		p.To.Type = obj.TYPE_MEM
746		p.To.Name = obj.NAME_EXTERN
747		p.To.Sym = ssagen.BoundsCheckFunc[v.AuxInt]
748		s.UseArgs(8) // space used in callee args area by assembly stubs
749
750	case ssa.Op386LoweredPanicExtendA, ssa.Op386LoweredPanicExtendB, ssa.Op386LoweredPanicExtendC:
751		p := s.Prog(obj.ACALL)
752		p.To.Type = obj.TYPE_MEM
753		p.To.Name = obj.NAME_EXTERN
754		p.To.Sym = ssagen.ExtendCheckFunc[v.AuxInt]
755		s.UseArgs(12) // space used in callee args area by assembly stubs
756
757	case ssa.Op386CALLstatic, ssa.Op386CALLclosure, ssa.Op386CALLinter:
758		s.Call(v)
759	case ssa.Op386CALLtail:
760		s.TailCall(v)
761	case ssa.Op386NEGL,
762		ssa.Op386BSWAPL,
763		ssa.Op386NOTL:
764		p := s.Prog(v.Op.Asm())
765		p.To.Type = obj.TYPE_REG
766		p.To.Reg = v.Reg()
767	case ssa.Op386BSFL, ssa.Op386BSFW,
768		ssa.Op386BSRL, ssa.Op386BSRW,
769		ssa.Op386SQRTSS, ssa.Op386SQRTSD:
770		p := s.Prog(v.Op.Asm())
771		p.From.Type = obj.TYPE_REG
772		p.From.Reg = v.Args[0].Reg()
773		p.To.Type = obj.TYPE_REG
774		p.To.Reg = v.Reg()
775	case ssa.Op386SETEQ, ssa.Op386SETNE,
776		ssa.Op386SETL, ssa.Op386SETLE,
777		ssa.Op386SETG, ssa.Op386SETGE,
778		ssa.Op386SETGF, ssa.Op386SETGEF,
779		ssa.Op386SETB, ssa.Op386SETBE,
780		ssa.Op386SETORD, ssa.Op386SETNAN,
781		ssa.Op386SETA, ssa.Op386SETAE,
782		ssa.Op386SETO:
783		p := s.Prog(v.Op.Asm())
784		p.To.Type = obj.TYPE_REG
785		p.To.Reg = v.Reg()
786
787	case ssa.Op386SETNEF:
788		p := s.Prog(v.Op.Asm())
789		p.To.Type = obj.TYPE_REG
790		p.To.Reg = v.Reg()
791		q := s.Prog(x86.ASETPS)
792		q.To.Type = obj.TYPE_REG
793		q.To.Reg = x86.REG_AX
794		opregreg(s, x86.AORL, v.Reg(), x86.REG_AX)
795
796	case ssa.Op386SETEQF:
797		p := s.Prog(v.Op.Asm())
798		p.To.Type = obj.TYPE_REG
799		p.To.Reg = v.Reg()
800		q := s.Prog(x86.ASETPC)
801		q.To.Type = obj.TYPE_REG
802		q.To.Reg = x86.REG_AX
803		opregreg(s, x86.AANDL, v.Reg(), x86.REG_AX)
804
805	case ssa.Op386InvertFlags:
806		v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
807	case ssa.Op386FlagEQ, ssa.Op386FlagLT_ULT, ssa.Op386FlagLT_UGT, ssa.Op386FlagGT_ULT, ssa.Op386FlagGT_UGT:
808		v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
809	case ssa.Op386REPSTOSL:
810		s.Prog(x86.AREP)
811		s.Prog(x86.ASTOSL)
812	case ssa.Op386REPMOVSL:
813		s.Prog(x86.AREP)
814		s.Prog(x86.AMOVSL)
815	case ssa.Op386LoweredNilCheck:
816		// Issue a load which will fault if the input is nil.
817		// TODO: We currently use the 2-byte instruction TESTB AX, (reg).
818		// Should we use the 3-byte TESTB $0, (reg) instead? It is larger
819		// but it doesn't have false dependency on AX.
820		// Or maybe allocate an output register and use MOVL (reg),reg2 ?
821		// That trades clobbering flags for clobbering a register.
822		p := s.Prog(x86.ATESTB)
823		p.From.Type = obj.TYPE_REG
824		p.From.Reg = x86.REG_AX
825		p.To.Type = obj.TYPE_MEM
826		p.To.Reg = v.Args[0].Reg()
827		ssagen.AddAux(&p.To, v)
828		if logopt.Enabled() {
829			logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name)
830		}
831		if base.Debug.Nil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
832			base.WarnfAt(v.Pos, "generated nil check")
833		}
834	case ssa.Op386LoweredCtz32:
835		// BSFL in, out
836		p := s.Prog(x86.ABSFL)
837		p.From.Type = obj.TYPE_REG
838		p.From.Reg = v.Args[0].Reg()
839		p.To.Type = obj.TYPE_REG
840		p.To.Reg = v.Reg()
841
842		// JNZ 2(PC)
843		p1 := s.Prog(x86.AJNE)
844		p1.To.Type = obj.TYPE_BRANCH
845
846		// MOVL $32, out
847		p2 := s.Prog(x86.AMOVL)
848		p2.From.Type = obj.TYPE_CONST
849		p2.From.Offset = 32
850		p2.To.Type = obj.TYPE_REG
851		p2.To.Reg = v.Reg()
852
853		// NOP (so the JNZ has somewhere to land)
854		nop := s.Prog(obj.ANOP)
855		p1.To.SetTarget(nop)
856
857	case ssa.OpClobber:
858		p := s.Prog(x86.AMOVL)
859		p.From.Type = obj.TYPE_CONST
860		p.From.Offset = 0xdeaddead
861		p.To.Type = obj.TYPE_MEM
862		p.To.Reg = x86.REG_SP
863		ssagen.AddAux(&p.To, v)
864	case ssa.OpClobberReg:
865		// TODO: implement for clobberdead experiment. Nop is ok for now.
866	default:
867		v.Fatalf("genValue not implemented: %s", v.LongString())
868	}
869}
870
871var blockJump = [...]struct {
872	asm, invasm obj.As
873}{
874	ssa.Block386EQ:  {x86.AJEQ, x86.AJNE},
875	ssa.Block386NE:  {x86.AJNE, x86.AJEQ},
876	ssa.Block386LT:  {x86.AJLT, x86.AJGE},
877	ssa.Block386GE:  {x86.AJGE, x86.AJLT},
878	ssa.Block386LE:  {x86.AJLE, x86.AJGT},
879	ssa.Block386GT:  {x86.AJGT, x86.AJLE},
880	ssa.Block386OS:  {x86.AJOS, x86.AJOC},
881	ssa.Block386OC:  {x86.AJOC, x86.AJOS},
882	ssa.Block386ULT: {x86.AJCS, x86.AJCC},
883	ssa.Block386UGE: {x86.AJCC, x86.AJCS},
884	ssa.Block386UGT: {x86.AJHI, x86.AJLS},
885	ssa.Block386ULE: {x86.AJLS, x86.AJHI},
886	ssa.Block386ORD: {x86.AJPC, x86.AJPS},
887	ssa.Block386NAN: {x86.AJPS, x86.AJPC},
888}
889
890var eqfJumps = [2][2]ssagen.IndexJump{
891	{{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPS, Index: 1}}, // next == b.Succs[0]
892	{{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPC, Index: 0}}, // next == b.Succs[1]
893}
894var nefJumps = [2][2]ssagen.IndexJump{
895	{{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPC, Index: 1}}, // next == b.Succs[0]
896	{{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPS, Index: 0}}, // next == b.Succs[1]
897}
898
899func ssaGenBlock(s *ssagen.State, b, next *ssa.Block) {
900	switch b.Kind {
901	case ssa.BlockPlain:
902		if b.Succs[0].Block() != next {
903			p := s.Prog(obj.AJMP)
904			p.To.Type = obj.TYPE_BRANCH
905			s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
906		}
907	case ssa.BlockDefer:
908		// defer returns in rax:
909		// 0 if we should continue executing
910		// 1 if we should jump to deferreturn call
911		p := s.Prog(x86.ATESTL)
912		p.From.Type = obj.TYPE_REG
913		p.From.Reg = x86.REG_AX
914		p.To.Type = obj.TYPE_REG
915		p.To.Reg = x86.REG_AX
916		p = s.Prog(x86.AJNE)
917		p.To.Type = obj.TYPE_BRANCH
918		s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[1].Block()})
919		if b.Succs[0].Block() != next {
920			p := s.Prog(obj.AJMP)
921			p.To.Type = obj.TYPE_BRANCH
922			s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
923		}
924	case ssa.BlockExit, ssa.BlockRetJmp:
925	case ssa.BlockRet:
926		s.Prog(obj.ARET)
927
928	case ssa.Block386EQF:
929		s.CombJump(b, next, &eqfJumps)
930
931	case ssa.Block386NEF:
932		s.CombJump(b, next, &nefJumps)
933
934	case ssa.Block386EQ, ssa.Block386NE,
935		ssa.Block386LT, ssa.Block386GE,
936		ssa.Block386LE, ssa.Block386GT,
937		ssa.Block386OS, ssa.Block386OC,
938		ssa.Block386ULT, ssa.Block386UGT,
939		ssa.Block386ULE, ssa.Block386UGE:
940		jmp := blockJump[b.Kind]
941		switch next {
942		case b.Succs[0].Block():
943			s.Br(jmp.invasm, b.Succs[1].Block())
944		case b.Succs[1].Block():
945			s.Br(jmp.asm, b.Succs[0].Block())
946		default:
947			if b.Likely != ssa.BranchUnlikely {
948				s.Br(jmp.asm, b.Succs[0].Block())
949				s.Br(obj.AJMP, b.Succs[1].Block())
950			} else {
951				s.Br(jmp.invasm, b.Succs[1].Block())
952				s.Br(obj.AJMP, b.Succs[0].Block())
953			}
954		}
955	default:
956		b.Fatalf("branch not implemented: %s", b.LongString())
957	}
958}
959