1// Copyright 2016 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package ssa 6 7import ( 8 "cmd/internal/src" 9 "fmt" 10 "math" 11) 12 13type branch int 14 15const ( 16 unknown branch = iota 17 positive 18 negative 19 // The outedges from a jump table are jumpTable0, 20 // jumpTable0+1, jumpTable0+2, etc. There could be an 21 // arbitrary number so we can't list them all here. 22 jumpTable0 23) 24 25// relation represents the set of possible relations between 26// pairs of variables (v, w). Without a priori knowledge the 27// mask is lt | eq | gt meaning v can be less than, equal to or 28// greater than w. When the execution path branches on the condition 29// `v op w` the set of relations is updated to exclude any 30// relation not possible due to `v op w` being true (or false). 31// 32// E.g. 33// 34// r := relation(...) 35// 36// if v < w { 37// newR := r & lt 38// } 39// if v >= w { 40// newR := r & (eq|gt) 41// } 42// if v != w { 43// newR := r & (lt|gt) 44// } 45type relation uint 46 47const ( 48 lt relation = 1 << iota 49 eq 50 gt 51) 52 53var relationStrings = [...]string{ 54 0: "none", lt: "<", eq: "==", lt | eq: "<=", 55 gt: ">", gt | lt: "!=", gt | eq: ">=", gt | eq | lt: "any", 56} 57 58func (r relation) String() string { 59 if r < relation(len(relationStrings)) { 60 return relationStrings[r] 61 } 62 return fmt.Sprintf("relation(%d)", uint(r)) 63} 64 65// domain represents the domain of a variable pair in which a set 66// of relations is known. For example, relations learned for unsigned 67// pairs cannot be transferred to signed pairs because the same bit 68// representation can mean something else. 69type domain uint 70 71const ( 72 signed domain = 1 << iota 73 unsigned 74 pointer 75 boolean 76) 77 78var domainStrings = [...]string{ 79 "signed", "unsigned", "pointer", "boolean", 80} 81 82func (d domain) String() string { 83 s := "" 84 for i, ds := range domainStrings { 85 if d&(1<<uint(i)) != 0 { 86 if len(s) != 0 { 87 s += "|" 88 } 89 s += ds 90 d &^= 1 << uint(i) 91 } 92 } 93 if d != 0 { 94 if len(s) != 0 { 95 s += "|" 96 } 97 s += fmt.Sprintf("0x%x", uint(d)) 98 } 99 return s 100} 101 102type pair struct { 103 // a pair of values, ordered by ID. 104 // v can be nil, to mean the zero value. 105 // for booleans the zero value (v == nil) is false. 106 v, w *Value 107 d domain 108} 109 110// fact is a pair plus a relation for that pair. 111type fact struct { 112 p pair 113 r relation 114} 115 116// a limit records known upper and lower bounds for a value. 117type limit struct { 118 min, max int64 // min <= value <= max, signed 119 umin, umax uint64 // umin <= value <= umax, unsigned 120} 121 122func (l limit) String() string { 123 return fmt.Sprintf("sm,SM,um,UM=%d,%d,%d,%d", l.min, l.max, l.umin, l.umax) 124} 125 126func (l limit) intersect(l2 limit) limit { 127 if l.min < l2.min { 128 l.min = l2.min 129 } 130 if l.umin < l2.umin { 131 l.umin = l2.umin 132 } 133 if l.max > l2.max { 134 l.max = l2.max 135 } 136 if l.umax > l2.umax { 137 l.umax = l2.umax 138 } 139 return l 140} 141 142var noLimit = limit{math.MinInt64, math.MaxInt64, 0, math.MaxUint64} 143 144// a limitFact is a limit known for a particular value. 145type limitFact struct { 146 vid ID 147 limit limit 148} 149 150// factsTable keeps track of relations between pairs of values. 151// 152// The fact table logic is sound, but incomplete. Outside of a few 153// special cases, it performs no deduction or arithmetic. While there 154// are known decision procedures for this, the ad hoc approach taken 155// by the facts table is effective for real code while remaining very 156// efficient. 157type factsTable struct { 158 // unsat is true if facts contains a contradiction. 159 // 160 // Note that the factsTable logic is incomplete, so if unsat 161 // is false, the assertions in factsTable could be satisfiable 162 // *or* unsatisfiable. 163 unsat bool // true if facts contains a contradiction 164 unsatDepth int // number of unsat checkpoints 165 166 facts map[pair]relation // current known set of relation 167 stack []fact // previous sets of relations 168 169 // order* is a couple of partial order sets that record information 170 // about relations between SSA values in the signed and unsigned 171 // domain. 172 orderS *poset 173 orderU *poset 174 175 // known lower and upper bounds on individual values. 176 limits map[ID]limit 177 limitStack []limitFact // previous entries 178 179 // For each slice s, a map from s to a len(s)/cap(s) value (if any) 180 // TODO: check if there are cases that matter where we have 181 // more than one len(s) for a slice. We could keep a list if necessary. 182 lens map[ID]*Value 183 caps map[ID]*Value 184 185 // zero is a zero-valued constant 186 zero *Value 187} 188 189// checkpointFact is an invalid value used for checkpointing 190// and restoring factsTable. 191var checkpointFact = fact{} 192var checkpointBound = limitFact{} 193 194func newFactsTable(f *Func) *factsTable { 195 ft := &factsTable{} 196 ft.orderS = f.newPoset() 197 ft.orderU = f.newPoset() 198 ft.orderS.SetUnsigned(false) 199 ft.orderU.SetUnsigned(true) 200 ft.facts = make(map[pair]relation) 201 ft.stack = make([]fact, 4) 202 ft.limits = make(map[ID]limit) 203 ft.limitStack = make([]limitFact, 4) 204 ft.zero = f.ConstInt64(f.Config.Types.Int64, 0) 205 return ft 206} 207 208// update updates the set of relations between v and w in domain d 209// restricting it to r. 210func (ft *factsTable) update(parent *Block, v, w *Value, d domain, r relation) { 211 if parent.Func.pass.debug > 2 { 212 parent.Func.Warnl(parent.Pos, "parent=%s, update %s %s %s", parent, v, w, r) 213 } 214 // No need to do anything else if we already found unsat. 215 if ft.unsat { 216 return 217 } 218 219 // Self-fact. It's wasteful to register it into the facts 220 // table, so just note whether it's satisfiable 221 if v == w { 222 if r&eq == 0 { 223 ft.unsat = true 224 } 225 return 226 } 227 228 if d == signed || d == unsigned { 229 var ok bool 230 order := ft.orderS 231 if d == unsigned { 232 order = ft.orderU 233 } 234 switch r { 235 case lt: 236 ok = order.SetOrder(v, w) 237 case gt: 238 ok = order.SetOrder(w, v) 239 case lt | eq: 240 ok = order.SetOrderOrEqual(v, w) 241 case gt | eq: 242 ok = order.SetOrderOrEqual(w, v) 243 case eq: 244 ok = order.SetEqual(v, w) 245 case lt | gt: 246 ok = order.SetNonEqual(v, w) 247 default: 248 panic("unknown relation") 249 } 250 if !ok { 251 if parent.Func.pass.debug > 2 { 252 parent.Func.Warnl(parent.Pos, "unsat %s %s %s", v, w, r) 253 } 254 ft.unsat = true 255 return 256 } 257 } else { 258 if lessByID(w, v) { 259 v, w = w, v 260 r = reverseBits[r] 261 } 262 263 p := pair{v, w, d} 264 oldR, ok := ft.facts[p] 265 if !ok { 266 if v == w { 267 oldR = eq 268 } else { 269 oldR = lt | eq | gt 270 } 271 } 272 // No changes compared to information already in facts table. 273 if oldR == r { 274 return 275 } 276 ft.stack = append(ft.stack, fact{p, oldR}) 277 ft.facts[p] = oldR & r 278 // If this relation is not satisfiable, mark it and exit right away 279 if oldR&r == 0 { 280 if parent.Func.pass.debug > 2 { 281 parent.Func.Warnl(parent.Pos, "unsat %s %s %s", v, w, r) 282 } 283 ft.unsat = true 284 return 285 } 286 } 287 288 // Extract bounds when comparing against constants 289 if v.isGenericIntConst() { 290 v, w = w, v 291 r = reverseBits[r] 292 } 293 if v != nil && w.isGenericIntConst() { 294 // Note: all the +1/-1 below could overflow/underflow. Either will 295 // still generate correct results, it will just lead to imprecision. 296 // In fact if there is overflow/underflow, the corresponding 297 // code is unreachable because the known range is outside the range 298 // of the value's type. 299 old, ok := ft.limits[v.ID] 300 if !ok { 301 old = noLimit 302 if v.isGenericIntConst() { 303 switch d { 304 case signed: 305 old.min, old.max = v.AuxInt, v.AuxInt 306 if v.AuxInt >= 0 { 307 old.umin, old.umax = uint64(v.AuxInt), uint64(v.AuxInt) 308 } 309 case unsigned: 310 old.umin = v.AuxUnsigned() 311 old.umax = old.umin 312 if int64(old.umin) >= 0 { 313 old.min, old.max = int64(old.umin), int64(old.umin) 314 } 315 } 316 } 317 } 318 lim := noLimit 319 switch d { 320 case signed: 321 c := w.AuxInt 322 switch r { 323 case lt: 324 lim.max = c - 1 325 case lt | eq: 326 lim.max = c 327 case gt | eq: 328 lim.min = c 329 case gt: 330 lim.min = c + 1 331 case lt | gt: 332 lim = old 333 if c == lim.min { 334 lim.min++ 335 } 336 if c == lim.max { 337 lim.max-- 338 } 339 case eq: 340 lim.min = c 341 lim.max = c 342 } 343 if lim.min >= 0 { 344 // int(x) >= 0 && int(x) >= N ⇒ uint(x) >= N 345 lim.umin = uint64(lim.min) 346 } 347 if lim.max != noLimit.max && old.min >= 0 && lim.max >= 0 { 348 // 0 <= int(x) <= N ⇒ 0 <= uint(x) <= N 349 // This is for a max update, so the lower bound 350 // comes from what we already know (old). 351 lim.umax = uint64(lim.max) 352 } 353 case unsigned: 354 uc := w.AuxUnsigned() 355 switch r { 356 case lt: 357 lim.umax = uc - 1 358 case lt | eq: 359 lim.umax = uc 360 case gt | eq: 361 lim.umin = uc 362 case gt: 363 lim.umin = uc + 1 364 case lt | gt: 365 lim = old 366 if uc == lim.umin { 367 lim.umin++ 368 } 369 if uc == lim.umax { 370 lim.umax-- 371 } 372 case eq: 373 lim.umin = uc 374 lim.umax = uc 375 } 376 // We could use the contrapositives of the 377 // signed implications to derive signed facts, 378 // but it turns out not to matter. 379 } 380 ft.limitStack = append(ft.limitStack, limitFact{v.ID, old}) 381 lim = old.intersect(lim) 382 ft.limits[v.ID] = lim 383 if v.Block.Func.pass.debug > 2 { 384 v.Block.Func.Warnl(parent.Pos, "parent=%s, new limits %s %s %s %s", parent, v, w, r, lim.String()) 385 } 386 if lim.min > lim.max || lim.umin > lim.umax { 387 ft.unsat = true 388 return 389 } 390 } 391 392 // Derived facts below here are only about numbers. 393 if d != signed && d != unsigned { 394 return 395 } 396 397 // Additional facts we know given the relationship between len and cap. 398 // 399 // TODO: Since prove now derives transitive relations, it 400 // should be sufficient to learn that len(w) <= cap(w) at the 401 // beginning of prove where we look for all len/cap ops. 402 if v.Op == OpSliceLen && r< == 0 && ft.caps[v.Args[0].ID] != nil { 403 // len(s) > w implies cap(s) > w 404 // len(s) >= w implies cap(s) >= w 405 // len(s) == w implies cap(s) >= w 406 ft.update(parent, ft.caps[v.Args[0].ID], w, d, r|gt) 407 } 408 if w.Op == OpSliceLen && r> == 0 && ft.caps[w.Args[0].ID] != nil { 409 // same, length on the RHS. 410 ft.update(parent, v, ft.caps[w.Args[0].ID], d, r|lt) 411 } 412 if v.Op == OpSliceCap && r> == 0 && ft.lens[v.Args[0].ID] != nil { 413 // cap(s) < w implies len(s) < w 414 // cap(s) <= w implies len(s) <= w 415 // cap(s) == w implies len(s) <= w 416 ft.update(parent, ft.lens[v.Args[0].ID], w, d, r|lt) 417 } 418 if w.Op == OpSliceCap && r< == 0 && ft.lens[w.Args[0].ID] != nil { 419 // same, capacity on the RHS. 420 ft.update(parent, v, ft.lens[w.Args[0].ID], d, r|gt) 421 } 422 423 // Process fence-post implications. 424 // 425 // First, make the condition > or >=. 426 if r == lt || r == lt|eq { 427 v, w = w, v 428 r = reverseBits[r] 429 } 430 switch r { 431 case gt: 432 if x, delta := isConstDelta(v); x != nil && delta == 1 { 433 // x+1 > w ⇒ x >= w 434 // 435 // This is useful for eliminating the 436 // growslice branch of append. 437 ft.update(parent, x, w, d, gt|eq) 438 } else if x, delta := isConstDelta(w); x != nil && delta == -1 { 439 // v > x-1 ⇒ v >= x 440 ft.update(parent, v, x, d, gt|eq) 441 } 442 case gt | eq: 443 if x, delta := isConstDelta(v); x != nil && delta == -1 { 444 // x-1 >= w && x > min ⇒ x > w 445 // 446 // Useful for i > 0; s[i-1]. 447 lim, ok := ft.limits[x.ID] 448 if ok && ((d == signed && lim.min > opMin[v.Op]) || (d == unsigned && lim.umin > 0)) { 449 ft.update(parent, x, w, d, gt) 450 } 451 } else if x, delta := isConstDelta(w); x != nil && delta == 1 { 452 // v >= x+1 && x < max ⇒ v > x 453 lim, ok := ft.limits[x.ID] 454 if ok && ((d == signed && lim.max < opMax[w.Op]) || (d == unsigned && lim.umax < opUMax[w.Op])) { 455 ft.update(parent, v, x, d, gt) 456 } 457 } 458 } 459 460 // Process: x+delta > w (with delta constant) 461 // Only signed domain for now (useful for accesses to slices in loops). 462 if r == gt || r == gt|eq { 463 if x, delta := isConstDelta(v); x != nil && d == signed { 464 if parent.Func.pass.debug > 1 { 465 parent.Func.Warnl(parent.Pos, "x+d %s w; x:%v %v delta:%v w:%v d:%v", r, x, parent.String(), delta, w.AuxInt, d) 466 } 467 underflow := true 468 if l, has := ft.limits[x.ID]; has && delta < 0 { 469 if (x.Type.Size() == 8 && l.min >= math.MinInt64-delta) || 470 (x.Type.Size() == 4 && l.min >= math.MinInt32-delta) { 471 underflow = false 472 } 473 } 474 if delta < 0 && !underflow { 475 // If delta < 0 and x+delta cannot underflow then x > x+delta (that is, x > v) 476 ft.update(parent, x, v, signed, gt) 477 } 478 if !w.isGenericIntConst() { 479 // If we know that x+delta > w but w is not constant, we can derive: 480 // if delta < 0 and x+delta cannot underflow, then x > w 481 // This is useful for loops with bounds "len(slice)-K" (delta = -K) 482 if delta < 0 && !underflow { 483 ft.update(parent, x, w, signed, r) 484 } 485 } else { 486 // With w,delta constants, we want to derive: x+delta > w ⇒ x > w-delta 487 // 488 // We compute (using integers of the correct size): 489 // min = w - delta 490 // max = MaxInt - delta 491 // 492 // And we prove that: 493 // if min<max: min < x AND x <= max 494 // if min>max: min < x OR x <= max 495 // 496 // This is always correct, even in case of overflow. 497 // 498 // If the initial fact is x+delta >= w instead, the derived conditions are: 499 // if min<max: min <= x AND x <= max 500 // if min>max: min <= x OR x <= max 501 // 502 // Notice the conditions for max are still <=, as they handle overflows. 503 var min, max int64 504 var vmin, vmax *Value 505 switch x.Type.Size() { 506 case 8: 507 min = w.AuxInt - delta 508 max = int64(^uint64(0)>>1) - delta 509 510 vmin = parent.NewValue0I(parent.Pos, OpConst64, parent.Func.Config.Types.Int64, min) 511 vmax = parent.NewValue0I(parent.Pos, OpConst64, parent.Func.Config.Types.Int64, max) 512 513 case 4: 514 min = int64(int32(w.AuxInt) - int32(delta)) 515 max = int64(int32(^uint32(0)>>1) - int32(delta)) 516 517 vmin = parent.NewValue0I(parent.Pos, OpConst32, parent.Func.Config.Types.Int32, min) 518 vmax = parent.NewValue0I(parent.Pos, OpConst32, parent.Func.Config.Types.Int32, max) 519 520 case 2: 521 min = int64(int16(w.AuxInt) - int16(delta)) 522 max = int64(int16(^uint16(0)>>1) - int16(delta)) 523 524 vmin = parent.NewValue0I(parent.Pos, OpConst16, parent.Func.Config.Types.Int16, min) 525 vmax = parent.NewValue0I(parent.Pos, OpConst16, parent.Func.Config.Types.Int16, max) 526 527 case 1: 528 min = int64(int8(w.AuxInt) - int8(delta)) 529 max = int64(int8(^uint8(0)>>1) - int8(delta)) 530 531 vmin = parent.NewValue0I(parent.Pos, OpConst8, parent.Func.Config.Types.Int8, min) 532 vmax = parent.NewValue0I(parent.Pos, OpConst8, parent.Func.Config.Types.Int8, max) 533 534 default: 535 panic("unimplemented") 536 } 537 538 if min < max { 539 // Record that x > min and max >= x 540 ft.update(parent, x, vmin, d, r) 541 ft.update(parent, vmax, x, d, r|eq) 542 } else { 543 // We know that either x>min OR x<=max. factsTable cannot record OR conditions, 544 // so let's see if we can already prove that one of them is false, in which case 545 // the other must be true 546 if l, has := ft.limits[x.ID]; has { 547 if l.max <= min { 548 if r&eq == 0 || l.max < min { 549 // x>min (x>=min) is impossible, so it must be x<=max 550 ft.update(parent, vmax, x, d, r|eq) 551 } 552 } else if l.min > max { 553 // x<=max is impossible, so it must be x>min 554 ft.update(parent, x, vmin, d, r) 555 } 556 } 557 } 558 } 559 } 560 } 561 562 // Look through value-preserving extensions. 563 // If the domain is appropriate for the pre-extension Type, 564 // repeat the update with the pre-extension Value. 565 if isCleanExt(v) { 566 switch { 567 case d == signed && v.Args[0].Type.IsSigned(): 568 fallthrough 569 case d == unsigned && !v.Args[0].Type.IsSigned(): 570 ft.update(parent, v.Args[0], w, d, r) 571 } 572 } 573 if isCleanExt(w) { 574 switch { 575 case d == signed && w.Args[0].Type.IsSigned(): 576 fallthrough 577 case d == unsigned && !w.Args[0].Type.IsSigned(): 578 ft.update(parent, v, w.Args[0], d, r) 579 } 580 } 581} 582 583var opMin = map[Op]int64{ 584 OpAdd64: math.MinInt64, OpSub64: math.MinInt64, 585 OpAdd32: math.MinInt32, OpSub32: math.MinInt32, 586} 587 588var opMax = map[Op]int64{ 589 OpAdd64: math.MaxInt64, OpSub64: math.MaxInt64, 590 OpAdd32: math.MaxInt32, OpSub32: math.MaxInt32, 591} 592 593var opUMax = map[Op]uint64{ 594 OpAdd64: math.MaxUint64, OpSub64: math.MaxUint64, 595 OpAdd32: math.MaxUint32, OpSub32: math.MaxUint32, 596} 597 598// isNonNegative reports whether v is known to be non-negative. 599func (ft *factsTable) isNonNegative(v *Value) bool { 600 if isNonNegative(v) { 601 return true 602 } 603 604 var max int64 605 switch v.Type.Size() { 606 case 1: 607 max = math.MaxInt8 608 case 2: 609 max = math.MaxInt16 610 case 4: 611 max = math.MaxInt32 612 case 8: 613 max = math.MaxInt64 614 default: 615 panic("unexpected integer size") 616 } 617 618 // Check if the recorded limits can prove that the value is positive 619 620 if l, has := ft.limits[v.ID]; has && (l.min >= 0 || l.umax <= uint64(max)) { 621 return true 622 } 623 624 // Check if v = x+delta, and we can use x's limits to prove that it's positive 625 if x, delta := isConstDelta(v); x != nil { 626 if l, has := ft.limits[x.ID]; has { 627 if delta > 0 && l.min >= -delta && l.max <= max-delta { 628 return true 629 } 630 if delta < 0 && l.min >= -delta { 631 return true 632 } 633 } 634 } 635 636 // Check if v is a value-preserving extension of a non-negative value. 637 if isCleanExt(v) && ft.isNonNegative(v.Args[0]) { 638 return true 639 } 640 641 // Check if the signed poset can prove that the value is >= 0 642 return ft.orderS.OrderedOrEqual(ft.zero, v) 643} 644 645// checkpoint saves the current state of known relations. 646// Called when descending on a branch. 647func (ft *factsTable) checkpoint() { 648 if ft.unsat { 649 ft.unsatDepth++ 650 } 651 ft.stack = append(ft.stack, checkpointFact) 652 ft.limitStack = append(ft.limitStack, checkpointBound) 653 ft.orderS.Checkpoint() 654 ft.orderU.Checkpoint() 655} 656 657// restore restores known relation to the state just 658// before the previous checkpoint. 659// Called when backing up on a branch. 660func (ft *factsTable) restore() { 661 if ft.unsatDepth > 0 { 662 ft.unsatDepth-- 663 } else { 664 ft.unsat = false 665 } 666 for { 667 old := ft.stack[len(ft.stack)-1] 668 ft.stack = ft.stack[:len(ft.stack)-1] 669 if old == checkpointFact { 670 break 671 } 672 if old.r == lt|eq|gt { 673 delete(ft.facts, old.p) 674 } else { 675 ft.facts[old.p] = old.r 676 } 677 } 678 for { 679 old := ft.limitStack[len(ft.limitStack)-1] 680 ft.limitStack = ft.limitStack[:len(ft.limitStack)-1] 681 if old.vid == 0 { // checkpointBound 682 break 683 } 684 if old.limit == noLimit { 685 delete(ft.limits, old.vid) 686 } else { 687 ft.limits[old.vid] = old.limit 688 } 689 } 690 ft.orderS.Undo() 691 ft.orderU.Undo() 692} 693 694func lessByID(v, w *Value) bool { 695 if v == nil && w == nil { 696 // Should not happen, but just in case. 697 return false 698 } 699 if v == nil { 700 return true 701 } 702 return w != nil && v.ID < w.ID 703} 704 705var ( 706 reverseBits = [...]relation{0, 4, 2, 6, 1, 5, 3, 7} 707 708 // maps what we learn when the positive branch is taken. 709 // For example: 710 // OpLess8: {signed, lt}, 711 // v1 = (OpLess8 v2 v3). 712 // If v1 branch is taken then we learn that the rangeMask 713 // can be at most lt. 714 domainRelationTable = map[Op]struct { 715 d domain 716 r relation 717 }{ 718 OpEq8: {signed | unsigned, eq}, 719 OpEq16: {signed | unsigned, eq}, 720 OpEq32: {signed | unsigned, eq}, 721 OpEq64: {signed | unsigned, eq}, 722 OpEqPtr: {pointer, eq}, 723 724 OpNeq8: {signed | unsigned, lt | gt}, 725 OpNeq16: {signed | unsigned, lt | gt}, 726 OpNeq32: {signed | unsigned, lt | gt}, 727 OpNeq64: {signed | unsigned, lt | gt}, 728 OpNeqPtr: {pointer, lt | gt}, 729 730 OpLess8: {signed, lt}, 731 OpLess8U: {unsigned, lt}, 732 OpLess16: {signed, lt}, 733 OpLess16U: {unsigned, lt}, 734 OpLess32: {signed, lt}, 735 OpLess32U: {unsigned, lt}, 736 OpLess64: {signed, lt}, 737 OpLess64U: {unsigned, lt}, 738 739 OpLeq8: {signed, lt | eq}, 740 OpLeq8U: {unsigned, lt | eq}, 741 OpLeq16: {signed, lt | eq}, 742 OpLeq16U: {unsigned, lt | eq}, 743 OpLeq32: {signed, lt | eq}, 744 OpLeq32U: {unsigned, lt | eq}, 745 OpLeq64: {signed, lt | eq}, 746 OpLeq64U: {unsigned, lt | eq}, 747 748 // For these ops, the negative branch is different: we can only 749 // prove signed/GE (signed/GT) if we can prove that arg0 is non-negative. 750 // See the special case in addBranchRestrictions. 751 OpIsInBounds: {signed | unsigned, lt}, // 0 <= arg0 < arg1 752 OpIsSliceInBounds: {signed | unsigned, lt | eq}, // 0 <= arg0 <= arg1 753 } 754) 755 756// cleanup returns the posets to the free list 757func (ft *factsTable) cleanup(f *Func) { 758 for _, po := range []*poset{ft.orderS, ft.orderU} { 759 // Make sure it's empty as it should be. A non-empty poset 760 // might cause errors and miscompilations if reused. 761 if checkEnabled { 762 if err := po.CheckEmpty(); err != nil { 763 f.Fatalf("poset not empty after function %s: %v", f.Name, err) 764 } 765 } 766 f.retPoset(po) 767 } 768} 769 770// prove removes redundant BlockIf branches that can be inferred 771// from previous dominating comparisons. 772// 773// By far, the most common redundant pair are generated by bounds checking. 774// For example for the code: 775// 776// a[i] = 4 777// foo(a[i]) 778// 779// The compiler will generate the following code: 780// 781// if i >= len(a) { 782// panic("not in bounds") 783// } 784// a[i] = 4 785// if i >= len(a) { 786// panic("not in bounds") 787// } 788// foo(a[i]) 789// 790// The second comparison i >= len(a) is clearly redundant because if the 791// else branch of the first comparison is executed, we already know that i < len(a). 792// The code for the second panic can be removed. 793// 794// prove works by finding contradictions and trimming branches whose 795// conditions are unsatisfiable given the branches leading up to them. 796// It tracks a "fact table" of branch conditions. For each branching 797// block, it asserts the branch conditions that uniquely dominate that 798// block, and then separately asserts the block's branch condition and 799// its negation. If either leads to a contradiction, it can trim that 800// successor. 801func prove(f *Func) { 802 // Find induction variables. Currently, findIndVars 803 // is limited to one induction variable per block. 804 var indVars map[*Block]indVar 805 for _, v := range findIndVar(f) { 806 ind := v.ind 807 if len(ind.Args) != 2 { 808 // the rewrite code assumes there is only ever two parents to loops 809 panic("unexpected induction with too many parents") 810 } 811 812 nxt := v.nxt 813 if !(ind.Uses == 2 && // 2 used by comparison and next 814 nxt.Uses == 1) { // 1 used by induction 815 // ind or nxt is used inside the loop, add it for the facts table 816 if indVars == nil { 817 indVars = make(map[*Block]indVar) 818 } 819 indVars[v.entry] = v 820 continue 821 } else { 822 // Since this induction variable is not used for anything but counting the iterations, 823 // no point in putting it into the facts table. 824 } 825 826 // try to rewrite to a downward counting loop checking against start if the 827 // loop body does not depends on ind or nxt and end is known before the loop. 828 // This reduce pressure on the register allocator because this do not need 829 // to use end on each iteration anymore. We compare against the start constant instead. 830 // That means this code: 831 // 832 // loop: 833 // ind = (Phi (Const [x]) nxt), 834 // if ind < end 835 // then goto enter_loop 836 // else goto exit_loop 837 // 838 // enter_loop: 839 // do something without using ind nor nxt 840 // nxt = inc + ind 841 // goto loop 842 // 843 // exit_loop: 844 // 845 // is rewritten to: 846 // 847 // loop: 848 // ind = (Phi end nxt) 849 // if (Const [x]) < ind 850 // then goto enter_loop 851 // else goto exit_loop 852 // 853 // enter_loop: 854 // do something without using ind nor nxt 855 // nxt = ind - inc 856 // goto loop 857 // 858 // exit_loop: 859 // 860 // this is better because it only require to keep ind then nxt alive while looping, 861 // while the original form keeps ind then nxt and end alive 862 start, end := v.min, v.max 863 if v.flags&indVarCountDown != 0 { 864 start, end = end, start 865 } 866 867 if !(start.Op == OpConst8 || start.Op == OpConst16 || start.Op == OpConst32 || start.Op == OpConst64) { 868 // if start is not a constant we would be winning nothing from inverting the loop 869 continue 870 } 871 if end.Op == OpConst8 || end.Op == OpConst16 || end.Op == OpConst32 || end.Op == OpConst64 { 872 // TODO: if both start and end are constants we should rewrite such that the comparison 873 // is against zero and nxt is ++ or -- operation 874 // That means: 875 // for i := 2; i < 11; i += 2 { 876 // should be rewritten to: 877 // for i := 5; 0 < i; i-- { 878 continue 879 } 880 881 if end.Block == ind.Block { 882 // we can't rewrite loops where the condition depends on the loop body 883 // this simple check is forced to work because if this is true a Phi in ind.Block must exists 884 continue 885 } 886 887 check := ind.Block.Controls[0] 888 // invert the check 889 check.Args[0], check.Args[1] = check.Args[1], check.Args[0] 890 891 // swap start and end in the loop 892 for i, v := range check.Args { 893 if v != end { 894 continue 895 } 896 897 check.SetArg(i, start) 898 goto replacedEnd 899 } 900 panic(fmt.Sprintf("unreachable, ind: %v, start: %v, end: %v", ind, start, end)) 901 replacedEnd: 902 903 for i, v := range ind.Args { 904 if v != start { 905 continue 906 } 907 908 ind.SetArg(i, end) 909 goto replacedStart 910 } 911 panic(fmt.Sprintf("unreachable, ind: %v, start: %v, end: %v", ind, start, end)) 912 replacedStart: 913 914 if nxt.Args[0] != ind { 915 // unlike additions subtractions are not commutative so be sure we get it right 916 nxt.Args[0], nxt.Args[1] = nxt.Args[1], nxt.Args[0] 917 } 918 919 switch nxt.Op { 920 case OpAdd8: 921 nxt.Op = OpSub8 922 case OpAdd16: 923 nxt.Op = OpSub16 924 case OpAdd32: 925 nxt.Op = OpSub32 926 case OpAdd64: 927 nxt.Op = OpSub64 928 case OpSub8: 929 nxt.Op = OpAdd8 930 case OpSub16: 931 nxt.Op = OpAdd16 932 case OpSub32: 933 nxt.Op = OpAdd32 934 case OpSub64: 935 nxt.Op = OpAdd64 936 default: 937 panic("unreachable") 938 } 939 940 if f.pass.debug > 0 { 941 f.Warnl(ind.Pos, "Inverted loop iteration") 942 } 943 } 944 945 ft := newFactsTable(f) 946 ft.checkpoint() 947 948 var lensVars map[*Block][]*Value 949 var logicVars map[*Block][]*Value 950 951 // Find length and capacity ops. 952 for _, b := range f.Blocks { 953 for _, v := range b.Values { 954 if v.Uses == 0 { 955 // We don't care about dead values. 956 // (There can be some that are CSEd but not removed yet.) 957 continue 958 } 959 switch v.Op { 960 case OpStringLen: 961 ft.update(b, v, ft.zero, signed, gt|eq) 962 case OpSliceLen: 963 if ft.lens == nil { 964 ft.lens = map[ID]*Value{} 965 } 966 // Set all len Values for the same slice as equal in the poset. 967 // The poset handles transitive relations, so Values related to 968 // any OpSliceLen for this slice will be correctly related to others. 969 if l, ok := ft.lens[v.Args[0].ID]; ok { 970 ft.update(b, v, l, signed, eq) 971 } else { 972 ft.lens[v.Args[0].ID] = v 973 } 974 ft.update(b, v, ft.zero, signed, gt|eq) 975 if v.Args[0].Op == OpSliceMake { 976 if lensVars == nil { 977 lensVars = make(map[*Block][]*Value) 978 } 979 lensVars[b] = append(lensVars[b], v) 980 } 981 case OpSliceCap: 982 if ft.caps == nil { 983 ft.caps = map[ID]*Value{} 984 } 985 // Same as case OpSliceLen above, but for slice cap. 986 if c, ok := ft.caps[v.Args[0].ID]; ok { 987 ft.update(b, v, c, signed, eq) 988 } else { 989 ft.caps[v.Args[0].ID] = v 990 } 991 ft.update(b, v, ft.zero, signed, gt|eq) 992 if v.Args[0].Op == OpSliceMake { 993 if lensVars == nil { 994 lensVars = make(map[*Block][]*Value) 995 } 996 lensVars[b] = append(lensVars[b], v) 997 } 998 case OpCtz64, OpCtz32, OpCtz16, OpCtz8, OpBitLen64, OpBitLen32, OpBitLen16, OpBitLen8: 999 ft.update(b, v, ft.zero, signed, gt|eq) 1000 // TODO: we could also do <= 64/32/16/8, if that helped. 1001 case OpAnd64, OpAnd32, OpAnd16, OpAnd8: 1002 ft.update(b, v, v.Args[1], unsigned, lt|eq) 1003 ft.update(b, v, v.Args[0], unsigned, lt|eq) 1004 for i := 0; i < 2; i++ { 1005 if isNonNegative(v.Args[i]) { 1006 ft.update(b, v, v.Args[i], signed, lt|eq) 1007 ft.update(b, v, ft.zero, signed, gt|eq) 1008 } 1009 } 1010 if logicVars == nil { 1011 logicVars = make(map[*Block][]*Value) 1012 } 1013 logicVars[b] = append(logicVars[b], v) 1014 case OpOr64, OpOr32, OpOr16, OpOr8: 1015 // TODO: investigate how to always add facts without much slowdown, see issue #57959. 1016 if v.Args[0].isGenericIntConst() { 1017 ft.update(b, v, v.Args[0], unsigned, gt|eq) 1018 } 1019 if v.Args[1].isGenericIntConst() { 1020 ft.update(b, v, v.Args[1], unsigned, gt|eq) 1021 } 1022 case OpDiv64u, OpDiv32u, OpDiv16u, OpDiv8u, 1023 OpRsh8Ux64, OpRsh8Ux32, OpRsh8Ux16, OpRsh8Ux8, 1024 OpRsh16Ux64, OpRsh16Ux32, OpRsh16Ux16, OpRsh16Ux8, 1025 OpRsh32Ux64, OpRsh32Ux32, OpRsh32Ux16, OpRsh32Ux8, 1026 OpRsh64Ux64, OpRsh64Ux32, OpRsh64Ux16, OpRsh64Ux8: 1027 ft.update(b, v, v.Args[0], unsigned, lt|eq) 1028 case OpMod64u, OpMod32u, OpMod16u, OpMod8u: 1029 ft.update(b, v, v.Args[0], unsigned, lt|eq) 1030 ft.update(b, v, v.Args[1], unsigned, lt) 1031 case OpPhi: 1032 // Determine the min and max value of OpPhi composed entirely of integer constants. 1033 // 1034 // For example, for an OpPhi: 1035 // 1036 // v1 = OpConst64 [13] 1037 // v2 = OpConst64 [7] 1038 // v3 = OpConst64 [42] 1039 // 1040 // v4 = OpPhi(v1, v2, v3) 1041 // 1042 // We can prove: 1043 // 1044 // v4 >= 7 && v4 <= 42 1045 // 1046 // TODO(jake-ciolek): Handle nested constant OpPhi's 1047 sameConstOp := true 1048 min := 0 1049 max := 0 1050 1051 if !v.Args[min].isGenericIntConst() { 1052 break 1053 } 1054 1055 for k := range v.Args { 1056 if v.Args[k].Op != v.Args[min].Op { 1057 sameConstOp = false 1058 break 1059 } 1060 if v.Args[k].AuxInt < v.Args[min].AuxInt { 1061 min = k 1062 } 1063 if v.Args[k].AuxInt > v.Args[max].AuxInt { 1064 max = k 1065 } 1066 } 1067 1068 if sameConstOp { 1069 ft.update(b, v, v.Args[min], signed, gt|eq) 1070 ft.update(b, v, v.Args[max], signed, lt|eq) 1071 } 1072 // One might be tempted to create a v >= ft.zero relation for 1073 // all OpPhi's composed of only provably-positive values 1074 // but that bloats up the facts table for a very negligible gain. 1075 // In Go itself, very few functions get improved (< 5) at a cost of 5-7% total increase 1076 // of compile time. 1077 } 1078 } 1079 } 1080 1081 // current node state 1082 type walkState int 1083 const ( 1084 descend walkState = iota 1085 simplify 1086 ) 1087 // work maintains the DFS stack. 1088 type bp struct { 1089 block *Block // current handled block 1090 state walkState // what's to do 1091 } 1092 work := make([]bp, 0, 256) 1093 work = append(work, bp{ 1094 block: f.Entry, 1095 state: descend, 1096 }) 1097 1098 idom := f.Idom() 1099 sdom := f.Sdom() 1100 1101 // DFS on the dominator tree. 1102 // 1103 // For efficiency, we consider only the dominator tree rather 1104 // than the entire flow graph. On the way down, we consider 1105 // incoming branches and accumulate conditions that uniquely 1106 // dominate the current block. If we discover a contradiction, 1107 // we can eliminate the entire block and all of its children. 1108 // On the way back up, we consider outgoing branches that 1109 // haven't already been considered. This way we consider each 1110 // branch condition only once. 1111 for len(work) > 0 { 1112 node := work[len(work)-1] 1113 work = work[:len(work)-1] 1114 parent := idom[node.block.ID] 1115 branch := getBranch(sdom, parent, node.block) 1116 1117 switch node.state { 1118 case descend: 1119 ft.checkpoint() 1120 1121 // Entering the block, add the block-depending facts that we collected 1122 // at the beginning: induction variables and lens/caps of slices. 1123 if iv, ok := indVars[node.block]; ok { 1124 addIndVarRestrictions(ft, parent, iv) 1125 } 1126 if lens, ok := lensVars[node.block]; ok { 1127 for _, v := range lens { 1128 switch v.Op { 1129 case OpSliceLen: 1130 ft.update(node.block, v, v.Args[0].Args[1], signed, eq) 1131 case OpSliceCap: 1132 ft.update(node.block, v, v.Args[0].Args[2], signed, eq) 1133 } 1134 } 1135 } 1136 1137 if branch != unknown { 1138 addBranchRestrictions(ft, parent, branch) 1139 // After we add the branch restriction, re-check the logic operations in the parent block, 1140 // it may give us more info to omit some branches 1141 if logic, ok := logicVars[parent]; ok { 1142 for _, v := range logic { 1143 // we only have OpAnd for now 1144 ft.update(parent, v, v.Args[1], unsigned, lt|eq) 1145 ft.update(parent, v, v.Args[0], unsigned, lt|eq) 1146 for i := 0; i < 2; i++ { 1147 if isNonNegative(v.Args[i]) { 1148 ft.update(parent, v, v.Args[i], signed, lt|eq) 1149 ft.update(parent, v, ft.zero, signed, gt|eq) 1150 } 1151 } 1152 } 1153 } 1154 if ft.unsat { 1155 // node.block is unreachable. 1156 // Remove it and don't visit 1157 // its children. 1158 removeBranch(parent, branch) 1159 ft.restore() 1160 break 1161 } 1162 // Otherwise, we can now commit to 1163 // taking this branch. We'll restore 1164 // ft when we unwind. 1165 } 1166 1167 // Add inductive facts for phis in this block. 1168 addLocalInductiveFacts(ft, node.block) 1169 1170 work = append(work, bp{ 1171 block: node.block, 1172 state: simplify, 1173 }) 1174 for s := sdom.Child(node.block); s != nil; s = sdom.Sibling(s) { 1175 work = append(work, bp{ 1176 block: s, 1177 state: descend, 1178 }) 1179 } 1180 1181 case simplify: 1182 simplifyBlock(sdom, ft, node.block) 1183 ft.restore() 1184 } 1185 } 1186 1187 ft.restore() 1188 1189 ft.cleanup(f) 1190} 1191 1192// getBranch returns the range restrictions added by p 1193// when reaching b. p is the immediate dominator of b. 1194func getBranch(sdom SparseTree, p *Block, b *Block) branch { 1195 if p == nil { 1196 return unknown 1197 } 1198 switch p.Kind { 1199 case BlockIf: 1200 // If p and p.Succs[0] are dominators it means that every path 1201 // from entry to b passes through p and p.Succs[0]. We care that 1202 // no path from entry to b passes through p.Succs[1]. If p.Succs[0] 1203 // has one predecessor then (apart from the degenerate case), 1204 // there is no path from entry that can reach b through p.Succs[1]. 1205 // TODO: how about p->yes->b->yes, i.e. a loop in yes. 1206 if sdom.IsAncestorEq(p.Succs[0].b, b) && len(p.Succs[0].b.Preds) == 1 { 1207 return positive 1208 } 1209 if sdom.IsAncestorEq(p.Succs[1].b, b) && len(p.Succs[1].b.Preds) == 1 { 1210 return negative 1211 } 1212 case BlockJumpTable: 1213 // TODO: this loop can lead to quadratic behavior, as 1214 // getBranch can be called len(p.Succs) times. 1215 for i, e := range p.Succs { 1216 if sdom.IsAncestorEq(e.b, b) && len(e.b.Preds) == 1 { 1217 return jumpTable0 + branch(i) 1218 } 1219 } 1220 } 1221 return unknown 1222} 1223 1224// addIndVarRestrictions updates the factsTables ft with the facts 1225// learned from the induction variable indVar which drives the loop 1226// starting in Block b. 1227func addIndVarRestrictions(ft *factsTable, b *Block, iv indVar) { 1228 d := signed 1229 if ft.isNonNegative(iv.min) && ft.isNonNegative(iv.max) { 1230 d |= unsigned 1231 } 1232 1233 if iv.flags&indVarMinExc == 0 { 1234 addRestrictions(b, ft, d, iv.min, iv.ind, lt|eq) 1235 } else { 1236 addRestrictions(b, ft, d, iv.min, iv.ind, lt) 1237 } 1238 1239 if iv.flags&indVarMaxInc == 0 { 1240 addRestrictions(b, ft, d, iv.ind, iv.max, lt) 1241 } else { 1242 addRestrictions(b, ft, d, iv.ind, iv.max, lt|eq) 1243 } 1244} 1245 1246// addBranchRestrictions updates the factsTables ft with the facts learned when 1247// branching from Block b in direction br. 1248func addBranchRestrictions(ft *factsTable, b *Block, br branch) { 1249 c := b.Controls[0] 1250 switch { 1251 case br == negative: 1252 addRestrictions(b, ft, boolean, nil, c, eq) 1253 case br == positive: 1254 addRestrictions(b, ft, boolean, nil, c, lt|gt) 1255 case br >= jumpTable0: 1256 idx := br - jumpTable0 1257 val := int64(idx) 1258 if v, off := isConstDelta(c); v != nil { 1259 // Establish the bound on the underlying value we're switching on, 1260 // not on the offset-ed value used as the jump table index. 1261 c = v 1262 val -= off 1263 } 1264 old, ok := ft.limits[c.ID] 1265 if !ok { 1266 old = noLimit 1267 } 1268 ft.limitStack = append(ft.limitStack, limitFact{c.ID, old}) 1269 if val < old.min || val > old.max || uint64(val) < old.umin || uint64(val) > old.umax { 1270 ft.unsat = true 1271 if b.Func.pass.debug > 2 { 1272 b.Func.Warnl(b.Pos, "block=%s outedge=%d %s=%d unsat", b, idx, c, val) 1273 } 1274 } else { 1275 ft.limits[c.ID] = limit{val, val, uint64(val), uint64(val)} 1276 if b.Func.pass.debug > 2 { 1277 b.Func.Warnl(b.Pos, "block=%s outedge=%d %s=%d", b, idx, c, val) 1278 } 1279 } 1280 default: 1281 panic("unknown branch") 1282 } 1283 if tr, has := domainRelationTable[c.Op]; has { 1284 // When we branched from parent we learned a new set of 1285 // restrictions. Update the factsTable accordingly. 1286 d := tr.d 1287 if d == signed && ft.isNonNegative(c.Args[0]) && ft.isNonNegative(c.Args[1]) { 1288 d |= unsigned 1289 } 1290 switch c.Op { 1291 case OpIsInBounds, OpIsSliceInBounds: 1292 // 0 <= a0 < a1 (or 0 <= a0 <= a1) 1293 // 1294 // On the positive branch, we learn: 1295 // signed: 0 <= a0 < a1 (or 0 <= a0 <= a1) 1296 // unsigned: a0 < a1 (or a0 <= a1) 1297 // 1298 // On the negative branch, we learn (0 > a0 || 1299 // a0 >= a1). In the unsigned domain, this is 1300 // simply a0 >= a1 (which is the reverse of the 1301 // positive branch, so nothing surprising). 1302 // But in the signed domain, we can't express the || 1303 // condition, so check if a0 is non-negative instead, 1304 // to be able to learn something. 1305 switch br { 1306 case negative: 1307 d = unsigned 1308 if ft.isNonNegative(c.Args[0]) { 1309 d |= signed 1310 } 1311 addRestrictions(b, ft, d, c.Args[0], c.Args[1], tr.r^(lt|gt|eq)) 1312 case positive: 1313 addRestrictions(b, ft, signed, ft.zero, c.Args[0], lt|eq) 1314 addRestrictions(b, ft, d, c.Args[0], c.Args[1], tr.r) 1315 } 1316 default: 1317 switch br { 1318 case negative: 1319 addRestrictions(b, ft, d, c.Args[0], c.Args[1], tr.r^(lt|gt|eq)) 1320 case positive: 1321 addRestrictions(b, ft, d, c.Args[0], c.Args[1], tr.r) 1322 } 1323 } 1324 1325 } 1326} 1327 1328// addRestrictions updates restrictions from the immediate 1329// dominating block (p) using r. 1330func addRestrictions(parent *Block, ft *factsTable, t domain, v, w *Value, r relation) { 1331 if t == 0 { 1332 // Trivial case: nothing to do. 1333 // Should not happen, but just in case. 1334 return 1335 } 1336 for i := domain(1); i <= t; i <<= 1 { 1337 if t&i == 0 { 1338 continue 1339 } 1340 ft.update(parent, v, w, i, r) 1341 } 1342} 1343 1344// addLocalInductiveFacts adds inductive facts when visiting b, where 1345// b is a join point in a loop. In contrast with findIndVar, this 1346// depends on facts established for b, which is why it happens when 1347// visiting b. 1348// 1349// TODO: It would be nice to combine this with findIndVar. 1350func addLocalInductiveFacts(ft *factsTable, b *Block) { 1351 // This looks for a specific pattern of induction: 1352 // 1353 // 1. i1 = OpPhi(min, i2) in b 1354 // 2. i2 = i1 + 1 1355 // 3. i2 < max at exit from b.Preds[1] 1356 // 4. min < max 1357 // 1358 // If all of these conditions are true, then i1 < max and i1 >= min. 1359 1360 // To ensure this is a loop header node. 1361 if len(b.Preds) != 2 { 1362 return 1363 } 1364 1365 for _, i1 := range b.Values { 1366 if i1.Op != OpPhi { 1367 continue 1368 } 1369 1370 // Check for conditions 1 and 2. This is easy to do 1371 // and will throw out most phis. 1372 min, i2 := i1.Args[0], i1.Args[1] 1373 if i1q, delta := isConstDelta(i2); i1q != i1 || delta != 1 { 1374 continue 1375 } 1376 1377 // Try to prove condition 3. We can't just query the 1378 // fact table for this because we don't know what the 1379 // facts of b.Preds[1] are (in general, b.Preds[1] is 1380 // a loop-back edge, so we haven't even been there 1381 // yet). As a conservative approximation, we look for 1382 // this condition in the predecessor chain until we 1383 // hit a join point. 1384 uniquePred := func(b *Block) *Block { 1385 if len(b.Preds) == 1 { 1386 return b.Preds[0].b 1387 } 1388 return nil 1389 } 1390 pred, child := b.Preds[1].b, b 1391 for ; pred != nil; pred, child = uniquePred(pred), pred { 1392 if pred.Kind != BlockIf { 1393 continue 1394 } 1395 control := pred.Controls[0] 1396 1397 br := unknown 1398 if pred.Succs[0].b == child { 1399 br = positive 1400 } 1401 if pred.Succs[1].b == child { 1402 if br != unknown { 1403 continue 1404 } 1405 br = negative 1406 } 1407 if br == unknown { 1408 continue 1409 } 1410 1411 tr, has := domainRelationTable[control.Op] 1412 if !has { 1413 continue 1414 } 1415 r := tr.r 1416 if br == negative { 1417 // Negative branch taken to reach b. 1418 // Complement the relations. 1419 r = (lt | eq | gt) ^ r 1420 } 1421 1422 // Check for i2 < max or max > i2. 1423 var max *Value 1424 if r == lt && control.Args[0] == i2 { 1425 max = control.Args[1] 1426 } else if r == gt && control.Args[1] == i2 { 1427 max = control.Args[0] 1428 } else { 1429 continue 1430 } 1431 1432 // Check condition 4 now that we have a 1433 // candidate max. For this we can query the 1434 // fact table. We "prove" min < max by showing 1435 // that min >= max is unsat. (This may simply 1436 // compare two constants; that's fine.) 1437 ft.checkpoint() 1438 ft.update(b, min, max, tr.d, gt|eq) 1439 proved := ft.unsat 1440 ft.restore() 1441 1442 if proved { 1443 // We know that min <= i1 < max. 1444 if b.Func.pass.debug > 0 { 1445 printIndVar(b, i1, min, max, 1, 0) 1446 } 1447 ft.update(b, min, i1, tr.d, lt|eq) 1448 ft.update(b, i1, max, tr.d, lt) 1449 } 1450 } 1451 } 1452} 1453 1454var ctzNonZeroOp = map[Op]Op{OpCtz8: OpCtz8NonZero, OpCtz16: OpCtz16NonZero, OpCtz32: OpCtz32NonZero, OpCtz64: OpCtz64NonZero} 1455var mostNegativeDividend = map[Op]int64{ 1456 OpDiv16: -1 << 15, 1457 OpMod16: -1 << 15, 1458 OpDiv32: -1 << 31, 1459 OpMod32: -1 << 31, 1460 OpDiv64: -1 << 63, 1461 OpMod64: -1 << 63} 1462 1463// simplifyBlock simplifies some constant values in b and evaluates 1464// branches to non-uniquely dominated successors of b. 1465func simplifyBlock(sdom SparseTree, ft *factsTable, b *Block) { 1466 for _, v := range b.Values { 1467 switch v.Op { 1468 case OpSlicemask: 1469 // Replace OpSlicemask operations in b with constants where possible. 1470 x, delta := isConstDelta(v.Args[0]) 1471 if x == nil { 1472 break 1473 } 1474 // slicemask(x + y) 1475 // if x is larger than -y (y is negative), then slicemask is -1. 1476 lim, ok := ft.limits[x.ID] 1477 if !ok { 1478 break 1479 } 1480 if lim.umin > uint64(-delta) { 1481 if v.Args[0].Op == OpAdd64 { 1482 v.reset(OpConst64) 1483 } else { 1484 v.reset(OpConst32) 1485 } 1486 if b.Func.pass.debug > 0 { 1487 b.Func.Warnl(v.Pos, "Proved slicemask not needed") 1488 } 1489 v.AuxInt = -1 1490 } 1491 case OpCtz8, OpCtz16, OpCtz32, OpCtz64: 1492 // On some architectures, notably amd64, we can generate much better 1493 // code for CtzNN if we know that the argument is non-zero. 1494 // Capture that information here for use in arch-specific optimizations. 1495 x := v.Args[0] 1496 lim, ok := ft.limits[x.ID] 1497 if !ok { 1498 break 1499 } 1500 if lim.umin > 0 || lim.min > 0 || lim.max < 0 { 1501 if b.Func.pass.debug > 0 { 1502 b.Func.Warnl(v.Pos, "Proved %v non-zero", v.Op) 1503 } 1504 v.Op = ctzNonZeroOp[v.Op] 1505 } 1506 case OpRsh8x8, OpRsh8x16, OpRsh8x32, OpRsh8x64, 1507 OpRsh16x8, OpRsh16x16, OpRsh16x32, OpRsh16x64, 1508 OpRsh32x8, OpRsh32x16, OpRsh32x32, OpRsh32x64, 1509 OpRsh64x8, OpRsh64x16, OpRsh64x32, OpRsh64x64: 1510 // Check whether, for a >> b, we know that a is non-negative 1511 // and b is all of a's bits except the MSB. If so, a is shifted to zero. 1512 bits := 8 * v.Type.Size() 1513 if v.Args[1].isGenericIntConst() && v.Args[1].AuxInt >= bits-1 && ft.isNonNegative(v.Args[0]) { 1514 if b.Func.pass.debug > 0 { 1515 b.Func.Warnl(v.Pos, "Proved %v shifts to zero", v.Op) 1516 } 1517 switch bits { 1518 case 64: 1519 v.reset(OpConst64) 1520 case 32: 1521 v.reset(OpConst32) 1522 case 16: 1523 v.reset(OpConst16) 1524 case 8: 1525 v.reset(OpConst8) 1526 default: 1527 panic("unexpected integer size") 1528 } 1529 v.AuxInt = 0 1530 break // Be sure not to fallthrough - this is no longer OpRsh. 1531 } 1532 // If the Rsh hasn't been replaced with 0, still check if it is bounded. 1533 fallthrough 1534 case OpLsh8x8, OpLsh8x16, OpLsh8x32, OpLsh8x64, 1535 OpLsh16x8, OpLsh16x16, OpLsh16x32, OpLsh16x64, 1536 OpLsh32x8, OpLsh32x16, OpLsh32x32, OpLsh32x64, 1537 OpLsh64x8, OpLsh64x16, OpLsh64x32, OpLsh64x64, 1538 OpRsh8Ux8, OpRsh8Ux16, OpRsh8Ux32, OpRsh8Ux64, 1539 OpRsh16Ux8, OpRsh16Ux16, OpRsh16Ux32, OpRsh16Ux64, 1540 OpRsh32Ux8, OpRsh32Ux16, OpRsh32Ux32, OpRsh32Ux64, 1541 OpRsh64Ux8, OpRsh64Ux16, OpRsh64Ux32, OpRsh64Ux64: 1542 // Check whether, for a << b, we know that b 1543 // is strictly less than the number of bits in a. 1544 by := v.Args[1] 1545 lim, ok := ft.limits[by.ID] 1546 if !ok { 1547 break 1548 } 1549 bits := 8 * v.Args[0].Type.Size() 1550 if lim.umax < uint64(bits) || (lim.max < bits && ft.isNonNegative(by)) { 1551 v.AuxInt = 1 // see shiftIsBounded 1552 if b.Func.pass.debug > 0 { 1553 b.Func.Warnl(v.Pos, "Proved %v bounded", v.Op) 1554 } 1555 } 1556 case OpDiv16, OpDiv32, OpDiv64, OpMod16, OpMod32, OpMod64: 1557 // On amd64 and 386 fix-up code can be avoided if we know 1558 // the divisor is not -1 or the dividend > MinIntNN. 1559 // Don't modify AuxInt on other architectures, 1560 // as that can interfere with CSE. 1561 // TODO: add other architectures? 1562 if b.Func.Config.arch != "386" && b.Func.Config.arch != "amd64" { 1563 break 1564 } 1565 divr := v.Args[1] 1566 divrLim, divrLimok := ft.limits[divr.ID] 1567 divd := v.Args[0] 1568 divdLim, divdLimok := ft.limits[divd.ID] 1569 if (divrLimok && (divrLim.max < -1 || divrLim.min > -1)) || 1570 (divdLimok && divdLim.min > mostNegativeDividend[v.Op]) { 1571 // See DivisionNeedsFixUp in rewrite.go. 1572 // v.AuxInt = 1 means we have proved both that the divisor is not -1 1573 // and that the dividend is not the most negative integer, 1574 // so we do not need to add fix-up code. 1575 v.AuxInt = 1 1576 if b.Func.pass.debug > 0 { 1577 b.Func.Warnl(v.Pos, "Proved %v does not need fix-up", v.Op) 1578 } 1579 } 1580 } 1581 // Fold provable constant results. 1582 // Helps in cases where we reuse a value after branching on its equality. 1583 for i, arg := range v.Args { 1584 switch arg.Op { 1585 case OpConst64, OpConst32, OpConst16, OpConst8: 1586 continue 1587 } 1588 lim, ok := ft.limits[arg.ID] 1589 if !ok { 1590 continue 1591 } 1592 1593 var constValue int64 1594 typ := arg.Type 1595 bits := 8 * typ.Size() 1596 switch { 1597 case lim.min == lim.max: 1598 constValue = lim.min 1599 case lim.umin == lim.umax: 1600 // truncate then sign extand 1601 switch bits { 1602 case 64: 1603 constValue = int64(lim.umin) 1604 case 32: 1605 constValue = int64(int32(lim.umin)) 1606 case 16: 1607 constValue = int64(int16(lim.umin)) 1608 case 8: 1609 constValue = int64(int8(lim.umin)) 1610 default: 1611 panic("unexpected integer size") 1612 } 1613 default: 1614 continue 1615 } 1616 var c *Value 1617 f := b.Func 1618 switch bits { 1619 case 64: 1620 c = f.ConstInt64(typ, constValue) 1621 case 32: 1622 c = f.ConstInt32(typ, int32(constValue)) 1623 case 16: 1624 c = f.ConstInt16(typ, int16(constValue)) 1625 case 8: 1626 c = f.ConstInt8(typ, int8(constValue)) 1627 default: 1628 panic("unexpected integer size") 1629 } 1630 v.SetArg(i, c) 1631 if b.Func.pass.debug > 1 { 1632 b.Func.Warnl(v.Pos, "Proved %v's arg %d (%v) is constant %d", v, i, arg, constValue) 1633 } 1634 } 1635 } 1636 1637 if b.Kind != BlockIf { 1638 return 1639 } 1640 1641 // Consider outgoing edges from this block. 1642 parent := b 1643 for i, branch := range [...]branch{positive, negative} { 1644 child := parent.Succs[i].b 1645 if getBranch(sdom, parent, child) != unknown { 1646 // For edges to uniquely dominated blocks, we 1647 // already did this when we visited the child. 1648 continue 1649 } 1650 // For edges to other blocks, this can trim a branch 1651 // even if we couldn't get rid of the child itself. 1652 ft.checkpoint() 1653 addBranchRestrictions(ft, parent, branch) 1654 unsat := ft.unsat 1655 ft.restore() 1656 if unsat { 1657 // This branch is impossible, so remove it 1658 // from the block. 1659 removeBranch(parent, branch) 1660 // No point in considering the other branch. 1661 // (It *is* possible for both to be 1662 // unsatisfiable since the fact table is 1663 // incomplete. We could turn this into a 1664 // BlockExit, but it doesn't seem worth it.) 1665 break 1666 } 1667 } 1668} 1669 1670func removeBranch(b *Block, branch branch) { 1671 c := b.Controls[0] 1672 if b.Func.pass.debug > 0 { 1673 verb := "Proved" 1674 if branch == positive { 1675 verb = "Disproved" 1676 } 1677 if b.Func.pass.debug > 1 { 1678 b.Func.Warnl(b.Pos, "%s %s (%s)", verb, c.Op, c) 1679 } else { 1680 b.Func.Warnl(b.Pos, "%s %s", verb, c.Op) 1681 } 1682 } 1683 if c != nil && c.Pos.IsStmt() == src.PosIsStmt && c.Pos.SameFileAndLine(b.Pos) { 1684 // attempt to preserve statement marker. 1685 b.Pos = b.Pos.WithIsStmt() 1686 } 1687 if branch == positive || branch == negative { 1688 b.Kind = BlockFirst 1689 b.ResetControls() 1690 if branch == positive { 1691 b.swapSuccessors() 1692 } 1693 } else { 1694 // TODO: figure out how to remove an entry from a jump table 1695 } 1696} 1697 1698// isNonNegative reports whether v is known to be greater or equal to zero. 1699func isNonNegative(v *Value) bool { 1700 if !v.Type.IsInteger() { 1701 v.Fatalf("isNonNegative bad type: %v", v.Type) 1702 } 1703 // TODO: return true if !v.Type.IsSigned() 1704 // SSA isn't type-safe enough to do that now (issue 37753). 1705 // The checks below depend only on the pattern of bits. 1706 1707 switch v.Op { 1708 case OpConst64: 1709 return v.AuxInt >= 0 1710 1711 case OpConst32: 1712 return int32(v.AuxInt) >= 0 1713 1714 case OpConst16: 1715 return int16(v.AuxInt) >= 0 1716 1717 case OpConst8: 1718 return int8(v.AuxInt) >= 0 1719 1720 case OpStringLen, OpSliceLen, OpSliceCap, 1721 OpZeroExt8to64, OpZeroExt16to64, OpZeroExt32to64, 1722 OpZeroExt8to32, OpZeroExt16to32, OpZeroExt8to16, 1723 OpCtz64, OpCtz32, OpCtz16, OpCtz8, 1724 OpCtz64NonZero, OpCtz32NonZero, OpCtz16NonZero, OpCtz8NonZero, 1725 OpBitLen64, OpBitLen32, OpBitLen16, OpBitLen8: 1726 return true 1727 1728 case OpRsh64Ux64, OpRsh32Ux64: 1729 by := v.Args[1] 1730 return by.Op == OpConst64 && by.AuxInt > 0 1731 1732 case OpRsh64x64, OpRsh32x64, OpRsh8x64, OpRsh16x64, OpRsh32x32, OpRsh64x32, 1733 OpSignExt32to64, OpSignExt16to64, OpSignExt8to64, OpSignExt16to32, OpSignExt8to32: 1734 return isNonNegative(v.Args[0]) 1735 1736 case OpAnd64, OpAnd32, OpAnd16, OpAnd8: 1737 return isNonNegative(v.Args[0]) || isNonNegative(v.Args[1]) 1738 1739 case OpMod64, OpMod32, OpMod16, OpMod8, 1740 OpDiv64, OpDiv32, OpDiv16, OpDiv8, 1741 OpOr64, OpOr32, OpOr16, OpOr8, 1742 OpXor64, OpXor32, OpXor16, OpXor8: 1743 return isNonNegative(v.Args[0]) && isNonNegative(v.Args[1]) 1744 1745 // We could handle OpPhi here, but the improvements from doing 1746 // so are very minor, and it is neither simple nor cheap. 1747 } 1748 return false 1749} 1750 1751// isConstDelta returns non-nil if v is equivalent to w+delta (signed). 1752func isConstDelta(v *Value) (w *Value, delta int64) { 1753 cop := OpConst64 1754 switch v.Op { 1755 case OpAdd32, OpSub32: 1756 cop = OpConst32 1757 case OpAdd16, OpSub16: 1758 cop = OpConst16 1759 case OpAdd8, OpSub8: 1760 cop = OpConst8 1761 } 1762 switch v.Op { 1763 case OpAdd64, OpAdd32, OpAdd16, OpAdd8: 1764 if v.Args[0].Op == cop { 1765 return v.Args[1], v.Args[0].AuxInt 1766 } 1767 if v.Args[1].Op == cop { 1768 return v.Args[0], v.Args[1].AuxInt 1769 } 1770 case OpSub64, OpSub32, OpSub16, OpSub8: 1771 if v.Args[1].Op == cop { 1772 aux := v.Args[1].AuxInt 1773 if aux != -aux { // Overflow; too bad 1774 return v.Args[0], -aux 1775 } 1776 } 1777 } 1778 return nil, 0 1779} 1780 1781// isCleanExt reports whether v is the result of a value-preserving 1782// sign or zero extension. 1783func isCleanExt(v *Value) bool { 1784 switch v.Op { 1785 case OpSignExt8to16, OpSignExt8to32, OpSignExt8to64, 1786 OpSignExt16to32, OpSignExt16to64, OpSignExt32to64: 1787 // signed -> signed is the only value-preserving sign extension 1788 return v.Args[0].Type.IsSigned() && v.Type.IsSigned() 1789 1790 case OpZeroExt8to16, OpZeroExt8to32, OpZeroExt8to64, 1791 OpZeroExt16to32, OpZeroExt16to64, OpZeroExt32to64: 1792 // unsigned -> signed/unsigned are value-preserving zero extensions 1793 return !v.Args[0].Type.IsSigned() 1794 } 1795 return false 1796} 1797