diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go index 33c0b196110e36..dc46de9becae40 100644 --- a/src/cmd/internal/obj/arm64/asm7.go +++ b/src/cmd/internal/obj/arm64/asm7.go @@ -33,6 +33,7 @@ package arm64 import ( "cmd/internal/obj" "cmd/internal/objabi" + "encoding/binary" "fmt" "log" "math" @@ -1099,133 +1100,57 @@ func span7(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { c := ctxt7{ctxt: ctxt, newprog: newprog, cursym: cursym, autosize: int32(p.To.Offset & 0xffffffff), extrasize: int32(p.To.Offset >> 32)} p.To.Offset &= 0xffffffff // extrasize is no longer needed - bflag := 1 + // Process literal pool and allocate initial program counter for each Prog, before + // generating branch veneers. pc := int64(0) p.Pc = pc - var m int - var o *Optab for p = p.Link; p != nil; p = p.Link { p.Pc = pc - o = c.oplook(p) - m = o.size(c.ctxt, p) - if m == 0 { - switch p.As { - case obj.APCALIGN, obj.APCALIGNMAX: - m = obj.AlignmentPadding(int32(pc), p, ctxt, cursym) - break - case obj.ANOP, obj.AFUNCDATA, obj.APCDATA: - continue - default: - c.ctxt.Diag("zero-width instruction\n%v", p) - } - } - pc += int64(m) - - if o.flag&LFROM != 0 { - c.addpool(p, &p.From) - } - if o.flag<O != 0 { - c.addpool(p, &p.To) - } - if c.blitrl != nil { - c.checkpool(p) - } + c.addLiteralsToPool(p) + pc += int64(c.asmsizeBytes(p)) } - c.cursym.Size = pc - /* * if any procedure is large enough to * generate a large SBRA branch, then * generate extra passes putting branches * around jmps to fix. this is rare. */ - for bflag != 0 { - bflag = 0 + changed := true + for changed { + changed = false pc = 0 for p = c.cursym.Func().Text.Link; p != nil; p = p.Link { p.Pc = pc - o = c.oplook(p) - - /* very large branches */ - if (o.flag&BRANCH14BITS != 0 || o.flag&BRANCH19BITS != 0) && p.To.Target() != nil { - otxt := p.To.Target().Pc - pc - var toofar bool - if o.flag&BRANCH14BITS != 0 { // branch instruction encodes 14 bits - toofar = otxt <= -(1<<15)+10 || otxt >= (1<<15)-10 - } else if o.flag&BRANCH19BITS != 0 { // branch instruction encodes 19 bits - toofar = otxt <= -(1<<20)+10 || otxt >= (1<<20)-10 - } - if toofar { - q := c.newprog() - q.Link = p.Link - p.Link = q - q.As = AB - q.To.Type = obj.TYPE_BRANCH - q.To.SetTarget(p.To.Target()) - p.To.SetTarget(q) - q = c.newprog() - q.Link = p.Link - p.Link = q - q.As = AB - q.To.Type = obj.TYPE_BRANCH - q.To.SetTarget(q.Link.Link) - bflag = 1 - } - } - m = o.size(c.ctxt, p) - - if m == 0 { - switch p.As { - case obj.APCALIGN, obj.APCALIGNMAX: - m = obj.AlignmentPaddingLength(int32(pc), p, ctxt) - break - case obj.ANOP, obj.AFUNCDATA, obj.APCDATA: - continue - default: - c.ctxt.Diag("zero-width instruction\n%v", p) - } - } - - pc += int64(m) + changed = changed || c.fixUpLongBranch(p) + pc += int64(c.asmsizeBytes(p)) } } - pc += -pc & (funcAlign - 1) - c.cursym.Size = pc - /* * lay out the code, emitting code and data relocations. */ - c.cursym.Grow(c.cursym.Size) - bp := c.cursym.P - psz := int32(0) - var i int - var out [6]uint32 + buf := codeBuffer{&c.cursym.P} + for p := c.cursym.Func().Text.Link; p != nil; p = p.Link { c.pc = p.Pc - o = c.oplook(p) - sz := o.size(c.ctxt, p) - if sz > 4*len(out) { - log.Fatalf("out array in span7 is too small, need at least %d for %v", sz/4, p) - } - if p.As == obj.APCALIGN || p.As == obj.APCALIGNMAX { + switch p.As { + case obj.APCALIGN, obj.APCALIGNMAX: v := obj.AlignmentPaddingLength(int32(p.Pc), p, c.ctxt) - for i = 0; i < int(v/4); i++ { + for i := 0; i < int(v/4); i++ { // emit ANOOP instruction by the padding size - c.ctxt.Arch.ByteOrder.PutUint32(bp, OP_NOOP) - bp = bp[4:] - psz += 4 - } - } else { - c.asmout(p, o, out[:]) - for i = 0; i < sz/4; i++ { - c.ctxt.Arch.ByteOrder.PutUint32(bp, out[i]) - bp = bp[4:] - psz += 4 + buf.emit(OP_NOOP) } + case obj.ANOP, obj.AFUNCDATA, obj.APCDATA: + continue + default: + var out [6]uint32 + count := c.asmout(p, out[:]) + buf.emit(out[:count]...) } } + buf.finish() + c.cursym.Size = int64(len(c.cursym.P)) // Mark nonpreemptible instruction sequences. // We use REGTMP as a scratch register during call injection, @@ -1244,6 +1169,92 @@ func span7(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { } } +type codeBuffer struct { + data *[]byte +} + +func (cb *codeBuffer) pc() int64 { + return int64(len(*cb.data)) +} + +// Write a sequence of opcodes into the code buffer. +func (cb *codeBuffer) emit(op ...uint32) { + for _, o := range op { + *cb.data = binary.LittleEndian.AppendUint32(*cb.data, o) + } +} + +// Completes the code buffer for the function by padding the buffer to function alignment +// with zero values. +func (cb *codeBuffer) finish() { + for len(*cb.data)%funcAlign > 0 { + *cb.data = append(*cb.data, 0) + } +} + +// Return the size of the assembled Prog, in bytes. +func (c *ctxt7) asmsizeBytes(p *obj.Prog) int { + switch p.As { + case obj.APCALIGN, obj.APCALIGNMAX: + return obj.AlignmentPadding(int32(p.Pc), p, c.ctxt, c.cursym) + case obj.ANOP, obj.AFUNCDATA, obj.APCDATA: + return 0 + default: + o := c.oplook(p) + return o.size(c.ctxt, p) + } +} + +// Modify the Prog list if the Prog is a branch with a large offset that cannot be +// encoded in the instruction. Return true if a modification was made, false if not. +func (c *ctxt7) fixUpLongBranch(p *obj.Prog) bool { + var toofar bool + + o := c.oplook(p) + + /* very large branches */ + if (o.flag&BRANCH14BITS != 0 || o.flag&BRANCH19BITS != 0) && p.To.Target() != nil { + otxt := p.To.Target().Pc - p.Pc + if o.flag&BRANCH14BITS != 0 { // branch instruction encodes 14 bits + toofar = otxt <= -(1<<15)+10 || otxt >= (1<<15)-10 + } else if o.flag&BRANCH19BITS != 0 { // branch instruction encodes 19 bits + toofar = otxt <= -(1<<20)+10 || otxt >= (1<<20)-10 + } + if toofar { + q := c.newprog() + q.Link = p.Link + p.Link = q + q.As = AB + q.To.Type = obj.TYPE_BRANCH + q.To.SetTarget(p.To.Target()) + p.To.SetTarget(q) + q = c.newprog() + q.Link = p.Link + p.Link = q + q.As = AB + q.To.Type = obj.TYPE_BRANCH + q.To.SetTarget(q.Link.Link) + } + } + + return toofar +} + +// Adds literal values from the Prog into the literal pool if necessary. +func (c *ctxt7) addLiteralsToPool(p *obj.Prog) { + o := c.oplook(p) + + if o.flag&LFROM != 0 { + c.addpool(p, &p.From) + } + if o.flag<O != 0 { + c.addpool(p, &p.To) + } + if c.blitrl != nil { + c.checkpool(p) + } +} + // isUnsafePoint returns whether p is an unsafe point. func (c *ctxt7) isUnsafePoint(p *obj.Prog) bool { // If p explicitly uses REGTMP, it's unsafe to preempt, because the @@ -3456,7 +3467,9 @@ func (c *ctxt7) checkShiftAmount(p *obj.Prog, a *obj.Addr) { } } -func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { +func (c *ctxt7) asmout(p *obj.Prog, out []uint32) (count int) { + o := c.oplook(p) + var os [5]uint32 o1 := uint32(0) o2 := uint32(0) @@ -5896,6 +5909,8 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { out[2] = o3 out[3] = o4 out[4] = o5 + + return int(o.size(c.ctxt, p) / 4) } func (c *ctxt7) addrRelocType(p *obj.Prog) objabi.RelocType {