Skip to content

Commit

Permalink
cmd/internal/obj/arm64: Add helpers for span7 passes
Browse files Browse the repository at this point in the history
Adds helper functions for the literal pooling, large branch handling
and code emission stages of the span7 assembler pass. This hides the
implementation of the current assembler from the general workflow in
span7 to make the implementation easier to change in future.

Updates golang#44734

Change-Id: I8859956b23ad4faebeeff6df28051b098ef90fed
Reviewed-on: https://go-review.googlesource.com/c/go/+/595755
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
  • Loading branch information
snickolls-arm authored and cherrymui committed Sep 6, 2024
1 parent 8cd550a commit 557211c
Showing 1 changed file with 115 additions and 100 deletions.
215 changes: 115 additions & 100 deletions src/cmd/internal/obj/arm64/asm7.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ package arm64
import (
"cmd/internal/obj"
"cmd/internal/objabi"
"encoding/binary"
"fmt"
"log"
"math"
Expand Down Expand Up @@ -1099,133 +1100,57 @@ func span7(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
c := ctxt7{ctxt: ctxt, newprog: newprog, cursym: cursym, autosize: int32(p.To.Offset & 0xffffffff), extrasize: int32(p.To.Offset >> 32)}
p.To.Offset &= 0xffffffff // extrasize is no longer needed

bflag := 1
// Process literal pool and allocate initial program counter for each Prog, before
// generating branch veneers.
pc := int64(0)
p.Pc = pc
var m int
var o *Optab
for p = p.Link; p != nil; p = p.Link {
p.Pc = pc
o = c.oplook(p)
m = o.size(c.ctxt, p)
if m == 0 {
switch p.As {
case obj.APCALIGN, obj.APCALIGNMAX:
m = obj.AlignmentPadding(int32(pc), p, ctxt, cursym)
break
case obj.ANOP, obj.AFUNCDATA, obj.APCDATA:
continue
default:
c.ctxt.Diag("zero-width instruction\n%v", p)
}
}
pc += int64(m)

if o.flag&LFROM != 0 {
c.addpool(p, &p.From)
}
if o.flag&LTO != 0 {
c.addpool(p, &p.To)
}
if c.blitrl != nil {
c.checkpool(p)
}
c.addLiteralsToPool(p)
pc += int64(c.asmsizeBytes(p))
}

c.cursym.Size = pc

/*
* if any procedure is large enough to
* generate a large SBRA branch, then
* generate extra passes putting branches
* around jmps to fix. this is rare.
*/
for bflag != 0 {
bflag = 0
changed := true
for changed {
changed = false
pc = 0
for p = c.cursym.Func().Text.Link; p != nil; p = p.Link {
p.Pc = pc
o = c.oplook(p)

/* very large branches */
if (o.flag&BRANCH14BITS != 0 || o.flag&BRANCH19BITS != 0) && p.To.Target() != nil {
otxt := p.To.Target().Pc - pc
var toofar bool
if o.flag&BRANCH14BITS != 0 { // branch instruction encodes 14 bits
toofar = otxt <= -(1<<15)+10 || otxt >= (1<<15)-10
} else if o.flag&BRANCH19BITS != 0 { // branch instruction encodes 19 bits
toofar = otxt <= -(1<<20)+10 || otxt >= (1<<20)-10
}
if toofar {
q := c.newprog()
q.Link = p.Link
p.Link = q
q.As = AB
q.To.Type = obj.TYPE_BRANCH
q.To.SetTarget(p.To.Target())
p.To.SetTarget(q)
q = c.newprog()
q.Link = p.Link
p.Link = q
q.As = AB
q.To.Type = obj.TYPE_BRANCH
q.To.SetTarget(q.Link.Link)
bflag = 1
}
}
m = o.size(c.ctxt, p)

if m == 0 {
switch p.As {
case obj.APCALIGN, obj.APCALIGNMAX:
m = obj.AlignmentPaddingLength(int32(pc), p, ctxt)
break
case obj.ANOP, obj.AFUNCDATA, obj.APCDATA:
continue
default:
c.ctxt.Diag("zero-width instruction\n%v", p)
}
}

pc += int64(m)
changed = changed || c.fixUpLongBranch(p)
pc += int64(c.asmsizeBytes(p))
}
}

pc += -pc & (funcAlign - 1)
c.cursym.Size = pc

/*
* lay out the code, emitting code and data relocations.
*/
c.cursym.Grow(c.cursym.Size)
bp := c.cursym.P
psz := int32(0)
var i int
var out [6]uint32
buf := codeBuffer{&c.cursym.P}

for p := c.cursym.Func().Text.Link; p != nil; p = p.Link {
c.pc = p.Pc
o = c.oplook(p)
sz := o.size(c.ctxt, p)
if sz > 4*len(out) {
log.Fatalf("out array in span7 is too small, need at least %d for %v", sz/4, p)
}
if p.As == obj.APCALIGN || p.As == obj.APCALIGNMAX {
switch p.As {
case obj.APCALIGN, obj.APCALIGNMAX:
v := obj.AlignmentPaddingLength(int32(p.Pc), p, c.ctxt)
for i = 0; i < int(v/4); i++ {
for i := 0; i < int(v/4); i++ {
// emit ANOOP instruction by the padding size
c.ctxt.Arch.ByteOrder.PutUint32(bp, OP_NOOP)
bp = bp[4:]
psz += 4
}
} else {
c.asmout(p, o, out[:])
for i = 0; i < sz/4; i++ {
c.ctxt.Arch.ByteOrder.PutUint32(bp, out[i])
bp = bp[4:]
psz += 4
buf.emit(OP_NOOP)
}
case obj.ANOP, obj.AFUNCDATA, obj.APCDATA:
continue
default:
var out [6]uint32
count := c.asmout(p, out[:])
buf.emit(out[:count]...)
}
}
buf.finish()
c.cursym.Size = int64(len(c.cursym.P))

// Mark nonpreemptible instruction sequences.
// We use REGTMP as a scratch register during call injection,
Expand All @@ -1244,6 +1169,92 @@ func span7(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
}
}

type codeBuffer struct {
data *[]byte
}

func (cb *codeBuffer) pc() int64 {
return int64(len(*cb.data))
}

// Write a sequence of opcodes into the code buffer.
func (cb *codeBuffer) emit(op ...uint32) {
for _, o := range op {
*cb.data = binary.LittleEndian.AppendUint32(*cb.data, o)
}
}

// Completes the code buffer for the function by padding the buffer to function alignment
// with zero values.
func (cb *codeBuffer) finish() {
for len(*cb.data)%funcAlign > 0 {
*cb.data = append(*cb.data, 0)
}
}

// Return the size of the assembled Prog, in bytes.
func (c *ctxt7) asmsizeBytes(p *obj.Prog) int {
switch p.As {
case obj.APCALIGN, obj.APCALIGNMAX:
return obj.AlignmentPadding(int32(p.Pc), p, c.ctxt, c.cursym)
case obj.ANOP, obj.AFUNCDATA, obj.APCDATA:
return 0
default:
o := c.oplook(p)
return o.size(c.ctxt, p)
}
}

// Modify the Prog list if the Prog is a branch with a large offset that cannot be
// encoded in the instruction. Return true if a modification was made, false if not.
func (c *ctxt7) fixUpLongBranch(p *obj.Prog) bool {
var toofar bool

o := c.oplook(p)

/* very large branches */
if (o.flag&BRANCH14BITS != 0 || o.flag&BRANCH19BITS != 0) && p.To.Target() != nil {
otxt := p.To.Target().Pc - p.Pc
if o.flag&BRANCH14BITS != 0 { // branch instruction encodes 14 bits
toofar = otxt <= -(1<<15)+10 || otxt >= (1<<15)-10
} else if o.flag&BRANCH19BITS != 0 { // branch instruction encodes 19 bits
toofar = otxt <= -(1<<20)+10 || otxt >= (1<<20)-10
}
if toofar {
q := c.newprog()
q.Link = p.Link
p.Link = q
q.As = AB
q.To.Type = obj.TYPE_BRANCH
q.To.SetTarget(p.To.Target())
p.To.SetTarget(q)
q = c.newprog()
q.Link = p.Link
p.Link = q
q.As = AB
q.To.Type = obj.TYPE_BRANCH
q.To.SetTarget(q.Link.Link)
}
}

return toofar
}

// Adds literal values from the Prog into the literal pool if necessary.
func (c *ctxt7) addLiteralsToPool(p *obj.Prog) {
o := c.oplook(p)

if o.flag&LFROM != 0 {
c.addpool(p, &p.From)
}
if o.flag&LTO != 0 {
c.addpool(p, &p.To)
}
if c.blitrl != nil {
c.checkpool(p)
}
}

// isUnsafePoint returns whether p is an unsafe point.
func (c *ctxt7) isUnsafePoint(p *obj.Prog) bool {
// If p explicitly uses REGTMP, it's unsafe to preempt, because the
Expand Down Expand Up @@ -3456,7 +3467,9 @@ func (c *ctxt7) checkShiftAmount(p *obj.Prog, a *obj.Addr) {
}
}

func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
func (c *ctxt7) asmout(p *obj.Prog, out []uint32) (count int) {
o := c.oplook(p)

var os [5]uint32
o1 := uint32(0)
o2 := uint32(0)
Expand Down Expand Up @@ -5896,6 +5909,8 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
out[2] = o3
out[3] = o4
out[4] = o5

return int(o.size(c.ctxt, p) / 4)
}

func (c *ctxt7) addrRelocType(p *obj.Prog) objabi.RelocType {
Expand Down

0 comments on commit 557211c

Please sign in to comment.