Skip to content

Commit

Permalink
cmd/compile: wire up bits.Reverse intrinsics for loong64
Browse files Browse the repository at this point in the history
Micro-benchmark results on Loongson 3A5000 and 3A6000:

goos: linux
goarch: loong64
pkg: math/bits
cpu: Loongson-3A6000 @ 2500.00MHz
          |  CL 624576   |               this CL                |
          |    sec/op    |    sec/op     vs base                |
Reverse     2.8130n ± 0%   0.8008n ± 0%  -71.53% (p=0.000 n=20)
Reverse8    0.7014n ± 0%   0.4040n ± 0%  -42.40% (p=0.000 n=20)
Reverse16   1.2975n ± 0%   0.6632n ± 1%  -48.89% (p=0.000 n=20)
Reverse32   2.7520n ± 0%   0.4042n ± 0%  -85.31% (p=0.000 n=20)
Reverse64   2.8970n ± 0%   0.4041n ± 0%  -86.05% (p=0.000 n=20)
geomean      1.828n        0.5116n       -72.01%

goos: linux
goarch: loong64
pkg: math/bits
cpu: Loongson-3A5000 @ 2500.00MHz
          |  CL 624576   |               this CL                |
          |    sec/op    |    sec/op     vs base                |
Reverse     4.0050n ± 0%   0.8011n ± 0%  -80.00% (p=0.000 n=20)
Reverse8    0.8010n ± 0%   0.5210n ± 1%  -34.96% (p=0.000 n=20)
Reverse16   1.6160n ± 0%   0.6008n ± 0%  -62.82% (p=0.000 n=20)
Reverse32   3.8550n ± 0%   0.5179n ± 0%  -86.57% (p=0.000 n=20)
Reverse64   3.8050n ± 0%   0.5177n ± 0%  -86.40% (p=0.000 n=20)
geomean      2.378n        0.5828n       -75.49%

Updates #59120

This patch is a copy of CL 483656.
Co-authored-by: WANG Xuerui <git@xen0n.name>

Change-Id: I98681091763279279c8404bd0295785f13ea1c8e
Reviewed-on: https://go-review.googlesource.com/c/go/+/624276
Reviewed-by: abner chenc <chenguoqi@loongson.cn>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
  • Loading branch information
sophie-zhao authored and abner-chenc committed Nov 11, 2024
1 parent 4b89120 commit 583d750
Show file tree
Hide file tree
Showing 8 changed files with 118 additions and 5 deletions.
3 changes: 3 additions & 0 deletions src/cmd/compile/internal/loong64/ssa.go
Original file line number Diff line number Diff line change
Expand Up @@ -516,6 +516,9 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
ssa.OpLOONG64REVB2H,
ssa.OpLOONG64REVB2W,
ssa.OpLOONG64REVBV,
ssa.OpLOONG64BITREV4B,
ssa.OpLOONG64BITREVW,
ssa.OpLOONG64BITREVV,
ssa.OpLOONG64ABSD:
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REG
Expand Down
4 changes: 4 additions & 0 deletions src/cmd/compile/internal/ssa/_gen/LOONG64.rules
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,10 @@
(BitLen64 <t> x) => (NEGV <t> (SUBVconst <t> [64] (CLZV <t> x)))
(BitLen32 <t> x) => (NEGV <t> (SUBVconst <t> [32] (CLZW <t> x)))
(Bswap(16|32|64) ...) => (REVB(2H|2W|V) ...)
(BitRev8 ...) => (BITREV4B ...)
(BitRev16 <t> x) => (REVB2H (BITREV4B <t> x))
(BitRev32 ...) => (BITREVW ...)
(BitRev64 ...) => (BITREVV ...)

// math package intrinsics
(Sqrt ...) => (SQRTD ...)
Expand Down
4 changes: 4 additions & 0 deletions src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,10 @@ func init() {
{name: "REVB2W", argLength: 1, reg: gp11, asm: "REVB2W"}, // Swap bytes: 0x1122334455667788 -> 0x4433221188776655
{name: "REVBV", argLength: 1, reg: gp11, asm: "REVBV"}, // Swap bytes: 0x1122334455667788 -> 0x8877665544332211

{name: "BITREV4B", argLength: 1, reg: gp11, asm: "BITREV4B"}, // Reverse the bits of each byte inside a 32-bit arg[0]
{name: "BITREVW", argLength: 1, reg: gp11, asm: "BITREVW"}, // Reverse the bits in a 32-bit arg[0]
{name: "BITREVV", argLength: 1, reg: gp11, asm: "BITREVV"}, // Reverse the bits in a 64-bit arg[0]

{name: "FMINF", argLength: 2, reg: fp21, resultNotInArgs: true, asm: "FMINF", commutative: true, typ: "Float32"}, // min(arg0, arg1), float32
{name: "FMIND", argLength: 2, reg: fp21, resultNotInArgs: true, asm: "FMIND", commutative: true, typ: "Float64"}, // min(arg0, arg1), float64
{name: "FMAXF", argLength: 2, reg: fp21, resultNotInArgs: true, asm: "FMAXF", commutative: true, typ: "Float32"}, // max(arg0, arg1), float32
Expand Down
42 changes: 42 additions & 0 deletions src/cmd/compile/internal/ssa/opGen.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

26 changes: 26 additions & 0 deletions src/cmd/compile/internal/ssa/rewriteLOONG64.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 5 additions & 5 deletions src/cmd/compile/internal/ssagen/intrinsics.go
Original file line number Diff line number Diff line change
Expand Up @@ -946,27 +946,27 @@ func initIntrinsics(cfg *intrinsicBuildConfig) {
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
return s.newValue1(ssa.OpBitRev64, types.Types[types.TINT], args[0])
},
sys.ARM64)
sys.ARM64, sys.Loong64)
addF("math/bits", "Reverse32",
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
return s.newValue1(ssa.OpBitRev32, types.Types[types.TINT], args[0])
},
sys.ARM64)
sys.ARM64, sys.Loong64)
addF("math/bits", "Reverse16",
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
return s.newValue1(ssa.OpBitRev16, types.Types[types.TINT], args[0])
},
sys.ARM64)
sys.ARM64, sys.Loong64)
addF("math/bits", "Reverse8",
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
return s.newValue1(ssa.OpBitRev8, types.Types[types.TINT], args[0])
},
sys.ARM64)
sys.ARM64, sys.Loong64)
addF("math/bits", "Reverse",
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
return s.newValue1(ssa.OpBitRev64, types.Types[types.TINT], args[0])
},
sys.ARM64)
sys.ARM64, sys.Loong64)
addF("math/bits", "RotateLeft8",
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
return s.newValue2(ssa.OpRotateLeft8, types.Types[types.TUINT8], args[0], args[1])
Expand Down
5 changes: 5 additions & 0 deletions src/cmd/compile/internal/ssagen/intrinsics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -421,6 +421,11 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{
{"loong64", "math/bits", "Len16"}: struct{}{},
{"loong64", "math/bits", "Len32"}: struct{}{},
{"loong64", "math/bits", "Len64"}: struct{}{},
{"loong64", "math/bits", "Reverse"}: struct{}{},
{"loong64", "math/bits", "Reverse8"}: struct{}{},
{"loong64", "math/bits", "Reverse16"}: struct{}{},
{"loong64", "math/bits", "Reverse32"}: struct{}{},
{"loong64", "math/bits", "Reverse64"}: struct{}{},
{"loong64", "math/bits", "RotateLeft"}: struct{}{},
{"loong64", "math/bits", "RotateLeft32"}: struct{}{},
{"loong64", "math/bits", "RotateLeft64"}: struct{}{},
Expand Down
29 changes: 29 additions & 0 deletions test/codegen/mathbits.go
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,35 @@ func OnesCount8(n uint8) int {
return bits.OnesCount8(n)
}

// ------------------ //
// bits.Reverse //
// ------------------ //

func Reverse(n uint) uint {
// loong64:"BITREVV"
return bits.Reverse(n)
}

func Reverse64(n uint64) uint64 {
// loong64:"BITREVV"
return bits.Reverse64(n)
}

func Reverse32(n uint32) uint32 {
// loong64:"BITREVW"
return bits.Reverse32(n)
}

func Reverse16(n uint16) uint16 {
// loong64:"BITREV4B","REVB2H"
return bits.Reverse16(n)
}

func Reverse8(n uint8) uint8 {
// loong64:"BITREV4B"
return bits.Reverse8(n)
}

// ----------------------- //
// bits.ReverseBytes //
// ----------------------- //
Expand Down

0 comments on commit 583d750

Please sign in to comment.