Thanks to visit codestin.com
Credit goes to github.com

Skip to content

64-bit integer arithmetic performance optimization #1082

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Nov 1, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 9 additions & 9 deletions compiler/gopherjspkg/fs_vfsdata.go

Large diffs are not rendered by default.

290 changes: 145 additions & 145 deletions compiler/natives/fs_vfsdata.go

Large diffs are not rendered by default.

4 changes: 4 additions & 0 deletions compiler/natives/src/math/big/big_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,7 @@ func TestBytes(t *testing.T) {
func TestModSqrt(t *testing.T) {
t.Skip("slow")
}

func TestLinkerGC(t *testing.T) {
t.Skip("The test is specific to GC's linker.")
}
83 changes: 83 additions & 0 deletions compiler/natives/src/math/bits/bits.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,86 @@ var (
overflowError error = _err("runtime error: integer overflow")
divideError error = _err("runtime error: integer divide by zero")
)

func Mul32(x, y uint32) (hi, lo uint32) {
// Avoid slow 64-bit integers for better performance. Adapted from Mul64().
const mask16 = 1<<16 - 1
x0 := x & mask16
x1 := x >> 16
y0 := y & mask16
y1 := y >> 16
w0 := x0 * y0
t := x1*y0 + w0>>16
w1 := t & mask16
w2 := t >> 16
w1 += x0 * y1
hi = x1*y1 + w2 + w1>>16
lo = x * y
return
}

func Add32(x, y, carry uint32) (sum, carryOut uint32) {
// Avoid slow 64-bit integers for better performance. Adapted from Add64().
sum = x + y + carry
carryOut = ((x & y) | ((x | y) &^ sum)) >> 31
return
}

func Div32(hi, lo, y uint32) (quo, rem uint32) {
// Avoid slow 64-bit integers for better performance. Adapted from Div64().
const (
two16 = 1 << 16
mask16 = two16 - 1
)
if y == 0 {
panic(divideError)
}
if y <= hi {
panic(overflowError)
}

s := uint(LeadingZeros32(y))
y <<= s

yn1 := y >> 16
yn0 := y & mask16
un16 := hi<<s | lo>>(32-s)
un10 := lo << s
un1 := un10 >> 16
un0 := un10 & mask16
q1 := un16 / yn1
rhat := un16 - q1*yn1

for q1 >= two16 || q1*yn0 > two16*rhat+un1 {
q1--
rhat += yn1
if rhat >= two16 {
break
}
}

un21 := un16*two16 + un1 - q1*y
q0 := un21 / yn1
rhat = un21 - q0*yn1

for q0 >= two16 || q0*yn0 > two16*rhat+un0 {
q0--
rhat += yn1
if rhat >= two16 {
break
}
}

return q1*two16 + q0, (un21*two16 + un0 - q0*y) >> s
}

func Rem32(hi, lo, y uint32) uint32 {
// We scale down hi so that hi < y, then use Div32 to compute the
// rem with the guarantee that it won't panic on quotient overflow.
// Given that
// hi ≡ hi%y (mod y)
// we have
// hi<<64 + lo ≡ (hi%y)<<64 + lo (mod y)
_, rem := Div32(hi%y, lo, y)
return rem
}
54 changes: 37 additions & 17 deletions compiler/prelude/numeric.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,23 +80,43 @@ var $shiftRightUint64 = function(x, y) {
};

var $mul64 = function(x, y) {
var high = 0, low = 0;
if ((y.$low & 1) !== 0) {
high = x.$high;
low = x.$low;
}
for (var i = 1; i < 32; i++) {
if ((y.$low & 1<<i) !== 0) {
high += x.$high << i | x.$low >>> (32 - i);
low += (x.$low << i) >>> 0;
}
}
for (var i = 0; i < 32; i++) {
if ((y.$high & 1<<i) !== 0) {
high += x.$low << i;
}
}
return new x.constructor(high, low);
var x48 = x.$high >>> 16;
var x32 = x.$high & 0xFFFF;
var x16 = x.$low >>> 16;
var x00 = x.$low & 0xFFFF;

var y48 = y.$high >>> 16;
var y32 = y.$high & 0xFFFF;
var y16 = y.$low >>> 16;
var y00 = y.$low & 0xFFFF;

var z48 = 0, z32 = 0, z16 = 0, z00 = 0;
z00 += x00 * y00;
z16 += z00 >>> 16;
z00 &= 0xFFFF;
z16 += x16 * y00;
z32 += z16 >>> 16;
z16 &= 0xFFFF;
z16 += x00 * y16;
z32 += z16 >>> 16;
z16 &= 0xFFFF;
z32 += x32 * y00;
z48 += z32 >>> 16;
z32 &= 0xFFFF;
z32 += x16 * y16;
z48 += z32 >>> 16;
z32 &= 0xFFFF;
z32 += x00 * y32;
z48 += z32 >>> 16;
z32 &= 0xFFFF;
z48 += x48 * y00 + x32 * y16 + x16 * y32 + x00 * y48;
z48 &= 0xFFFF;

var hi = ((z48 << 16) | z32) >>> 0;
var lo = ((z16 << 16) | z00) >>> 0;

var r = new x.constructor(hi, lo);
return r;
};

var $div64 = function(x, y, returnRemainder) {
Expand Down
2 changes: 1 addition & 1 deletion compiler/prelude/prelude_min.go

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions tests/js_test.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
//go:build js
// +build js
//go:build js && !wasm
// +build js,!wasm

package tests_test

Expand Down
95 changes: 95 additions & 0 deletions tests/numeric_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
package tests

import (
"math/rand"
"runtime"
"testing"
"testing/quick"
)

// naiveMul64 performs 64-bit multiplication without using the multiplication
// operation and can be used to test correctness of the compiler's multiplication
// implementation.
func naiveMul64(x, y uint64) uint64 {
var z uint64 = 0
for i := 0; i < 64; i++ {
mask := uint64(1) << i
if y&mask > 0 {
z += x << i
}
}
return z
}

func TestMul64(t *testing.T) {
cfg := &quick.Config{
MaxCountScale: 10000,
Rand: rand.New(rand.NewSource(0x5EED)), // Fixed seed for reproducibility.
}
if testing.Short() {
cfg.MaxCountScale = 1000
}

t.Run("unsigned", func(t *testing.T) {
err := quick.CheckEqual(
func(x, y uint64) uint64 { return x * y },
naiveMul64,
cfg)
if err != nil {
t.Error(err)
}
})
t.Run("signed", func(t *testing.T) {
// GopherJS represents 64-bit signed integers in a two-complement form,
// so bitwise multiplication looks identical for signed and unsigned integers
// and we can reuse naiveMul64() as a reference implementation for both with
// appropriate type conversions.
err := quick.CheckEqual(
func(x, y int64) int64 { return x * y },
func(x, y int64) int64 { return int64(naiveMul64(uint64(x), uint64(y))) },
cfg)
if err != nil {
t.Error(err)
}
})
}

func BenchmarkMul64(b *testing.B) {
// Prepare a randomized set of multipliers to make sure the benchmark doesn't
// get too specific for a single value. The trade-off is that the cost of
// loading from an array gets mixed into the result, but it is good enough for
// relative comparisons.
r := rand.New(rand.NewSource(0x5EED))
const size = 1024
xU := [size]uint64{}
yU := [size]uint64{}
xS := [size]int64{}
yS := [size]int64{}
for i := 0; i < size; i++ {
xU[i] = r.Uint64()
yU[i] = r.Uint64()
xS[i] = r.Int63() | (r.Int63n(2) << 63)
yS[i] = r.Int63() | (r.Int63n(2) << 63)
}

b.Run("noop", func(b *testing.B) {
// This benchmark allows to gauge the cost of array load operations without
// the multiplications.
for i := 0; i < b.N; i++ {
runtime.KeepAlive(yU[i%size])
runtime.KeepAlive(xU[i%size])
}
})
b.Run("unsigned", func(b *testing.B) {
for i := 0; i < b.N; i++ {
z := xU[i%size] * yU[i%size]
runtime.KeepAlive(z)
}
})
b.Run("signed", func(b *testing.B) {
for i := 0; i < b.N; i++ {
z := xS[i%size] * yS[i%size]
runtime.KeepAlive(z)
}
})
}