0%

取余操作%的优化

取余操作的优化

a%b 这个取余操作据说会比较慢, 如果这个b刚好是2的n次方, 那么有一种优化方式是使用 a & (2^n-1)
也就是说 a & (b -1) 因为 a = x * b + y(余数), 那么 ab-1(低位每位都是1) 取& 就是获得了 y的值.

具体这个方式能快多少? 测了就知道:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
// main.go
func doPercent(n int, divisor int) int{
return n % divisor
}

func doAnd(n int, divisor int) int {
return n & (divisor -1)
}

// main_test.go
func TestMod(t *testing.T) {
testCase := []struct{
N int
Div int
Expect int
}{
{
N: 4,
Div: 2,
Expect: 0,
},
{
N: 15,
Div: 8,
Expect: 7,
},
{
N: 5,
Div: 8,
Expect: 5,
},
{
N: 5,
Div: 1,
Expect: 0,
},
{
N: 0,
Div: 2,
Expect: 0,
},
{
N: 1<<31,
Div: 1<<30,
Expect: 0,
},

}
for _, c := range testCase {
t.Run(fmt.Sprintf("n: %v, div: %v, expect: %v", c.N, c.Div,c.Expect), func(t *testing.T) {
assert.Equal(t, c.Expect, doPercent(c.N,c.Div))
assert.Equal(t, c.Expect, doAnd(c.N,c.Div))
})

}
}

func BenchmarkMod(b *testing.B) {
testCase := []struct{
N int
Div int
Expect int
}{
{
N: 4,
Div: 2,
Expect: 0,
},
{
N: 15,
Div: 8,
Expect: 7,
},
{
N: 1<<13+ 1<< 10 + 12345,
Div: 8,
Expect: 7,
},
{
N: 1<<30 + 13,
Div: 1<<16,
Expect: 0,
},

}
for _, c := range testCase {
b.Run(fmt.Sprintf("test do percent with n: %v, div: %v",c.N,c.Div),func (b *testing.B) {
for i := 0; i < b.N; i++ {
doPercent(c.N,c.Div)
}
})
b.Run(fmt.Sprintf("test do and with n: %v, div: %v",c.N,c.Div),func (b *testing.B) {
for i := 0; i < b.N; i++ {
doAnd(c.N,c.Div)
}
})
}

}

benchmark结果:

1
2
3
4
5
6
7
8
9
10
11
12
13
goos: linux
goarch: amd64
pkg: test
BenchmarkMod/test_do_percent_with_n:_4,_div:_2-8 984819216 1.18 ns/op 0 B/op 0 allocs/op
BenchmarkMod/test_do_and_with_n:_4,_div:_2-8 1000000000 0.592 ns/op 0 B/op 0 allocs/op
BenchmarkMod/test_do_percent_with_n:_15,_div:_8-8 876655876 1.21 ns/op 0 B/op 0 allocs/op
BenchmarkMod/test_do_and_with_n:_15,_div:_8-8 1000000000 0.665 ns/op 0 B/op 0 allocs/op
BenchmarkMod/test_do_percent_with_n:_21561,_div:_8-8 1000000000 1.21 ns/op 0 B/op 0 allocs/op
BenchmarkMod/test_do_and_with_n:_21561,_div:_8-8 1000000000 0.574 ns/op 0 B/op 0 allocs/op
BenchmarkMod/test_do_percent_with_n:_1073741837,_div:_65536-8 1000000000 1.21 ns/op 0 B/op 0 allocs/op
BenchmarkMod/test_do_and_with_n:_1073741837,_div:_65536-8 1000000000 0.620 ns/op 0 B/op 0 allocs/op
PASS
ok test 7.878s

可见效率吊打使用%啊! 性能优化真的是与算法关系+系统体系结构关系太大了.