0%

计算一个数的二进制表示中1的个数算法比较

计算一个数的二进制表示中1的个数算法比较

计算一个数的二进制中1的个数, 这是一个经典的面试题, 网上有各种算法. 自己面试也被问过, 没看过的只会想到循环的方法求解. 但是这个效率就不高了.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
// 笨办法
func findBitCount1(x uint64) int{
var res int
for x > 0 {
if x & (x-1) == (x-1) {
res++
}
x = x >> 1
}
return res
}

func findBitCount2(x uint64) int{
x = x & 0x5555555555555555 + x >> 1 & 0x5555555555555555
x = x & 0x3333333333333333 + x >> 2 & 0x3333333333333333
x = x & 0x0f0f0f0f0f0f0f0f + x >> 4 & 0x0f0f0f0f0f0f0f0f
x = x & 0x00ff00ff00ff00ff + x >> 8 & 0x00ff00ff00ff00ff
x = x & 0x0000ffff0000ffff + x >> 16 & 0x0000ffff0000ffff
x = x & 0x00000000ffffffff + x >> 32 & 0x00000000ffffffff
return int(x)
}

func findBitCount3(x uint64) int{
x = x & 0x5555555555555555 + x >> 1 & 0x5555555555555555
x = x & 0x3333333333333333 + x >> 2 & 0x3333333333333333
x = x & 0x0f0f0f0f0f0f0f0f + x >> 4 & 0x0f0f0f0f0f0f0f0f
x = x >> 8
x = x >> 16
x = x >> 32
// & (1<<7 - 1) 确保1的个数不会超过64个
return int(x) & (1<<7 - 1)
}

测试代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
func BenchmarkFindBitCount(b *testing.B) {
cases := []struct{
Num uint64
Ept int
}{
{Num: uint64(0), Ept: 0},
{Num:uint64(1<<64-1), Ept:64},
{Num:uint64(64), Ept:1},
{Num:uint64(1<<32), Ept:1},
{Num:uint64(167381424443), Ept:23},
}
for _, c := range cases {
b.Run(fmt.Sprintf("Func1 with Num:%v", c.Num), func(b *testing.B){
for i := 0; i <b.N; i++ {
findBitCount1(c.Num)
}
})
b.Run(fmt.Sprintf("Func2 with Num:%v", c.Num), func(b *testing.B){
for i := 0; i <b.N; i++ {
findBitCount2(c.Num)
}
})
b.Run(fmt.Sprintf("Func3 with Num:%v", c.Num), func(b *testing.B){
for i := 0; i <b.N; i++ {
findBitCount3(c.Num)
}
})
b.Run(fmt.Sprintf("OnesCount64 with Num:%v", c.Num), func(b *testing.B){
for i := 0; i <b.N; i++ {
bits.OnesCount64(c.Num)
}
})
}
}

测试结果:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
goos: linux
goarch: amd64
pkg: test
BenchmarkFindBitCount/Func1_with_Num:0-8 397047744 3.07 ns/op 0 B/op 0 allocs/op
BenchmarkFindBitCount/Func2_with_Num:0-8 1000000000 0.634 ns/op 0 B/op 0 allocs/op
BenchmarkFindBitCount/Func3_with_Num:0-8 1000000000 0.587 ns/op 0 B/op 0 allocs/op
BenchmarkFindBitCount/OnesCount64_with_Num:0-8 1000000000 0.634 ns/op 0 B/op 0 allocs/op
BenchmarkFindBitCount/Func1_with_Num:18446744073709551615-8 13153808 91.1 ns/op 0 B/op 0 allocs/op
BenchmarkFindBitCount/Func2_with_Num:18446744073709551615-8 1000000000 0.588 ns/op 0 B/op 0 allocs/op
BenchmarkFindBitCount/Func3_with_Num:18446744073709551615-8 1000000000 0.581 ns/op 0 B/op 0 allocs/op
BenchmarkFindBitCount/OnesCount64_with_Num:18446744073709551615-8 1000000000 0.570 ns/op 0 B/op 0 allocs/op
BenchmarkFindBitCount/Func1_with_Num:64-8 86061858 14.1 ns/op 0 B/op 0 allocs/op
BenchmarkFindBitCount/Func2_with_Num:64-8 1000000000 0.587 ns/op 0 B/op 0 allocs/op
BenchmarkFindBitCount/Func3_with_Num:64-8 1000000000 0.585 ns/op 0 B/op 0 allocs/op
BenchmarkFindBitCount/OnesCount64_with_Num:64-8 1000000000 0.567 ns/op 0 B/op 0 allocs/op
BenchmarkFindBitCount/Func1_with_Num:4294967296-8 25344757 48.3 ns/op 0 B/op 0 allocs/op
BenchmarkFindBitCount/Func2_with_Num:4294967296-8 1000000000 0.575 ns/op 0 B/op 0 allocs/op
BenchmarkFindBitCount/Func3_with_Num:4294967296-8 1000000000 0.610 ns/op 0 B/op 0 allocs/op
BenchmarkFindBitCount/OnesCount64_with_Num:4294967296-8 1000000000 0.569 ns/op 0 B/op 0 allocs/op
BenchmarkFindBitCount/Func1_with_Num:167381424443-8 16672856 71.8 ns/op 0 B/op 0 allocs/op
BenchmarkFindBitCount/Func2_with_Num:167381424443-8 1000000000 0.598 ns/op 0 B/op 0 allocs/op
BenchmarkFindBitCount/Func3_with_Num:167381424443-8 1000000000 0.584 ns/op 0 B/op 0 allocs/op
BenchmarkFindBitCount/OnesCount64_with_Num:167381424443-8 1000000000 0.577 ns/op 0 B/op 0 allocs/op
PASS
ok test 16.403s

可见标准库还是挺快的, 其中有些代码可以省略的, 但是没有, 注释有其原因说明:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
133  // OnesCount64 returns the number of one bits ("population count") in x.
134 func OnesCount64(x uint64) int {
135 // Implementation: Parallel summing of adjacent bits.
136 // See "Hacker's Delight", Chap. 5: Counting Bits.
137 // The following pattern shows the general approach:
138 //
139 // x = x>>1&(m0&m) + x&(m0&m)
140 // x = x>>2&(m1&m) + x&(m1&m)
141 // x = x>>4&(m2&m) + x&(m2&m)
142 // x = x>>8&(m3&m) + x&(m3&m)
143 // x = x>>16&(m4&m) + x&(m4&m)
144 // x = x>>32&(m5&m) + x&(m5&m)
145 // return int(x)
146 //
147 // Masking (& operations) can be left away when there's no
148 // danger that a field's sum will carry over into the next
149 // field: Since the result cannot be > 64, 8 bits is enough
150 // and we can ignore the masks for the shifts by 8 and up.
151 // Per "Hacker's Delight", the first line can be simplified
152 // more, but it saves at best one instruction, so we leave
153 // it alone for clarity.
154 const m = 1<<64 - 1
155 x = x>>1&(m0&m) + x&(m0&m)
156 x = x>>2&(m1&m) + x&(m1&m)
157 x = (x>>4 + x) & (m2 & m)
158 x += x >> 8
159 x += x >> 16
160 x += x >> 32
161 return int(x) & (1<<7 - 1)
162 }