blob: 97349f1b27a122c63f18653033a47fcc13db00ea [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package utils
import (
"encoding/binary"
"fmt"
"math/bits"
"unsafe"
"github.com/apache/arrow/go/v6/arrow"
"github.com/apache/arrow/go/v6/arrow/bitutil"
)
// BitRun represents a run of bits with the same value of length Len
// with Set representing if the group of bits were 1 or 0.
type BitRun struct {
Len int64
Set bool
}
// BitRunReader is an interface that is usable by multiple callers to provide
// multiple types of bit run readers such as a reverse reader and so on.
//
// It's a convenience interface for counting contiguous set/unset bits in a bitmap.
// In places where BitBlockCounter can be used, then it would be preferred to use that
// as it would be faster than using BitRunReader.
type BitRunReader interface {
NextRun() BitRun
}
func (b BitRun) String() string {
return fmt.Sprintf("{Length: %d, set=%t}", b.Len, b.Set)
}
type bitRunReader struct {
bitmap []byte
pos int64
length int64
word uint64
curRunBitSet bool
}
// NewBitRunReader returns a reader for the given bitmap, offset and length that
// grabs runs of the same value bit at a time for easy iteration.
func NewBitRunReader(bitmap []byte, offset int64, length int64) BitRunReader {
ret := &bitRunReader{
bitmap: bitmap[offset/8:],
pos: offset % 8,
length: (offset % 8) + length,
}
if length == 0 {
return ret
}
ret.curRunBitSet = bitutil.BitIsNotSet(bitmap, int(offset))
bitsRemaining := length + ret.pos
ret.loadWord(bitsRemaining)
ret.word = ret.word &^ LeastSignificantBitMask(ret.pos)
return ret
}
// NextRun returns a new BitRun containing the number of contiguous bits with the
// same value. Len == 0 indicates the end of the bitmap.
func (b *bitRunReader) NextRun() BitRun {
if b.pos >= b.length {
return BitRun{0, false}
}
// This implementation relies on a efficient implementations of
// CountTrailingZeros and assumes that runs are more often then
// not. The logic is to incrementally find the next bit change
// from the current position. This is done by zeroing all
// bits in word_ up to position_ and using the TrailingZeroCount
// to find the index of the next set bit.
// The runs alternate on each call, so flip the bit.
b.curRunBitSet = !b.curRunBitSet
start := b.pos
startOffset := start & 63
// Invert the word for proper use of CountTrailingZeros and
// clear bits so CountTrailingZeros can do it magic.
b.word = ^b.word &^ LeastSignificantBitMask(startOffset)
// Go forward until the next change from unset to set.
newbits := int64(bits.TrailingZeros64(b.word)) - startOffset
b.pos += newbits
if IsMultipleOf64(b.pos) && b.pos < b.length {
b.advanceUntilChange()
}
return BitRun{b.pos - start, b.curRunBitSet}
}
func (b *bitRunReader) advanceUntilChange() {
newbits := int64(0)
for {
b.bitmap = b.bitmap[arrow.Uint64SizeBytes:]
b.loadNextWord()
newbits = int64(bits.TrailingZeros64(b.word))
b.pos += newbits
if !IsMultipleOf64(b.pos) || b.pos >= b.length || newbits <= 0 {
break
}
}
}
func (b *bitRunReader) loadNextWord() {
b.loadWord(b.length - b.pos)
}
func (b *bitRunReader) loadWord(bitsRemaining int64) {
b.word = 0
if bitsRemaining >= 64 {
b.word = binary.LittleEndian.Uint64(b.bitmap)
} else {
nbytes := bitutil.BytesForBits(bitsRemaining)
wordptr := (*(*[8]byte)(unsafe.Pointer(&b.word)))[:]
copy(wordptr, b.bitmap[:nbytes])
bitutil.SetBitTo(wordptr, int(bitsRemaining), bitutil.BitIsNotSet(wordptr, int(bitsRemaining-1)))
// reset the value to little endian for big endian architectures
b.word = ToLEUint64(b.word)
}
// Two cases:
// 1. For unset, CountTrailingZeros works naturally so we don't
// invert the word.
// 2. Otherwise invert so we can use CountTrailingZeros.
if b.curRunBitSet {
b.word = ^b.word
}
}