blob: 63992b861e4e53e447b851661d2b228267c1445f [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package utils
import (
"encoding/binary"
"math/bits"
"github.com/apache/arrow/go/v6/arrow/bitutil"
)
// IsMultipleOf64 returns whether v is a multiple of 64.
func IsMultipleOf64(v int64) bool { return v&63 == 0 }
// LeastSignificantBitMask returns a bit mask to return the least significant
// bits for a value starting from the bit index passed in. ie: if you want a
// mask for the 4 least significant bits, you call LeastSignificantBitMask(4)
func LeastSignificantBitMask(index int64) uint64 {
return (uint64(1) << index) - 1
}
// SetBitRun describes a run of contiguous set bits in a bitmap with Pos being
// the starting position of the run and Length being the number of bits.
type SetBitRun struct {
Pos int64
Length int64
}
// AtEnd returns true if this bit run is the end of the set by checking
// that the length is 0.
func (s SetBitRun) AtEnd() bool {
return s.Length == 0
}
// Equal returns whether rhs is the same run as s
func (s SetBitRun) Equal(rhs SetBitRun) bool {
return s.Pos == rhs.Pos && s.Length == rhs.Length
}
// SetBitRunReader is an interface for reading groups of contiguous set bits
// from a bitmap. The interface allows us to create different reader implementations
// that share the same interface easily such as a reverse set reader.
type SetBitRunReader interface {
// NextRun will return the next run of contiguous set bits in the bitmap
NextRun() SetBitRun
// Reset allows re-using the reader by providing a new bitmap, offset and length. The arguments
// match the New function for the reader being used.
Reset([]byte, int64, int64)
// VisitSetBitRuns calls visitFn for each set in a loop starting from the current position
// it's roughly equivalent to simply looping, calling NextRun and calling visitFn on the run
// for each run.
VisitSetBitRuns(visitFn VisitFn) error
}
type baseSetBitRunReader struct {
bitmap []byte
pos int64
length int64
remaining int64
curWord uint64
curNumBits int32
reversed bool
firstBit uint64
}
// NewSetBitRunReader returns a SetBitRunReader for the bitmap starting at startOffset which will read
// numvalues bits.
func NewSetBitRunReader(validBits []byte, startOffset, numValues int64) SetBitRunReader {
return newBaseSetBitRunReader(validBits, startOffset, numValues, false)
}
// NewReverseSetBitRunReader returns a SetBitRunReader like NewSetBitRunReader, except it will
// return runs starting from the end of the bitmap until it reaches startOffset rather than starting
// at startOffset and reading from there. The SetBitRuns will still operate the same, so Pos
// will still be the position of the "left-most" bit of the run or the "start" of the run. It
// just returns runs starting from the end instead of starting from the beginning.
func NewReverseSetBitRunReader(validBits []byte, startOffset, numValues int64) SetBitRunReader {
return newBaseSetBitRunReader(validBits, startOffset, numValues, true)
}
func newBaseSetBitRunReader(bitmap []byte, startOffset, length int64, reverse bool) *baseSetBitRunReader {
ret := &baseSetBitRunReader{reversed: reverse}
ret.Reset(bitmap, startOffset, length)
return ret
}
func (br *baseSetBitRunReader) Reset(bitmap []byte, startOffset, length int64) {
br.bitmap = bitmap
br.length = length
br.remaining = length
br.curNumBits = 0
br.curWord = 0
if !br.reversed {
br.pos = startOffset / 8
br.firstBit = 1
bitOffset := int8(startOffset % 8)
if length > 0 && bitOffset != 0 {
br.curNumBits = int32(MinInt(int(length), int(8-bitOffset)))
br.curWord = br.loadPartial(bitOffset, int64(br.curNumBits))
}
return
}
br.pos = (startOffset + length) / 8
br.firstBit = uint64(0x8000000000000000)
endBitOffset := int8((startOffset + length) % 8)
if length > 0 && endBitOffset != 0 {
br.pos++
br.curNumBits = int32(MinInt(int(length), int(endBitOffset)))
br.curWord = br.loadPartial(8-endBitOffset, int64(br.curNumBits))
}
}
func (br *baseSetBitRunReader) consumeBits(word uint64, nbits int32) uint64 {
if br.reversed {
return word << nbits
}
return word >> nbits
}
func (br *baseSetBitRunReader) countFirstZeros(word uint64) int32 {
if br.reversed {
return int32(bits.LeadingZeros64(word))
}
return int32(bits.TrailingZeros64(word))
}
func (br *baseSetBitRunReader) loadPartial(bitOffset int8, numBits int64) uint64 {
var word [8]byte
nbytes := bitutil.BytesForBits(numBits)
if br.reversed {
br.pos -= nbytes
copy(word[8-nbytes:], br.bitmap[br.pos:br.pos+nbytes])
return (binary.LittleEndian.Uint64(word[:]) << bitOffset) &^ LeastSignificantBitMask(64-numBits)
}
copy(word[:], br.bitmap[br.pos:br.pos+nbytes])
br.pos += nbytes
return (binary.LittleEndian.Uint64(word[:]) >> bitOffset) & LeastSignificantBitMask(numBits)
}
func (br *baseSetBitRunReader) findCurrentRun() SetBitRun {
nzeros := br.countFirstZeros(br.curWord)
if nzeros >= br.curNumBits {
br.remaining -= int64(br.curNumBits)
br.curWord = 0
br.curNumBits = 0
return SetBitRun{0, 0}
}
br.curWord = br.consumeBits(br.curWord, nzeros)
br.curNumBits -= nzeros
br.remaining -= int64(nzeros)
pos := br.position()
numOnes := br.countFirstZeros(^br.curWord)
br.curWord = br.consumeBits(br.curWord, numOnes)
br.curNumBits -= numOnes
br.remaining -= int64(numOnes)
return SetBitRun{pos, int64(numOnes)}
}
func (br *baseSetBitRunReader) position() int64 {
if br.reversed {
return br.remaining
}
return br.length - br.remaining
}
func (br *baseSetBitRunReader) adjustRun(run SetBitRun) SetBitRun {
if br.reversed {
run.Pos -= run.Length
}
return run
}
func (br *baseSetBitRunReader) loadFull() (ret uint64) {
if br.reversed {
br.pos -= 8
}
ret = binary.LittleEndian.Uint64(br.bitmap[br.pos : br.pos+8])
if !br.reversed {
br.pos += 8
}
return
}
func (br *baseSetBitRunReader) skipNextZeros() {
for br.remaining >= 64 {
br.curWord = br.loadFull()
nzeros := br.countFirstZeros(br.curWord)
if nzeros < 64 {
br.curWord = br.consumeBits(br.curWord, nzeros)
br.curNumBits = 64 - nzeros
br.remaining -= int64(nzeros)
return
}
br.remaining -= 64
}
// run of zeros continues in last bitmap word
if br.remaining > 0 {
br.curWord = br.loadPartial(0, br.remaining)
br.curNumBits = int32(br.remaining)
nzeros := int32(MinInt(int(br.curNumBits), int(br.countFirstZeros(br.curWord))))
br.curWord = br.consumeBits(br.curWord, nzeros)
br.curNumBits -= nzeros
br.remaining -= int64(nzeros)
}
}
func (br *baseSetBitRunReader) countNextOnes() int64 {
var length int64
if ^br.curWord != 0 {
numOnes := br.countFirstZeros(^br.curWord)
br.remaining -= int64(numOnes)
br.curWord = br.consumeBits(br.curWord, numOnes)
br.curNumBits -= numOnes
if br.curNumBits != 0 {
return int64(numOnes)
}
length = int64(numOnes)
} else {
br.remaining -= 64
br.curNumBits = 0
length = 64
}
for br.remaining >= 64 {
br.curWord = br.loadFull()
numOnes := br.countFirstZeros(^br.curWord)
length += int64(numOnes)
br.remaining -= int64(numOnes)
if numOnes < 64 {
br.curWord = br.consumeBits(br.curWord, numOnes)
br.curNumBits = 64 - numOnes
return length
}
}
if br.remaining > 0 {
br.curWord = br.loadPartial(0, br.remaining)
br.curNumBits = int32(br.remaining)
numOnes := br.countFirstZeros(^br.curWord)
br.curWord = br.consumeBits(br.curWord, numOnes)
br.curNumBits -= numOnes
br.remaining -= int64(numOnes)
length += int64(numOnes)
}
return length
}
func (br *baseSetBitRunReader) NextRun() SetBitRun {
var (
pos int64 = 0
length int64 = 0
)
if br.curNumBits != 0 {
run := br.findCurrentRun()
if run.Length != 0 && br.curNumBits != 0 {
return br.adjustRun(run)
}
pos = run.Pos
length = run.Length
}
if length == 0 {
// we didn't get any ones in curWord, so we can skip any zeros
// in the following words
br.skipNextZeros()
if br.remaining == 0 {
return SetBitRun{0, 0}
}
pos = br.position()
} else if br.curNumBits == 0 {
if br.remaining >= 64 {
br.curWord = br.loadFull()
br.curNumBits = 64
} else if br.remaining > 0 {
br.curWord = br.loadPartial(0, br.remaining)
br.curNumBits = int32(br.remaining)
} else {
return br.adjustRun(SetBitRun{pos, length})
}
if (br.curWord & br.firstBit) == 0 {
return br.adjustRun(SetBitRun{pos, length})
}
}
length += br.countNextOnes()
return br.adjustRun(SetBitRun{pos, length})
}
// VisitFn is a callback function for visiting runs of contiguous bits
type VisitFn func(pos int64, length int64) error
func (br *baseSetBitRunReader) VisitSetBitRuns(visitFn VisitFn) error {
for {
run := br.NextRun()
if run.Length == 0 {
break
}
if err := visitFn(run.Pos, run.Length); err != nil {
return err
}
}
return nil
}
// VisitSetBitRuns is just a convenience function for calling NewSetBitRunReader and then VisitSetBitRuns
func VisitSetBitRuns(bitmap []byte, bitmapOffset int64, length int64, visitFn VisitFn) error {
if bitmap == nil {
return visitFn(0, length)
}
rdr := NewSetBitRunReader(bitmap, bitmapOffset, length)
for {
run := rdr.NextRun()
if run.Length == 0 {
break
}
if err := visitFn(run.Pos, run.Length); err != nil {
return err
}
}
return nil
}