| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| package utils |
| |
| import ( |
| "encoding/binary" |
| "math/bits" |
| |
| "github.com/apache/arrow/go/v6/arrow/bitutil" |
| ) |
| |
| // IsMultipleOf64 returns whether v is a multiple of 64. |
| func IsMultipleOf64(v int64) bool { return v&63 == 0 } |
| |
| // LeastSignificantBitMask returns a bit mask to return the least significant |
| // bits for a value starting from the bit index passed in. ie: if you want a |
| // mask for the 4 least significant bits, you call LeastSignificantBitMask(4) |
| func LeastSignificantBitMask(index int64) uint64 { |
| return (uint64(1) << index) - 1 |
| } |
| |
| // SetBitRun describes a run of contiguous set bits in a bitmap with Pos being |
| // the starting position of the run and Length being the number of bits. |
| type SetBitRun struct { |
| Pos int64 |
| Length int64 |
| } |
| |
| // AtEnd returns true if this bit run is the end of the set by checking |
| // that the length is 0. |
| func (s SetBitRun) AtEnd() bool { |
| return s.Length == 0 |
| } |
| |
| // Equal returns whether rhs is the same run as s |
| func (s SetBitRun) Equal(rhs SetBitRun) bool { |
| return s.Pos == rhs.Pos && s.Length == rhs.Length |
| } |
| |
| // SetBitRunReader is an interface for reading groups of contiguous set bits |
| // from a bitmap. The interface allows us to create different reader implementations |
| // that share the same interface easily such as a reverse set reader. |
| type SetBitRunReader interface { |
| // NextRun will return the next run of contiguous set bits in the bitmap |
| NextRun() SetBitRun |
| // Reset allows re-using the reader by providing a new bitmap, offset and length. The arguments |
| // match the New function for the reader being used. |
| Reset([]byte, int64, int64) |
| // VisitSetBitRuns calls visitFn for each set in a loop starting from the current position |
| // it's roughly equivalent to simply looping, calling NextRun and calling visitFn on the run |
| // for each run. |
| VisitSetBitRuns(visitFn VisitFn) error |
| } |
| |
| type baseSetBitRunReader struct { |
| bitmap []byte |
| pos int64 |
| length int64 |
| remaining int64 |
| curWord uint64 |
| curNumBits int32 |
| reversed bool |
| |
| firstBit uint64 |
| } |
| |
| // NewSetBitRunReader returns a SetBitRunReader for the bitmap starting at startOffset which will read |
| // numvalues bits. |
| func NewSetBitRunReader(validBits []byte, startOffset, numValues int64) SetBitRunReader { |
| return newBaseSetBitRunReader(validBits, startOffset, numValues, false) |
| } |
| |
| // NewReverseSetBitRunReader returns a SetBitRunReader like NewSetBitRunReader, except it will |
| // return runs starting from the end of the bitmap until it reaches startOffset rather than starting |
| // at startOffset and reading from there. The SetBitRuns will still operate the same, so Pos |
| // will still be the position of the "left-most" bit of the run or the "start" of the run. It |
| // just returns runs starting from the end instead of starting from the beginning. |
| func NewReverseSetBitRunReader(validBits []byte, startOffset, numValues int64) SetBitRunReader { |
| return newBaseSetBitRunReader(validBits, startOffset, numValues, true) |
| } |
| |
| func newBaseSetBitRunReader(bitmap []byte, startOffset, length int64, reverse bool) *baseSetBitRunReader { |
| ret := &baseSetBitRunReader{reversed: reverse} |
| ret.Reset(bitmap, startOffset, length) |
| return ret |
| } |
| |
| func (br *baseSetBitRunReader) Reset(bitmap []byte, startOffset, length int64) { |
| br.bitmap = bitmap |
| br.length = length |
| br.remaining = length |
| br.curNumBits = 0 |
| br.curWord = 0 |
| |
| if !br.reversed { |
| br.pos = startOffset / 8 |
| br.firstBit = 1 |
| |
| bitOffset := int8(startOffset % 8) |
| if length > 0 && bitOffset != 0 { |
| br.curNumBits = int32(MinInt(int(length), int(8-bitOffset))) |
| br.curWord = br.loadPartial(bitOffset, int64(br.curNumBits)) |
| } |
| return |
| } |
| |
| br.pos = (startOffset + length) / 8 |
| br.firstBit = uint64(0x8000000000000000) |
| endBitOffset := int8((startOffset + length) % 8) |
| if length > 0 && endBitOffset != 0 { |
| br.pos++ |
| br.curNumBits = int32(MinInt(int(length), int(endBitOffset))) |
| br.curWord = br.loadPartial(8-endBitOffset, int64(br.curNumBits)) |
| } |
| } |
| |
| func (br *baseSetBitRunReader) consumeBits(word uint64, nbits int32) uint64 { |
| if br.reversed { |
| return word << nbits |
| } |
| return word >> nbits |
| } |
| |
| func (br *baseSetBitRunReader) countFirstZeros(word uint64) int32 { |
| if br.reversed { |
| return int32(bits.LeadingZeros64(word)) |
| } |
| return int32(bits.TrailingZeros64(word)) |
| } |
| |
| func (br *baseSetBitRunReader) loadPartial(bitOffset int8, numBits int64) uint64 { |
| var word [8]byte |
| nbytes := bitutil.BytesForBits(numBits) |
| if br.reversed { |
| br.pos -= nbytes |
| copy(word[8-nbytes:], br.bitmap[br.pos:br.pos+nbytes]) |
| return (binary.LittleEndian.Uint64(word[:]) << bitOffset) &^ LeastSignificantBitMask(64-numBits) |
| } |
| |
| copy(word[:], br.bitmap[br.pos:br.pos+nbytes]) |
| br.pos += nbytes |
| return (binary.LittleEndian.Uint64(word[:]) >> bitOffset) & LeastSignificantBitMask(numBits) |
| } |
| |
| func (br *baseSetBitRunReader) findCurrentRun() SetBitRun { |
| nzeros := br.countFirstZeros(br.curWord) |
| if nzeros >= br.curNumBits { |
| br.remaining -= int64(br.curNumBits) |
| br.curWord = 0 |
| br.curNumBits = 0 |
| return SetBitRun{0, 0} |
| } |
| |
| br.curWord = br.consumeBits(br.curWord, nzeros) |
| br.curNumBits -= nzeros |
| br.remaining -= int64(nzeros) |
| pos := br.position() |
| |
| numOnes := br.countFirstZeros(^br.curWord) |
| br.curWord = br.consumeBits(br.curWord, numOnes) |
| br.curNumBits -= numOnes |
| br.remaining -= int64(numOnes) |
| return SetBitRun{pos, int64(numOnes)} |
| } |
| |
| func (br *baseSetBitRunReader) position() int64 { |
| if br.reversed { |
| return br.remaining |
| } |
| return br.length - br.remaining |
| } |
| |
| func (br *baseSetBitRunReader) adjustRun(run SetBitRun) SetBitRun { |
| if br.reversed { |
| run.Pos -= run.Length |
| } |
| return run |
| } |
| |
| func (br *baseSetBitRunReader) loadFull() (ret uint64) { |
| if br.reversed { |
| br.pos -= 8 |
| } |
| ret = binary.LittleEndian.Uint64(br.bitmap[br.pos : br.pos+8]) |
| if !br.reversed { |
| br.pos += 8 |
| } |
| return |
| } |
| |
| func (br *baseSetBitRunReader) skipNextZeros() { |
| for br.remaining >= 64 { |
| br.curWord = br.loadFull() |
| nzeros := br.countFirstZeros(br.curWord) |
| if nzeros < 64 { |
| br.curWord = br.consumeBits(br.curWord, nzeros) |
| br.curNumBits = 64 - nzeros |
| br.remaining -= int64(nzeros) |
| return |
| } |
| br.remaining -= 64 |
| } |
| // run of zeros continues in last bitmap word |
| if br.remaining > 0 { |
| br.curWord = br.loadPartial(0, br.remaining) |
| br.curNumBits = int32(br.remaining) |
| nzeros := int32(MinInt(int(br.curNumBits), int(br.countFirstZeros(br.curWord)))) |
| br.curWord = br.consumeBits(br.curWord, nzeros) |
| br.curNumBits -= nzeros |
| br.remaining -= int64(nzeros) |
| } |
| } |
| |
| func (br *baseSetBitRunReader) countNextOnes() int64 { |
| var length int64 |
| if ^br.curWord != 0 { |
| numOnes := br.countFirstZeros(^br.curWord) |
| br.remaining -= int64(numOnes) |
| br.curWord = br.consumeBits(br.curWord, numOnes) |
| br.curNumBits -= numOnes |
| if br.curNumBits != 0 { |
| return int64(numOnes) |
| } |
| length = int64(numOnes) |
| } else { |
| br.remaining -= 64 |
| br.curNumBits = 0 |
| length = 64 |
| } |
| |
| for br.remaining >= 64 { |
| br.curWord = br.loadFull() |
| numOnes := br.countFirstZeros(^br.curWord) |
| length += int64(numOnes) |
| br.remaining -= int64(numOnes) |
| if numOnes < 64 { |
| br.curWord = br.consumeBits(br.curWord, numOnes) |
| br.curNumBits = 64 - numOnes |
| return length |
| } |
| } |
| |
| if br.remaining > 0 { |
| br.curWord = br.loadPartial(0, br.remaining) |
| br.curNumBits = int32(br.remaining) |
| numOnes := br.countFirstZeros(^br.curWord) |
| br.curWord = br.consumeBits(br.curWord, numOnes) |
| br.curNumBits -= numOnes |
| br.remaining -= int64(numOnes) |
| length += int64(numOnes) |
| } |
| return length |
| } |
| |
| func (br *baseSetBitRunReader) NextRun() SetBitRun { |
| var ( |
| pos int64 = 0 |
| length int64 = 0 |
| ) |
| |
| if br.curNumBits != 0 { |
| run := br.findCurrentRun() |
| if run.Length != 0 && br.curNumBits != 0 { |
| return br.adjustRun(run) |
| } |
| pos = run.Pos |
| length = run.Length |
| } |
| |
| if length == 0 { |
| // we didn't get any ones in curWord, so we can skip any zeros |
| // in the following words |
| br.skipNextZeros() |
| if br.remaining == 0 { |
| return SetBitRun{0, 0} |
| } |
| pos = br.position() |
| } else if br.curNumBits == 0 { |
| if br.remaining >= 64 { |
| br.curWord = br.loadFull() |
| br.curNumBits = 64 |
| } else if br.remaining > 0 { |
| br.curWord = br.loadPartial(0, br.remaining) |
| br.curNumBits = int32(br.remaining) |
| } else { |
| return br.adjustRun(SetBitRun{pos, length}) |
| } |
| if (br.curWord & br.firstBit) == 0 { |
| return br.adjustRun(SetBitRun{pos, length}) |
| } |
| } |
| |
| length += br.countNextOnes() |
| return br.adjustRun(SetBitRun{pos, length}) |
| } |
| |
| // VisitFn is a callback function for visiting runs of contiguous bits |
| type VisitFn func(pos int64, length int64) error |
| |
| func (br *baseSetBitRunReader) VisitSetBitRuns(visitFn VisitFn) error { |
| for { |
| run := br.NextRun() |
| if run.Length == 0 { |
| break |
| } |
| |
| if err := visitFn(run.Pos, run.Length); err != nil { |
| return err |
| } |
| } |
| return nil |
| } |
| |
| // VisitSetBitRuns is just a convenience function for calling NewSetBitRunReader and then VisitSetBitRuns |
| func VisitSetBitRuns(bitmap []byte, bitmapOffset int64, length int64, visitFn VisitFn) error { |
| if bitmap == nil { |
| return visitFn(0, length) |
| } |
| rdr := NewSetBitRunReader(bitmap, bitmapOffset, length) |
| for { |
| run := rdr.NextRun() |
| if run.Length == 0 { |
| break |
| } |
| |
| if err := visitFn(run.Pos, run.Length); err != nil { |
| return err |
| } |
| } |
| return nil |
| } |