blob: 72d790a4223f275bbaf1439b50d9790877d354c5 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package bitutil
import (
"math/bits"
"unsafe"
"github.com/apache/arrow/go/v6/arrow/endian"
"github.com/apache/arrow/go/v6/arrow/internal/debug"
)
// BitmapReader is a simple bitmap reader for a byte slice.
type BitmapReader struct {
bitmap []byte
pos int
len int
current byte
byteOffset int
bitOffset int
}
// NewBitmapReader creates and returns a new bitmap reader for the given bitmap
func NewBitmapReader(bitmap []byte, offset, length int) *BitmapReader {
curbyte := byte(0)
if length > 0 && bitmap != nil {
curbyte = bitmap[offset/8]
}
return &BitmapReader{
bitmap: bitmap,
byteOffset: offset / 8,
bitOffset: offset % 8,
current: curbyte,
len: length,
}
}
// Set returns true if the current bit is set
func (b *BitmapReader) Set() bool {
return (b.current & (1 << b.bitOffset)) != 0
}
// NotSet returns true if the current bit is not set
func (b *BitmapReader) NotSet() bool {
return (b.current & (1 << b.bitOffset)) == 0
}
// Next advances the reader to the next bit in the bitmap.
func (b *BitmapReader) Next() {
b.bitOffset++
b.pos++
if b.bitOffset == 8 {
b.bitOffset = 0
b.byteOffset++
if b.pos < b.len {
b.current = b.bitmap[int(b.byteOffset)]
}
}
}
// Pos returns the current bit position in the bitmap that the reader is looking at
func (b *BitmapReader) Pos() int { return b.pos }
// Len returns the total number of bits in the bitmap
func (b *BitmapReader) Len() int { return b.len }
// BitmapWriter is a simple writer for writing bitmaps to byte slices
type BitmapWriter struct {
buf []byte
pos int
length int
curByte uint8
bitMask uint8
byteOffset int
}
// NewBitmapWriter returns a sequential bitwise writer that preserves surrounding
// bit values as it writes.
func NewBitmapWriter(bitmap []byte, start, length int) *BitmapWriter {
ret := &BitmapWriter{
buf: bitmap,
length: length,
byteOffset: start / 8,
bitMask: BitMask[start%8],
}
if length > 0 {
ret.curByte = bitmap[int(ret.byteOffset)]
}
return ret
}
// Reset resets the position and view of the slice to restart writing a bitmap
// to the same byte slice.
func (b *BitmapWriter) Reset(start, length int) {
b.pos = 0
b.byteOffset = start / 8
b.bitMask = BitMask[start%8]
b.length = length
if b.length > 0 {
b.curByte = b.buf[int(b.byteOffset)]
}
}
func (b *BitmapWriter) Pos() int { return b.pos }
func (b *BitmapWriter) Set() { b.curByte |= b.bitMask }
func (b *BitmapWriter) Clear() { b.curByte &= ^b.bitMask }
// Next increments the writer to the next bit for writing.
func (b *BitmapWriter) Next() {
b.bitMask = b.bitMask << 1
b.pos++
if b.bitMask == 0 {
b.bitMask = 0x01
b.buf[b.byteOffset] = b.curByte
b.byteOffset++
if b.pos < b.length {
b.curByte = b.buf[int(b.byteOffset)]
}
}
}
// AppendBools writes a series of booleans to the bitmapwriter and returns
// the number of remaining bytes left in the buffer for writing.
func (b *BitmapWriter) AppendBools(in []bool) int {
space := min(b.length-b.pos, len(in))
if space == 0 {
return 0
}
bitOffset := bits.TrailingZeros32(uint32(b.bitMask))
// location that the first byte needs to be written to for appending
appslice := b.buf[int(b.byteOffset) : b.byteOffset+int(BytesForBits(int64(bitOffset+space)))]
// update everything but curByte
appslice[0] = b.curByte
for i, b := range in[:space] {
if b {
SetBit(appslice, i+bitOffset)
} else {
ClearBit(appslice, i+bitOffset)
}
}
b.pos += space
b.bitMask = BitMask[(bitOffset+space)%8]
b.byteOffset += (bitOffset + space) / 8
b.curByte = appslice[len(appslice)-1]
return space
}
// Finish flushes the final byte out to the byteslice in case it was not already
// on a byte aligned boundary.
func (b *BitmapWriter) Finish() {
if b.length > 0 && (b.bitMask != 0x01 || b.pos < b.length) {
b.buf[int(b.byteOffset)] = b.curByte
}
}
// BitmapWordReader is a reader for bitmaps that reads a word at a time (a word being an 8 byte uint64)
// and then provides functions to grab the individual trailing bytes after the last word
type BitmapWordReader struct {
bitmap []byte
offset int
nwords int
trailingBits int
trailingBytes int
curword uint64
}
// NewBitmapWordReader sets up a word reader, calculates the number of trailing bits and
// number of trailing bytes, along with the number of words.
func NewBitmapWordReader(bitmap []byte, offset, length int) *BitmapWordReader {
bitoffset := offset % 8
byteOffset := offset / 8
bm := &BitmapWordReader{
offset: bitoffset,
bitmap: bitmap[byteOffset : byteOffset+int(BytesForBits(int64(bitoffset+length)))],
// decrement wordcount by 1 as we may touch two adjacent words in one iteration
nwords: length/int(unsafe.Sizeof(uint64(0))*8) - 1,
}
if bm.nwords < 0 {
bm.nwords = 0
}
bm.trailingBits = length - bm.nwords*int(unsafe.Sizeof(uint64(0)))*8
bm.trailingBytes = int(BytesForBits(int64(bm.trailingBits)))
if bm.nwords > 0 {
bm.curword = toFromLEFunc(endian.Native.Uint64(bm.bitmap))
} else {
setLSB(&bm.curword, bm.bitmap[0])
}
return bm
}
// NextWord returns the next full word read from the bitmap, should not be called
// if Words() is 0 as it will step outside of the bounds of the bitmap slice and panic.
//
// We don't perform the bounds checking in order to improve performance.
func (bm *BitmapWordReader) NextWord() uint64 {
bm.bitmap = bm.bitmap[unsafe.Sizeof(bm.curword):]
word := bm.curword
nextWord := toFromLEFunc(endian.Native.Uint64(bm.bitmap))
if bm.offset != 0 {
// combine two adjacent words into one word
// |<------ next ----->|<---- current ---->|
// +-------------+-----+-------------+-----+
// | --- | A | B | --- |
// +-------------+-----+-------------+-----+
// | | offset
// v v
// +-----+-------------+
// | A | B |
// +-----+-------------+
// |<------ word ----->|
word >>= uint64(bm.offset)
word |= nextWord << (int64(unsafe.Sizeof(uint64(0))*8) - int64(bm.offset))
}
bm.curword = nextWord
return word
}
// NextTrailingByte returns the next trailing byte of the bitmap after the last word
// along with the number of valid bits in that byte. When validBits < 8, that
// is the last byte.
//
// If the bitmap ends on a byte alignment, then the last byte can also return 8 valid bits.
// Thus the TrailingBytes function should be used to know how many trailing bytes to read.
func (bm *BitmapWordReader) NextTrailingByte() (val byte, validBits int) {
debug.Assert(bm.trailingBits > 0, "next trailing byte called with no trailing bits")
if bm.trailingBits <= 8 {
// last byte
validBits = bm.trailingBits
bm.trailingBits = 0
rdr := NewBitmapReader(bm.bitmap, bm.offset, validBits)
for i := 0; i < validBits; i++ {
val >>= 1
if rdr.Set() {
val |= 0x80
}
rdr.Next()
}
val >>= (8 - validBits)
return
}
bm.bitmap = bm.bitmap[1:]
nextByte := bm.bitmap[0]
val = getLSB(bm.curword)
if bm.offset != 0 {
val >>= byte(bm.offset)
val |= nextByte << (8 - bm.offset)
}
setLSB(&bm.curword, nextByte)
bm.trailingBits -= 8
bm.trailingBytes--
validBits = 8
return
}
func (bm *BitmapWordReader) Words() int { return bm.nwords }
func (bm *BitmapWordReader) TrailingBytes() int { return bm.trailingBytes }
// BitmapWordWriter is a bitmap writer for writing a full word at a time (a word being
// a uint64). After the last full word is written, PutNextTrailingByte can be used to
// write the remaining trailing bytes.
type BitmapWordWriter struct {
bitmap []byte
offset int
len int
bitMask uint64
currentWord uint64
}
// NewBitmapWordWriter initializes a new bitmap word writer which will start writing
// into the byte slice at bit offset start, expecting to write len bits.
func NewBitmapWordWriter(bitmap []byte, start, len int) *BitmapWordWriter {
ret := &BitmapWordWriter{
bitmap: bitmap[start/8:],
len: len,
offset: start % 8,
bitMask: (uint64(1) << uint64(start%8)) - 1,
}
if ret.offset != 0 {
if ret.len >= int(unsafe.Sizeof(uint64(0))*8) {
ret.currentWord = toFromLEFunc(endian.Native.Uint64(ret.bitmap))
} else if ret.len > 0 {
setLSB(&ret.currentWord, ret.bitmap[0])
}
}
return ret
}
// PutNextWord writes the given word to the bitmap, potentially splitting across
// two adjacent words.
func (bm *BitmapWordWriter) PutNextWord(word uint64) {
sz := int(unsafe.Sizeof(word))
if bm.offset != 0 {
// split one word into two adjacent words, don't touch unused bits
// |<------ word ----->|
// +-----+-------------+
// | A | B |
// +-----+-------------+
// | |
// v v offset
// +-------------+-----+-------------+-----+
// | --- | A | B | --- |
// +-------------+-----+-------------+-----+
// |<------ next ----->|<---- current ---->|
word = (word << uint64(bm.offset)) | (word >> (int64(sz*8) - int64(bm.offset)))
next := toFromLEFunc(endian.Native.Uint64(bm.bitmap[sz:]))
bm.currentWord = (bm.currentWord & bm.bitMask) | (word &^ bm.bitMask)
next = (next &^ bm.bitMask) | (word & bm.bitMask)
endian.Native.PutUint64(bm.bitmap, toFromLEFunc(bm.currentWord))
endian.Native.PutUint64(bm.bitmap[sz:], toFromLEFunc(next))
bm.currentWord = next
} else {
endian.Native.PutUint64(bm.bitmap, toFromLEFunc(word))
}
bm.bitmap = bm.bitmap[sz:]
}
// PutNextTrailingByte writes the number of bits indicated by validBits from b to
// the bitmap.
func (bm *BitmapWordWriter) PutNextTrailingByte(b byte, validBits int) {
curbyte := getLSB(bm.currentWord)
if validBits == 8 {
if bm.offset != 0 {
b = (b << bm.offset) | (b >> (8 - bm.offset))
next := bm.bitmap[1]
curbyte = (curbyte & byte(bm.bitMask)) | (b &^ byte(bm.bitMask))
next = (next &^ byte(bm.bitMask)) | (b & byte(bm.bitMask))
bm.bitmap[0] = curbyte
bm.bitmap[1] = next
bm.currentWord = uint64(next)
} else {
bm.bitmap[0] = b
}
bm.bitmap = bm.bitmap[1:]
} else {
debug.Assert(validBits > 0 && validBits < 8, "invalid valid bits in bitmap word writer")
debug.Assert(BytesForBits(int64(bm.offset+validBits)) <= int64(len(bm.bitmap)), "writing trailiing byte outside of bounds of bitmap")
wr := NewBitmapWriter(bm.bitmap, int(bm.offset), validBits)
for i := 0; i < validBits; i++ {
if b&0x01 != 0 {
wr.Set()
} else {
wr.Clear()
}
wr.Next()
b >>= 1
}
wr.Finish()
}
}
// CopyBitmap copies the bitmap indicated by src, starting at bit offset srcOffset,
// and copying length bits into dst, starting at bit offset dstOffset.
func CopyBitmap(src []byte, srcOffset, length int, dst []byte, dstOffset int) {
if length == 0 {
// if there's nothing to write, end early.
return
}
bitOffset := srcOffset % 8
destBitOffset := dstOffset % 8
// slow path, one of the bitmaps are not byte aligned.
if bitOffset != 0 || destBitOffset != 0 {
rdr := NewBitmapWordReader(src, srcOffset, length)
wr := NewBitmapWordWriter(dst, dstOffset, length)
nwords := rdr.Words()
for nwords > 0 {
nwords--
wr.PutNextWord(rdr.NextWord())
}
nbytes := rdr.TrailingBytes()
for nbytes > 0 {
nbytes--
bt, validBits := rdr.NextTrailingByte()
wr.PutNextTrailingByte(bt, validBits)
}
return
}
// fast path, both are starting with byte-aligned bitmaps
nbytes := int(BytesForBits(int64(length)))
// shift by its byte offset
src = src[srcOffset/8:]
dst = dst[dstOffset/8:]
// Take care of the trailing bits in the last byte
// E.g., if trailing_bits = 5, last byte should be
// - low 3 bits: new bits from last byte of data buffer
// - high 5 bits: old bits from last byte of dest buffer
trailingBits := nbytes*8 - length
trailMask := byte(uint(1)<<(8-trailingBits)) - 1
copy(dst, src[:nbytes-1])
lastData := src[nbytes-1]
dst[nbytes-1] &= ^trailMask
dst[nbytes-1] |= lastData & trailMask
}