blob: 7efc62edb15499f7ee6e341874d72b465f674f27 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package utils_test
import (
"testing"
"github.com/apache/arrow/go/v6/arrow/bitutil"
"github.com/apache/arrow/go/v6/arrow/memory"
"github.com/apache/arrow/go/v6/parquet/internal/utils"
"github.com/stretchr/testify/assert"
"golang.org/x/exp/rand"
)
const kWordSize = 64
func create(nbytes, offset, length int64) (*memory.Buffer, *utils.BitBlockCounter) {
buf := memory.NewResizableBuffer(memory.DefaultAllocator)
buf.Resize(int(nbytes))
return buf, utils.NewBitBlockCounter(buf.Bytes(), offset, length)
}
func TestOneWordBasics(t *testing.T) {
const nbytes = 1024
buf, counter := create(nbytes, 0, nbytes*8)
defer buf.Release()
var bitsScanned int64
for i := 0; i < nbytes/8; i++ {
block := counter.NextWord()
assert.EqualValues(t, kWordSize, block.Len)
assert.EqualValues(t, 0, block.Popcnt)
bitsScanned += int64(block.Len)
}
assert.EqualValues(t, 1024*8, bitsScanned)
block := counter.NextWord()
assert.Zero(t, block.Len)
assert.Zero(t, block.Popcnt)
assert.True(t, block.NoneSet())
}
func TestFourWordsBasics(t *testing.T) {
const nbytes = 1024
buf, counter := create(nbytes, 0, nbytes*8)
defer buf.Release()
var bitsScanned int64
for i := 0; i < nbytes/32; i++ {
block := counter.NextFourWords()
assert.EqualValues(t, 4*kWordSize, block.Len)
assert.EqualValues(t, 0, block.Popcnt)
bitsScanned += int64(block.Len)
}
assert.EqualValues(t, 1024*8, bitsScanned)
block := counter.NextFourWords()
assert.Zero(t, block.Len)
assert.Zero(t, block.Popcnt)
}
func TestOneWordWithOffsets(t *testing.T) {
checkWithOffset := func(offset int64) {
const (
nwords int64 = 4
totalBytes = nwords*8 + 1
)
// Trim a bit from the end of the bitmap so we can check
// the remainder bits behavior
buf, counter := create(totalBytes, offset, nwords*kWordSize-offset-1)
defer buf.Release()
memory.Set(buf.Bytes(), byte(0xFF))
block := counter.NextWord()
assert.EqualValues(t, kWordSize, block.Len)
assert.EqualValues(t, 64, block.Popcnt)
// add a false value to the next word
bitutil.SetBitTo(buf.Bytes(), kWordSize+int(offset), false)
block = counter.NextWord()
assert.EqualValues(t, 64, block.Len)
assert.EqualValues(t, 63, block.Popcnt)
// Set the next word to all false
bitutil.SetBitsTo(buf.Bytes(), 2*kWordSize+offset, kWordSize, false)
block = counter.NextWord()
assert.EqualValues(t, 64, block.Len)
assert.Zero(t, block.Popcnt)
block = counter.NextWord()
assert.EqualValues(t, kWordSize-offset-1, block.Len)
assert.EqualValues(t, block.Len, block.Popcnt)
assert.True(t, block.AllSet())
// we can keep calling nextword safely
block = counter.NextWord()
assert.Zero(t, block.Len)
assert.Zero(t, block.Popcnt)
}
for offsetI := int64(0); offsetI < 8; offsetI++ {
checkWithOffset(offsetI)
}
}
func TestFourWordsWithOffsets(t *testing.T) {
checkWithOffset := func(offset int64) {
const (
nwords = 17
totalBytes = nwords*8 + 1
)
// trim a bit from the end of the bitmap so we can check the remainder
// bits behavior
buf, counter := create(totalBytes, offset, nwords*kWordSize-offset-1)
// start with all set
memory.Set(buf.Bytes(), 0xFF)
block := counter.NextFourWords()
assert.EqualValues(t, 4*kWordSize, block.Len)
assert.EqualValues(t, block.Len, block.Popcnt)
// add some false values to the next 3 shifted words
bitutil.ClearBit(buf.Bytes(), int(4*kWordSize+offset))
bitutil.ClearBit(buf.Bytes(), int(5*kWordSize+offset))
bitutil.ClearBit(buf.Bytes(), int(6*kWordSize+offset))
block = counter.NextFourWords()
assert.EqualValues(t, 4*kWordSize, block.Len)
assert.EqualValues(t, 253, block.Popcnt)
// set the next two words to all false
bitutil.SetBitsTo(buf.Bytes(), 8*kWordSize+offset, 2*kWordSize, false)
// block is half set
block = counter.NextFourWords()
assert.EqualValues(t, 4*kWordSize, block.Len)
assert.EqualValues(t, 128, block.Popcnt)
// last full block whether offset or no
block = counter.NextFourWords()
assert.EqualValues(t, 4*kWordSize, block.Len)
assert.EqualValues(t, block.Len, block.Popcnt)
// partial block
block = counter.NextFourWords()
assert.EqualValues(t, kWordSize-offset-1, block.Len)
assert.EqualValues(t, block.Len, block.Popcnt)
// we can keep calling NextFourWords safely
block = counter.NextFourWords()
assert.Zero(t, block.Len)
assert.Zero(t, block.Popcnt)
}
for offsetI := int64(0); offsetI < 8; offsetI++ {
checkWithOffset(offsetI)
}
}
func TestFourWordsRandomData(t *testing.T) {
const (
nbytes = 1024
)
buf := make([]byte, nbytes)
r := rand.New(rand.NewSource(0))
r.Read(buf)
checkWithOffset := func(offset int64) {
counter := utils.NewBitBlockCounter(buf, offset, nbytes*8-offset)
for i := 0; i < nbytes/32; i++ {
block := counter.NextFourWords()
assert.EqualValues(t, bitutil.CountSetBits(buf, i*256+int(offset), int(block.Len)), block.Popcnt)
}
}
for offsetI := int64(0); offsetI < 8; offsetI++ {
checkWithOffset(offsetI)
}
}