blob: 4d247b2edb2dd2c3e1c19423f42ce7d1dbe9916a [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package utils_test
import (
"reflect"
"testing"
"github.com/apache/arrow/go/v6/arrow/bitutil"
"github.com/apache/arrow/go/v6/parquet/internal/utils"
"github.com/stretchr/testify/suite"
)
func reverseAny(s interface{}) {
n := reflect.ValueOf(s).Len()
swap := reflect.Swapper(s)
for i, j := 0, n-1; i < j; i, j = i+1, j-1 {
swap(i, j)
}
}
type linearBitRunReader struct {
reader *bitutil.BitmapReader
}
func (l linearBitRunReader) NextRun() utils.BitRun {
r := utils.BitRun{0, l.reader.Set()}
for l.reader.Pos() < l.reader.Len() && l.reader.Set() == r.Set {
r.Len++
l.reader.Next()
}
return r
}
func bitmapFromString(s string) []byte {
maxLen := bitutil.BytesForBits(int64(len(s)))
ret := make([]byte, maxLen)
i := 0
for _, c := range s {
switch c {
case '0':
bitutil.ClearBit(ret, i)
i++
case '1':
bitutil.SetBit(ret, i)
i++
case ' ', '\t', '\r', '\n':
default:
panic("unexpected character for bitmap string")
}
}
actualLen := bitutil.BytesForBits(int64(i))
return ret[:actualLen]
}
func referenceBitRuns(data []byte, offset, length int) (ret []utils.SetBitRun) {
ret = make([]utils.SetBitRun, 0)
reader := linearBitRunReader{bitutil.NewBitmapReader(data, offset, length)}
pos := 0
for pos < length {
br := reader.NextRun()
if br.Set {
ret = append(ret, utils.SetBitRun{int64(pos), br.Len})
}
pos += int(br.Len)
}
return
}
type BitSetRunReaderSuite struct {
suite.Suite
testOffsets []int64
}
func TestBitSetRunReader(t *testing.T) {
suite.Run(t, new(BitSetRunReaderSuite))
}
func (br *BitSetRunReaderSuite) SetupSuite() {
br.testOffsets = []int64{0, 1, 6, 7, 8, 33, 63, 64, 65, 71}
br.T().Parallel()
}
type Range struct {
Offset int64
Len int64
}
func (r Range) EndOffset() int64 { return r.Offset + r.Len }
func (br *BitSetRunReaderSuite) bufferTestRanges(buf []byte) []Range {
bufSize := int64(len(buf) * 8) // in bits
rg := make([]Range, 0)
for _, offset := range br.testOffsets {
for _, lenAdjust := range br.testOffsets {
length := utils.Min(bufSize-offset, lenAdjust)
br.GreaterOrEqual(length, int64(0))
rg = append(rg, Range{offset, length})
length = utils.Min(bufSize-offset, bufSize-lenAdjust)
br.GreaterOrEqual(length, int64(0))
rg = append(rg, Range{offset, length})
}
}
return rg
}
func (br *BitSetRunReaderSuite) assertBitRuns(buf []byte, start, length int64, expected []utils.SetBitRun) {
{
runs := make([]utils.SetBitRun, 0)
reader := utils.NewSetBitRunReader(buf, start, length)
for {
run := reader.NextRun()
if run.Length == 0 {
break
}
runs = append(runs, run)
}
br.Equal(expected, runs)
}
{
runs := make([]utils.SetBitRun, 0)
reader := utils.NewReverseSetBitRunReader(buf, start, length)
for {
run := reader.NextRun()
if run.Length == 0 {
break
}
runs = append(runs, run)
}
reverseAny(expected)
br.Equal(expected, runs)
}
}
func (br *BitSetRunReaderSuite) TestEmpty() {
for _, offset := range br.testOffsets {
br.assertBitRuns(nil, offset, 0, []utils.SetBitRun{})
}
}
func (br *BitSetRunReaderSuite) TestOneByte() {
buffer := bitmapFromString("01101101")
br.assertBitRuns(buffer, 0, 8, []utils.SetBitRun{
{1, 2}, {4, 2}, {7, 1},
})
for _, str := range []string{"01101101", "10110110", "00000000", "11111111"} {
buf := bitmapFromString(str)
for offset := 0; offset < 8; offset++ {
for length := 0; length <= 8-offset; length++ {
expected := referenceBitRuns(buf, offset, length)
br.assertBitRuns(buf, int64(offset), int64(length), expected)
}
}
}
}
func (br *BitSetRunReaderSuite) TestTiny() {
buf := bitmapFromString("11100011 10001110 00111000 11100011 10001110 00111000")
br.assertBitRuns(buf, 0, 48, []utils.SetBitRun{
{0, 3}, {6, 3}, {12, 3}, {18, 3}, {24, 3}, {30, 3}, {36, 3}, {42, 3},
})
br.assertBitRuns(buf, 0, 46, []utils.SetBitRun{
{0, 3}, {6, 3}, {12, 3}, {18, 3}, {24, 3}, {30, 3}, {36, 3}, {42, 3},
})
br.assertBitRuns(buf, 0, 45, []utils.SetBitRun{
{0, 3}, {6, 3}, {12, 3}, {18, 3}, {24, 3}, {30, 3}, {36, 3}, {42, 3},
})
br.assertBitRuns(buf, 0, 42, []utils.SetBitRun{
{0, 3}, {6, 3}, {12, 3}, {18, 3}, {24, 3}, {30, 3}, {36, 3},
})
br.assertBitRuns(buf, 3, 45, []utils.SetBitRun{
{3, 3}, {9, 3}, {15, 3}, {21, 3}, {27, 3}, {33, 3}, {39, 3},
})
br.assertBitRuns(buf, 3, 43, []utils.SetBitRun{
{3, 3}, {9, 3}, {15, 3}, {21, 3}, {27, 3}, {33, 3}, {39, 3},
})
br.assertBitRuns(buf, 3, 42, []utils.SetBitRun{
{3, 3}, {9, 3}, {15, 3}, {21, 3}, {27, 3}, {33, 3}, {39, 3},
})
br.assertBitRuns(buf, 3, 39, []utils.SetBitRun{
{3, 3}, {9, 3}, {15, 3}, {21, 3}, {27, 3}, {33, 3},
})
}
func (br *BitSetRunReaderSuite) TestAllZeros() {
const bufferSize = 256
buf := make([]byte, int(bitutil.BytesForBits(bufferSize)))
for _, rg := range br.bufferTestRanges(buf) {
br.assertBitRuns(buf, rg.Offset, rg.Len, []utils.SetBitRun{})
}
}
func (br *BitSetRunReaderSuite) TestAllOnes() {
const bufferSize = 256
buf := make([]byte, int(bitutil.BytesForBits(bufferSize)))
bitutil.SetBitsTo(buf, 0, bufferSize, true)
for _, rg := range br.bufferTestRanges(buf) {
if rg.Len > 0 {
br.assertBitRuns(buf, rg.Offset, rg.Len, []utils.SetBitRun{{0, rg.Len}})
} else {
br.assertBitRuns(buf, rg.Offset, rg.Len, []utils.SetBitRun{})
}
}
}
func (br *BitSetRunReaderSuite) TestSmall() {
// ones then zeros then ones
const (
bufferSize = 256
onesLen = 64
secondOnesStart = bufferSize - onesLen
)
buf := make([]byte, int(bitutil.BytesForBits(bufferSize)))
bitutil.SetBitsTo(buf, 0, bufferSize, false)
bitutil.SetBitsTo(buf, 0, onesLen, true)
bitutil.SetBitsTo(buf, secondOnesStart, onesLen, true)
for _, rg := range br.bufferTestRanges(buf) {
expected := []utils.SetBitRun{}
if rg.Offset < onesLen && rg.Len > 0 {
expected = append(expected, utils.SetBitRun{0, utils.Min(onesLen-rg.Offset, rg.Len)})
}
if rg.Offset+rg.Len > secondOnesStart {
expected = append(expected, utils.SetBitRun{secondOnesStart - rg.Offset, rg.Len + rg.Offset - secondOnesStart})
}
br.assertBitRuns(buf, rg.Offset, rg.Len, expected)
}
}
func (br *BitSetRunReaderSuite) TestSingleRun() {
// one single run of ones, at varying places in the buffer
const bufferSize = 512
buf := make([]byte, int(bitutil.BytesForBits(bufferSize)))
for _, onesRg := range br.bufferTestRanges(buf) {
bitutil.SetBitsTo(buf, 0, bufferSize, false)
bitutil.SetBitsTo(buf, onesRg.Offset, onesRg.Len, true)
for _, rg := range br.bufferTestRanges(buf) {
expect := []utils.SetBitRun{}
if rg.Len != 0 && onesRg.Len != 0 && rg.Offset < onesRg.EndOffset() && onesRg.Offset < rg.EndOffset() {
// the two ranges intersect
var (
intersectStart = utils.Max(rg.Offset, onesRg.Offset)
intersectStop = utils.Min(rg.EndOffset(), onesRg.EndOffset())
)
expect = append(expect, utils.SetBitRun{intersectStart - rg.Offset, intersectStop - intersectStart})
}
br.assertBitRuns(buf, rg.Offset, rg.Len, expect)
}
}
}