| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| package utils_test |
| |
| import ( |
| "fmt" |
| "reflect" |
| "strings" |
| "testing" |
| |
| "github.com/apache/arrow/go/v6/arrow/bitutil" |
| "github.com/apache/arrow/go/v6/parquet/internal/utils" |
| "github.com/stretchr/testify/suite" |
| ) |
| |
| func writeSliceToWriter(wr utils.BitmapWriter, values []int) { |
| for _, v := range values { |
| if v != 0 { |
| wr.Set() |
| } else { |
| wr.Clear() |
| } |
| wr.Next() |
| } |
| wr.Finish() |
| } |
| |
| type FirstTimeBitmapWriterSuite struct { |
| suite.Suite |
| } |
| |
| func (f *FirstTimeBitmapWriterSuite) TestNormalOperation() { |
| for _, fb := range []byte{0x00, 0xFF} { |
| { |
| bitmap := []byte{fb, fb, fb, fb} |
| wr := utils.NewFirstTimeBitmapWriter(bitmap, 0, 12) |
| writeSliceToWriter(wr, []int{0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1}) |
| // {0b00110110, 0b1010, 0, 0} |
| f.Equal([]byte{0x36, 0x0a}, bitmap[:2]) |
| } |
| { |
| bitmap := []byte{fb, fb, fb, fb} |
| wr := utils.NewFirstTimeBitmapWriter(bitmap, 4, 12) |
| writeSliceToWriter(wr, []int{0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1}) |
| // {0b00110110, 0b1010, 0, 0} |
| f.Equal([]byte{0x60 | (fb & 0x0f), 0xa3}, bitmap[:2]) |
| } |
| // Consecutive write chunks |
| { |
| bitmap := []byte{fb, fb, fb, fb} |
| { |
| wr := utils.NewFirstTimeBitmapWriter(bitmap, 0, 6) |
| writeSliceToWriter(wr, []int{0, 1, 1, 0, 1, 1}) |
| } |
| { |
| wr := utils.NewFirstTimeBitmapWriter(bitmap, 6, 3) |
| writeSliceToWriter(wr, []int{0, 0, 0}) |
| } |
| { |
| wr := utils.NewFirstTimeBitmapWriter(bitmap, 9, 3) |
| writeSliceToWriter(wr, []int{1, 0, 1}) |
| } |
| f.Equal([]byte{0x36, 0x0a}, bitmap[:2]) |
| } |
| { |
| bitmap := []byte{fb, fb, fb, fb} |
| { |
| wr := utils.NewFirstTimeBitmapWriter(bitmap, 4, 0) |
| writeSliceToWriter(wr, []int{}) |
| } |
| { |
| wr := utils.NewFirstTimeBitmapWriter(bitmap, 4, 6) |
| writeSliceToWriter(wr, []int{0, 1, 1, 0, 1, 1}) |
| } |
| { |
| wr := utils.NewFirstTimeBitmapWriter(bitmap, 10, 3) |
| writeSliceToWriter(wr, []int{0, 0, 0}) |
| } |
| { |
| wr := utils.NewFirstTimeBitmapWriter(bitmap, 13, 0) |
| writeSliceToWriter(wr, []int{}) |
| } |
| { |
| wr := utils.NewFirstTimeBitmapWriter(bitmap, 13, 3) |
| writeSliceToWriter(wr, []int{1, 0, 1}) |
| } |
| f.Equal([]byte{0x60 | (fb & 0x0f), 0xa3}, bitmap[:2]) |
| } |
| } |
| } |
| |
| func bitmapToString(bitmap []byte, bitCount int64) string { |
| var bld strings.Builder |
| bld.Grow(int(bitCount)) |
| for i := 0; i < int(bitCount); i++ { |
| if bitutil.BitIsSet(bitmap, i) { |
| bld.WriteByte('1') |
| } else { |
| bld.WriteByte('0') |
| } |
| } |
| return bld.String() |
| } |
| |
| func (f *FirstTimeBitmapWriterSuite) TestAppendWordOffsetOverwritesCorrectBits() { |
| check := func(start byte, expectedBits string, offset int64) { |
| validBits := []byte{start} |
| const bitsAfterAppend = 8 |
| wr := utils.NewFirstTimeBitmapWriter(validBits, offset, int64(8*len(validBits))-offset) |
| wr.AppendWord(0xFF, bitsAfterAppend-offset) |
| wr.Finish() |
| f.Equal(expectedBits, bitmapToString(validBits, bitsAfterAppend)) |
| } |
| |
| f.Run("CheckAppend", func() { |
| tests := []struct { |
| expectedBits string |
| offset int64 |
| }{ |
| {"11111111", 0}, |
| {"01111111", 1}, |
| {"00111111", 2}, |
| {"00011111", 3}, |
| {"00001111", 4}, |
| {"00000111", 5}, |
| {"00000011", 6}, |
| {"00000001", 7}, |
| } |
| for _, tt := range tests { |
| f.Run(tt.expectedBits, func() { check(0x00, tt.expectedBits, tt.offset) }) |
| } |
| }) |
| |
| f.Run("CheckWithSet", func() { |
| tests := []struct { |
| expectedBits string |
| offset int64 |
| }{ |
| {"11111111", 1}, |
| {"10111111", 2}, |
| {"10011111", 3}, |
| {"10001111", 4}, |
| {"10000111", 5}, |
| {"10000011", 6}, |
| {"10000001", 7}, |
| } |
| for _, tt := range tests { |
| f.Run(tt.expectedBits, func() { check(0x1, tt.expectedBits, tt.offset) }) |
| } |
| }) |
| |
| f.Run("CheckWithPreceding", func() { |
| tests := []struct { |
| expectedBits string |
| offset int64 |
| }{ |
| {"11111111", 0}, |
| {"11111111", 1}, |
| {"11111111", 2}, |
| {"11111111", 3}, |
| {"11111111", 4}, |
| {"11111111", 5}, |
| {"11111111", 6}, |
| {"11111111", 7}, |
| } |
| for _, tt := range tests { |
| f.Run(fmt.Sprintf("%d", tt.offset), func() { check(0xFF, tt.expectedBits, tt.offset) }) |
| } |
| }) |
| } |
| |
| func (f *FirstTimeBitmapWriterSuite) TestAppendZeroBitsNoImpact() { |
| validBits := []byte{0x00} |
| wr := utils.NewFirstTimeBitmapWriter(validBits, 1, int64(len(validBits)*8)) |
| wr.AppendWord(0xFF, 0) |
| wr.AppendWord(0xFF, 0) |
| wr.AppendWord(0x01, 1) |
| wr.Finish() |
| f.Equal(uint8(0x2), validBits[0]) |
| } |
| |
| func (f *FirstTimeBitmapWriterSuite) TestAppendLessThanByte() { |
| { |
| validBits := make([]byte, 8) |
| wr := utils.NewFirstTimeBitmapWriter(validBits, 1, 8) |
| wr.AppendWord(0xB, 4) |
| wr.Finish() |
| f.Equal("01101000", bitmapToString(validBits, 8)) |
| } |
| { |
| // test with all bits initially set |
| validBits := []byte{0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF} |
| wr := utils.NewFirstTimeBitmapWriter(validBits, 1, 8) |
| wr.AppendWord(0xB, 4) |
| wr.Finish() |
| f.Equal("11101000", bitmapToString(validBits, 8)) |
| } |
| } |
| |
| func (f *FirstTimeBitmapWriterSuite) TestAppendByteThenMore() { |
| { |
| validBits := make([]byte, 8) |
| wr := utils.NewFirstTimeBitmapWriter(validBits, 0, 9) |
| wr.AppendWord(0xC3, 8) |
| wr.AppendWord(0x01, 1) |
| wr.Finish() |
| f.Equal("110000111", bitmapToString(validBits, 9)) |
| } |
| { |
| // test with all bits initially set |
| validBits := []byte{0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF} |
| wr := utils.NewFirstTimeBitmapWriter(validBits, 0, 9) |
| wr.AppendWord(0xC3, 8) |
| wr.AppendWord(0x01, 1) |
| wr.Finish() |
| f.Equal("110000111", bitmapToString(validBits, 9)) |
| } |
| } |
| |
| func (f *FirstTimeBitmapWriterSuite) TestAppendWordShiftBitsCorrectly() { |
| const pattern = 0x9A9A9A9A9A9A9A9A |
| |
| tests := []struct { |
| leadingBits string |
| middleBits string |
| trailingBits string |
| offset int64 |
| presetBufferBits bool |
| }{ |
| {"01011001", "01011001", "00000000", 8, false}, |
| {"00101100", "10101100", "10000000", 9, false}, |
| {"00010110", "01010110", "01000000", 10, false}, |
| {"00001011", "00101011", "00100000", 11, false}, |
| {"00000101", "10010101", "10010000", 12, false}, |
| {"00000010", "11001010", "11001000", 13, false}, |
| {"00000001", "01100101", "01100100", 14, false}, |
| {"00000000", "10110010", "10110010", 15, false}, |
| {"01011001", "01011001", "11111111", 8, true}, |
| {"10101100", "10101100", "10000000", 9, true}, |
| {"11010110", "01010110", "01000000", 10, true}, |
| {"11101011", "00101011", "00100000", 11, true}, |
| {"11110101", "10010101", "10010000", 12, true}, |
| {"11111010", "11001010", "11001000", 13, true}, |
| {"11111101", "01100101", "01100100", 14, true}, |
| {"11111110", "10110010", "10110010", 15, true}, |
| } |
| for _, tt := range tests { |
| f.Run(tt.leadingBits, func() { |
| f.Require().GreaterOrEqual(tt.offset, int64(8)) |
| validBits := make([]byte, 10) |
| if tt.presetBufferBits { |
| for idx := range validBits { |
| validBits[idx] = 0xFF |
| } |
| } |
| |
| validBits[0] = 0x99 |
| wr := utils.NewFirstTimeBitmapWriter(validBits, tt.offset, (9*int64(reflect.TypeOf(uint64(0)).Size()))-tt.offset) |
| wr.AppendWord(pattern, 64) |
| wr.Finish() |
| f.Equal(uint8(0x99), validBits[0]) |
| f.Equal(tt.leadingBits, bitmapToString(validBits[1:], 8)) |
| for x := 2; x < 9; x++ { |
| f.Equal(tt.middleBits, bitmapToString(validBits[x:], 8)) |
| } |
| f.Equal(tt.trailingBits, bitmapToString(validBits[9:], 8)) |
| }) |
| } |
| } |
| |
| func (f *FirstTimeBitmapWriterSuite) TestAppendWordOnlyAppropriateBytesWritten() { |
| validBits := []byte{0x00, 0x00} |
| bitmap := uint64(0x1FF) |
| { |
| wr := utils.NewFirstTimeBitmapWriter(validBits, 1, int64(8*len(validBits))-1) |
| wr.AppendWord(bitmap, 7) |
| wr.Finish() |
| f.Equal([]byte{0xFE, 0x00}, validBits) |
| } |
| { |
| wr := utils.NewFirstTimeBitmapWriter(validBits, 1, int64(8*len(validBits)-1)) |
| wr.AppendWord(bitmap, 8) |
| wr.Finish() |
| f.Equal([]byte{0xFE, 0x03}, validBits) |
| } |
| } |
| |
| func TestFirstTimeBitmapWriter(t *testing.T) { |
| suite.Run(t, new(FirstTimeBitmapWriterSuite)) |
| } |