blob: 3a934653ec711311e0c4fe24646af34518b84ebb [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package encoding
import (
"github.com/apache/arrow/go/v6/arrow/bitutil"
"github.com/apache/arrow/go/v6/parquet"
"github.com/apache/arrow/go/v6/parquet/internal/utils"
"golang.org/x/xerrors"
)
// PlainBooleanDecoder is for the Plain Encoding type, there is no
// dictionary decoding for bools.
type PlainBooleanDecoder struct {
decoder
bitOffset int
}
// Type for the PlainBooleanDecoder is parquet.Types.Boolean
func (PlainBooleanDecoder) Type() parquet.Type {
return parquet.Types.Boolean
}
// Decode fills out with bools decoded from the data at the current point
// or until we reach the end of the data.
//
// Returns the number of values decoded
func (dec *PlainBooleanDecoder) Decode(out []bool) (int, error) {
max := utils.MinInt(len(out), dec.nvals)
unalignedExtract := func(start, end, curBitOffset int) int {
i := start
for ; curBitOffset < end; i, curBitOffset = i+1, curBitOffset+1 {
out[i] = (dec.data[0] & byte(1<<curBitOffset)) != 0
}
return i // return the number of bits we extracted
}
// if we aren't at a byte boundary, then get bools until we hit
// a byte boundary with the bit offset.
i := 0
if dec.bitOffset != 0 {
i = unalignedExtract(0, 8, dec.bitOffset)
dec.bitOffset = 0
}
// determine the number of full bytes worth of bits we can decode
// given the number of values we want to decode.
bitsRemain := max - i
batch := bitsRemain / 8 * 8
if batch > 0 { // only go in here if there's at least one full byte to decode
if i > 0 { // skip our data forward if we decoded anything above
dec.data = dec.data[1:]
out = out[i:]
}
// determine the number of aligned bytes we can grab using SIMD optimized
// functions to improve performance.
alignedBytes := bitutil.BytesForBits(int64(batch))
utils.BytesToBools(dec.data[:alignedBytes], out)
dec.data = dec.data[alignedBytes:]
out = out[alignedBytes*8:]
}
// grab any trailing bits now that we've got our aligned bytes.
dec.bitOffset += unalignedExtract(dec.bitOffset, bitsRemain-batch, dec.bitOffset)
dec.nvals -= max
return max, nil
}
// DecodeSpaced is like Decode except it expands the values to leave spaces for null
// as determined by the validBits bitmap.
func (dec *PlainBooleanDecoder) DecodeSpaced(out []bool, nullCount int, validBits []byte, validBitsOffset int64) (int, error) {
if nullCount > 0 {
toRead := len(out) - nullCount
valuesRead, err := dec.Decode(out[:toRead])
if err != nil {
return 0, err
}
if valuesRead != toRead {
return valuesRead, xerrors.New("parquet: boolean decoder: number of values / definition levels read did not match")
}
return spacedExpand(out, nullCount, validBits, validBitsOffset), nil
}
return dec.Decode(out)
}