blob: eb4c67b0f01365f75395684185f9f8b1291322bc [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package encoding
import (
"github.com/apache/arrow/go/v6/arrow/bitutil"
"github.com/apache/arrow/go/v6/parquet"
"github.com/apache/arrow/go/v6/parquet/internal/utils"
)
const (
boolBufSize = 1024
boolsInBuf = boolBufSize * 8
)
// PlainBooleanEncoder encodes bools as a bitmap as per the Plain Encoding
type PlainBooleanEncoder struct {
encoder
bitsBuffer []byte
wr utils.BitmapWriter
}
// Type for the PlainBooleanEncoder is parquet.Types.Boolean
func (PlainBooleanEncoder) Type() parquet.Type {
return parquet.Types.Boolean
}
// Put encodes the contents of in into the underlying data buffer.
func (enc *PlainBooleanEncoder) Put(in []bool) {
if enc.bitsBuffer == nil {
enc.bitsBuffer = make([]byte, boolBufSize)
}
if enc.wr == nil {
enc.wr = utils.NewBitmapWriter(enc.bitsBuffer, 0, boolsInBuf)
}
n := enc.wr.AppendBools(in)
for n < len(in) {
enc.wr.Finish()
enc.append(enc.bitsBuffer)
enc.wr.Reset(0, boolsInBuf)
in = in[n:]
n = enc.wr.AppendBools(in)
}
}
// PutSpaced will use the validBits bitmap to determine which values are nulls
// and can be left out from the slice, and the encoded without those nulls.
func (enc *PlainBooleanEncoder) PutSpaced(in []bool, validBits []byte, validBitsOffset int64) {
bufferOut := make([]bool, len(in))
nvalid := spacedCompress(in, bufferOut, validBits, validBitsOffset)
enc.Put(bufferOut[:nvalid])
}
// EstimatedDataEncodedSize returns the current number of bytes that have
// been buffered so far
func (enc *PlainBooleanEncoder) EstimatedDataEncodedSize() int64 {
return int64(enc.sink.Len() + int(bitutil.BytesForBits(int64(enc.wr.Pos()))))
}
// FlushValues returns the buffered data, the responsibility is on the caller
// to release the buffer memory
func (enc *PlainBooleanEncoder) FlushValues() (Buffer, error) {
if enc.wr.Pos() > 0 {
toFlush := int(enc.wr.Pos())
enc.append(enc.bitsBuffer[:bitutil.BytesForBits(int64(toFlush))])
}
return enc.sink.Finish(), nil
}