blob: e8041a64c099625d59b9aa616adcd30c3a9b53ec [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package compress_test
import (
"bytes"
"io"
"math/rand"
"testing"
"github.com/apache/arrow/go/v14/parquet/compress"
"github.com/stretchr/testify/assert"
)
const (
RandomDataSize = 3 * 1024 * 1024
CompressibleDataSize = 8 * 1024 * 1024
)
func makeRandomData(size int) []byte {
ret := make([]byte, size)
r := rand.New(rand.NewSource(1234))
r.Read(ret)
return ret
}
func makeCompressibleData(size int) []byte {
const base = "Apache Arrow is a cross-language development platform for in-memory data"
data := make([]byte, size)
n := copy(data, base)
for i := n; i < len(data); i *= 2 {
copy(data[i:], data[:i])
}
return data
}
func TestErrorForUnimplemented(t *testing.T) {
_, err := compress.GetCodec(compress.Codecs.Lzo)
assert.Error(t, err)
_, err = compress.GetCodec(compress.Codecs.Lz4)
assert.Error(t, err)
}
func TestCompressDataOneShot(t *testing.T) {
tests := []struct {
c compress.Compression
}{
{compress.Codecs.Uncompressed},
{compress.Codecs.Snappy},
{compress.Codecs.Gzip},
{compress.Codecs.Brotli},
{compress.Codecs.Zstd},
// {compress.Codecs.Lzo},
// {compress.Codecs.Lz4},
}
for _, tt := range tests {
t.Run(tt.c.String(), func(t *testing.T) {
codec, err := compress.GetCodec(tt.c)
assert.NoError(t, err)
data := makeCompressibleData(CompressibleDataSize)
buf := make([]byte, codec.CompressBound(int64(len(data))))
compressed := codec.Encode(buf, data)
assert.Same(t, &buf[0], &compressed[0])
out := make([]byte, len(data))
uncompressed := codec.Decode(out, compressed)
assert.Same(t, &out[0], &uncompressed[0])
assert.Exactly(t, data, uncompressed)
})
}
}
func TestCompressReaderWriter(t *testing.T) {
tests := []struct {
c compress.Compression
}{
{compress.Codecs.Uncompressed},
{compress.Codecs.Snappy},
{compress.Codecs.Gzip},
{compress.Codecs.Brotli},
{compress.Codecs.Zstd},
// {compress.Codecs.Lzo},
// {compress.Codecs.Lz4},
}
for _, tt := range tests {
t.Run(tt.c.String(), func(t *testing.T) {
var buf bytes.Buffer
codec, err := compress.GetCodec(tt.c)
assert.NoError(t, err)
data := makeRandomData(RandomDataSize)
wr := codec.NewWriter(&buf)
const chunkSize = 1111
input := data
for len(input) > 0 {
var (
n int
err error
)
if len(input) > chunkSize {
n, err = wr.Write(input[:chunkSize])
} else {
n, err = wr.Write(input)
}
assert.NoError(t, err)
input = input[n:]
}
wr.Close()
rdr := codec.NewReader(&buf)
out, err := io.ReadAll(rdr)
assert.NoError(t, err)
assert.Exactly(t, data, out)
})
}
}