blob: a31d9e8e839f5efeae2f590378737bf0bbb768f2 [file] [log] [blame]
/**
* Copyright 2012 Twitter, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package parquet.hadoop;
import java.io.Closeable;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import parquet.hadoop.api.WriteSupport;
import parquet.hadoop.metadata.CompressionCodecName;
import parquet.schema.MessageType;
/**
* Write records to a Parquet file.
*/
public class ParquetWriter<T> implements Closeable {
public static final int DEFAULT_BLOCK_SIZE = 128 * 1024 * 1024;
public static final int DEFAULT_PAGE_SIZE = 1 * 1024 * 1024;
private final InternalParquetRecordWriter<T> writer;
/**
* Create a new ParquetWriter.
* (with dictionary encoding disabled and validation off)
* @see ParquetWriter#ParquetWriter(Path, WriteSupport, CompressionCodecName, int, int, boolean)
*
* @param file the file to create
* @param writeSupport the implementation to write a record to a RecordConsumer
* @param compressionCodecName the compression codec to use
* @param blockSize the block size threshold
* @param pageSize the page size threshold
* @throws IOException
*/
public ParquetWriter(Path file, WriteSupport<T> writeSupport, CompressionCodecName compressionCodecName, int blockSize, int pageSize) throws IOException {
this(file, writeSupport, compressionCodecName, blockSize, pageSize, false, false);
}
/**
* Create a new ParquetWriter.
*
* @param file the file to create
* @param writeSupport the implementation to write a record to a RecordConsumer
* @param compressionCodecName the compression codec to use
* @param blockSize the block size threshold
* @param pageSize the page size threshold
* @param enableDictionary to turn dictionary encoding on
* @param validating to turn on validation using the schema
* @throws IOException
*/
public ParquetWriter(
Path file,
WriteSupport<T> writeSupport,
CompressionCodecName compressionCodecName,
int blockSize,
int pageSize,
boolean enableDictionary,
boolean validating) throws IOException {
Configuration conf = new Configuration();
WriteSupport.WriteContext writeContext = writeSupport.init(conf);
MessageType schema = writeContext.getSchema();
ParquetFileWriter fileWriter = new ParquetFileWriter(conf, schema, file);
fileWriter.start();
CodecFactory codecFactory = new CodecFactory(conf);
CodecFactory.BytesCompressor compressor = codecFactory.getCompressor(compressionCodecName, 0);
this.writer = new InternalParquetRecordWriter<T>(fileWriter, writeSupport, schema, writeContext.getExtraMetaData(), blockSize, pageSize, compressor, enableDictionary, validating);
}
/**
* Create a new ParquetWriter. The default block size is 50 MB.The default
* page size is 1 MB. Default compression is no compression. Dictionary encoding is disabled.
*
* @param file the file to create
* @param writeSupport the implementation to write a record to a RecordConsumer
* @throws IOException
*/
public ParquetWriter(Path file, WriteSupport<T> writeSupport) throws IOException {
this(file, writeSupport, CompressionCodecName.UNCOMPRESSED, DEFAULT_BLOCK_SIZE, DEFAULT_PAGE_SIZE);
}
public void write(T object) throws IOException {
try {
writer.write(object);
} catch (InterruptedException e) {
throw new IOException(e);
}
}
@Override
public void close() throws IOException {
try {
writer.close();
} catch (InterruptedException e) {
throw new IOException(e);
}
}
}