| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| * |
| */ |
| |
| package org.apache.hadoop.io.compress.bzip2; |
| |
| import java.io.Closeable; |
| import java.io.IOException; |
| import java.io.OutputStream; |
| import java.nio.charset.StandardCharsets; |
| |
| import org.apache.hadoop.conf.Configuration; |
| import org.apache.hadoop.fs.Path; |
| import org.apache.hadoop.io.compress.BZip2Codec; |
| |
| import static org.apache.hadoop.io.compress.bzip2.CBZip2OutputStream.MIN_BLOCKSIZE; |
| import static org.apache.hadoop.util.Preconditions.checkArgument; |
| |
| /** |
| * A writer that simplifies creating BZip2 compressed text data for testing |
| * purposes. |
| */ |
| public final class BZip2TextFileWriter implements Closeable { |
| |
| // Use minimum block size to reduce amount of data to require to be written |
| // to CBZip2OutputStream before a new block is created. |
| private static final int BLOCK_SIZE_100K = MIN_BLOCKSIZE; |
| |
| /** |
| * The amount of bytes of run-length encoded data that needs to be written |
| * to this writer in order for the next byte written starts a new BZip2 block. |
| */ |
| public static final int BLOCK_SIZE = |
| // The + 1 is needed because of how CBZip2OutputStream checks whether the |
| // last offset written is less than allowable block size. Because the last |
| // offset is one less of the amount of bytes written to the block, we need |
| // to write an extra byte to trigger writing a new block. |
| CBZip2OutputStream.getAllowableBlockSize(BLOCK_SIZE_100K) + 1; |
| |
| private final CBZip2OutputStream out; |
| |
| public BZip2TextFileWriter(Path path, Configuration conf) throws IOException { |
| this(path.getFileSystem(conf).create(path)); |
| } |
| |
| public BZip2TextFileWriter(OutputStream rawOut) throws IOException { |
| try { |
| BZip2Codec.writeHeader(rawOut); |
| out = new CBZip2OutputStream(rawOut, BLOCK_SIZE_100K); |
| } catch (Throwable e) { |
| rawOut.close(); |
| throw e; |
| } |
| } |
| |
| public void writeManyRecords(int totalSize, int numRecords, byte[] delimiter) |
| throws IOException { |
| checkArgument(numRecords > 0); |
| checkArgument(delimiter.length > 0); |
| |
| int minRecordSize = totalSize / numRecords; |
| checkArgument(minRecordSize >= delimiter.length); |
| |
| int lastRecordExtraSize = totalSize % numRecords; |
| |
| for (int i = 0; i < numRecords - 1; i++) { |
| writeRecord(minRecordSize, delimiter); |
| } |
| writeRecord(minRecordSize + lastRecordExtraSize, delimiter); |
| } |
| |
| public void writeRecord(int totalSize, byte[] delimiter) throws IOException { |
| checkArgument(delimiter.length > 0); |
| checkArgument(totalSize >= delimiter.length); |
| |
| int contentSize = totalSize - delimiter.length; |
| for (int i = 0; i < contentSize; i++) { |
| // Alternate between characters so that internals of CBZip2OutputStream |
| // cannot condensed the written bytes using run-length encoding. This |
| // allows the caller to use #BLOCK_SIZE in order to know whether the next |
| // write will end just before the end of the current block, or exceed it, |
| // and by how much. |
| out.write(i % 2 == 0 ? 'a' : 'b'); |
| } |
| write(delimiter); |
| } |
| |
| public void write(String bytes) throws IOException { |
| write(bytes.getBytes(StandardCharsets.UTF_8)); |
| } |
| |
| public void write(byte[] bytes) throws IOException { |
| out.write(bytes); |
| } |
| |
| @Override |
| public void close() throws IOException { |
| out.close(); |
| } |
| } |