blob: 915f064c5c40d9612b036072f1a39ab3cb250c68 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pig.impl.io;
import java.io.DataOutputStream;
import java.io.IOException;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.pig.data.InterSedes;
import org.apache.pig.data.InterSedesFactory;
import org.apache.pig.data.Tuple;
import org.apache.hadoop.io.file.tfile.TFile.Writer;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.conf.Configuration;
/**
* A record reader used to write data compatible with {@link InterRecordWriter}
* It uses the default InterSedes object for serialization.
*/
public class TFileRecordWriter extends
RecordWriter<org.apache.hadoop.io.WritableComparable, Tuple> {
final private BytesWritable KEY0 = new BytesWritable(new byte[0]);
private static InterSedes sedes = InterSedesFactory.getInterSedesInstance();
/**
* the outputstream to write out on
*/
Writer writer = null;
FSDataOutputStream fileOut = null;
/**
*
*/
public TFileRecordWriter(Path file, String codec, Configuration conf)
throws IOException {
FileSystem fs = file.getFileSystem(conf);
fileOut = fs.create(file, false);
writer = new Writer(fileOut, 1024 * 1024, codec, null, conf);
}
/* (non-Javadoc)
* @see org.apache.hadoop.mapreduce.RecordWriter#close(org.apache.hadoop.mapreduce.TaskAttemptContext)
*/
@Override
public void close(TaskAttemptContext arg0) throws IOException,
InterruptedException {
if (writer != null) writer.close();
if (fileOut != null) fileOut.close();
}
/* (non-Javadoc)
* @see org.apache.hadoop.mapreduce.RecordWriter#write(java.lang.Object, java.lang.Object)
*/
@Override
public void write(WritableComparable wc, Tuple t) throws IOException,
InterruptedException {
DataOutputStream outputKey = writer.prepareAppendKey(KEY0.getLength());
try {
outputKey.write(KEY0.getBytes(), 0, KEY0.getLength());
}
finally {
outputKey.close();
}
// we really only want to write the tuple (value) out here
DataOutputStream outputValue = writer.prepareAppendValue(-1);
try {
sedes.writeDatum(outputValue, t);
}
finally {
outputValue.close();
}
}
}