blob: 3f303cb1e2f17d8608745ccad2b5cdb480322291 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pig.builtin;
import java.io.IOException;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.util.Map;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.pig.LoadCaster;
import org.apache.pig.LoadFunc;
import org.apache.pig.PigConfiguration;
import org.apache.pig.PigException;
import org.apache.pig.ResourceSchema.ResourceFieldSchema;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigSplit;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigTextInputFormat;
import org.apache.pig.bzip2r.Bzip2TextInputFormat;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
import org.joda.time.DateTime;
/**
* This load function simply creates a tuple for each line of text that has a
* single chararray field that
* contains the line of text.
*/
public class TextLoader extends LoadFunc implements LoadCaster {
protected RecordReader in = null;
private TupleFactory mTupleFactory = TupleFactory.getInstance();
private String loadLocation;
protected final Log mLog = LogFactory.getLog(getClass());
// it determines whether to depend on pig's own Bzip2TextInputFormat or
// to simply depend on hadoop for handling bzip2 inputs
private boolean bzipinput_usehadoops ;
@Override
public Tuple getNext() throws IOException {
try {
boolean notDone = in.nextKeyValue();
if (!notDone) {
return null;
}
Text value = (Text) in.getCurrentValue();
byte[] ba = value.getBytes();
// make a copy of the bytes representing the input since
// TextInputFormat will reuse the byte array
return mTupleFactory.newTuple(new DataByteArray(ba, 0, value.getLength()));
} catch (InterruptedException e) {
throw new IOException("Error getting input");
}
}
/**
* TextLoader does not support conversion to Boolean
* @throws IOException if the value cannot be cast.
*/
@Override
public Boolean bytesToBoolean(byte[] b) throws IOException {
int errCode = 2109;
String msg = "TextLoader does not support conversion to Boolean.";
throw new ExecException(msg, errCode, PigException.BUG);
}
/**
* TextLoader does not support conversion to Integer
* @throws IOException if the value cannot be cast.
*/
@Override
public Integer bytesToInteger(byte[] b) throws IOException {
int errCode = 2109;
String msg = "TextLoader does not support conversion to Integer.";
throw new ExecException(msg, errCode, PigException.BUG);
}
/**
* TextLoader does not support conversion to Long
* @throws IOException if the value cannot be cast.
*/
@Override
public Long bytesToLong(byte[] b) throws IOException {
int errCode = 2109;
String msg = "TextLoader does not support conversion to Long.";
throw new ExecException(msg, errCode, PigException.BUG);
}
/**
* TextLoader does not support conversion to Float
* @throws IOException if the value cannot be cast.
*/
@Override
public Float bytesToFloat(byte[] b) throws IOException {
int errCode = 2109;
String msg = "TextLoader does not support conversion to Float.";
throw new ExecException(msg, errCode, PigException.BUG);
}
/**
* TextLoader does not support conversion to Double
* @throws IOException if the value cannot be cast.
*/
@Override
public Double bytesToDouble(byte[] b) throws IOException {
int errCode = 2109;
String msg = "TextLoader does not support conversion to Double.";
throw new ExecException(msg, errCode, PigException.BUG);
}
/**
* TextLoader does not support conversion to DateTime
* @throws IOException if the value cannot be cast.
*/
@Override
public DateTime bytesToDateTime(byte[] b) throws IOException {
int errCode = 2109;
String msg = "TextLoader does not support conversion to DateTime.";
throw new ExecException(msg, errCode, PigException.BUG);
}
/**
* Cast data from bytes to chararray value.
* @param b byte array to be cast.
* @return String value.
* @throws IOException if the value cannot be cast.
*/
@Override
public String bytesToCharArray(byte[] b) throws IOException {
return new String(b);
}
@Override
public Map<String, Object> bytesToMap(byte[] b, ResourceFieldSchema schema) throws IOException {
int errCode = 2109;
String msg = "TextLoader does not support conversion to Map.";
throw new ExecException(msg, errCode, PigException.BUG);
}
/**
* TextLoader does not support conversion to Tuple
* @throws IOException if the value cannot be cast.
*/
@Override
public Tuple bytesToTuple(byte[] b, ResourceFieldSchema schema) throws IOException {
int errCode = 2109;
String msg = "TextLoader does not support conversion to Tuple.";
throw new ExecException(msg, errCode, PigException.BUG);
}
/**
* TextLoader does not support conversion to Bag
* @throws IOException if the value cannot be cast.
*/
public DataBag bytesToBag(byte[] b, ResourceFieldSchema schema) throws IOException {
int errCode = 2109;
String msg = "TextLoader does not support conversion to Bag.";
throw new ExecException(msg, errCode, PigException.BUG);
}
public byte[] toBytes(DataBag bag) throws IOException {
int errCode = 2109;
String msg = "TextLoader does not support conversion from Bag.";
throw new ExecException(msg, errCode, PigException.BUG);
}
public byte[] toBytes(String s) throws IOException {
return s.getBytes();
}
public byte[] toBytes(Double d) throws IOException {
int errCode = 2109;
String msg = "TextLoader does not support conversion from Double.";
throw new ExecException(msg, errCode, PigException.BUG);
}
public byte[] toBytes(Float f) throws IOException {
int errCode = 2109;
String msg = "TextLoader does not support conversion from Float.";
throw new ExecException(msg, errCode, PigException.BUG);
}
public byte[] toBytes(Boolean b) throws IOException {
int errCode = 2109;
String msg = "TextLoader does not support conversion from Boolean.";
throw new ExecException(msg, errCode, PigException.BUG);
}
public byte[] toBytes(Integer i) throws IOException {
int errCode = 2109;
String msg = "TextLoader does not support conversion from Integer.";
throw new ExecException(msg, errCode, PigException.BUG);
}
public byte[] toBytes(Long l) throws IOException {
int errCode = 2109;
String msg = "TextLoader does not support conversion from Long.";
throw new ExecException(msg, errCode, PigException.BUG);
}
public byte[] toBytes(DateTime dt) throws IOException {
int errCode = 2109;
String msg = "TextLoader does not support conversion from DateTime.";
throw new ExecException(msg, errCode, PigException.BUG);
}
public byte[] toBytes(Map<String, Object> m) throws IOException {
int errCode = 2109;
String msg = "TextLoader does not support conversion from Map.";
throw new ExecException(msg, errCode, PigException.BUG);
}
public byte[] toBytes(Tuple t) throws IOException {
int errCode = 2109;
String msg = "TextLoader does not support conversion from Tuple.";
throw new ExecException(msg, errCode, PigException.BUG);
}
@Override
public BigInteger bytesToBigInteger(byte[] b) throws IOException {
int errCode = 2109;
String msg = "TextLoader does not support conversion to BigInteger.";
throw new ExecException(msg, errCode, PigException.BUG);
}
@Override
public BigDecimal bytesToBigDecimal(byte[] b) throws IOException {
int errCode = 2109;
String msg = "TextLoader does not support conversion to BigDecimal.";
throw new ExecException(msg, errCode, PigException.BUG);
}
@Override
public InputFormat getInputFormat() {
if((loadLocation.endsWith(".bz2") || loadLocation.endsWith(".bz"))
&& !bzipinput_usehadoops ) {
mLog.info("Using Bzip2TextInputFormat");
return new Bzip2TextInputFormat();
} else {
mLog.info("Using PigTextInputFormat");
return new PigTextInputFormat();
}
}
@Override
public LoadCaster getLoadCaster() {
return this;
}
@Override
public void prepareToRead(RecordReader reader, PigSplit split) {
in = reader;
}
@Override
public void setLocation(String location, Job job) throws IOException {
loadLocation = location;
FileInputFormat.setInputPaths(job, location);
bzipinput_usehadoops = job.getConfiguration().getBoolean(
PigConfiguration.PIG_BZIP_USE_HADOOP_INPUTFORMAT,
true );
}
}