blob: 7326f9f36e3bc6369cd5b721eab59c9aeaec3ff4 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pig;
import java.io.IOException;
import org.apache.pig.data.Tuple;
import org.apache.pig.impl.io.BufferedPositionedInputStream;
public interface LoadFunc {
/**
* This interface is used to implement functions to parse records
* from a dataset.
*
* @author database-systems@research.yahoo
*
*/
/**
* Specifies a portion of an InputStream to read tuples. Because the
* starting and ending offsets may not be on record boundaries it is up to
* the implementor to deal with figuring out the actual starting and ending
* offsets in such a way that an arbitrarily sliced up file will be processed
* in its entirety.
* <p>
* A common way of handling slices in the middle of records is to start at
* the given offset and, if the offset is not zero, skip to the end of the
* first record (which may be a partial record) before reading tuples.
* Reading continues until a tuple has been read that ends at an offset past
* the ending offset.
* <p>
* <b>The load function should not do any buffering on the input stream</b>. Buffering will
* cause the offsets returned by is.getPos() to be unreliable.
*
* @param fileName the name of the file to be read
* @param is the stream representing the file to be processed, and which can also provide its position.
* @param offset the offset to start reading tuples.
* @param end the ending offset for reading.
* @throws IOException
*/
public abstract void bindTo(String fileName, BufferedPositionedInputStream is, long offset, long end) throws IOException;
/**
* Retrieves the next tuple to be processed.
* @return the next tuple to be processed or null if there are no more tuples
* to be processed.
* @throws IOException
*/
public abstract Tuple getNext() throws IOException;
}