blob: 0b1a86f5dc3ab0f95928b47f621347e0d89a0728 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.giraph.graph;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import java.io.IOException;
import java.util.List;
/**
* Use this to load data for a BSP application. Note that the InputSplit must
* also implement Writable. The InputSplits will determine the partitioning of
* vertices across the mappers, so keep that in consideration when implementing
* getSplits().
*
* @param <I> Vertex id
* @param <V> Vertex value
* @param <E> Edge value
*/
@SuppressWarnings("rawtypes")
public abstract class VertexInputFormat<I extends WritableComparable,
V extends Writable, E extends Writable, M extends Writable> {
/**
* Logically split the vertices for a graph processing application.
*
* Each {@link InputSplit} is then assigned to a worker for processing.
*
* <p><i>Note</i>: The split is a <i>logical</i> split of the inputs and the
* input files are not physically split into chunks. For e.g. a split could
* be <i>&lt;input-file-path, start, offset&gt;</i> tuple. The InputFormat
* also creates the {@link VertexReader} to read the {@link InputSplit}.
*
* Also, the number of workers is a hint given to the developer to try to
* intelligently determine how many splits to create (if this is
* adjustable) at runtime.
*
* @param context Context of the job
* @param numWorkers Number of workers used for this job
* @return an array of {@link InputSplit}s for the job.
*/
public abstract List<InputSplit> getSplits(
JobContext context, int numWorkers)
throws IOException, InterruptedException;
/**
* Create a vertex reader for a given split. The framework will call
* {@link VertexReader#initialize(InputSplit, TaskAttemptContext)} before
* the split is used.
*
* @param split the split to be read
* @param context the information about the task
* @return a new record reader
* @throws IOException
* @throws InterruptedException
*/
public abstract VertexReader<I, V, E, M> createVertexReader(
InputSplit split,
TaskAttemptContext context) throws IOException;
}