blob: db99523458c2344009684361c84d1982286f5f11 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.rya.prospector.plans;
import java.io.IOException;
import java.util.Collection;
import java.util.Date;
import java.util.List;
import java.util.Map;
import org.apache.accumulo.core.client.Connector;
import org.apache.accumulo.core.client.TableNotFoundException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.rya.api.domain.RyaStatement;
import org.apache.rya.prospector.domain.IndexEntry;
import org.apache.rya.prospector.domain.IntermediateProspect;
import org.apache.rya.prospector.mr.ProspectorCombiner;
import org.apache.rya.prospector.mr.ProspectorMapper;
import org.eclipse.rdf4j.model.vocabulary.XMLSchema;
/**
* Contains the methods that perform each of the Map Reduce functions that result
* in the final {@link IndexEntry} values as well as a way to query those values
* once they have been written.
*/
public interface IndexWorkPlan {
public static final String URITYPE = XMLSchema.ANYURI.stringValue();
public static final LongWritable ONE = new LongWritable(1);
public static final String DELIM = "\u0000";
/**
* This method is invoked by {@link ProspectorMapper}. It's used to pull
* input from an Accumulo Rya instance into the Map Reduce framework.
* </p>
* It must use the values of a {@link RyaStatement} to derive a bunch of
* {@link IntermediateProspect} and {@code LongWritable} pairs. This is only
* useful for prospecting jobs that count things. The {@link IntermediateProspect}
* value will be used as the key within {@link #combine(IntermediateProspect, Iterable)} and
* {@link #reduce(IntermediateProspect, Iterable, Date, org.apache.hadoop.mapreduce.Reducer.Context)}.
*
* @param ryaStatement - The RDF Statement that needs to be mapped.
* @return A collection of intermediate keys and counts.
*/
public Collection<Map.Entry<IntermediateProspect, LongWritable>> map(RyaStatement ryaStatement);
/**
* This method is invoked by {@link ProspectorCombiner}. It is used by to
* combine the results of {@link ProspectorMapper} before the shuffle operation
* of the Map Reduce framework.
*
* @param prospect - The intermediate prospect that is being combined.
* @param counts - The counts that need to be combined together.
* @return A collection containing the combined results.
*/
public Collection<Map.Entry<IntermediateProspect, LongWritable>> combine(IntermediateProspect prospect, Iterable<LongWritable> counts);
/**
* This method is invoked by {@link ProsectorReducer}. It is used to reduce
* the counts to their final states and write them to output via the
* {@code context}.l
*
* @param prospect - The intermediate prospect that is being reduced.
* @param counts - The counts that need to be reduced.
* @param timestamp - The timestamp that identifies this Prospector run.
* @param context - The reducer context the reduced values will be written to.
* @throws IOException A problem was encountered while writing to the context.
* @throws InterruptedException Writes to the context were interrupted.
*/
public void reduce(IntermediateProspect prospect, Iterable<LongWritable> counts, Date timestamp, Reducer.Context context) throws IOException, InterruptedException;
/**
* @return A unique name that indicates which {@link IndexEntry}s came from this plan.
*/
public String getIndexType();
/**
* TODO Not sure what this generically is for. It is used by the count job to
* place a null delimiter between any {@link IndexEntry}s whose data
* section is two difference pieces of information together.
*/
public String getCompositeValue(List<String> indices);
/**
* Search for {@link IndexEntry}s that have values matching the provided parameters.
*
* @param connector - The Accumulo Connector used to find the table holding the data.
* @param tableName - The name of the table the Prospector results are stored within.
* @param prospectTimes - Indicates which Prospect runs will be part of the query.
* @param type - The name of the index the {@link IndexEntry}s are stored within.
* @param index - The data portion of the {@link IndexEntry}s that may be returned.
* @param dataType - The data type of the {@link IndexEntry}s that may be returned.
* @param auths - The authorizations used to search for the entries.
* @return The {@link IndexEntries} that match the provided values.
* @throws TableNotFoundException No table exists for {@code tableName}.
*/
public List<IndexEntry> query(Connector connector, String tableName, List<Long> prospectTimes, String type, String index, String dataType, String[] auths) throws TableNotFoundException;
}