extras/rya.prospector/src/main/java/org/apache/rya/prospector/plans/IndexWorkPlan.java - rya - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */
 package org.apache.rya.prospector.plans;

 import java.io.IOException;
 import java.util.Collection;
 import java.util.Date;
 import java.util.List;
 import java.util.Map;

 import org.apache.accumulo.core.client.Connector;
 import org.apache.accumulo.core.client.TableNotFoundException;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.mapreduce.Reducer;
 import org.apache.rya.api.domain.RyaStatement;
 import org.apache.rya.prospector.domain.IndexEntry;
 import org.apache.rya.prospector.domain.IntermediateProspect;
 import org.apache.rya.prospector.mr.ProspectorCombiner;
 import org.apache.rya.prospector.mr.ProspectorMapper;
 import org.eclipse.rdf4j.model.vocabulary.XMLSchema;

 /**
  * Contains the methods that perform each of the Map Reduce functions that result
  * in the final {@link IndexEntry} values as well as a way to query those values
  * once they have been written.
  */
 public interface IndexWorkPlan {

     public static final String URITYPE = XMLSchema.ANYURI.stringValue();
     public static final LongWritable ONE = new LongWritable(1);
     public static final String DELIM = "\u0000";

     /**
      * This method is invoked by {@link ProspectorMapper}. It's used to pull
      * input from an Accumulo Rya instance into the Map Reduce framework.
      * </p>
      * It must use the values of a {@link RyaStatement} to derive a bunch of
      * {@link IntermediateProspect} and {@code LongWritable} pairs. This is only
      * useful for prospecting jobs that count things. The {@link IntermediateProspect}
      * value will be used as the key within {@link #combine(IntermediateProspect, Iterable)} and
      * {@link #reduce(IntermediateProspect, Iterable, Date, org.apache.hadoop.mapreduce.Reducer.Context)}.
      *
      * @param ryaStatement - The RDF Statement that needs to be mapped.
      * @return A collection of intermediate keys and counts.
      */
     public Collection<Map.Entry<IntermediateProspect, LongWritable>> map(RyaStatement ryaStatement);

     /**
      * This method is invoked by {@link ProspectorCombiner}. It is used by to
      * combine the results of {@link ProspectorMapper} before the shuffle operation
      * of the Map Reduce framework.
      *
      * @param prospect - The intermediate prospect that is being combined.
      * @param counts - The counts that need to be combined together.
      * @return A collection containing the combined results.
      */
     public Collection<Map.Entry<IntermediateProspect, LongWritable>> combine(IntermediateProspect prospect, Iterable<LongWritable> counts);

     /**
      * This method is invoked by {@link ProsectorReducer}. It is used to reduce
      * the counts to their final states and write them to output via the
      * {@code context}.l
      *
      * @param prospect - The intermediate prospect that is being reduced.
      * @param counts - The counts that need to be reduced.
      * @param timestamp - The timestamp that identifies this Prospector run.
      * @param context - The reducer context the reduced values will be written to.
      * @throws IOException A problem was encountered while writing to the context.
      * @throws InterruptedException Writes to the context were interrupted.
      */
     public void reduce(IntermediateProspect prospect, Iterable<LongWritable> counts, Date timestamp, Reducer.Context context) throws IOException, InterruptedException;

     /**
      * @return A unique name that indicates which {@link IndexEntry}s came from this plan.
      */
     public String getIndexType();

     /**
      * TODO Not sure what this generically is for. It is used by the count job to
      *      place a null delimiter between any {@link IndexEntry}s whose data
      *      section is two difference pieces of information together.
      */
     public String getCompositeValue(List<String> indices);

     /**
      * Search for {@link IndexEntry}s that have values matching the provided parameters.
      *
      * @param connector - The Accumulo Connector used to find the table holding the data.
      * @param tableName - The name of the table the Prospector results are stored within.
      * @param prospectTimes - Indicates which Prospect runs will be part of the query.
      * @param type - The name of the index the {@link IndexEntry}s are stored within.
      * @param index - The data portion of the {@link IndexEntry}s that may be returned.
      * @param dataType - The data type of the {@link IndexEntry}s that may be returned.
      * @param auths - The authorizations used to search for the entries.
      * @return The {@link IndexEntries} that match the provided values.
      * @throws TableNotFoundException No table exists for {@code tableName}.
      */
     public List<IndexEntry> query(Connector connector, String tableName, List<Long> prospectTimes, String type, String index, String dataType, String[] auths) throws TableNotFoundException;
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.
	*/
	package org.apache.rya.prospector.plans;

	import java.io.IOException;
	import java.util.Collection;
	import java.util.Date;
	import java.util.List;
	import java.util.Map;

	import org.apache.accumulo.core.client.Connector;
	import org.apache.accumulo.core.client.TableNotFoundException;
	import org.apache.hadoop.io.LongWritable;
	import org.apache.hadoop.mapreduce.Reducer;
	import org.apache.rya.api.domain.RyaStatement;
	import org.apache.rya.prospector.domain.IndexEntry;
	import org.apache.rya.prospector.domain.IntermediateProspect;
	import org.apache.rya.prospector.mr.ProspectorCombiner;
	import org.apache.rya.prospector.mr.ProspectorMapper;
	import org.eclipse.rdf4j.model.vocabulary.XMLSchema;

	/**
	* Contains the methods that perform each of the Map Reduce functions that result
	* in the final {@link IndexEntry} values as well as a way to query those values
	* once they have been written.
	*/
	public interface IndexWorkPlan {

	public static final String URITYPE = XMLSchema.ANYURI.stringValue();
	public static final LongWritable ONE = new LongWritable(1);
	public static final String DELIM = "\u0000";

	/**
	* This method is invoked by {@link ProspectorMapper}. It's used to pull
	* input from an Accumulo Rya instance into the Map Reduce framework.
	* </p>
	* It must use the values of a {@link RyaStatement} to derive a bunch of
	* {@link IntermediateProspect} and {@code LongWritable} pairs. This is only
	* useful for prospecting jobs that count things. The {@link IntermediateProspect}
	* value will be used as the key within {@link #combine(IntermediateProspect, Iterable)} and
	* {@link #reduce(IntermediateProspect, Iterable, Date, org.apache.hadoop.mapreduce.Reducer.Context)}.
	*
	* @param ryaStatement - The RDF Statement that needs to be mapped.
	* @return A collection of intermediate keys and counts.
	*/
	public Collection<Map.Entry<IntermediateProspect, LongWritable>> map(RyaStatement ryaStatement);

	/**
	* This method is invoked by {@link ProspectorCombiner}. It is used by to
	* combine the results of {@link ProspectorMapper} before the shuffle operation
	* of the Map Reduce framework.
	*
	* @param prospect - The intermediate prospect that is being combined.
	* @param counts - The counts that need to be combined together.
	* @return A collection containing the combined results.
	*/
	public Collection<Map.Entry<IntermediateProspect, LongWritable>> combine(IntermediateProspect prospect, Iterable<LongWritable> counts);

	/**
	* This method is invoked by {@link ProsectorReducer}. It is used to reduce
	* the counts to their final states and write them to output via the
	* {@code context}.l
	*
	* @param prospect - The intermediate prospect that is being reduced.
	* @param counts - The counts that need to be reduced.
	* @param timestamp - The timestamp that identifies this Prospector run.
	* @param context - The reducer context the reduced values will be written to.
	* @throws IOException A problem was encountered while writing to the context.
	* @throws InterruptedException Writes to the context were interrupted.
	*/
	public void reduce(IntermediateProspect prospect, Iterable<LongWritable> counts, Date timestamp, Reducer.Context context) throws IOException, InterruptedException;

	/**
	* @return A unique name that indicates which {@link IndexEntry}s came from this plan.
	*/
	public String getIndexType();

	/**
	* TODO Not sure what this generically is for. It is used by the count job to
	* place a null delimiter between any {@link IndexEntry}s whose data
	* section is two difference pieces of information together.
	*/
	public String getCompositeValue(List<String> indices);

	/**
	* Search for {@link IndexEntry}s that have values matching the provided parameters.
	*
	* @param connector - The Accumulo Connector used to find the table holding the data.
	* @param tableName - The name of the table the Prospector results are stored within.
	* @param prospectTimes - Indicates which Prospect runs will be part of the query.
	* @param type - The name of the index the {@link IndexEntry}s are stored within.
	* @param index - The data portion of the {@link IndexEntry}s that may be returned.
	* @param dataType - The data type of the {@link IndexEntry}s that may be returned.
	* @param auths - The authorizations used to search for the entries.
	* @return The {@link IndexEntries} that match the provided values.
	* @throws TableNotFoundException No table exists for {@code tableName}.
	*/
	public List<IndexEntry> query(Connector connector, String tableName, List<Long> prospectTimes, String type, String index, String dataType, String[] auths) throws TableNotFoundException;
	}