blob: 864ccf790203fb7fdb565bf4ee8c19df72858240 [file] [log] [blame]
package org.apache.stanbol.enhancer.engine.disambiguation.freebase.graph;
/*
* Created on Jul 9, 2005
*
* Copyright (c) 2005, the JUNG Project and the Regents of the University
* of California
* All rights reserved.
*
* This software is open-source under the BSD license; see either
* "license.txt" or
* http://jung.sourceforge.net/license.txt for a description.
*/
import java.util.Collection;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Set;
import org.apache.commons.collections15.FunctorException;
import edu.uci.ics.jung.algorithms.shortestpath.Distance;
import edu.uci.ics.jung.algorithms.util.BasicMapEntry;
import edu.uci.ics.jung.algorithms.util.MapBinaryHeap;
import edu.uci.ics.jung.graph.Graph;
import edu.uci.ics.jung.graph.Hypergraph;
/**
* <p>
* Calculates distances in a specified graph, using Dijkstra's single-source-shortest-path algorithm. All edge
* weights in the graph must be nonnegative; if any edge with negative weight is found in the course of
* calculating distances, an <code>IllegalArgumentException</code> will be thrown. (Note: this exception will
* only be thrown when such an edge would be used to update a given tentative distance; the algorithm does not
* check for negative-weight edges "up front".)
*
* <p>
* Distances and partial results are optionally cached (by this instance) for later reference. Thus, if the 10
* closest vertices to a specified source vertex are known, calculating the 20 closest vertices does not
* require starting Dijkstra's algorithm over from scratch.
* </p>
*
* <p>
* Distances are stored as double-precision values. If a vertex is not reachable from the specified source
* vertex, no distance is stored. <b>This is new behavior with version 1.4</b>; the previous behavior was to
* store a value of <code>Double.POSITIVE_INFINITY</code>. This change gives the algorithm an approximate
* complexity of O(kD log k), where k is either the number of requested targets or the number of reachable
* vertices (whichever is smaller), and D is the average degree of a vertex.
* </p>
*
* <p>
* The elements in the maps returned by <code>getDistanceMap</code> are ordered (that is, returned by the
* iterator) by nondecreasing distance from <code>source</code>.
* </p>
*
* <p>
* Users are cautioned that distances calculated should be assumed to be invalidated by changes to the graph,
* and should invoke <code>reset()</code> when appropriate so that the distances can be recalculated.
* </p>
*
* @author Joshua O'Madadhain
* @author Tom Nelson converted to jung2
*/
public class CustomDijkstraDistance<V,E> implements Distance<V> {
protected Hypergraph<V,E> g;
protected Transformer<E,? extends Number> nev;
protected Map<V,SourceData> sourceMap; // a map of source vertices to an instance of SourceData
protected boolean cached;
protected double max_distance;
protected int max_targets;
/**
* <p>
* Creates an instance of <code>DijkstraShortestPath</code> for the specified graph and the specified
* method of extracting weights from edges, which caches results locally if and only if
* <code>cached</code> is <code>true</code>.
*
* @param g
* the graph on which distances will be calculated
* @param nev
* the class responsible for returning weights for edges
* @param cached
* specifies whether the results are to be cached
* @return
*/
public CustomDijkstraDistance(Hypergraph<V,E> g, Transformer<E,? extends Number> nev, boolean cached) {
this.g = g;
this.nev = nev;
this.sourceMap = new HashMap<V,SourceData>();
this.cached = cached;
this.max_distance = Double.POSITIVE_INFINITY;
this.max_targets = Integer.MAX_VALUE;
}
/**
* <p>
* Creates an instance of <code>DijkstraShortestPath</code> for the specified graph and the specified
* method of extracting weights from edges, which caches results locally.
*
* @param g
* the graph on which distances will be calculated
* @param nev
* the class responsible for returning weights for edges
*/
public CustomDijkstraDistance(Hypergraph<V,E> g, Transformer<E,? extends Number> nev) {
this(g, nev, true);
}
/**
* <p>
* Creates an instance of <code>DijkstraShortestPath</code> for the specified unweighted graph (that is,
* all weights 1) which caches results locally.
*
* @param g
* the graph on which distances will be calculated
*/
@SuppressWarnings("unchecked")
public CustomDijkstraDistance(Graph<V,E> g) {
this(g, new ConstantTransformer(1), true);
}
/**
* <p>
* Creates an instance of <code>DijkstraShortestPath</code> for the specified unweighted graph (that is,
* all weights 1) which caches results locally.
*
* @param g
* the graph on which distances will be calculated
* @param cached
* specifies whether the results are to be cached
*/
@SuppressWarnings("unchecked")
public CustomDijkstraDistance(Graph<V,E> g, boolean cached) {
this(g, new ConstantTransformer(1), cached);
}
/**
* Implements Dijkstra's single-source shortest-path algorithm for weighted graphs. Uses a
* <code>MapBinaryHeap</code> as the priority queue, which gives this algorithm a time complexity of O(m
* lg n) (m = # of edges, n = # of vertices). This algorithm will terminate when any of the following have
* occurred (in order of priority):
* <ul>
* <li>the distance to the specified target (if any) has been found
* <li>no more vertices are reachable
* <li>the specified # of distances have been found, or the maximum distance desired has been exceeded
* <li>all distances have been found
* </ul>
*
* @param source
* the vertex from which distances are to be measured
* @param numDests
* the number of distances to measure
* @param targets
* the set of vertices to which distances are to be measured
*/
protected LinkedHashMap<V,Number> singleSourceShortestPath(V source, Collection<V> targets, int numDests) {
SourceData sd = getSourceData(source);
Set<V> to_get = new HashSet<V>();
if (targets != null) {
to_get.addAll(targets);
Set<V> existing_dists = sd.distances.keySet();
for (V o : targets) {
if (existing_dists.contains(o)) to_get.remove(o);
}
}
// if we've exceeded the max distance or max # of distances we're willing to calculate, or
// if we already have all the distances we need,
// terminate
if (sd.reached_max || (targets != null && to_get.isEmpty()) || (sd.distances.size() >= numDests)) {
return sd.distances;
}
while (!sd.unknownVertices.isEmpty() && (sd.distances.size() < numDests || !to_get.isEmpty())) {
Map.Entry<V,Number> p = sd.getNextVertex();
V v = p.getKey();
double v_dist = p.getValue().doubleValue();
to_get.remove(v);
if (v_dist > this.max_distance) {
// we're done; put this vertex back in so that we're not including
// a distance beyond what we specified
sd.restoreVertex(v, v_dist);
sd.reached_max = true;
break;
}
sd.dist_reached = v_dist;
if (sd.distances.size() >= this.max_targets) {
sd.reached_max = true;
break;
}
for (E e : getEdgesToCheck(v)) {
for (V w : g.getIncidentVertices(e)) {
if (!sd.distances.containsKey(w)) {
double edge_weight = nev.transform(e).doubleValue();
if (edge_weight < 0) throw new IllegalArgumentException(
"Edges weights must be non-negative");
double new_dist = v_dist + edge_weight;
if (!sd.estimatedDistances.containsKey(w)) {
sd.createRecord(w, e, new_dist);
} else {
double w_dist = ((Double) sd.estimatedDistances.get(w)).doubleValue();
if (new_dist < w_dist) // update tentative distance & path for w
sd.update(w, e, new_dist);
}
}
}
}
}
return sd.distances;
}
protected SourceData getSourceData(V source) {
SourceData sd = sourceMap.get(source);
if (sd == null) sd = new SourceData(source);
return sd;
}
/**
* Returns the set of edges incident to <code>v</code> that should be tested. By default, this is the set
* of outgoing edges for instances of <code>Graph</code>, the set of incident edges for instances of
* <code>Hypergraph</code>, and is otherwise undefined.
*/
protected Collection<E> getEdgesToCheck(V v) {
if (g instanceof Graph) return ((Graph<V,E>) g).getOutEdges(v);
else return g.getIncidentEdges(v);
}
/**
* Returns the length of a shortest path from the source to the target vertex, or null if the target is
* not reachable from the source. If either vertex is not in the graph for which this instance was
* created, throws <code>IllegalArgumentException</code>.
*
* @see #getDistanceMap(Object)
* @see #getDistanceMap(Object,int)
*/
public Number getDistance(V source, V target) {
if (g.containsVertex(target) == false) throw new IllegalArgumentException("Specified target vertex "
+ target
+ " is not part of graph "
+ g);
if (g.containsVertex(source) == false) throw new IllegalArgumentException("Specified source vertex "
+ source
+ " is not part of graph "
+ g);
Set<V> targets = new HashSet<V>();
targets.add(target);
Map<V,Number> distanceMap = getDistanceMap(source, targets);
return distanceMap.get(target);
}
/**
* Returns a {@code Map} from each element {@code t} of {@code targets} to the shortest-path distance from
* {@code source} to {@code t}.
*/
public Map<V,Number> getDistanceMap(V source, Collection<V> targets) {
if (g.containsVertex(source) == false) throw new IllegalArgumentException("Specified source vertex "
+ source
+ " is not part of graph "
+ g);
if (targets.size() > max_targets) throw new IllegalArgumentException(
"size of target set exceeds maximum " + "number of targets allowed: " + this.max_targets);
Map<V,Number> distanceMap = singleSourceShortestPath(source, targets,
Math.min(g.getVertexCount(), max_targets));
if (!cached) reset(source);
return distanceMap;
}
/**
* <p>
* Returns a <code>LinkedHashMap</code> which maps each vertex in the graph (including the
* <code>source</code> vertex) to its distance from the <code>source</code> vertex. The map's iterator
* will return the elements in order of increasing distance from <code>source</code>.
* </p>
*
* <p>
* The size of the map returned will be the number of vertices reachable from <code>source</code>.
* </p>
*
* @see #getDistanceMap(Object,int)
* @see #getDistance(Object,Object)
* @param source
* the vertex from which distances are measured
*/
public Map<V,Number> getDistanceMap(V source) {
return getDistanceMap(source, Math.min(g.getVertexCount(), max_targets));
}
/**
* <p>
* Returns a <code>LinkedHashMap</code> which maps each of the closest <code>numDist</code> vertices to
* the <code>source</code> vertex in the graph (including the <code>source</code> vertex) to its distance
* from the <code>source</code> vertex. Throws an <code>IllegalArgumentException</code> if
* <code>source</code> is not in this instance's graph, or if <code>numDests</code> is either less than 1
* or greater than the number of vertices in the graph.
* </p>
*
* <p>
* The size of the map returned will be the smaller of <code>numDests</code> and the number of vertices
* reachable from <code>source</code>.
*
* @see #getDistanceMap(Object)
* @see #getDistance(Object,Object)
* @param source
* the vertex from which distances are measured
* @param numDests
* the number of vertices for which to measure distances
*/
public LinkedHashMap<V,Number> getDistanceMap(V source, int numDests) {
if (g.getVertices().contains(source) == false) {
throw new IllegalArgumentException("Specified source vertex " + source + " is not part of graph "
+ g);
}
if (numDests < 1 || numDests > g.getVertexCount()) throw new IllegalArgumentException(
"numDests must be >= 1 " + "and <= g.numVertices()");
if (numDests > max_targets) throw new IllegalArgumentException("numDests must be <= the maximum "
+ "number of targets allowed: "
+ this.max_targets);
LinkedHashMap<V,Number> distanceMap = singleSourceShortestPath(source, null, numDests);
if (!cached) reset(source);
return distanceMap;
}
/**
* Allows the user to specify the maximum distance that this instance will calculate. Any vertices past
* this distance will effectively be unreachable from the source, in the sense that the algorithm will not
* calculate the distance to any vertices which are farther away than this distance. A negative value for
* <code>max_dist</code> will ensure that no further distances are calculated.
*
* <p>
* This can be useful for limiting the amount of time and space used by this algorithm if the graph is
* very large.
* </p>
*
* <p>
* Note: if this instance has already calculated distances greater than <code>max_dist</code>, and the
* results are cached, those results will still be valid and available; this limit applies only to
* subsequent distance calculations.
* </p>
*
* @see #setMaxTargets(int)
*/
public void setMaxDistance(double max_dist) {
this.max_distance = max_dist;
for (V v : sourceMap.keySet()) {
SourceData sd = sourceMap.get(v);
sd.reached_max = (this.max_distance <= sd.dist_reached) || (sd.distances.size() >= max_targets);
}
}
/**
* Allows the user to specify the maximum number of target vertices per source vertex for which this
* instance will calculate distances. Once this threshold is reached, any further vertices will
* effectively be unreachable from the source, in the sense that the algorithm will not calculate the
* distance to any more vertices. A negative value for <code>max_targets</code> will ensure that no
* further distances are calculated.
*
* <p>
* This can be useful for limiting the amount of time and space used by this algorithm if the graph is
* very large.
* </p>
*
* <p>
* Note: if this instance has already calculated distances to a greater number of targets than
* <code>max_targets</code>, and the results are cached, those results will still be valid and available;
* this limit applies only to subsequent distance calculations.
* </p>
*
* @see #setMaxDistance(double)
*/
public void setMaxTargets(int max_targets) {
this.max_targets = max_targets;
for (V v : sourceMap.keySet()) {
SourceData sd = sourceMap.get(v);
sd.reached_max = (this.max_distance <= sd.dist_reached) || (sd.distances.size() >= max_targets);
}
}
/**
* Clears all stored distances for this instance. Should be called whenever the graph is modified (edge
* weights changed or edges added/removed). If the user knows that some currently calculated distances are
* unaffected by a change, <code>reset(V)</code> may be appropriate instead.
*
* @see #reset(Object)
*/
public void reset() {
sourceMap = new HashMap<V,SourceData>();
}
/**
* Specifies whether or not this instance of <code>DijkstraShortestPath</code> should cache its results
* (final and partial) for future reference.
*
* @param enable
* <code>true</code> if the results are to be cached, and <code>false</code> otherwise
*/
public void enableCaching(boolean enable) {
this.cached = enable;
}
/**
* Clears all stored distances for the specified source vertex <code>source</code>. Should be called
* whenever the stored distances from this vertex are invalidated by changes to the graph.
*
* @see #reset()
*/
public void reset(V source) {
sourceMap.put(source, null);
}
/**
* Compares according to distances, so that the BinaryHeap knows how to order the tree.
*/
protected static class VertexComparator<V> implements Comparator<V> {
private Map<V,Number> distances;
protected VertexComparator(Map<V,Number> distances) {
this.distances = distances;
}
public int compare(V o1, V o2) {
return ((Double) distances.get(o1)).compareTo((Double) distances.get(o2));
}
}
/**
* For a given source vertex, holds the estimated and final distances, tentative and final assignments of
* incoming edges on the shortest path from the source vertex, and a priority queue (ordered by estimated
* distance) of the vertices for which distances are unknown.
*
* @author Joshua O'Madadhain
*/
protected class SourceData {
protected LinkedHashMap<V,Number> distances;
protected Map<V,Number> estimatedDistances;
protected MapBinaryHeap<V> unknownVertices;
protected boolean reached_max = false;
protected double dist_reached = 0;
protected SourceData(V source) {
distances = new LinkedHashMap<V,Number>();
estimatedDistances = new HashMap<V,Number>();
unknownVertices = new MapBinaryHeap<V>(new VertexComparator<V>(estimatedDistances));
sourceMap.put(source, this);
// initialize priority queue
estimatedDistances.put(source, new Double(0)); // distance from source to itself is 0
unknownVertices.add(source);
reached_max = false;
dist_reached = 0;
}
protected Map.Entry<V,Number> getNextVertex() {
V v = unknownVertices.remove();
Double dist = (Double) estimatedDistances.remove(v);
distances.put(v, dist);
return new BasicMapEntry<V,Number>(v, dist);
}
protected void update(V dest, E tentative_edge, double new_dist) {
estimatedDistances.put(dest, new_dist);
unknownVertices.update(dest);
}
protected void createRecord(V w, E e, double new_dist) {
estimatedDistances.put(w, new_dist);
unknownVertices.add(w);
}
protected void restoreVertex(V v, double dist) {
estimatedDistances.put(v, dist);
unknownVertices.add(v);
distances.remove(v);
}
}
public static interface Transformer<I,O> {
/**
* Transforms the input object (leaving it unchanged) into some output object.
*
* @param input
* the object to be transformed, should be left unchanged
* @return a transformed object
* @throws ClassCastException
* (runtime) if the input is the wrong class
* @throws IllegalArgumentException
* (runtime) if the input is invalid
* @throws FunctorException
* (runtime) if the transform cannot be completed
*/
public O transform(I input);
}
public static class ConstantTransformer<T> implements Transformer<Object,T> {
private T constant;
public ConstantTransformer(T constantToReturn) {
super();
constant = constantToReturn;
}
public T transform(Object input) {
return constant;
}
}
}