blob: 72eee73a5ce31a07aa084bbb3fbe1d0798a1b07a [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.tinkerpop.gremlin.hadoop.structure;
import org.apache.commons.configuration.BaseConfiguration;
import org.apache.commons.configuration.Configuration;
import org.apache.commons.configuration.ConfigurationException;
import org.apache.commons.configuration.PropertiesConfiguration;
import org.apache.tinkerpop.gremlin.hadoop.Constants;
import org.apache.tinkerpop.gremlin.hadoop.process.computer.AbstractHadoopGraphComputer;
import org.apache.tinkerpop.gremlin.hadoop.structure.io.HadoopEdgeIterator;
import org.apache.tinkerpop.gremlin.hadoop.structure.io.HadoopVertexIterator;
import org.apache.tinkerpop.gremlin.process.computer.GraphComputer;
import org.apache.tinkerpop.gremlin.structure.Edge;
import org.apache.tinkerpop.gremlin.structure.Graph;
import org.apache.tinkerpop.gremlin.structure.Transaction;
import org.apache.tinkerpop.gremlin.structure.Vertex;
import org.apache.tinkerpop.gremlin.structure.util.ElementHelper;
import org.apache.tinkerpop.gremlin.structure.util.StringFactory;
import org.apache.tinkerpop.gremlin.util.iterator.IteratorUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.Iterator;
import java.util.Optional;
import java.util.stream.Stream;
/**
* @author Marko A. Rodriguez (http://markorodriguez.com)
*/
@Graph.OptIn(Graph.OptIn.SUITE_STRUCTURE_STANDARD)
@Graph.OptIn(Graph.OptIn.SUITE_PROCESS_STANDARD)
@Graph.OptIn(Graph.OptIn.SUITE_PROCESS_COMPUTER)
@Graph.OptOut(
test = "org.apache.tinkerpop.gremlin.process.traversal.step.map.MatchTest$Traversals",
method = "g_V_matchXa_hasXname_GarciaX__a_0writtenBy_b__a_0sungBy_bX",
reason = "Hadoop-Gremlin is OLAP-oriented and for OLTP operations, linear-scan joins are required. This particular tests takes many minutes to execute.",
computers = {"ALL"})
@Graph.OptOut(
test = "org.apache.tinkerpop.gremlin.process.traversal.step.map.MatchTest$Traversals",
method = "g_V_matchXa_0sungBy_b__a_0sungBy_c__b_writtenBy_d__c_writtenBy_e__d_hasXname_George_HarisonX__e_hasXname_Bob_MarleyXX",
reason = "Hadoop-Gremlin is OLAP-oriented and for OLTP operations, linear-scan joins are required. This particular tests takes many minutes to execute.",
computers = {"ALL"})
@Graph.OptOut(
test = "org.apache.tinkerpop.gremlin.process.traversal.step.map.MatchTest$Traversals",
method = "g_V_matchXa_0sungBy_b__a_0writtenBy_c__b_writtenBy_d__c_sungBy_d__d_hasXname_GarciaXX",
reason = "Hadoop-Gremlin is OLAP-oriented and for OLTP operations, linear-scan joins are required. This particular tests takes many minutes to execute.",
computers = {"ALL"})
@Graph.OptOut(
test = "org.apache.tinkerpop.gremlin.process.traversal.step.map.MatchTest$Traversals",
method = "g_V_matchXa_0sungBy_b__a_0writtenBy_c__b_writtenBy_dX_whereXc_sungBy_dX_whereXd_hasXname_GarciaXX",
reason = "Hadoop-Gremlin is OLAP-oriented and for OLTP operations, linear-scan joins are required. This particular tests takes many minutes to execute.",
computers = {"ALL"})
@Graph.OptOut(
test = "org.apache.tinkerpop.gremlin.process.traversal.step.map.MatchTest$Traversals",
method = "g_V_matchXa_knows_b__c_knows_bX",
reason = "Giraph does a hard kill on failure and stops threads which stops test cases. Exception handling semantics are correct though.",
computers = {"org.apache.tinkerpop.gremlin.giraph.process.computer.GiraphGraphComputer"})
@Graph.OptOut(
test = "org.apache.tinkerpop.gremlin.process.traversal.step.map.MatchTest$Traversals",
method = "g_V_matchXa_created_b__c_created_bX_selectXa_b_cX_byXnameX",
reason = "Giraph does a hard kill on failure and stops threads which stops test cases. Exception handling semantics are correct though.",
computers = {"org.apache.tinkerpop.gremlin.giraph.process.computer.GiraphGraphComputer"})
@Graph.OptOut(
test = "org.apache.tinkerpop.gremlin.process.traversal.step.map.MatchTest$Traversals",
method = "g_V_out_asXcX_matchXb_knows_a__c_created_eX_selectXcX",
reason = "Giraph does a hard kill on failure and stops threads which stops test cases. Exception handling semantics are correct though.",
computers = {"org.apache.tinkerpop.gremlin.giraph.process.computer.GiraphGraphComputer"})
@Graph.OptOut(
test = "org.apache.tinkerpop.gremlin.process.traversal.step.map.CountTest$Traversals",
method = "g_V_both_both_count",
reason = "Hadoop-Gremlin is OLAP-oriented and for OLTP operations, linear-scan joins are required. This particular tests takes many minutes to execute.",
computers = {"ALL"})
@Graph.OptOut(
test = "org.apache.tinkerpop.gremlin.process.traversal.step.map.CountTest$Traversals",
method = "g_V_repeatXoutX_timesX3X_count",
reason = "Hadoop-Gremlin is OLAP-oriented and for OLTP operations, linear-scan joins are required. This particular tests takes many minutes to execute.",
computers = {"ALL"})
@Graph.OptOut(
test = "org.apache.tinkerpop.gremlin.process.traversal.step.map.CountTest$Traversals",
method = "g_V_repeatXoutX_timesX8X_count",
reason = "Hadoop-Gremlin is OLAP-oriented and for OLTP operations, linear-scan joins are required. This particular tests takes many minutes to execute.",
computers = {"ALL"})
@Graph.OptOut(
test = "org.apache.tinkerpop.gremlin.process.traversal.step.map.CountTest$Traversals",
method = "g_V_repeatXoutX_timesX5X_asXaX_outXwrittenByX_asXbX_selectXa_bX_count",
reason = "Hadoop-Gremlin is OLAP-oriented and for OLTP operations, linear-scan joins are required. This particular tests takes many minutes to execute.",
computers = {"ALL"})
@Graph.OptOut(
test = "org.apache.tinkerpop.gremlin.process.traversal.step.map.ProfileTest$Traversals",
method = "grateful_V_out_out_profile",
reason = "Hadoop-Gremlin is OLAP-oriented and for OLTP operations, linear-scan joins are required. This particular tests takes many minutes to execute.")
@Graph.OptOut(
test = "org.apache.tinkerpop.gremlin.process.traversal.step.map.ProfileTest$Traversals",
method = "grateful_V_out_out_profileXmetricsX",
reason = "Hadoop-Gremlin is OLAP-oriented and for OLTP operations, linear-scan joins are required. This particular tests takes many minutes to execute.")
@Graph.OptOut(
test = "org.apache.tinkerpop.gremlin.process.traversal.step.sideEffect.GroupTest",
method = "g_V_hasLabelXsongX_groupXaX_byXnameX_byXproperties_groupCount_byXlabelXX_out_capXaX",
reason = "Hadoop-Gremlin is OLAP-oriented and for OLTP operations, linear-scan joins are required. This particular tests takes many minutes to execute.",
computers = {"ALL"})
@Graph.OptOut(
test = "org.apache.tinkerpop.gremlin.process.traversal.step.sideEffect.GroupTest",
method = "g_V_outXfollowedByX_group_byXsongTypeX_byXbothE_group_byXlabelX_byXweight_sumXX",
reason = "Hadoop-Gremlin is OLAP-oriented and for OLTP operations, linear-scan joins are required. This particular tests takes many minutes to execute.",
computers = {"ALL"})
@Graph.OptOut(
test = "org.apache.tinkerpop.gremlin.process.traversal.step.sideEffect.GroupTest",
method = "g_V_repeatXbothXfollowedByXX_timesX2X_group_byXsongTypeX_byXcountX",
reason = "Hadoop-Gremlin is OLAP-oriented and for OLTP operations, linear-scan joins are required. This particular tests takes many minutes to execute.",
computers = {"ALL"})
@Graph.OptOut(
test = "org.apache.tinkerpop.gremlin.process.traversal.step.sideEffect.GroupTest",
method = "g_V_repeatXbothXfollowedByXX_timesX2X_groupXaX_byXsongTypeX_byXcountX_capXaX",
reason = "Hadoop-Gremlin is OLAP-oriented and for OLTP operations, linear-scan joins are required. This particular tests takes many minutes to execute.",
computers = {"ALL"})
@Graph.OptOut(
test = "org.apache.tinkerpop.gremlin.process.computer.GraphComputerTest",
method = "shouldStartAndEndWorkersForVertexProgramAndMapReduce",
reason = "Spark executes map and combine in a lazy fashion and thus, fails the blocking aspect of this test",
computers = {"org.apache.tinkerpop.gremlin.spark.process.computer.SparkGraphComputer"})
@Graph.OptOut(
test = "org.apache.tinkerpop.gremlin.process.traversal.TraversalInterruptionTest",
method = "*",
reason = "The interruption model in the test can't guarantee interruption at the right time with HadoopGraph.")
@Graph.OptOut(
test = "org.apache.tinkerpop.gremlin.process.traversal.TraversalInterruptionComputerTest",
method = "*",
reason = "This test makes use of a sideEffect to enforce when a thread interruption is triggered and thus isn't applicable to HadoopGraph",
computers = {"org.apache.tinkerpop.gremlin.spark.process.computer.SparkGraphComputer", "org.apache.tinkerpop.gremlin.giraph.process.computer.GiraphGraphComputer"})
@Graph.OptOut(
test = "org.apache.tinkerpop.gremlin.process.traversal.step.map.MatchTest$CountMatchTraversals",
method = "g_V_matchXa_followedBy_count_isXgtX10XX_b__a_0followedBy_count_isXgtX10XX_bX_count",
reason = "Hadoop-Gremlin is OLAP-oriented and for OLTP operations, linear-scan joins are required. This particular tests takes many minutes to execute.",
computers = {"ALL"})
@Graph.OptOut(
test = "org.apache.tinkerpop.gremlin.process.traversal.step.map.MatchTest$GreedyMatchTraversals",
method = "g_V_matchXa_followedBy_count_isXgtX10XX_b__a_0followedBy_count_isXgtX10XX_bX_count",
reason = "Hadoop-Gremlin is OLAP-oriented and for OLTP operations, linear-scan joins are required. This particular tests takes many minutes to execute.",
computers = {"ALL"})
public final class HadoopGraph implements Graph {
public static final Logger LOGGER = LoggerFactory.getLogger(HadoopGraph.class);
private static final Configuration EMPTY_CONFIGURATION = new BaseConfiguration() {{
this.setProperty(Graph.GRAPH, HadoopGraph.class.getName());
}};
protected final HadoopConfiguration configuration;
private HadoopGraph(final Configuration configuration) {
this.configuration = new HadoopConfiguration(configuration);
}
public static HadoopGraph open() {
return HadoopGraph.open(EMPTY_CONFIGURATION);
}
public static HadoopGraph open(final Configuration configuration) {
return new HadoopGraph(Optional.ofNullable(configuration).orElse(EMPTY_CONFIGURATION));
}
public static HadoopGraph open(final String configurationFile) throws ConfigurationException {
if (null == configurationFile) throw Graph.Exceptions.argumentCanNotBeNull("configurationFile");
return open(new PropertiesConfiguration(configurationFile));
}
@Override
public Vertex addVertex(final Object... keyValues) {
throw Exceptions.vertexAdditionsNotSupported();
}
@Override
public <C extends GraphComputer> C compute(final Class<C> graphComputerClass) {
try {
if (AbstractHadoopGraphComputer.class.isAssignableFrom(graphComputerClass))
return graphComputerClass.getConstructor(HadoopGraph.class).newInstance(this);
else
throw Graph.Exceptions.graphDoesNotSupportProvidedGraphComputer(graphComputerClass);
} catch (final Exception e) {
throw new IllegalArgumentException(e.getMessage(), e);
}
}
@Override
public GraphComputer compute() {
if (this.configuration.containsKey(Constants.GREMLIN_HADOOP_DEFAULT_GRAPH_COMPUTER)) {
try {
return this.compute((Class<? extends GraphComputer>) Class.forName(this.configuration.getString(Constants.GREMLIN_HADOOP_DEFAULT_GRAPH_COMPUTER)));
} catch (final Exception e) {
throw new IllegalStateException(e.getMessage(), e);
}
} else
throw new IllegalArgumentException("There is no default GraphComputer for HadoopGraph. Use HadoopGraph.compute(class) or gremlin.hadoop.defaultGraphComputer to specify the GraphComputer to use.");
}
@Override
public Variables variables() {
throw Exceptions.variablesNotSupported();
}
@Override
public HadoopConfiguration configuration() {
return this.configuration;
}
public String toString() {
final String fromString = this.configuration.containsKey(Constants.GREMLIN_HADOOP_GRAPH_READER) ?
this.configuration.getGraphReader().getSimpleName() :
"no-reader";
final String toString = this.configuration.containsKey(Constants.GREMLIN_HADOOP_GRAPH_WRITER) ?
this.configuration.getGraphWriter().getSimpleName() :
"no-writer";
return StringFactory.graphString(this, fromString.toLowerCase() + "->" + toString.toLowerCase());
}
@Override
public void close() {
this.configuration.clear();
}
@Override
public Transaction tx() {
throw Exceptions.transactionsNotSupported();
}
@Override
public Iterator<Vertex> vertices(final Object... vertexIds) {
try {
if (0 == vertexIds.length) {
return new HadoopVertexIterator(this);
} else {
// base the conversion function on the first item in the id list as the expectation is that these
// id values will be a uniform list
if (vertexIds[0] instanceof Vertex) {
// based on the first item assume all vertices in the argument list
if (!Stream.of(vertexIds).allMatch(id -> id instanceof Vertex))
throw Graph.Exceptions.idArgsMustBeEitherIdOrElement();
// no need to get the vertices again, so just flip it back - some implementation may want to treat this
// as a refresh operation. that's not necessary for hadoopgraph.
return Stream.of(vertexIds).map(id -> (Vertex) id).iterator();
} else {
final Class<?> firstClass = vertexIds[0].getClass();
if (!Stream.of(vertexIds).map(Object::getClass).allMatch(firstClass::equals))
throw Graph.Exceptions.idArgsMustBeEitherIdOrElement(); // todo: change exception to be ids of the same type
return IteratorUtils.filter(new HadoopVertexIterator(this), vertex -> ElementHelper.idExists(vertex.id(), vertexIds));
}
}
} catch (final IOException e) {
throw new IllegalStateException(e.getMessage(), e);
}
}
@Override
public Iterator<Edge> edges(final Object... edgeIds) {
try {
if (0 == edgeIds.length) {
return new HadoopEdgeIterator(this);
} else {
// base the conversion function on the first item in the id list as the expectation is that these
// id values will be a uniform list
if (edgeIds[0] instanceof Edge) {
// based on the first item assume all Edges in the argument list
if (!Stream.of(edgeIds).allMatch(id -> id instanceof Edge))
throw Graph.Exceptions.idArgsMustBeEitherIdOrElement();
// no need to get the vertices again, so just flip it back - some implementation may want to treat this
// as a refresh operation. that's not necessary for hadoopgraph.
return Stream.of(edgeIds).map(id -> (Edge) id).iterator();
} else {
final Class<?> firstClass = edgeIds[0].getClass();
if (!Stream.of(edgeIds).map(Object::getClass).allMatch(firstClass::equals))
throw Graph.Exceptions.idArgsMustBeEitherIdOrElement(); // todo: change exception to be ids of the same type
return IteratorUtils.filter(new HadoopEdgeIterator(this), vertex -> ElementHelper.idExists(vertex.id(), edgeIds));
}
}
} catch (final IOException e) {
throw new IllegalStateException(e.getMessage(), e);
}
}
public Features features() {
return new HadoopGraphFeatures();
}
public static class HadoopGraphFeatures implements Features {
@Override
public GraphFeatures graph() {
return new GraphFeatures() {
@Override
public boolean supportsTransactions() {
return false;
}
@Override
public boolean supportsThreadedTransactions() {
return false;
}
@Override
public Features.VariableFeatures variables() {
return new Features.VariableFeatures() {
@Override
public boolean supportsVariables() {
return false;
}
@Override
public boolean supportsBooleanValues() {
return false;
}
@Override
public boolean supportsByteValues() {
return false;
}
@Override
public boolean supportsDoubleValues() {
return false;
}
@Override
public boolean supportsFloatValues() {
return false;
}
@Override
public boolean supportsIntegerValues() {
return false;
}
@Override
public boolean supportsLongValues() {
return false;
}
@Override
public boolean supportsMapValues() {
return false;
}
@Override
public boolean supportsMixedListValues() {
return false;
}
@Override
public boolean supportsBooleanArrayValues() {
return false;
}
@Override
public boolean supportsByteArrayValues() {
return false;
}
@Override
public boolean supportsDoubleArrayValues() {
return false;
}
@Override
public boolean supportsFloatArrayValues() {
return false;
}
@Override
public boolean supportsIntegerArrayValues() {
return false;
}
@Override
public boolean supportsStringArrayValues() {
return false;
}
@Override
public boolean supportsLongArrayValues() {
return false;
}
@Override
public boolean supportsSerializableValues() {
return false;
}
@Override
public boolean supportsStringValues() {
return false;
}
@Override
public boolean supportsUniformListValues() {
return false;
}
};
}
};
}
@Override
public EdgeFeatures edge() {
return new EdgeFeatures() {
@Override
public boolean supportsAddEdges() {
return false;
}
@Override
public boolean supportsRemoveEdges() {
return false;
}
@Override
public boolean supportsAddProperty() {
return false;
}
@Override
public boolean supportsRemoveProperty() {
return false;
}
@Override
public boolean supportsCustomIds() {
return false;
}
};
}
@Override
public VertexFeatures vertex() {
return new VertexFeatures() {
@Override
public boolean supportsAddVertices() {
return false;
}
@Override
public boolean supportsRemoveVertices() {
return false;
}
@Override
public boolean supportsAddProperty() {
return false;
}
@Override
public boolean supportsRemoveProperty() {
return false;
}
@Override
public boolean supportsCustomIds() {
return false;
}
@Override
public Features.VertexPropertyFeatures properties() {
return new Features.VertexPropertyFeatures() {
@Override
public boolean supportsRemoveProperty() {
return false;
}
@Override
public boolean supportsCustomIds() {
return false;
}
};
}
};
}
@Override
public String toString() {
return StringFactory.featureString(this);
}
}
}