blob: da63560ddc124198816404f560b5de756dbc61cc [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.tinkerpop.gremlin.spark.process.computer.traversal.strategy.optimization;
import org.apache.commons.configuration.Configuration;
import org.apache.tinkerpop.gremlin.TestHelper;
import org.apache.tinkerpop.gremlin.hadoop.Constants;
import org.apache.tinkerpop.gremlin.hadoop.structure.io.gryo.GryoInputFormat;
import org.apache.tinkerpop.gremlin.process.computer.traversal.step.map.TraversalVertexProgramStep;
import org.apache.tinkerpop.gremlin.process.computer.traversal.strategy.optimization.MessagePassingReductionStrategy;
import org.apache.tinkerpop.gremlin.process.traversal.Traversal;
import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.GraphTraversalSource;
import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.__;
import org.apache.tinkerpop.gremlin.process.traversal.util.TraversalHelper;
import org.apache.tinkerpop.gremlin.spark.AbstractSparkTest;
import org.apache.tinkerpop.gremlin.spark.process.computer.SparkGraphComputer;
import org.apache.tinkerpop.gremlin.spark.process.computer.SparkHadoopGraphProvider;
import org.apache.tinkerpop.gremlin.spark.structure.io.PersistedOutputRDD;
import org.apache.tinkerpop.gremlin.structure.Column;
import org.apache.tinkerpop.gremlin.structure.Graph;
import org.apache.tinkerpop.gremlin.structure.T;
import org.apache.tinkerpop.gremlin.structure.io.gryo.GryoPool;
import org.apache.tinkerpop.gremlin.structure.io.gryo.GryoVersion;
import org.apache.tinkerpop.gremlin.structure.util.GraphFactory;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import java.util.function.Supplier;
import static org.apache.tinkerpop.gremlin.structure.Column.keys;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
/**
* @author Marko A. Rodriguez (http://markorodriguez.com)
*/
@RunWith(Parameterized.class)
public class SparkSingleIterationStrategyTest extends AbstractSparkTest {
@Parameterized.Parameters(name = "expect({0})")
public static Iterable<Object[]> data() {
return Arrays.asList(new Object[][]{
{"V1d0", GryoVersion.V1_0},
{"V3d0", GryoVersion.V3_0}});
}
@Parameterized.Parameter(value = 0)
public String name;
@Parameterized.Parameter(value = 1)
public GryoVersion version;
@Test
public void shouldSuccessfullyEvaluateSingleIterationTraversals() throws Exception {
final String outputLocation = TestHelper.makeTestDataDirectory(SparkSingleIterationStrategyTest.class, UUID.randomUUID().toString());
Configuration configuration = getBaseConfiguration();
configuration.setProperty(Constants.GREMLIN_HADOOP_INPUT_LOCATION, SparkHadoopGraphProvider.PATHS.get("tinkerpop-modern-v3d0.kryo"));
configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_READER, GryoInputFormat.class.getCanonicalName());
configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_WRITER, PersistedOutputRDD.class.getCanonicalName());
configuration.setProperty(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, outputLocation);
configuration.setProperty(Constants.GREMLIN_HADOOP_DEFAULT_GRAPH_COMPUTER, SparkGraphComputer.class.getCanonicalName());
configuration.setProperty(Constants.GREMLIN_SPARK_PERSIST_CONTEXT, true);
configuration.setProperty(GryoPool.CONFIG_IO_GRYO_VERSION, version.name());
/////////// WITHOUT SINGLE-ITERATION STRATEGY LESS SINGLE-PASS OPTIONS ARE AVAILABLE
Graph graph = GraphFactory.open(configuration);
GraphTraversalSource g = graph.traversal().withComputer().withoutStrategies(SparkInterceptorStrategy.class, MessagePassingReductionStrategy.class);
assertFalse(g.getStrategies().toList().contains(SparkInterceptorStrategy.instance()));
assertFalse(g.V().count().explain().getStrategyTraversals().stream().filter(pair -> pair.getValue0() instanceof SparkInterceptorStrategy).findAny().isPresent());
assertFalse(g.getStrategies().toList().contains(MessagePassingReductionStrategy.instance()));
assertFalse(g.V().count().explain().getStrategyTraversals().stream().filter(pair -> pair.getValue0() instanceof MessagePassingReductionStrategy).findAny().isPresent());
assertTrue(g.getStrategies().toList().contains(SparkSingleIterationStrategy.instance()));
assertTrue(g.V().count().explain().getStrategyTraversals().stream().filter(pair -> pair.getValue0() instanceof SparkSingleIterationStrategy).findAny().isPresent());
test(true, g.V().limit(10));
test(true, g.V().values("age").groupCount());
test(true, g.V().groupCount().by(__.out().count()));
test(true, g.V().outE());
test(true, 6L, g.V().count());
test(true, 6L, g.V().out().count());
test(true, 6L, g.V().outE().inV().count());
////
test(false, 6L, g.V().local(__.inE()).count());
test(false, g.V().outE().inV());
test(false, g.V().both());
test(false, 12L, g.V().both().count());
test(false, g.V().out().id());
test(false, 2L, g.V().out().out().count());
test(false, 6L, g.V().in().count());
test(false, 6L, g.V().inE().count());
/////////// WITH SINGLE-ITERATION STRATEGY MORE SINGLE-PASS OPTIONS ARE AVAILABLE
graph = GraphFactory.open(configuration);
g = graph.traversal().withComputer().withoutStrategies(SparkInterceptorStrategy.class).withStrategies(MessagePassingReductionStrategy.instance());
assertFalse(g.getStrategies().toList().contains(SparkInterceptorStrategy.instance()));
assertFalse(g.V().count().explain().getStrategyTraversals().stream().filter(pair -> pair.getValue0() instanceof SparkInterceptorStrategy).findAny().isPresent());
assertTrue(g.getStrategies().toList().contains(MessagePassingReductionStrategy.instance()));
assertTrue(g.V().count().explain().getStrategyTraversals().stream().filter(pair -> pair.getValue0() instanceof MessagePassingReductionStrategy).findAny().isPresent());
assertTrue(g.getStrategies().toList().contains(SparkSingleIterationStrategy.instance()));
assertTrue(g.V().count().explain().getStrategyTraversals().stream().filter(pair -> pair.getValue0() instanceof SparkSingleIterationStrategy).findAny().isPresent());
test(true, g.V().limit(10));
test(true, g.V().values("age").groupCount());
test(true, g.V().groupCount().by(__.out().count()));
test(true, g.V().outE());
test(true, 6L, g.V().outE().values("weight").count());
test(true, 6L, g.V().inE().values("weight").count());
test(true, 12L, g.V().bothE().values("weight").count());
test(true, g.V().bothE().values("weight"));
test(true, g.V().bothE().values("weight").limit(2));
test(true, 6L, g.V().count());
test(true, 6L, g.V().id().count());
test(true, 6L, g.V().identity().outE().identity().count());
test(true, 6L, g.V().identity().outE().has("weight").count());
test(true, 6L, g.V().out().count());
test(true, 6L, g.V().outE().inV().count());
test(true, 6L, g.V().outE().inV().id().count());
test(true, 2L, g.V().outE().inV().id().groupCount().select(Column.values).unfold().dedup().count());
test(true, g.V().out().id());
test(true, 6L, g.V().outE().valueMap().count());
test(true, g.V().outE().valueMap());
test(true, 6L, g.V().inE().valueMap().count());
test(true, g.V().inE().valueMap());
test(true, 12L, g.V().bothE().valueMap().count());
test(true, g.V().bothE().valueMap());
test(true, 6L, g.V().inE().id().count());
test(true, 6L, g.V().outE().count());
test(true, 4L, g.V().outE().inV().id().dedup().count());
test(true, 4L, g.V().filter(__.in()).count());
test(true, 6L, g.V().sideEffect(__.in()).count());
test(true, 6L, g.V().map(__.constant("hello")).count());
test(true, g.V().groupCount());
test(true, g.V().groupCount("x"));
test(true, g.V().groupCount("x").cap("x"));
test(true, g.V().id().groupCount("x").cap("x"));
test(true, g.V().outE().groupCount());
test(true, g.V().outE().groupCount().by("weight"));
test(true, g.V().inE().id().groupCount());
test(true, g.V().inE().values("weight").groupCount());
test(true, 6L, g.V().outE().outV().count());
test(true, g.V().out().id().groupCount("x"));
test(true, g.V().inE().values("weight").groupCount("x"));
test(true, 6L, g.V().in().count());
test(true, 12L, g.V().both().count());
test(true, 6L, g.V().flatMap(__.in()).count());
test(true, 4L, g.V().map(__.in()).count());
test(true, 6L, g.V().inE().count());
test(true, 4L, g.V().outE().inV().dedup().count());
/////
test(false, 6L, g.V().as("a").outE().inV().as("b").id().dedup("a", "b").by(T.id).count());
test(false, 6L, g.V().local(__.inE()).count());
test(false, 4L, g.V().outE().inV().dedup().by("name").count());
test(false, 6L, g.V().local(__.in()).count());
test(false, g.V().outE().inV());
test(false, g.V().both());
test(false, g.V().outE().inV().dedup());
test(false, 2L, g.V().out().out().count());
test(false, 6L, g.V().as("a").map(__.both()).select("a").count());
test(false, g.V().out().values("name"));
test(false, g.V().out().properties("name"));
test(false, g.V().out().valueMap());
test(false, 6L, g.V().as("a").outE().inV().values("name").as("b").dedup("a", "b").count());
test(false, 2L, g.V().outE().inV().groupCount().select(Column.values).unfold().dedup().count());
test(false, g.V().out().groupCount("x"));
test(false, g.V().out().groupCount("x").cap("x"));
test(false, 6L, g.V().both().groupCount("x").cap("x").select(keys).unfold().count());
test(false, g.V().outE().inV().groupCount());
test(false, g.V().outE().unfold().inV().groupCount());
test(false, g.V().outE().inV().groupCount().by("name"));
test(false, g.V().outE().inV().tree());
test(false, g.V().outE().inV().id().tree());
test(false, g.V().inE().groupCount());
test(false, g.V().inE().groupCount().by("weight"));
test(false, g.V().in().values("name").groupCount());
test(false, g.V().out().groupCount("x"));
test(false, g.V().in().groupCount("x"));
test(false, g.V().both().groupCount("x").cap("x"));
}
private static <R> void test(boolean singleIteration, final Traversal<?, R> traversal) {
test(singleIteration, null, traversal);
}
private static <R> void test(boolean singleIteration, R expectedResult, final Traversal<?, R> traversal) {
traversal.asAdmin().applyStrategies();
final Map<String, Object> configuration = TraversalHelper.getFirstStepOfAssignableClass(TraversalVertexProgramStep.class, traversal.asAdmin()).get()
.getComputer()
.getConfiguration();
assertEquals(singleIteration, configuration.getOrDefault(Constants.GREMLIN_SPARK_SKIP_PARTITIONER, false));
assertEquals(singleIteration, configuration.getOrDefault(Constants.GREMLIN_SPARK_SKIP_GRAPH_CACHE, false));
final List<R> result = traversal.toList();
if (null != expectedResult)
assertEquals(expectedResult, result.get(0));
}
}