blob: 069d57e6b5cebb7617552d4cf9c766f07e7d4411 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.tinkerpop.gremlin.process.traversal.step.map;
import org.apache.commons.configuration.Configuration;
import org.apache.tinkerpop.gremlin.process.computer.KeyValue;
import org.apache.tinkerpop.gremlin.process.computer.MapReduce;
import org.apache.tinkerpop.gremlin.process.computer.traversal.TraversalVertexProgram;
import org.apache.tinkerpop.gremlin.process.traversal.Step;
import org.apache.tinkerpop.gremlin.process.traversal.Traversal;
import org.apache.tinkerpop.gremlin.process.traversal.TraversalEngine;
import org.apache.tinkerpop.gremlin.process.traversal.Traverser;
import org.apache.tinkerpop.gremlin.process.traversal.dsl.graph.__;
import org.apache.tinkerpop.gremlin.process.traversal.step.Barrier;
import org.apache.tinkerpop.gremlin.process.traversal.step.EngineDependent;
import org.apache.tinkerpop.gremlin.process.traversal.step.MapReducer;
import org.apache.tinkerpop.gremlin.process.traversal.step.TraversalParent;
import org.apache.tinkerpop.gremlin.process.traversal.step.util.BulkSet;
import org.apache.tinkerpop.gremlin.process.traversal.step.util.GroupStepHelper;
import org.apache.tinkerpop.gremlin.process.traversal.step.util.ReducingBarrierStep;
import org.apache.tinkerpop.gremlin.process.traversal.traverser.TraverserRequirement;
import org.apache.tinkerpop.gremlin.process.traversal.traverser.util.TraverserSet;
import org.apache.tinkerpop.gremlin.process.traversal.util.TraversalHelper;
import org.apache.tinkerpop.gremlin.process.traversal.util.TraversalMatrix;
import org.apache.tinkerpop.gremlin.process.traversal.util.TraversalUtil;
import org.apache.tinkerpop.gremlin.structure.Graph;
import org.apache.tinkerpop.gremlin.structure.Vertex;
import org.apache.tinkerpop.gremlin.structure.util.StringFactory;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.function.BiFunction;
import java.util.function.Supplier;
/**
* @author Marko A. Rodriguez (http://markorodriguez.com)
*/
public final class GroupStep<S, K, V> extends ReducingBarrierStep<S, Map<K, V>> implements MapReducer, EngineDependent, TraversalParent {
private char state = 'k';
private Traversal.Admin<S, K> keyTraversal = null;
private Traversal.Admin<S, ?> valueTraversal = this.integrateChild(__.identity().asAdmin()); // used in OLAP
private Traversal.Admin<?, V> reduceTraversal = this.integrateChild(__.fold().asAdmin()); // used in OLAP
private Traversal.Admin<S, V> valueReduceTraversal = this.integrateChild(__.fold().asAdmin()); // used in OLTP
private boolean byPass = false;
public GroupStep(final Traversal.Admin traversal) {
super(traversal);
this.setSeedSupplier((Supplier) new GroupStepHelper.GroupMapSupplier());
this.setBiFunction(new GroupBiFunction(this));
}
@Override
public void onEngine(final TraversalEngine traversalEngine) {
this.byPass = traversalEngine.isComputer();
}
@Override
public List<Traversal.Admin<?, ?>> getLocalChildren() {
final List<Traversal.Admin<?, ?>> children = new ArrayList<>(4);
if (null != this.keyTraversal)
children.add((Traversal.Admin) this.keyTraversal);
children.add(this.valueReduceTraversal);
children.add(this.valueTraversal);
children.add(this.reduceTraversal);
return children;
}
@Override
public void addLocalChild(final Traversal.Admin<?, ?> kvTraversal) {
if ('k' == this.state) {
this.keyTraversal = this.integrateChild(kvTraversal);
this.state = 'v';
} else if ('v' == this.state) {
this.valueReduceTraversal = this.integrateChild(GroupStepHelper.convertValueTraversal(kvTraversal));
final List<Traversal.Admin<?, ?>> splitTraversal = GroupStepHelper.splitOnBarrierStep(this.valueReduceTraversal);
this.valueTraversal = this.integrateChild(splitTraversal.get(0));
this.reduceTraversal = this.integrateChild(splitTraversal.get(1));
this.state = 'x';
} else {
throw new IllegalStateException("The key and value traversals for group()-step have already been set: " + this);
}
}
@Override
public Set<TraverserRequirement> getRequirements() {
return this.getSelfAndChildRequirements(TraverserRequirement.OBJECT, TraverserRequirement.BULK);
}
@Override
public GroupStep<S, K, V> clone() {
final GroupStep<S, K, V> clone = (GroupStep<S, K, V>) super.clone();
if (null != this.keyTraversal)
clone.keyTraversal = clone.integrateChild(this.keyTraversal.clone());
clone.valueReduceTraversal = clone.integrateChild(this.valueReduceTraversal.clone());
clone.valueTraversal = clone.integrateChild(this.valueTraversal.clone());
clone.reduceTraversal = clone.integrateChild(this.reduceTraversal.clone());
clone.setBiFunction(new GroupBiFunction<>((GroupStep) clone));
return clone;
}
@Override
public int hashCode() {
int result = this.valueReduceTraversal.hashCode();
if (this.keyTraversal != null) result ^= this.keyTraversal.hashCode();
return result;
}
@Override
public MapReduce<K, Collection<?>, K, V, Map<K, V>> getMapReduce() {
return new GroupMapReduce<>(this);
}
@Override
public Traverser<Map<K, V>> processNextStart() {
if (this.byPass) {
final Traverser.Admin<S> traverser = this.starts.next();
final K key = TraversalUtil.applyNullable(traverser, this.keyTraversal);
this.valueTraversal.addStart(traverser);
final BulkSet<?> value = this.valueTraversal.toBulkSet();
return traverser.asAdmin().split(new Object[]{key, value}, (Step) this);
} else {
return super.processNextStart();
}
}
@Override
public String toString() {
return StringFactory.stepString(this, this.keyTraversal, this.valueReduceTraversal);
}
///////////
private static class GroupBiFunction<S, K, V> implements BiFunction<Map<K, Traversal.Admin<S, V>>, Traverser.Admin<S>, Map<K, Traversal.Admin<S, V>>>, Serializable {
private final GroupStep<S, K, V> groupStep;
private Map<K, Integer> counters = new HashMap<>();
private GroupBiFunction(final GroupStep<S, K, V> groupStep) {
this.groupStep = groupStep;
}
@Override
public Map<K, Traversal.Admin<S, V>> apply(final Map<K, Traversal.Admin<S, V>> mutatingSeed, final Traverser.Admin<S> traverser) {
final K key = TraversalUtil.applyNullable(traverser, this.groupStep.keyTraversal);
Traversal.Admin<S, V> traversal = mutatingSeed.get(key);
if (null == traversal) {
traversal = this.groupStep.valueReduceTraversal.clone();
this.counters.put(key, 0);
mutatingSeed.put(key, traversal);
}
traversal.addStart(traverser);
final int count = this.counters.compute(key, (k, i) -> ++i);
if (count > 10000) {
this.counters.put(key, 0);
TraversalHelper.getFirstStepOfAssignableClass(Barrier.class, traversal).ifPresent(Barrier::processAllStarts);
}
return mutatingSeed;
}
}
///////////
public static final class GroupMapReduce<S, K, V> implements MapReduce<K, Collection<?>, K, V, Map<K, V>> {
public static final String GROUP_BY_STEP_STEP_ID = "gremlin.groupStep.stepId";
private String groupStepId;
private Traversal.Admin<?, V> reduceTraversal;
private GroupMapReduce() {
}
public GroupMapReduce(final GroupStep<S, K, V> step) {
this.groupStepId = step.getId();
this.reduceTraversal = step.reduceTraversal.clone();
}
@Override
public void storeState(final Configuration configuration) {
MapReduce.super.storeState(configuration);
configuration.setProperty(GROUP_BY_STEP_STEP_ID, this.groupStepId);
}
@Override
public void loadState(final Graph graph, final Configuration configuration) {
this.groupStepId = configuration.getString(GROUP_BY_STEP_STEP_ID);
this.reduceTraversal = ((GroupStep) new TraversalMatrix<>(TraversalVertexProgram.getTraversal(graph, configuration)).getStepById(this.groupStepId)).reduceTraversal.clone();
}
@Override
public boolean doStage(final Stage stage) {
return !stage.equals(Stage.COMBINE);
}
@Override
public void map(final Vertex vertex, final MapEmitter<K, Collection<?>> emitter) {
vertex.<TraverserSet<Object[]>>property(TraversalVertexProgram.HALTED_TRAVERSERS).ifPresent(traverserSet -> traverserSet.forEach(traverser -> {
final Object[] objects = traverser.get();
emitter.emit((K) objects[0], (Collection<?>) objects[1]);
}));
}
@Override
public void reduce(final K key, final Iterator<Collection<?>> values, final ReduceEmitter<K, V> emitter) {
Traversal.Admin<?, V> reduceTraversalClone = this.reduceTraversal.clone();
while (values.hasNext()) {
reduceTraversalClone.addStarts(reduceTraversalClone.getTraverserGenerator().generateIterator(values.next().iterator(), (Step) reduceTraversalClone.getStartStep(), 1l));
TraversalHelper.getFirstStepOfAssignableClass(Barrier.class, reduceTraversalClone).ifPresent(Barrier::processAllStarts);
}
emitter.emit(key, reduceTraversalClone.next());
}
@Override
public Map<K, V> generateFinalResult(final Iterator<KeyValue<K, V>> keyValues) {
final Map<K, V> map = new HashMap<>();
keyValues.forEachRemaining(keyValue -> map.put(keyValue.getKey(), keyValue.getValue()));
return map;
}
@Override
public String getMemoryKey() {
return REDUCING;
}
@Override
public GroupMapReduce<S, K, V> clone() {
try {
final GroupMapReduce<S, K, V> clone = (GroupMapReduce<S, K, V>) super.clone();
clone.reduceTraversal = this.reduceTraversal.clone();
return clone;
} catch (final CloneNotSupportedException e) {
throw new IllegalStateException(e.getMessage(), e);
}
}
@Override
public String toString() {
return StringFactory.mapReduceString(this, this.getMemoryKey());
}
}
}