blob: 1136046d6b566c996f6e5ca41642e7e894cd0473 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.tinkerpop.gremlin.server.op.traversal;
import com.codahale.metrics.Timer;
import io.netty.channel.ChannelException;
import io.netty.channel.ChannelHandlerContext;
import org.apache.tinkerpop.gremlin.driver.MessageSerializer;
import org.apache.tinkerpop.gremlin.driver.Tokens;
import org.apache.tinkerpop.gremlin.driver.message.RequestMessage;
import org.apache.tinkerpop.gremlin.driver.message.ResponseMessage;
import org.apache.tinkerpop.gremlin.driver.message.ResponseStatusCode;
import org.apache.tinkerpop.gremlin.jsr223.JavaTranslator;
import org.apache.tinkerpop.gremlin.process.traversal.Bytecode;
import org.apache.tinkerpop.gremlin.process.traversal.Failure;
import org.apache.tinkerpop.gremlin.process.traversal.Traversal;
import org.apache.tinkerpop.gremlin.process.traversal.TraversalSource;
import org.apache.tinkerpop.gremlin.process.traversal.util.BytecodeHelper;
import org.apache.tinkerpop.gremlin.process.traversal.util.TraversalInterruptedException;
import org.apache.tinkerpop.gremlin.server.Context;
import org.apache.tinkerpop.gremlin.server.GraphManager;
import org.apache.tinkerpop.gremlin.server.GremlinServer;
import org.apache.tinkerpop.gremlin.server.OpProcessor;
import org.apache.tinkerpop.gremlin.server.Settings;
import org.apache.tinkerpop.gremlin.server.auth.AuthenticatedUser;
import org.apache.tinkerpop.gremlin.server.handler.Frame;
import org.apache.tinkerpop.gremlin.server.handler.StateKey;
import org.apache.tinkerpop.gremlin.server.op.AbstractOpProcessor;
import org.apache.tinkerpop.gremlin.server.op.OpProcessorException;
import org.apache.tinkerpop.gremlin.server.util.MetricManager;
import org.apache.tinkerpop.gremlin.server.util.TraverserIterator;
import org.apache.tinkerpop.gremlin.structure.Graph;
import org.apache.tinkerpop.gremlin.structure.util.TemporaryException;
import org.apache.tinkerpop.gremlin.util.function.ThrowingConsumer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.script.Bindings;
import javax.script.ScriptException;
import javax.script.SimpleBindings;
import java.lang.reflect.UndeclaredThrowableException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.concurrent.Future;
import java.util.concurrent.FutureTask;
import java.util.concurrent.RejectedExecutionException;
import java.util.concurrent.TimeUnit;
import static com.codahale.metrics.MetricRegistry.name;
/**
* Simple {@link OpProcessor} implementation that iterates remotely submitted serialized {@link Traversal} objects.
*
* @author Stephen Mallette (http://stephen.genoprime.com)
*/
public class TraversalOpProcessor extends AbstractOpProcessor {
private static final Logger logger = LoggerFactory.getLogger(TraversalOpProcessor.class);
private static final Logger auditLogger = LoggerFactory.getLogger(GremlinServer.AUDIT_LOGGER_NAME);
public static final String OP_PROCESSOR_NAME = "traversal";
public static final Timer traversalOpTimer = MetricManager.INSTANCE.getTimer(name(GremlinServer.class, "op", "traversal"));
private static final Bindings EMPTY_BINDINGS = new SimpleBindings();
public TraversalOpProcessor() {
super(false);
}
@Override
public String getName() {
return OP_PROCESSOR_NAME;
}
@Override
public void close() throws Exception {
// do nothing = no resources to release
}
@Override
public ThrowingConsumer<Context> select(final Context context) throws OpProcessorException {
final RequestMessage message = context.getRequestMessage();
logger.debug("Selecting processor for RequestMessage {}", message);
final ThrowingConsumer<Context> op;
switch (message.getOp()) {
case Tokens.OPS_BYTECODE:
validateTraversalSourceAlias(context, message, validateTraversalRequest(message));
op = this::iterateBytecodeTraversal;
break;
case Tokens.OPS_INVALID:
final String msgInvalid = String.format("Message could not be parsed. Check the format of the request. [%s]", message);
throw new OpProcessorException(msgInvalid, ResponseMessage.build(message).code(ResponseStatusCode.REQUEST_ERROR_MALFORMED_REQUEST).statusMessage(msgInvalid).create());
default:
final String msgDefault = String.format("Message with op code [%s] is not recognized.", message.getOp());
throw new OpProcessorException(msgDefault, ResponseMessage.build(message).code(ResponseStatusCode.REQUEST_ERROR_MALFORMED_REQUEST).statusMessage(msgDefault).create());
}
return op;
}
private static void validateTraversalSourceAlias(final Context ctx, final RequestMessage message, final Map<String, String> aliases) throws OpProcessorException {
final String traversalSourceBindingForAlias = aliases.values().iterator().next();
if (!ctx.getGraphManager().getTraversalSourceNames().contains(traversalSourceBindingForAlias)) {
final String msg = String.format("The traversal source [%s] for alias [%s] is not configured on the server.", traversalSourceBindingForAlias, Tokens.VAL_TRAVERSAL_SOURCE_ALIAS);
throw new OpProcessorException(msg, ResponseMessage.build(message).code(ResponseStatusCode.REQUEST_ERROR_INVALID_REQUEST_ARGUMENTS).statusMessage(msg).create());
}
}
private static Map<String, String> validateTraversalRequest(final RequestMessage message) throws OpProcessorException {
if (!message.optionalArgs(Tokens.ARGS_GREMLIN).isPresent()) {
final String msg = String.format("A message with [%s] op code requires a [%s] argument.", Tokens.OPS_BYTECODE, Tokens.ARGS_GREMLIN);
throw new OpProcessorException(msg, ResponseMessage.build(message).code(ResponseStatusCode.REQUEST_ERROR_INVALID_REQUEST_ARGUMENTS).statusMessage(msg).create());
}
// matches functionality in the UnifiedHandler
if (!(message.optionalArgs(Tokens.ARGS_GREMLIN).get() instanceof Bytecode)) {
final String msg = String.format("A message with [%s] op code requires a [%s] argument that is of type %s.",
Tokens.OPS_BYTECODE, Tokens.ARGS_GREMLIN, Bytecode.class.getSimpleName());
throw new OpProcessorException(msg, ResponseMessage.build(message).code(ResponseStatusCode.REQUEST_ERROR_INVALID_REQUEST_ARGUMENTS).statusMessage(msg).create());
}
return validatedAliases(message).get();
}
private static Optional<Map<String, String>> validatedAliases(final RequestMessage message) throws OpProcessorException {
final Optional<Map<String, String>> aliases = message.optionalArgs(Tokens.ARGS_ALIASES);
if (!aliases.isPresent()) {
final String msg = String.format("A message with [%s] op code requires a [%s] argument.", Tokens.OPS_BYTECODE, Tokens.ARGS_ALIASES);
throw new OpProcessorException(msg, ResponseMessage.build(message).code(ResponseStatusCode.REQUEST_ERROR_INVALID_REQUEST_ARGUMENTS).statusMessage(msg).create());
}
if (aliases.get().size() != 1 || !aliases.get().containsKey(Tokens.VAL_TRAVERSAL_SOURCE_ALIAS)) {
final String msg = String.format("A message with [%s] op code requires the [%s] argument to be a Map containing one alias assignment named '%s'.",
Tokens.OPS_BYTECODE, Tokens.ARGS_ALIASES, Tokens.VAL_TRAVERSAL_SOURCE_ALIAS);
throw new OpProcessorException(msg, ResponseMessage.build(message).code(ResponseStatusCode.REQUEST_ERROR_INVALID_REQUEST_ARGUMENTS).statusMessage(msg).create());
}
return aliases;
}
private void iterateBytecodeTraversal(final Context context) throws Exception {
final RequestMessage msg = context.getRequestMessage();
final Settings settings = context.getSettings();
logger.debug("Traversal request {} for in thread {}", msg.getRequestId(), Thread.currentThread().getName());
// validateTraversalRequest() ensures that this is of type Bytecode
final Object bytecodeObj = msg.getArgs().get(Tokens.ARGS_GREMLIN);
final Bytecode bytecode = (Bytecode) bytecodeObj;
// earlier validation in selection of this op method should free us to cast this without worry
final Map<String, String> aliases = (Map<String, String>) msg.optionalArgs(Tokens.ARGS_ALIASES).get();
// timeout override - handle both deprecated and newly named configuration. earlier logic should prevent
// both configurations from being submitted at the same time
final Map<String, Object> args = msg.getArgs();
final long seto = args.containsKey(Tokens.ARGS_EVAL_TIMEOUT) ?
((Number) args.get(Tokens.ARGS_EVAL_TIMEOUT)).longValue() : context.getSettings().getEvaluationTimeout();
final GraphManager graphManager = context.getGraphManager();
final String traversalSourceName = aliases.entrySet().iterator().next().getValue();
final TraversalSource g = graphManager.getTraversalSource(traversalSourceName);
final Traversal.Admin<?, ?> traversal;
try {
final Optional<String> lambdaLanguage = BytecodeHelper.getLambdaLanguage(bytecode);
if (!lambdaLanguage.isPresent())
traversal = JavaTranslator.of(g).translate(bytecode);
else
traversal = context.getGremlinExecutor().eval(bytecode, EMPTY_BINDINGS, lambdaLanguage.get(), traversalSourceName);
} catch (ScriptException ex) {
logger.error("Traversal contains a lambda that cannot be compiled", ex);
throw new OpProcessorException("Traversal contains a lambda that cannot be compiled",
ResponseMessage.build(msg).code(ResponseStatusCode.SERVER_ERROR_EVALUATION)
.statusMessage(ex.getMessage())
.statusAttributeException(ex).create());
} catch (Exception ex) {
logger.error("Could not deserialize the Traversal instance", ex);
throw new OpProcessorException("Could not deserialize the Traversal instance",
ResponseMessage.build(msg).code(ResponseStatusCode.SERVER_ERROR_SERIALIZATION)
.statusMessage(ex.getMessage())
.statusAttributeException(ex).create());
}
if (settings.enableAuditLog) {
AuthenticatedUser user = context.getChannelHandlerContext().channel().attr(StateKey.AUTHENTICATED_USER).get();
if (null == user) { // This is expected when using the AllowAllAuthenticator
user = AuthenticatedUser.ANONYMOUS_USER;
}
String address = context.getChannelHandlerContext().channel().remoteAddress().toString();
if (address.startsWith("/") && address.length() > 1) address = address.substring(1);
auditLogger.info("User {} with address {} requested: {}", user.getName(), address, bytecode);
}
final Timer.Context timerContext = traversalOpTimer.time();
final FutureTask<Void> evalFuture = new FutureTask<>(() -> {
context.setStartedResponse();
final Graph graph = g.getGraph();
try {
beforeProcessing(graph, context);
try {
// compile the traversal - without it getEndStep() has nothing in it
traversal.applyStrategies();
handleIterator(context, new TraverserIterator(traversal), graph);
} catch (Exception ex) {
Throwable t = ex;
if (ex instanceof UndeclaredThrowableException)
t = t.getCause();
// if any exception in the chain is TemporaryException or Failure then we should respond with the
// right error code so that the client knows to retry
final Optional<Throwable> possibleSpecialException = determineIfSpecialException(ex);
if (possibleSpecialException.isPresent()) {
final Throwable special = possibleSpecialException.get();
final ResponseMessage.Builder specialResponseMsg = ResponseMessage.build(msg).
statusMessage(special.getMessage()).
statusAttributeException(special);
if (special instanceof TemporaryException) {
specialResponseMsg.code(ResponseStatusCode.SERVER_ERROR_TEMPORARY);
} else if (special instanceof Failure) {
final Failure failure = (Failure) special;
specialResponseMsg.code(ResponseStatusCode.SERVER_ERROR_FAIL_STEP).
statusAttribute(Tokens.STATUS_ATTRIBUTE_FAIL_STEP_MESSAGE, failure.format());
}
context.writeAndFlush(specialResponseMsg.create());
} else if (t instanceof InterruptedException || t instanceof TraversalInterruptedException) {
graphManager.onQueryError(msg, t);
final String errorMessage = String.format("A timeout occurred during traversal evaluation of [%s] - consider increasing the limit given to evaluationTimeout", msg);
logger.warn(errorMessage);
context.writeAndFlush(ResponseMessage.build(msg).code(ResponseStatusCode.SERVER_ERROR_TIMEOUT)
.statusMessage(errorMessage)
.statusAttributeException(ex).create());
} else {
logger.warn(String.format("Exception processing a Traversal on iteration for request [%s].", msg.getRequestId()), ex);
context.writeAndFlush(ResponseMessage.build(msg).code(ResponseStatusCode.SERVER_ERROR)
.statusMessage(ex.getMessage())
.statusAttributeException(ex).create());
}
onError(graph, context, ex);
}
} catch (Throwable t) {
onError(graph, context, t);
// if any exception in the chain is TemporaryException or Failure then we should respond with the
// right error code so that the client knows to retry
final Optional<Throwable> possibleSpecialException = determineIfSpecialException(t);
if (possibleSpecialException.isPresent()) {
final Throwable special = possibleSpecialException.get();
final ResponseMessage.Builder specialResponseMsg = ResponseMessage.build(msg).
statusMessage(special.getMessage()).
statusAttributeException(special);
if (special instanceof TemporaryException) {
specialResponseMsg.code(ResponseStatusCode.SERVER_ERROR_TEMPORARY);
} else if (special instanceof Failure) {
final Failure failure = (Failure) special;
specialResponseMsg.code(ResponseStatusCode.SERVER_ERROR_FAIL_STEP).
statusAttribute(Tokens.STATUS_ATTRIBUTE_FAIL_STEP_MESSAGE, failure.format());
}
context.writeAndFlush(specialResponseMsg.create());
} else {
logger.warn(String.format("Exception processing a Traversal on request [%s].", msg.getRequestId()), t);
context.writeAndFlush(ResponseMessage.build(msg).code(ResponseStatusCode.SERVER_ERROR)
.statusMessage(t.getMessage())
.statusAttributeException(t).create());
if (t instanceof Error) {
//Re-throw any errors to be handled by and set as the result of evalFuture
throw t;
}
}
} finally {
timerContext.stop();
}
return null;
});
try {
final Future<?> executionFuture = context.getGremlinExecutor().getExecutorService().submit(evalFuture);
if (seto > 0) {
// Schedule a timeout in the thread pool for future execution
context.getScheduledExecutorService().schedule(() -> {
executionFuture.cancel(true);
if (!context.getStartedResponse()) {
context.sendTimeoutResponse();
}
}, seto, TimeUnit.MILLISECONDS);
}
} catch (RejectedExecutionException ree) {
context.writeAndFlush(ResponseMessage.build(msg).code(ResponseStatusCode.TOO_MANY_REQUESTS)
.statusMessage("Rate limiting").create());
}
}
protected void beforeProcessing(final Graph graph, final Context ctx) {
final GraphManager graphManager = ctx.getGraphManager();
final RequestMessage msg = ctx.getRequestMessage();
graphManager.beforeQueryStart(msg);
if (graph.features().graph().supportsTransactions() && graph.tx().isOpen()) graph.tx().rollback();
}
protected void onError(final Graph graph, final Context ctx, Throwable error) {
final GraphManager graphManager = ctx.getGraphManager();
final RequestMessage msg = ctx.getRequestMessage();
graphManager.onQueryError(msg, error);
if (graph.features().graph().supportsTransactions() && graph.tx().isOpen()) graph.tx().rollback();
}
protected void onTraversalSuccess(final Graph graph, final Context ctx) {
final GraphManager graphManager = ctx.getGraphManager();
final RequestMessage msg = ctx.getRequestMessage();
graphManager.onQuerySuccess(msg);
if (graph.features().graph().supportsTransactions() && graph.tx().isOpen()) graph.tx().commit();
}
protected void handleIterator(final Context context, final Iterator itty, final Graph graph) throws InterruptedException {
final ChannelHandlerContext nettyContext = context.getChannelHandlerContext();
final RequestMessage msg = context.getRequestMessage();
final Settings settings = context.getSettings();
final MessageSerializer<?> serializer = nettyContext.channel().attr(StateKey.SERIALIZER).get();
final boolean useBinary = nettyContext.channel().attr(StateKey.USE_BINARY).get();
boolean warnOnce = false;
// we have an empty iterator - happens on stuff like: g.V().iterate()
if (!itty.hasNext()) {
final Map<String, Object> attributes = generateStatusAttributes(nettyContext, msg, ResponseStatusCode.NO_CONTENT, itty, settings);
// as there is nothing left to iterate if we are transaction managed then we should execute a
// commit here before we send back a NO_CONTENT which implies success
onTraversalSuccess(graph, context);
context.writeAndFlush(ResponseMessage.build(msg)
.code(ResponseStatusCode.NO_CONTENT)
.statusAttributes(attributes)
.create());
return;
}
// the batch size can be overridden by the request
final int resultIterationBatchSize = (Integer) msg.optionalArgs(Tokens.ARGS_BATCH_SIZE)
.orElse(settings.resultIterationBatchSize);
List<Object> aggregate = new ArrayList<>(resultIterationBatchSize);
// use an external control to manage the loop as opposed to just checking hasNext() in the while. this
// prevent situations where auto transactions create a new transaction after calls to commit() withing
// the loop on calls to hasNext().
boolean hasMore = itty.hasNext();
while (hasMore) {
if (Thread.interrupted()) throw new InterruptedException();
// check if an implementation needs to force flush the aggregated results before the iteration batch
// size is reached.
final boolean forceFlush = isForceFlushed(nettyContext, msg, itty);
// have to check the aggregate size because it is possible that the channel is not writeable (below)
// so iterating next() if the message is not written and flushed would bump the aggregate size beyond
// the expected resultIterationBatchSize. Total serialization time for the response remains in
// effect so if the client is "slow" it may simply timeout.
//
// there is a need to check hasNext() on the iterator because if the channel is not writeable the
// previous pass through the while loop will have next()'d the iterator and if it is "done" then a
// NoSuchElementException will raise its head. also need a check to ensure that this iteration doesn't
// require a forced flush which can be forced by sub-classes.
//
// this could be placed inside the isWriteable() portion of the if-then below but it seems better to
// allow iteration to continue into a batch if that is possible rather than just doing nothing at all
// while waiting for the client to catch up
if (aggregate.size() < resultIterationBatchSize && itty.hasNext() && !forceFlush) aggregate.add(itty.next());
// Don't keep executor busy if client has already given up; there is no way to catch up if the channel is
// not active, and hence we should break the loop.
if (!nettyContext.channel().isActive()) {
onError(graph, context, new ChannelException("Channel is not active - cannot write any more results"));
break;
}
// send back a page of results if batch size is met or if it's the end of the results being iterated.
// also check writeability of the channel to prevent OOME for slow clients.
//
// clients might decide to close the Netty channel to the server with a CloseWebsocketFrame after errors
// like CorruptedFrameException. On the server, although the channel gets closed, there might be some
// executor threads waiting for watermark to clear which will not clear in these cases since client has
// already given up on these requests. This leads to these executors waiting for the client to consume
// results till the timeout. checking for isActive() should help prevent that.
if (nettyContext.channel().isActive() && nettyContext.channel().isWritable()) {
if (forceFlush || aggregate.size() == resultIterationBatchSize || !itty.hasNext()) {
final ResponseStatusCode code = itty.hasNext() ? ResponseStatusCode.PARTIAL_CONTENT : ResponseStatusCode.SUCCESS;
// serialize here because in sessionless requests the serialization must occur in the same
// thread as the eval. as eval occurs in the GremlinExecutor there's no way to get back to the
// thread that processed the eval of the script so, we have to push serialization down into that
final Map<String, Object> metadata = generateResultMetaData(nettyContext, msg, code, itty, settings);
final Map<String, Object> statusAttrb = generateStatusAttributes(nettyContext, msg, code, itty, settings);
Frame frame = null;
try {
frame = makeFrame(context, msg, serializer, useBinary, aggregate, code,
metadata, statusAttrb);
} catch (Exception ex) {
// a frame may use a Bytebuf which is a countable release - if it does not get written
// downstream it needs to be released here
if (frame != null) frame.tryRelease();
// exception is handled in makeFrame() - serialization error gets written back to driver
// at that point
onError(graph, context, ex);
break;
}
// track whether there is anything left in the iterator because it needs to be accessed after
// the transaction could be closed - in that case a call to hasNext() could open a new transaction
// unintentionally
hasMore = itty.hasNext();
try {
// only need to reset the aggregation list if there's more stuff to write
if (hasMore)
aggregate = new ArrayList<>(resultIterationBatchSize);
else {
// iteration and serialization are both complete which means this finished successfully. note that
// errors internal to script eval or timeout will rollback given GremlinServer's global configurations.
// local errors will get rolled back below because the exceptions aren't thrown in those cases to be
// caught by the GremlinExecutor for global rollback logic. this only needs to be committed if
// there are no more items to iterate and serialization is complete
onTraversalSuccess(graph, context);
}
} catch (Exception ex) {
// a frame may use a Bytebuf which is a countable release - if it does not get written
// downstream it needs to be released here
if (frame != null) frame.tryRelease();
throw ex;
}
if (!hasMore) iterateComplete(nettyContext, msg, itty);
// the flush is called after the commit has potentially occurred. in this way, if a commit was
// required then it will be 100% complete before the client receives it. the "frame" at this point
// should have completely detached objects from the transaction (i.e. serialization has occurred)
// so a new one should not be opened on the flush down the netty pipeline
context.writeAndFlush(code, frame);
}
} else {
// don't keep triggering this warning over and over again for the same request
if (!warnOnce) {
logger.warn("Pausing response writing as writeBufferHighWaterMark exceeded on {} - writing will continue once client has caught up", msg);
warnOnce = true;
}
// since the client is lagging we can hold here for a period of time for the client to catch up.
// this isn't blocking the IO thread - just a worker.
TimeUnit.MILLISECONDS.sleep(10);
}
}
}
}