blob: 73981c04d410a7292de357d9f63b6f3b3d241ffa [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.atlas.query
import org.apache.atlas.query.Expressions._
import org.apache.atlas.typesystem.types.{TypeSystem, DataTypes}
import org.apache.atlas.typesystem.types.DataTypes.TypeCategory
import org.joda.time.format.ISODateTimeFormat
import scala.collection.mutable
import scala.collection.mutable.ArrayBuffer
trait IntSequence {
def next: Int
}
case class GremlinQuery(expr: Expression, queryStr: String, resultMaping: Map[String, (String, Int)]) {
def hasSelectList = resultMaping != null
def isPathExpresion = expr.isInstanceOf[PathExpression]
}
trait SelectExpressionHandling {
/**
* To aide in gremlinQuery generation add an alias to the input of SelectExpressions
*/
class AddAliasToSelectInput extends PartialFunction[Expression, Expression] {
private var idx = 0
def isDefinedAt(e: Expression) = true
class DecorateFieldWithAlias(aliasE: AliasExpression)
extends PartialFunction[Expression, Expression] {
def isDefinedAt(e: Expression) = true
def apply(e: Expression) = e match {
case fe@FieldExpression(fieldName, fInfo, None) =>
FieldExpression(fieldName, fInfo, Some(BackReference(aliasE.alias, aliasE.child, None)))
case _ => e
}
}
def apply(e: Expression) = e match {
case SelectExpression(aliasE@AliasExpression(_, _), selList) => {
idx = idx + 1
SelectExpression(aliasE, selList.map(_.transformUp(new DecorateFieldWithAlias(aliasE))))
}
case SelectExpression(child, selList) => {
idx = idx + 1
val aliasE = AliasExpression(child, s"_src$idx")
SelectExpression(aliasE, selList.map(_.transformUp(new DecorateFieldWithAlias(aliasE))))
}
case _ => e
}
}
def getSelectExpressionSrc(e: Expression): List[String] = {
val l = ArrayBuffer[String]()
e.traverseUp {
case BackReference(alias, _, _) => l += alias
case ClassExpression(clsName) => l += clsName
}
l.toSet.toList
}
def validateSelectExprHaveOneSrc: PartialFunction[Expression, Unit] = {
case SelectExpression(_, selList) => {
selList.foreach { se =>
val srcs = getSelectExpressionSrc(se)
if (srcs.size > 1) {
throw new GremlinTranslationException(se, "Only one src allowed in a Select Expression")
}
}
}
}
def groupSelectExpressionsBySrc(sel: SelectExpression): mutable.LinkedHashMap[String, List[Expression]] = {
val m = mutable.LinkedHashMap[String, List[Expression]]()
sel.selectListWithAlias.foreach { se =>
val l = getSelectExpressionSrc(se.child)
if (!m.contains(l(0))) {
m(l(0)) = List()
}
m(l(0)) = m(l(0)) :+ se.child
}
m
}
/**
* For each Output Column in the SelectExpression compute the ArrayList(Src) this maps to and the position within
* this list.
* @param sel
* @return
*/
def buildResultMapping(sel: SelectExpression): Map[String, (String, Int)] = {
val srcToExprs = groupSelectExpressionsBySrc(sel)
val m = new mutable.HashMap[String, (String, Int)]
sel.selectListWithAlias.foreach { se =>
val src = getSelectExpressionSrc(se.child)(0)
val srcExprs = srcToExprs(src)
var idx = srcExprs.indexOf(se.child)
m(se.alias) = (src, idx)
}
m.toMap
}
}
class GremlinTranslationException(expr: Expression, reason: String) extends
ExpressionException(expr, s"Unsupported Gremlin translation: $reason")
class GremlinTranslator(expr: Expression,
gPersistenceBehavior: GraphPersistenceStrategies)
extends SelectExpressionHandling {
val preStatements = ArrayBuffer[String]()
val postStatements = ArrayBuffer[String]()
val wrapAndRule: PartialFunction[Expression, Expression] = {
case f: FilterExpression if !f.condExpr.isInstanceOf[LogicalExpression] =>
FilterExpression(f.child, new LogicalExpression("and", List(f.condExpr)))
}
val validateComparisonForm: PartialFunction[Expression, Unit] = {
case c@ComparisonExpression(op, left, right) =>
if (!left.isInstanceOf[FieldExpression]) {
throw new GremlinTranslationException(c, s"lhs of comparison is not a field")
}
if (!right.isInstanceOf[Literal[_]] && !right.isInstanceOf[ListLiteral[_]]) {
throw new GremlinTranslationException(c,
s"rhs of comparison is not a literal")
}
if(right.isInstanceOf[ListLiteral[_]] && (!op.equals("=") && !op.equals("!="))) {
throw new GremlinTranslationException(c,
s"operation not supported with list literal")
}
()
}
val counter = new IntSequence {
var i: Int = -1;
def next: Int = {
i += 1; i
}
}
def addAliasToLoopInput(c: IntSequence = counter): PartialFunction[Expression, Expression] = {
case l@LoopExpression(aliasE@AliasExpression(_, _), _, _) => l
case l@LoopExpression(inputExpr, loopExpr, t) => {
val aliasE = AliasExpression(inputExpr, s"_loop${c.next}")
LoopExpression(aliasE, loopExpr, t)
}
}
def instanceClauseToTop(topE : Expression) : PartialFunction[Expression, Expression] = {
case le : LogicalExpression if (le fastEquals topE) => {
le.instance()
}
case ce : ComparisonExpression if (ce fastEquals topE) => {
ce.instance()
}
case he : hasFieldUnaryExpression if (he fastEquals topE) => {
he.instance()
}
}
def traitClauseWithInstanceForTop(topE : Expression) : PartialFunction[Expression, Expression] = {
// This topE check prevented the comparison of trait expression when it is a child. Like trait as t limit 2
case te : TraitExpression => {
val theTrait = te.as("theTrait")
val theInstance = theTrait.traitInstance().as("theInstance")
val outE =
theInstance.select(id("theTrait").as("traitDetails"),
id("theInstance").as("instanceInfo"))
QueryProcessor.validate(outE)
}
}
def typeTestExpression(typeName : String) : String = {
val stats = gPersistenceBehavior.typeTestExpression(typeName, counter)
preStatements ++= stats.init
stats.last
}
private def genQuery(expr: Expression, inSelect: Boolean): String = expr match {
case ClassExpression(clsName) =>
typeTestExpression(clsName)
case TraitExpression(clsName) =>
typeTestExpression(clsName)
case fe@FieldExpression(fieldName, fInfo, child)
if fe.dataType.getTypeCategory == TypeCategory.PRIMITIVE || fe.dataType.getTypeCategory == TypeCategory.ARRAY => {
val fN = "\"" + gPersistenceBehavior.fieldNameInVertex(fInfo.dataType, fInfo.attrInfo) + "\""
child match {
case Some(e) => s"${genQuery(e, inSelect)}.$fN"
case None => s"$fN"
}
}
case fe@FieldExpression(fieldName, fInfo, child)
if fe.dataType.getTypeCategory == TypeCategory.CLASS || fe.dataType.getTypeCategory == TypeCategory.STRUCT => {
val direction = if (fInfo.isReverse) "in" else "out"
val edgeLbl = gPersistenceBehavior.edgeLabel(fInfo)
val step = s"""$direction("$edgeLbl")"""
child match {
case Some(e) => s"${genQuery(e, inSelect)}.$step"
case None => step
}
}
case fe@FieldExpression(fieldName, fInfo, child) if fInfo.traitName != null => {
val direction = gPersistenceBehavior.instanceToTraitEdgeDirection
val edgeLbl = gPersistenceBehavior.edgeLabel(fInfo)
val step = s"""$direction("$edgeLbl")"""
child match {
case Some(e) => s"${genQuery(e, inSelect)}.$step"
case None => step
}
}
case c@ComparisonExpression(symb, f@FieldExpression(fieldName, fInfo, ch), l) => {
val QUOTE = "\"";
val fieldGremlinExpr = s"${gPersistenceBehavior.fieldNameInVertex(fInfo.dataType, fInfo.attrInfo)}"
ch match {
case Some(child) => {
s"""${genQuery(child, inSelect)}.has("$fieldGremlinExpr", ${gPersistenceBehavior.gremlinCompOp(c)}, $l)"""
}
case None => {
if (fInfo.attrInfo.dataType == DataTypes.DATE_TYPE) {
try {
//Accepts both date, datetime formats
val dateStr = l.toString.stripPrefix(QUOTE).stripSuffix(QUOTE)
val dateVal = ISODateTimeFormat.dateOptionalTimeParser().parseDateTime(dateStr).getMillis
s"""has("$fieldGremlinExpr", ${gPersistenceBehavior.gremlinCompOp(c)},${dateVal})"""
} catch {
case pe: java.text.ParseException =>
throw new GremlinTranslationException(c,
"Date format " + l + " not supported. Should be of the format " + TypeSystem.getInstance().getDateFormat.toPattern);
}
}
else
s"""has("$fieldGremlinExpr", ${gPersistenceBehavior.gremlinCompOp(c)}, $l)"""
}
}
}
case fil@FilterExpression(child, condExpr) => {
s"${genQuery(child, inSelect)}.${genQuery(condExpr, inSelect)}"
}
case l@LogicalExpression(symb, children) => {
s"""$symb${children.map("_()." + genQuery(_, inSelect)).mkString("(", ",", ")")}"""
}
case sel@SelectExpression(child, selList) => {
val m = groupSelectExpressionsBySrc(sel)
var srcNamesList: List[String] = List()
var srcExprsList: List[List[String]] = List()
val it = m.iterator
while (it.hasNext) {
val (src, selExprs) = it.next
srcNamesList = srcNamesList :+ s""""$src""""
srcExprsList = srcExprsList :+ selExprs.map { selExpr =>
genQuery(selExpr, true)
}
}
val srcNamesString = srcNamesList.mkString("[", ",", "]")
val srcExprsStringList = srcExprsList.map {
_.mkString("[", ",", "]")
}
val srcExprsString = srcExprsStringList.foldLeft("")(_ + "{" + _ + "}")
s"${genQuery(child, inSelect)}.select($srcNamesString)$srcExprsString"
}
case loop@LoopExpression(input, loopExpr, t) => {
val inputQry = genQuery(input, inSelect)
val loopingPathGExpr = genQuery(loopExpr, inSelect)
val loopGExpr = s"""loop("${input.asInstanceOf[AliasExpression].alias}")"""
val untilCriteria = if (t.isDefined) s"{it.loops < ${t.get.value}}" else "{true}"
val loopObjectGExpr = gPersistenceBehavior.loopObjectExpression(input.dataType)
s"""${inputQry}.${loopingPathGExpr}.${loopGExpr}${untilCriteria}${loopObjectGExpr}"""
}
case BackReference(alias, _, _) =>
if (inSelect) gPersistenceBehavior.fieldPrefixInSelect else s"""back("$alias")"""
case AliasExpression(child, alias) => s"""${genQuery(child, inSelect)}.as("$alias")"""
case isTraitLeafExpression(traitName, Some(clsExp)) =>
s"""out("${gPersistenceBehavior.traitLabel(clsExp.dataType, traitName)}")"""
case isTraitUnaryExpression(traitName, child) =>
s"""out("${gPersistenceBehavior.traitLabel(child.dataType, traitName)}")"""
case hasFieldLeafExpression(fieldName, clsExp) => clsExp match {
case None => s"""has("$fieldName")"""
case Some(x) =>
x match {
case c: ClassExpression =>
s"""has("${x.asInstanceOf[ClassExpression].clsName}.$fieldName")"""
case default => s"""has("$fieldName")"""
}
}
case hasFieldUnaryExpression(fieldName, child) =>
s"""${genQuery(child, inSelect)}.has("$fieldName")"""
case ArithmeticExpression(symb, left, right) => s"${genQuery(left, inSelect)} $symb ${genQuery(right, inSelect)}"
case l: Literal[_] => l.toString
case list: ListLiteral[_] => list.toString
case in@TraitInstanceExpression(child) => {
val direction = gPersistenceBehavior.traitToInstanceEdgeDirection
s"${genQuery(child, inSelect)}.$direction()"
}
case in@InstanceExpression(child) => {
s"${genQuery(child, inSelect)}"
}
case pe@PathExpression(child) => {
s"${genQuery(child, inSelect)}.path"
}
case order@OrderExpression(child, odr, asc) => {
var orderby = ""
asc match {
//builds a closure comparison function based on provided order by clause in DSL. This will be used to sort the results by gremlin order pipe.
//Ordering is case insensitive.
case false=> orderby = s"order{it.b.getProperty('$odr').toLowerCase() <=> it.a.getProperty('$odr').toLowerCase()}"//descending
case _ => orderby = s"order{it.a.getProperty('$odr').toLowerCase() <=> it.b.getProperty('$odr').toLowerCase()}"
}
s"""${genQuery(child, inSelect)}.$orderby"""
}
case limitOffset@LimitExpression(child, limit, offset) => {
val totalResultRows = limit.value + offset.value
s"""${genQuery(child, inSelect)} [$offset..<$totalResultRows]"""
}
case x => throw new GremlinTranslationException(x, "expression not yet supported")
}
def genFullQuery(expr: Expression): String = {
var q = genQuery(expr, false)
if(gPersistenceBehavior.addGraphVertexPrefix(preStatements)) {
q = s"g.V.$q"
}
q = s"$q.toList()"
q = (preStatements ++ Seq(q) ++ postStatements).mkString("", ";", "")
/*
* the L:{} represents a groovy code block; the label is needed
* to distinguish it from a groovy closure.
*/
s"L:{$q}"
}
def translate(): GremlinQuery = {
var e1 = expr.transformUp(wrapAndRule)
e1.traverseUp(validateComparisonForm)
e1 = e1.transformUp(new AddAliasToSelectInput)
e1.traverseUp(validateSelectExprHaveOneSrc)
e1 = e1.transformUp(addAliasToLoopInput())
e1 = e1.transformUp(instanceClauseToTop(e1))
e1 = e1.transformUp(traitClauseWithInstanceForTop(e1))
//Following code extracts the select expressions from expression tree.
val se = SelectExpressionHelper.extractSelectExpression(e1)
if (se.isDefined)
{
val rMap = buildResultMapping(se.get)
GremlinQuery(e1, genFullQuery(e1), rMap)
}
else
{
GremlinQuery(e1, genFullQuery(e1), null)
}
}
}
object SelectExpressionHelper {
/**
* This method extracts the child select expression from parent expression
*/
def extractSelectExpression(child: Expression): Option[SelectExpression] = {
child match {
case se@SelectExpression(child, selectList) =>{
Some(se)
}
case limit@LimitExpression(child, lmt, offset) => {
extractSelectExpression(child)
}
case order@OrderExpression(child, odr, odrBy) => {
extractSelectExpression(child)
}
case path@PathExpression(child) => {
extractSelectExpression(child)
}
case _ => {
None
}
}
}
}
/*
* TODO
* Translation Issues:
* 1. back references in filters. For e.g. testBackreference: 'DB as db Table where (db.name = "Reporting")'
* this is translated to:
* g.V.has("typeName","DB").as("db").in("Table.db").and(_().back("db").has("name", T.eq, "Reporting")).map().toList()
* But the '_().back("db") within the and is ignored, the has condition is applied on the current element.
* The solution is to to do predicate pushdown and apply the filter immediately on top of the referred Expression.
*/