| /** |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * <p> |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * <p> |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package org.apache.atlas.discovery; |
| |
| |
| import org.apache.atlas.AtlasConfiguration; |
| import org.apache.atlas.AtlasErrorCode; |
| import org.apache.atlas.annotation.GraphTransaction; |
| import org.apache.atlas.authorize.AtlasAuthorizationUtils; |
| import org.apache.atlas.authorize.AtlasEntityAccessRequest; |
| import org.apache.atlas.authorize.AtlasPrivilege; |
| import org.apache.atlas.exception.AtlasBaseException; |
| import org.apache.atlas.model.instance.AtlasEntity; |
| import org.apache.atlas.model.instance.AtlasEntity.AtlasEntityWithExtInfo; |
| import org.apache.atlas.model.instance.AtlasEntityHeader; |
| import org.apache.atlas.model.instance.AtlasObjectId; |
| import org.apache.atlas.model.lineage.AtlasLineageInfo; |
| import org.apache.atlas.model.lineage.AtlasLineageInfo.LineageInfoOnDemand; |
| import org.apache.atlas.model.lineage.AtlasLineageInfo.LineageDirection; |
| import org.apache.atlas.model.lineage.AtlasLineageInfo.LineageRelation; |
| import org.apache.atlas.model.lineage.LineageOnDemandConstraints; |
| import org.apache.atlas.repository.graphdb.AtlasEdge; |
| import org.apache.atlas.repository.graphdb.AtlasEdgeDirection; |
| import org.apache.atlas.repository.graphdb.AtlasGraph; |
| import org.apache.atlas.repository.graphdb.AtlasVertex; |
| import org.apache.atlas.repository.store.graph.v2.AtlasGraphUtilsV2; |
| import org.apache.atlas.repository.store.graph.v2.EntityGraphRetriever; |
| import org.apache.atlas.type.AtlasEntityType; |
| import org.apache.atlas.type.AtlasTypeRegistry; |
| import org.apache.atlas.type.AtlasTypeUtil; |
| import org.apache.atlas.util.AtlasGremlinQueryProvider; |
| import org.apache.atlas.v1.model.lineage.SchemaResponse.SchemaDetails; |
| import org.apache.commons.collections.CollectionUtils; |
| import org.apache.commons.collections.MapUtils; |
| import org.apache.commons.lang.StringUtils; |
| import org.slf4j.Logger; |
| import org.slf4j.LoggerFactory; |
| import org.springframework.stereotype.Service; |
| |
| import javax.inject.Inject; |
| import javax.script.ScriptEngine; |
| import javax.script.ScriptException; |
| import java.util.ArrayList; |
| import java.util.HashMap; |
| import java.util.HashSet; |
| import java.util.List; |
| import java.util.Map; |
| import java.util.Objects; |
| import java.util.Set; |
| import java.util.stream.Collectors; |
| |
| import static org.apache.atlas.AtlasClient.DATA_SET_SUPER_TYPE; |
| import static org.apache.atlas.AtlasClient.PROCESS_SUPER_TYPE; |
| import static org.apache.atlas.AtlasErrorCode.INSTANCE_LINEAGE_QUERY_FAILED; |
| import static org.apache.atlas.model.lineage.AtlasLineageInfo.LineageDirection.BOTH; |
| import static org.apache.atlas.model.lineage.AtlasLineageInfo.LineageDirection.INPUT; |
| import static org.apache.atlas.model.lineage.AtlasLineageInfo.LineageDirection.OUTPUT; |
| import static org.apache.atlas.repository.Constants.RELATIONSHIP_GUID_PROPERTY_KEY; |
| import static org.apache.atlas.repository.graphdb.AtlasEdgeDirection.IN; |
| import static org.apache.atlas.repository.graphdb.AtlasEdgeDirection.OUT; |
| import static org.apache.atlas.util.AtlasGremlinQueryProvider.AtlasGremlinQuery.FULL_LINEAGE_DATASET; |
| import static org.apache.atlas.util.AtlasGremlinQueryProvider.AtlasGremlinQuery.FULL_LINEAGE_PROCESS; |
| import static org.apache.atlas.util.AtlasGremlinQueryProvider.AtlasGremlinQuery.PARTIAL_LINEAGE_DATASET; |
| import static org.apache.atlas.util.AtlasGremlinQueryProvider.AtlasGremlinQuery.PARTIAL_LINEAGE_PROCESS; |
| |
| @Service |
| public class EntityLineageService implements AtlasLineageService { |
| private static final Logger LOG = LoggerFactory.getLogger(EntityLineageService.class); |
| |
| private static final String PROCESS_INPUTS_EDGE = "__Process.inputs"; |
| private static final String PROCESS_OUTPUTS_EDGE = "__Process.outputs"; |
| private static final String COLUMNS = "columns"; |
| private static final boolean LINEAGE_USING_GREMLIN = AtlasConfiguration.LINEAGE_USING_GREMLIN.getBoolean(); |
| private static final Integer DEFAULT_LINEAGE_MAX_NODE_COUNT = 9000; |
| private static final int LINEAGE_ON_DEMAND_DEFAULT_DEPTH = 3; |
| private static final int LINEAGE_ON_DEMAND_DEFAULT_NODE_COUNT = AtlasConfiguration.LINEAGE_ON_DEMAND_DEFAULT_NODE_COUNT.getInt(); |
| private static final String SEPARATOR = "->"; |
| |
| private final AtlasGraph graph; |
| private final AtlasGremlinQueryProvider gremlinQueryProvider; |
| private final EntityGraphRetriever entityRetriever; |
| private final AtlasTypeRegistry atlasTypeRegistry; |
| |
| @Inject |
| EntityLineageService(AtlasTypeRegistry typeRegistry, AtlasGraph atlasGraph) { |
| this.graph = atlasGraph; |
| this.gremlinQueryProvider = AtlasGremlinQueryProvider.INSTANCE; |
| this.entityRetriever = new EntityGraphRetriever(atlasGraph, typeRegistry); |
| this.atlasTypeRegistry = typeRegistry; |
| } |
| |
| @Override |
| @GraphTransaction |
| public AtlasLineageInfo getAtlasLineageInfo(String guid, LineageDirection direction, int depth) throws AtlasBaseException { |
| AtlasLineageInfo ret; |
| |
| boolean isDataSet = validateEntityTypeAndCheckIfDataSet(guid); |
| |
| if (LINEAGE_USING_GREMLIN) { |
| ret = getLineageInfoV1(guid, direction, depth, isDataSet); |
| } else { |
| ret = getLineageInfoV2(guid, direction, depth, isDataSet); |
| } |
| |
| return ret; |
| } |
| |
| @Override |
| @GraphTransaction |
| public AtlasLineageInfo getAtlasLineageInfo(String guid, Map<String, LineageOnDemandConstraints> lineageConstraintsMap) throws AtlasBaseException { |
| AtlasLineageInfo ret; |
| |
| if (MapUtils.isEmpty(lineageConstraintsMap)) { |
| lineageConstraintsMap = new HashMap<>(); |
| lineageConstraintsMap.put(guid, getDefaultLineageConstraints(guid)); |
| } |
| |
| boolean isDataSet = validateEntityTypeAndCheckIfDataSet(guid); |
| |
| ret = getLineageInfoOnDemand(guid, lineageConstraintsMap, isDataSet); |
| |
| appendLineageOnDemandPayload(ret, lineageConstraintsMap); |
| |
| // filtering out on-demand relations which has input & output nodes within the limit |
| cleanupRelationsOnDemand(ret); |
| |
| return ret; |
| } |
| |
| private boolean validateEntityTypeAndCheckIfDataSet(String guid) throws AtlasBaseException { |
| AtlasEntityHeader entity = entityRetriever.toAtlasEntityHeaderWithClassifications(guid); |
| |
| AtlasAuthorizationUtils.verifyAccess(new AtlasEntityAccessRequest(atlasTypeRegistry, AtlasPrivilege.ENTITY_READ, entity), "read entity lineage: guid=", guid); |
| |
| AtlasEntityType entityType = atlasTypeRegistry.getEntityTypeByName(entity.getTypeName()); |
| |
| if (entityType == null) { |
| throw new AtlasBaseException(AtlasErrorCode.TYPE_NAME_NOT_FOUND, entity.getTypeName()); |
| } |
| |
| boolean isDataSet = entityType.getTypeAndAllSuperTypes().contains(DATA_SET_SUPER_TYPE); |
| |
| if (!isDataSet) { |
| boolean isProcess = entityType.getTypeAndAllSuperTypes().contains(PROCESS_SUPER_TYPE); |
| |
| if (!isProcess) { |
| throw new AtlasBaseException(AtlasErrorCode.INVALID_LINEAGE_ENTITY_TYPE, guid, entity.getTypeName()); |
| } |
| } |
| |
| return isDataSet; |
| } |
| |
| private void appendLineageOnDemandPayload(AtlasLineageInfo lineageInfo, Map<String, LineageOnDemandConstraints> lineageConstraintsMap) { |
| if (lineageInfo == null || MapUtils.isEmpty(lineageConstraintsMap)) { |
| return; |
| } |
| lineageInfo.setLineageOnDemandPayload(lineageConstraintsMap); |
| } |
| |
| //Consider only relationsOnDemand which has either more inputs or more outputs than given limit |
| private void cleanupRelationsOnDemand(AtlasLineageInfo lineageInfo) { |
| if (lineageInfo != null && MapUtils.isNotEmpty(lineageInfo.getRelationsOnDemand())) { |
| lineageInfo.getRelationsOnDemand().entrySet().removeIf(x -> !(x.getValue().hasMoreInputs() || x.getValue().hasMoreOutputs())); |
| } |
| } |
| |
| @Override |
| @GraphTransaction |
| public SchemaDetails getSchemaForHiveTableByName(final String datasetName) throws AtlasBaseException { |
| if (StringUtils.isEmpty(datasetName)) { |
| // TODO: Complete error handling here |
| throw new AtlasBaseException(AtlasErrorCode.BAD_REQUEST); |
| } |
| |
| AtlasEntityType hive_table = atlasTypeRegistry.getEntityTypeByName("hive_table"); |
| |
| Map<String, Object> lookupAttributes = new HashMap<>(); |
| lookupAttributes.put("qualifiedName", datasetName); |
| String guid = AtlasGraphUtilsV2.getGuidByUniqueAttributes(hive_table, lookupAttributes); |
| |
| return getSchemaForHiveTableByGuid(guid); |
| } |
| |
| @Override |
| @GraphTransaction |
| public SchemaDetails getSchemaForHiveTableByGuid(final String guid) throws AtlasBaseException { |
| if (StringUtils.isEmpty(guid)) { |
| throw new AtlasBaseException(AtlasErrorCode.BAD_REQUEST); |
| } |
| SchemaDetails ret = new SchemaDetails(); |
| AtlasEntityType hive_column = atlasTypeRegistry.getEntityTypeByName("hive_column"); |
| |
| ret.setDataType(AtlasTypeUtil.toClassTypeDefinition(hive_column)); |
| |
| AtlasEntityWithExtInfo entityWithExtInfo = entityRetriever.toAtlasEntityWithExtInfo(guid); |
| AtlasEntity entity = entityWithExtInfo.getEntity(); |
| |
| AtlasAuthorizationUtils.verifyAccess(new AtlasEntityAccessRequest(atlasTypeRegistry, AtlasPrivilege.ENTITY_READ, new AtlasEntityHeader(entity)), |
| "read entity schema: guid=", guid); |
| |
| Map<String, AtlasEntity> referredEntities = entityWithExtInfo.getReferredEntities(); |
| List<String> columnIds = getColumnIds(entity); |
| |
| if (MapUtils.isNotEmpty(referredEntities)) { |
| List<Map<String, Object>> rows = referredEntities.entrySet() |
| .stream() |
| .filter(e -> isColumn(columnIds, e)) |
| .map(e -> AtlasTypeUtil.toMap(e.getValue())) |
| .collect(Collectors.toList()); |
| ret.setRows(rows); |
| } |
| |
| return ret; |
| } |
| |
| private List<String> getColumnIds(AtlasEntity entity) { |
| List<String> ret = new ArrayList<>(); |
| Object columnObjs = entity.getAttribute(COLUMNS); |
| |
| if (columnObjs instanceof List) { |
| for (Object pkObj : (List) columnObjs) { |
| if (pkObj instanceof AtlasObjectId) { |
| ret.add(((AtlasObjectId) pkObj).getGuid()); |
| } |
| } |
| } |
| |
| return ret; |
| } |
| |
| private boolean isColumn(List<String> columnIds, Map.Entry<String, AtlasEntity> e) { |
| return columnIds.contains(e.getValue().getGuid()); |
| } |
| |
| private AtlasLineageInfo getLineageInfoV1(String guid, LineageDirection direction, int depth, boolean isDataSet) throws AtlasBaseException { |
| AtlasLineageInfo ret; |
| |
| if (direction.equals(INPUT)) { |
| ret = getLineageInfo(guid, INPUT, depth, isDataSet); |
| } else if (direction.equals(OUTPUT)) { |
| ret = getLineageInfo(guid, OUTPUT, depth, isDataSet); |
| } else { |
| ret = getBothLineageInfoV1(guid, depth, isDataSet); |
| } |
| |
| return ret; |
| } |
| |
| private AtlasLineageInfo getLineageInfo(String guid, LineageDirection direction, int depth, boolean isDataSet) throws AtlasBaseException { |
| final Map<String, Object> bindings = new HashMap<>(); |
| String lineageQuery = getLineageQuery(guid, direction, depth, isDataSet, bindings); |
| List results = executeGremlinScript(bindings, lineageQuery); |
| Map<String, AtlasEntityHeader> entities = new HashMap<>(); |
| Set<LineageRelation> relations = new HashSet<>(); |
| |
| if (CollectionUtils.isNotEmpty(results)) { |
| for (Object result : results) { |
| if (result instanceof Map) { |
| for (final Object o : ((Map) result).entrySet()) { |
| final Map.Entry entry = (Map.Entry) o; |
| Object value = entry.getValue(); |
| |
| if (value instanceof List) { |
| for (Object elem : (List) value) { |
| if (elem instanceof AtlasEdge) { |
| processEdge((AtlasEdge) elem, entities, relations); |
| } else { |
| LOG.warn("Invalid value of type {} found, ignoring", (elem != null ? elem.getClass().getSimpleName() : "null")); |
| } |
| } |
| } else if (value instanceof AtlasEdge) { |
| processEdge((AtlasEdge) value, entities, relations); |
| } else { |
| LOG.warn("Invalid value of type {} found, ignoring", (value != null ? value.getClass().getSimpleName() : "null")); |
| } |
| } |
| } else if (result instanceof AtlasEdge) { |
| processEdge((AtlasEdge) result, entities, relations); |
| } |
| } |
| } |
| |
| return new AtlasLineageInfo(guid, entities, relations, direction, depth); |
| } |
| |
| private AtlasLineageInfo getLineageInfoV2(String guid, LineageDirection direction, int depth, boolean isDataSet) throws AtlasBaseException { |
| AtlasLineageInfo ret = initializeLineageInfo(guid, direction, depth); |
| |
| if (depth == 0) { |
| depth = -1; |
| } |
| |
| if (isDataSet) { |
| AtlasVertex datasetVertex = AtlasGraphUtilsV2.findByGuid(this.graph, guid); |
| |
| if (direction == INPUT || direction == BOTH) { |
| traverseEdges(datasetVertex, true, depth, ret); |
| } |
| |
| if (direction == OUTPUT || direction == BOTH) { |
| traverseEdges(datasetVertex, false, depth, ret); |
| } |
| } else { |
| AtlasVertex processVertex = AtlasGraphUtilsV2.findByGuid(this.graph, guid); |
| |
| // make one hop to the next dataset vertices from process vertex and traverse with 'depth = depth - 1' |
| if (direction == INPUT || direction == BOTH) { |
| Iterable<AtlasEdge> processEdges = processVertex.getEdges(AtlasEdgeDirection.OUT, PROCESS_INPUTS_EDGE); |
| |
| for (AtlasEdge processEdge : processEdges) { |
| addEdgeToResult(processEdge, ret); |
| |
| AtlasVertex datasetVertex = processEdge.getInVertex(); |
| |
| traverseEdges(datasetVertex, true, depth - 1, ret); |
| } |
| } |
| |
| if (direction == OUTPUT || direction == BOTH) { |
| Iterable<AtlasEdge> processEdges = processVertex.getEdges(AtlasEdgeDirection.OUT, PROCESS_OUTPUTS_EDGE); |
| |
| for (AtlasEdge processEdge : processEdges) { |
| addEdgeToResult(processEdge, ret); |
| |
| AtlasVertex datasetVertex = processEdge.getInVertex(); |
| |
| traverseEdges(datasetVertex, false, depth - 1, ret); |
| } |
| } |
| } |
| |
| return ret; |
| } |
| |
| private LineageOnDemandConstraints getDefaultLineageConstraints(String guid) { |
| if (LOG.isDebugEnabled()) { |
| LOG.debug("No lineage on-demand constraints provided for guid: {}, configuring with default values direction: {}, inputRelationsLimit: {}, outputRelationsLimit: {}, depth: {}", |
| guid, BOTH, LINEAGE_ON_DEMAND_DEFAULT_NODE_COUNT, LINEAGE_ON_DEMAND_DEFAULT_NODE_COUNT, LINEAGE_ON_DEMAND_DEFAULT_DEPTH); |
| } |
| |
| return new LineageOnDemandConstraints(); |
| } |
| |
| private LineageOnDemandConstraints getAndValidateLineageConstraintsByGuid(String guid, Map<String, LineageOnDemandConstraints> lineageConstraintsMap) { |
| |
| if (lineageConstraintsMap == null || !lineageConstraintsMap.containsKey(guid)) { |
| return getDefaultLineageConstraints(guid); |
| } |
| |
| LineageOnDemandConstraints lineageConstraintsByGuid = lineageConstraintsMap.get(guid);; |
| if (lineageConstraintsByGuid == null) { |
| return getDefaultLineageConstraints(guid); |
| } |
| |
| if (Objects.isNull(lineageConstraintsByGuid.getDirection())) { |
| LOG.info("No lineage on-demand direction provided for guid: {}, configuring with default value {}", guid, LineageDirection.BOTH); |
| lineageConstraintsByGuid.setDirection(BOTH); |
| } |
| |
| if (lineageConstraintsByGuid.getInputRelationsLimit() == 0) { |
| LOG.info("No lineage on-demand inputRelationsLimit provided for guid: {}, configuring with default value {}", guid, LINEAGE_ON_DEMAND_DEFAULT_NODE_COUNT); |
| lineageConstraintsByGuid.setInputRelationsLimit(LINEAGE_ON_DEMAND_DEFAULT_NODE_COUNT); |
| } |
| |
| if (lineageConstraintsByGuid.getOutputRelationsLimit() == 0) { |
| LOG.info("No lineage on-demand outputRelationsLimit provided for guid: {}, configuring with default value {}", guid, LINEAGE_ON_DEMAND_DEFAULT_NODE_COUNT); |
| lineageConstraintsByGuid.setOutputRelationsLimit(LINEAGE_ON_DEMAND_DEFAULT_NODE_COUNT); |
| } |
| |
| if (lineageConstraintsByGuid.getDepth() == 0) { |
| LOG.info("No lineage on-demand depth provided for guid: {}, configuring with default value {}", guid, LINEAGE_ON_DEMAND_DEFAULT_DEPTH); |
| lineageConstraintsByGuid.setDepth(LINEAGE_ON_DEMAND_DEFAULT_DEPTH); |
| } |
| |
| return lineageConstraintsByGuid; |
| |
| } |
| |
| private AtlasLineageInfo getLineageInfoOnDemand(String guid, Map<String, LineageOnDemandConstraints> lineageConstraintsMap, boolean isDataSet) throws AtlasBaseException { |
| |
| LineageOnDemandConstraints lineageConstraintsByGuid = getAndValidateLineageConstraintsByGuid(guid, lineageConstraintsMap); |
| |
| if (lineageConstraintsByGuid == null) { |
| throw new AtlasBaseException(AtlasErrorCode.NO_LINEAGE_CONSTRAINTS_FOR_GUID, guid); |
| } |
| |
| LineageDirection direction = lineageConstraintsByGuid.getDirection(); |
| int depth = lineageConstraintsByGuid.getDepth(); |
| |
| AtlasLineageInfo ret = initializeLineageInfo(guid, direction, depth); |
| |
| if (depth == 0) { |
| depth = -1; |
| } |
| |
| if (isDataSet) { |
| AtlasVertex datasetVertex = AtlasGraphUtilsV2.findByGuid(this.graph, guid); |
| |
| if (!ret.getRelationsOnDemand().containsKey(guid)) { |
| ret.getRelationsOnDemand().put(guid, new LineageInfoOnDemand(lineageConstraintsByGuid)); |
| } |
| |
| if (direction == INPUT || direction == BOTH) { |
| traverseEdgesOnDemand(datasetVertex, true, depth, new HashSet<>(), lineageConstraintsMap, ret); |
| } |
| |
| if (direction == OUTPUT || direction == BOTH) { |
| traverseEdgesOnDemand(datasetVertex, false, depth, new HashSet<>(), lineageConstraintsMap, ret); |
| } |
| } else { |
| AtlasVertex processVertex = AtlasGraphUtilsV2.findByGuid(this.graph, guid); |
| |
| // make one hop to the next dataset vertices from process vertex and traverse with 'depth = depth - 1' |
| if (direction == INPUT || direction == BOTH) { |
| Iterable<AtlasEdge> processEdges = processVertex.getEdges(AtlasEdgeDirection.OUT, PROCESS_INPUTS_EDGE); |
| |
| traverseEdgesOnDemand(processEdges, true, depth, lineageConstraintsMap, ret); |
| } |
| |
| if (direction == OUTPUT || direction == BOTH) { |
| Iterable<AtlasEdge> processEdges = processVertex.getEdges(AtlasEdgeDirection.OUT, PROCESS_OUTPUTS_EDGE); |
| |
| traverseEdgesOnDemand(processEdges, false, depth, lineageConstraintsMap, ret); |
| } |
| |
| } |
| |
| return ret; |
| } |
| |
| private void traverseEdges(AtlasVertex datasetVertex, boolean isInput, int depth, AtlasLineageInfo ret) throws AtlasBaseException { |
| traverseEdges(datasetVertex, isInput, depth, new HashSet<>(), ret); |
| } |
| |
| private void traverseEdges(AtlasVertex datasetVertex, boolean isInput, int depth, Set<String> visitedVertices, AtlasLineageInfo ret) throws AtlasBaseException { |
| if (depth != 0) { |
| // keep track of visited vertices to avoid circular loop |
| visitedVertices.add(getId(datasetVertex)); |
| |
| Iterable<AtlasEdge> incomingEdges = datasetVertex.getEdges(IN, isInput ? PROCESS_OUTPUTS_EDGE : PROCESS_INPUTS_EDGE); |
| |
| for (AtlasEdge incomingEdge : incomingEdges) { |
| AtlasVertex processVertex = incomingEdge.getOutVertex(); |
| Iterable<AtlasEdge> outgoingEdges = processVertex.getEdges(OUT, isInput ? PROCESS_INPUTS_EDGE : PROCESS_OUTPUTS_EDGE); |
| |
| for (AtlasEdge outgoingEdge : outgoingEdges) { |
| AtlasVertex entityVertex = outgoingEdge.getInVertex(); |
| |
| if (entityVertex != null) { |
| addEdgeToResult(incomingEdge, ret); |
| addEdgeToResult(outgoingEdge, ret); |
| |
| if (!visitedVertices.contains(getId(entityVertex))) { |
| traverseEdges(entityVertex, isInput, depth - 1, visitedVertices, ret); |
| } |
| } |
| } |
| } |
| } |
| } |
| |
| private void traverseEdgesOnDemand(Iterable<AtlasEdge> processEdges, boolean isInput, int depth, Map<String, LineageOnDemandConstraints> lineageConstraintsMap, AtlasLineageInfo ret) throws AtlasBaseException { |
| for (AtlasEdge processEdge : processEdges) { |
| boolean isInputEdge = processEdge.getLabel().equalsIgnoreCase(PROCESS_INPUTS_EDGE); |
| |
| if (incrementAndCheckIfRelationsLimitReached(processEdge, isInputEdge, lineageConstraintsMap, ret)) { |
| break; |
| } else { |
| addEdgeToResult(processEdge, ret); |
| } |
| |
| AtlasVertex datasetVertex = processEdge.getInVertex(); |
| |
| String inGuid = AtlasGraphUtilsV2.getIdFromVertex(datasetVertex); |
| LineageOnDemandConstraints inGuidLineageConstrains = getAndValidateLineageConstraintsByGuid(inGuid, lineageConstraintsMap); |
| |
| if (!ret.getRelationsOnDemand().containsKey(inGuid)) { |
| ret.getRelationsOnDemand().put(inGuid, new LineageInfoOnDemand(inGuidLineageConstrains)); |
| } |
| |
| traverseEdgesOnDemand(datasetVertex, isInput, depth - 1, new HashSet<>(), lineageConstraintsMap, ret); |
| } |
| } |
| |
| private void traverseEdgesOnDemand(AtlasVertex datasetVertex, boolean isInput, int depth, Set<String> visitedVertices, Map<String, LineageOnDemandConstraints> lineageConstraintsMap, AtlasLineageInfo ret) throws AtlasBaseException { |
| if (depth != 0) { |
| // keep track of visited vertices to avoid circular loop |
| visitedVertices.add(getId(datasetVertex)); |
| |
| Iterable<AtlasEdge> incomingEdges = datasetVertex.getEdges(IN, isInput ? PROCESS_OUTPUTS_EDGE : PROCESS_INPUTS_EDGE); |
| |
| for (AtlasEdge incomingEdge : incomingEdges) { |
| |
| if (incrementAndCheckIfRelationsLimitReached(incomingEdge, !isInput, lineageConstraintsMap, ret)) { |
| break; |
| } else { |
| addEdgeToResult(incomingEdge, ret); |
| } |
| |
| AtlasVertex processVertex = incomingEdge.getOutVertex(); |
| Iterable<AtlasEdge> outgoingEdges = processVertex.getEdges(OUT, isInput ? PROCESS_INPUTS_EDGE : PROCESS_OUTPUTS_EDGE); |
| |
| for (AtlasEdge outgoingEdge : outgoingEdges) { |
| |
| if (incrementAndCheckIfRelationsLimitReached(outgoingEdge, isInput, lineageConstraintsMap, ret)) { |
| break; |
| } else { |
| addEdgeToResult(outgoingEdge, ret); |
| } |
| |
| AtlasVertex entityVertex = outgoingEdge.getInVertex(); |
| |
| if (entityVertex != null && !visitedVertices.contains(getId(entityVertex))) { |
| traverseEdgesOnDemand(entityVertex, isInput, depth - 1, visitedVertices, lineageConstraintsMap, ret); |
| } |
| } |
| } |
| } |
| } |
| |
| private boolean incrementAndCheckIfRelationsLimitReached(AtlasEdge atlasEdge, boolean isInput, Map<String, LineageOnDemandConstraints> lineageConstraintsMap, AtlasLineageInfo ret) { |
| |
| if (lineageContainsEdge(ret, atlasEdge)) { |
| return false; |
| } |
| |
| boolean hasRelationsLimitReached = false; |
| |
| AtlasVertex inVertex = isInput ? atlasEdge.getOutVertex() : atlasEdge.getInVertex(); |
| String inGuid = AtlasGraphUtilsV2.getIdFromVertex(inVertex); |
| LineageOnDemandConstraints inGuidLineageConstraints = getAndValidateLineageConstraintsByGuid(inGuid, lineageConstraintsMap); |
| |
| AtlasVertex outVertex = isInput ? atlasEdge.getInVertex() : atlasEdge.getOutVertex(); |
| String outGuid = AtlasGraphUtilsV2.getIdFromVertex(outVertex); |
| LineageOnDemandConstraints outGuidLineageConstraints = getAndValidateLineageConstraintsByGuid(outGuid, lineageConstraintsMap); |
| |
| LineageInfoOnDemand inLineageInfo = ret.getRelationsOnDemand().containsKey(inGuid) ? ret.getRelationsOnDemand().get(inGuid) : new LineageInfoOnDemand(inGuidLineageConstraints); |
| LineageInfoOnDemand outLineageInfo = ret.getRelationsOnDemand().containsKey(outGuid) ? ret.getRelationsOnDemand().get(outGuid) : new LineageInfoOnDemand(outGuidLineageConstraints); |
| |
| if (inLineageInfo.isInputRelationsReachedLimit()) { |
| inLineageInfo.setHasMoreInputs(true); |
| hasRelationsLimitReached = true; |
| }else { |
| inLineageInfo.incrementInputRelationsCount(); |
| } |
| |
| if (outLineageInfo.isOutputRelationsReachedLimit()) { |
| outLineageInfo.setHasMoreOutputs(true); |
| hasRelationsLimitReached = true; |
| } else { |
| outLineageInfo.incrementOutputRelationsCount(); |
| } |
| |
| if (!hasRelationsLimitReached) { |
| ret.getRelationsOnDemand().put(inGuid, inLineageInfo); |
| ret.getRelationsOnDemand().put(outGuid, outLineageInfo); |
| } |
| |
| return hasRelationsLimitReached; |
| } |
| |
| private void addEdgeToResult(AtlasEdge edge, AtlasLineageInfo lineageInfo) throws AtlasBaseException { |
| if (!lineageContainsEdge(lineageInfo, edge) && !lineageMaxNodeCountReached(lineageInfo.getRelations())) { |
| processEdge(edge, lineageInfo); |
| } |
| } |
| |
| private int getLineageMaxNodeAllowedCount() { |
| return Math.min(DEFAULT_LINEAGE_MAX_NODE_COUNT, AtlasConfiguration.LINEAGE_MAX_NODE_COUNT.getInt()); |
| } |
| |
| private boolean lineageMaxNodeCountReached(Set<LineageRelation> relations) { |
| return CollectionUtils.isNotEmpty(relations) && relations.size() > getLineageMaxNodeAllowedCount(); |
| } |
| |
| private String getVisitedEdgeLabel(String inGuid, String outGuid, String relationGuid) { |
| if (isLineageOnDemandEnabled()) { |
| return inGuid + SEPARATOR + outGuid; |
| } |
| return relationGuid; |
| } |
| |
| private boolean lineageContainsEdge(AtlasLineageInfo lineageInfo, AtlasEdge edge) { |
| boolean ret = false; |
| |
| if (edge != null && lineageInfo != null && CollectionUtils.isNotEmpty(lineageInfo.getVisitedEdges())) { |
| String inGuid = AtlasGraphUtilsV2.getIdFromVertex(edge.getInVertex()); |
| String outGuid = AtlasGraphUtilsV2.getIdFromVertex(edge.getOutVertex()); |
| String relationGuid = AtlasGraphUtilsV2.getEncodedProperty(edge, RELATIONSHIP_GUID_PROPERTY_KEY, String.class); |
| boolean isInputEdge = edge.getLabel().equalsIgnoreCase(PROCESS_INPUTS_EDGE); |
| String visitedEdgeLabel = isInputEdge ? getVisitedEdgeLabel(inGuid, outGuid, relationGuid) : getVisitedEdgeLabel(outGuid, inGuid, relationGuid); |
| |
| if (lineageInfo.getVisitedEdges().contains(visitedEdgeLabel)) { |
| ret = true; |
| } |
| } |
| |
| return ret; |
| } |
| |
| private void processEdge(final AtlasEdge edge, final AtlasLineageInfo lineageInfo) throws AtlasBaseException { |
| processEdge(edge, lineageInfo.getGuidEntityMap(), lineageInfo.getRelations(), lineageInfo.getVisitedEdges()); |
| } |
| |
| private AtlasLineageInfo initializeLineageInfo(String guid, LineageDirection direction, int depth) { |
| return new AtlasLineageInfo(guid, new HashMap<>(), new HashSet<>(), new HashSet<>(), new HashMap<>(), direction, depth); |
| } |
| |
| private static String getId(AtlasVertex vertex) { |
| return vertex.getIdForDisplay(); |
| } |
| |
| private List executeGremlinScript(Map<String, Object> bindings, String lineageQuery) throws AtlasBaseException { |
| List ret; |
| ScriptEngine engine = graph.getGremlinScriptEngine(); |
| |
| try { |
| ret = (List) graph.executeGremlinScript(engine, bindings, lineageQuery, false); |
| } catch (ScriptException e) { |
| throw new AtlasBaseException(INSTANCE_LINEAGE_QUERY_FAILED, lineageQuery); |
| } finally { |
| graph.releaseGremlinScriptEngine(engine); |
| } |
| |
| return ret; |
| } |
| |
| private void processEdge(final AtlasEdge edge, final Map<String, AtlasEntityHeader> entities, final Set<LineageRelation> relations) throws AtlasBaseException { |
| processEdge(edge, entities, relations, null); |
| } |
| |
| private void processEdge(final AtlasEdge edge, final Map<String, AtlasEntityHeader> entities, final Set<LineageRelation> relations, final Set<String> visitedEdges) throws AtlasBaseException { |
| AtlasVertex inVertex = edge.getInVertex(); |
| AtlasVertex outVertex = edge.getOutVertex(); |
| String inGuid = AtlasGraphUtilsV2.getIdFromVertex(inVertex); |
| String outGuid = AtlasGraphUtilsV2.getIdFromVertex(outVertex); |
| String relationGuid = AtlasGraphUtilsV2.getEncodedProperty(edge, RELATIONSHIP_GUID_PROPERTY_KEY, String.class); |
| boolean isInputEdge = edge.getLabel().equalsIgnoreCase(PROCESS_INPUTS_EDGE); |
| |
| |
| if (!entities.containsKey(inGuid)) { |
| AtlasEntityHeader entityHeader = entityRetriever.toAtlasEntityHeader(inVertex); |
| entities.put(inGuid, entityHeader); |
| } |
| |
| if (!entities.containsKey(outGuid)) { |
| AtlasEntityHeader entityHeader = entityRetriever.toAtlasEntityHeader(outVertex); |
| entities.put(outGuid, entityHeader); |
| } |
| |
| if (isInputEdge) { |
| relations.add(new LineageRelation(inGuid, outGuid, relationGuid)); |
| } else { |
| relations.add(new LineageRelation(outGuid, inGuid, relationGuid)); |
| } |
| |
| if (visitedEdges != null) { |
| String visitedEdgeLabel = isInputEdge ? getVisitedEdgeLabel(inGuid, outGuid, relationGuid) : getVisitedEdgeLabel(outGuid, inGuid, relationGuid); |
| visitedEdges.add(visitedEdgeLabel); |
| } |
| } |
| |
| private AtlasLineageInfo getBothLineageInfoV1(String guid, int depth, boolean isDataSet) throws AtlasBaseException { |
| AtlasLineageInfo inputLineage = getLineageInfo(guid, INPUT, depth, isDataSet); |
| AtlasLineageInfo outputLineage = getLineageInfo(guid, OUTPUT, depth, isDataSet); |
| AtlasLineageInfo ret = inputLineage; |
| |
| ret.getRelations().addAll(outputLineage.getRelations()); |
| ret.getGuidEntityMap().putAll(outputLineage.getGuidEntityMap()); |
| ret.setLineageDirection(BOTH); |
| |
| return ret; |
| } |
| |
| private String getLineageQuery(String entityGuid, LineageDirection direction, int depth, boolean isDataSet, Map<String, Object> bindings) { |
| String incomingFrom = null; |
| String outgoingTo = null; |
| String ret; |
| |
| if (direction.equals(INPUT)) { |
| incomingFrom = PROCESS_OUTPUTS_EDGE; |
| outgoingTo = PROCESS_INPUTS_EDGE; |
| } else if (direction.equals(OUTPUT)) { |
| incomingFrom = PROCESS_INPUTS_EDGE; |
| outgoingTo = PROCESS_OUTPUTS_EDGE; |
| } |
| |
| bindings.put("guid", entityGuid); |
| bindings.put("incomingEdgeLabel", incomingFrom); |
| bindings.put("outgoingEdgeLabel", outgoingTo); |
| bindings.put("dataSetDepth", depth); |
| bindings.put("processDepth", depth - 1); |
| |
| if (depth < 1) { |
| ret = isDataSet ? gremlinQueryProvider.getQuery(FULL_LINEAGE_DATASET) : |
| gremlinQueryProvider.getQuery(FULL_LINEAGE_PROCESS); |
| } else { |
| ret = isDataSet ? gremlinQueryProvider.getQuery(PARTIAL_LINEAGE_DATASET) : |
| gremlinQueryProvider.getQuery(PARTIAL_LINEAGE_PROCESS); |
| } |
| |
| return ret; |
| } |
| |
| public boolean isLineageOnDemandEnabled() { |
| return AtlasConfiguration.LINEAGE_ON_DEMAND_ENABLED.getBoolean(); |
| } |
| } |