blob: 5d02ea541988ff2b45d09930c1c6d05f11d5b769 [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p>
* http://www.apache.org/licenses/LICENSE-2.0
* <p>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.atlas.discovery;
import org.apache.atlas.AtlasClient;
import org.apache.atlas.AtlasErrorCode;
import org.apache.atlas.annotation.GraphTransaction;
import org.apache.atlas.authorize.AtlasAuthorizationUtils;
import org.apache.atlas.authorize.AtlasEntityAccessRequest;
import org.apache.atlas.authorize.AtlasPrivilege;
import org.apache.atlas.exception.AtlasBaseException;
import org.apache.atlas.model.instance.AtlasEntity;
import org.apache.atlas.model.instance.AtlasEntity.AtlasEntityWithExtInfo;
import org.apache.atlas.model.instance.AtlasEntityHeader;
import org.apache.atlas.model.instance.AtlasObjectId;
import org.apache.atlas.model.lineage.AtlasLineageInfo;
import org.apache.atlas.model.lineage.AtlasLineageInfo.LineageDirection;
import org.apache.atlas.model.lineage.AtlasLineageInfo.LineageRelation;
import org.apache.atlas.repository.Constants;
import org.apache.atlas.repository.graphdb.AtlasEdge;
import org.apache.atlas.repository.graphdb.AtlasGraph;
import org.apache.atlas.repository.graphdb.AtlasVertex;
import org.apache.atlas.repository.store.graph.v2.AtlasGraphUtilsV2;
import org.apache.atlas.repository.store.graph.v2.EntityGraphRetriever;
import org.apache.atlas.type.AtlasEntityType;
import org.apache.atlas.type.AtlasTypeRegistry;
import org.apache.atlas.type.AtlasTypeUtil;
import org.apache.atlas.util.AtlasGremlinQueryProvider;
import org.apache.atlas.util.AtlasGremlinQueryProvider.AtlasGremlinQuery;
import org.apache.atlas.v1.model.lineage.SchemaResponse.SchemaDetails;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.collections.MapUtils;
import org.apache.commons.lang.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Service;
import javax.inject.Inject;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import static org.apache.atlas.repository.Constants.RELATIONSHIP_GUID_PROPERTY_KEY;
@Service
public class EntityLineageService implements AtlasLineageService {
private static final Logger LOG = LoggerFactory.getLogger(EntityLineageService.class);
private static final String PROCESS_INPUTS_EDGE = "__Process.inputs";
private static final String PROCESS_OUTPUTS_EDGE = "__Process.outputs";
private static final String COLUMNS = "columns";
private final AtlasGraph graph;
private final AtlasGremlinQueryProvider gremlinQueryProvider;
private final EntityGraphRetriever entityRetriever;
private final AtlasTypeRegistry atlasTypeRegistry;
@Inject
EntityLineageService(AtlasTypeRegistry typeRegistry, AtlasGraph atlasGraph) {
this.graph = atlasGraph;
this.gremlinQueryProvider = AtlasGremlinQueryProvider.INSTANCE;
this.entityRetriever = new EntityGraphRetriever(typeRegistry);
this.atlasTypeRegistry = typeRegistry;
}
@Override
@GraphTransaction
public AtlasLineageInfo getAtlasLineageInfo(String guid, LineageDirection direction, int depth) throws AtlasBaseException {
AtlasLineageInfo lineageInfo;
AtlasEntityHeader entity = entityRetriever.toAtlasEntityHeaderWithClassifications(guid);
AtlasAuthorizationUtils.verifyAccess(new AtlasEntityAccessRequest(atlasTypeRegistry, AtlasPrivilege.ENTITY_READ, entity), "read entity lineage: guid=", guid);
AtlasEntityType entityType = atlasTypeRegistry.getEntityTypeByName(entity.getTypeName());
if (entityType == null || !entityType.getTypeAndAllSuperTypes().contains(AtlasClient.DATA_SET_SUPER_TYPE)) {
throw new AtlasBaseException(AtlasErrorCode.INSTANCE_GUID_NOT_DATASET, guid);
}
if (direction != null) {
if (direction.equals(LineageDirection.INPUT)) {
lineageInfo = getLineageInfo(guid, LineageDirection.INPUT, depth);
} else if (direction.equals(LineageDirection.OUTPUT)) {
lineageInfo = getLineageInfo(guid, LineageDirection.OUTPUT, depth);
} else if (direction.equals(LineageDirection.BOTH)) {
lineageInfo = getBothLineageInfo(guid, depth);
} else {
throw new AtlasBaseException(AtlasErrorCode.INSTANCE_LINEAGE_INVALID_PARAMS, "direction", direction.toString());
}
} else {
throw new AtlasBaseException(AtlasErrorCode.INSTANCE_LINEAGE_INVALID_PARAMS, "direction", null);
}
return lineageInfo;
}
@Override
@GraphTransaction
public SchemaDetails getSchemaForHiveTableByName(final String datasetName) throws AtlasBaseException {
if (StringUtils.isEmpty(datasetName)) {
// TODO: Complete error handling here
throw new AtlasBaseException(AtlasErrorCode.BAD_REQUEST);
}
AtlasEntityType hive_table = atlasTypeRegistry.getEntityTypeByName("hive_table");
Map<String, Object> lookupAttributes = new HashMap<>();
lookupAttributes.put("qualifiedName", datasetName);
String guid = AtlasGraphUtilsV2.getGuidByUniqueAttributes(hive_table, lookupAttributes);
return getSchemaForHiveTableByGuid(guid);
}
@Override
@GraphTransaction
public SchemaDetails getSchemaForHiveTableByGuid(final String guid) throws AtlasBaseException {
if (StringUtils.isEmpty(guid)) {
throw new AtlasBaseException(AtlasErrorCode.BAD_REQUEST);
}
SchemaDetails ret = new SchemaDetails();
AtlasEntityType hive_column = atlasTypeRegistry.getEntityTypeByName("hive_column");
ret.setDataType(AtlasTypeUtil.toClassTypeDefinition(hive_column));
AtlasEntityWithExtInfo entityWithExtInfo = entityRetriever.toAtlasEntityWithExtInfo(guid);
AtlasEntity entity = entityWithExtInfo.getEntity();
AtlasAuthorizationUtils.verifyAccess(new AtlasEntityAccessRequest(atlasTypeRegistry, AtlasPrivilege.ENTITY_READ, new AtlasEntityHeader(entity)),
"read entity schema: guid=", guid);
Map<String, AtlasEntity> referredEntities = entityWithExtInfo.getReferredEntities();
List<String> columnIds = getColumnIds(entity);
if (MapUtils.isNotEmpty(referredEntities)) {
List<Map<String, Object>> rows = referredEntities.entrySet()
.stream()
.filter(e -> isColumn(columnIds, e))
.map(e -> AtlasTypeUtil.toMap(e.getValue()))
.collect(Collectors.toList());
ret.setRows(rows);
}
return ret;
}
private List<String> getColumnIds(AtlasEntity entity) {
List<String> ret = new ArrayList<>();
Object columnObjs = entity.getAttribute(COLUMNS);
if (columnObjs instanceof List) {
for (Object pkObj : (List) columnObjs) {
if (pkObj instanceof AtlasObjectId) {
ret.add(((AtlasObjectId) pkObj).getGuid());
}
}
}
return ret;
}
private boolean isColumn(List<String> columnIds, Map.Entry<String, AtlasEntity> e) {
return columnIds.contains(e.getValue().getGuid());
}
private AtlasLineageInfo getLineageInfo(String guid, LineageDirection direction, int depth) throws AtlasBaseException {
Map<String, AtlasEntityHeader> entities = new HashMap<>();
Set<LineageRelation> relations = new HashSet<>();
String lineageQuery = getLineageQuery(guid, direction, depth);
List edgeMapList = (List) graph.executeGremlinScript(lineageQuery, false);
if (CollectionUtils.isNotEmpty(edgeMapList)) {
for (Object edgeMap : edgeMapList) {
if (edgeMap instanceof Map) {
for (final Object o : ((Map) edgeMap).entrySet()) {
final Map.Entry entry = (Map.Entry) o;
Object value = entry.getValue();
if (value instanceof List) {
for (Object elem : (List) value) {
if (elem instanceof AtlasEdge) {
processEdge((AtlasEdge) elem, entities, relations);
} else {
LOG.warn("Invalid value of type {} found, ignoring", (elem != null ? elem.getClass().getSimpleName() : "null"));
}
}
} else if (value instanceof AtlasEdge) {
processEdge((AtlasEdge) value, entities, relations);
} else {
LOG.warn("Invalid value of type {} found, ignoring", (value != null ? value.getClass().getSimpleName() : "null"));
}
}
}
}
}
return new AtlasLineageInfo(guid, entities, relations, direction, depth);
}
private void processEdge(final AtlasEdge edge, final Map<String, AtlasEntityHeader> entities, final Set<LineageRelation> relations) throws AtlasBaseException {
AtlasVertex inVertex = edge.getInVertex();
AtlasVertex outVertex = edge.getOutVertex();
String inGuid = AtlasGraphUtilsV2.getIdFromVertex(inVertex);
String outGuid = AtlasGraphUtilsV2.getIdFromVertex(outVertex);
String relationGuid = AtlasGraphUtilsV2.getEncodedProperty(edge, RELATIONSHIP_GUID_PROPERTY_KEY, String.class);
boolean isInputEdge = edge.getLabel().equalsIgnoreCase(PROCESS_INPUTS_EDGE);
if (!entities.containsKey(inGuid)) {
AtlasEntityHeader entityHeader = entityRetriever.toAtlasEntityHeader(inVertex);
entities.put(inGuid, entityHeader);
}
if (!entities.containsKey(outGuid)) {
AtlasEntityHeader entityHeader = entityRetriever.toAtlasEntityHeader(outVertex);
entities.put(outGuid, entityHeader);
}
if (isInputEdge) {
relations.add(new LineageRelation(inGuid, outGuid, relationGuid));
} else {
relations.add(new LineageRelation(outGuid, inGuid, relationGuid));
}
}
private AtlasLineageInfo getBothLineageInfo(String guid, int depth) throws AtlasBaseException {
AtlasLineageInfo inputLineage = getLineageInfo(guid, LineageDirection.INPUT, depth);
AtlasLineageInfo outputLineage = getLineageInfo(guid, LineageDirection.OUTPUT, depth);
AtlasLineageInfo ret = inputLineage;
ret.getRelations().addAll(outputLineage.getRelations());
ret.getGuidEntityMap().putAll(outputLineage.getGuidEntityMap());
ret.setLineageDirection(LineageDirection.BOTH);
return ret;
}
private String getLineageQuery(String entityGuid, LineageDirection direction, int depth) {
String lineageQuery = null;
if (direction.equals(LineageDirection.INPUT)) {
lineageQuery = generateLineageQuery(entityGuid, depth, PROCESS_OUTPUTS_EDGE, PROCESS_INPUTS_EDGE);
} else if (direction.equals(LineageDirection.OUTPUT)) {
lineageQuery = generateLineageQuery(entityGuid, depth, PROCESS_INPUTS_EDGE, PROCESS_OUTPUTS_EDGE);
}
return lineageQuery;
}
private String generateLineageQuery(String entityGuid, int depth, String incomingFrom, String outgoingTo) {
String lineageQuery;
if (depth < 1) {
String query = gremlinQueryProvider.getQuery(AtlasGremlinQuery.FULL_LINEAGE);
lineageQuery = String.format(query, entityGuid, incomingFrom, outgoingTo);
} else {
String query = gremlinQueryProvider.getQuery(AtlasGremlinQuery.PARTIAL_LINEAGE);
lineageQuery = String.format(query, entityGuid, incomingFrom, outgoingTo, depth);
}
return lineageQuery;
}
}