blob: 081c50ce758595eb6d1ab5dfd4654e957333907c [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.stanbol.enhancer.engines.htmlextractor.impl;
import static org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper.randomUUID;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import org.apache.clerezza.commons.rdf.BlankNode;
import org.apache.clerezza.commons.rdf.Graph;
import org.apache.clerezza.commons.rdf.BlankNodeOrIRI;
import org.apache.clerezza.commons.rdf.RDFTerm;
import org.apache.clerezza.commons.rdf.Triple;
import org.apache.clerezza.commons.rdf.IRI;
import org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph;
import org.apache.clerezza.commons.rdf.impl.utils.TripleImpl;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Utilities functions for RDF Graphs
*
* @author <a href="mailto:kasper@dfki.de">Walter Kasper</a>
*
*/
public final class ClerezzaRDFUtils {
/**
* Restrict instantiation
*/
private ClerezzaRDFUtils() {}
private static final Logger LOG = LoggerFactory.getLogger(ClerezzaRDFUtils.class);
public static void urifyBlankNodes(Graph model) {
HashMap<BlankNode,IRI> blankNodeMap = new HashMap<BlankNode,IRI>();
Graph remove = new SimpleGraph();
Graph add = new SimpleGraph();
for (Triple t: model) {
BlankNodeOrIRI subj = t.getSubject();
RDFTerm obj = t.getObject();
IRI pred = t.getPredicate();
boolean match = false;
if (subj instanceof BlankNode) {
match = true;
IRI ru = blankNodeMap.get(subj);
if (ru == null) {
ru = createRandomUri();
blankNodeMap.put((BlankNode)subj, ru);
}
subj = ru;
}
if (obj instanceof BlankNode) {
match = true;
IRI ru = blankNodeMap.get(obj);
if (ru == null) {
ru = createRandomUri();
blankNodeMap.put((BlankNode)obj, ru);
}
obj = ru;
}
if (match) {
remove.add(t);
add.add(new TripleImpl(subj,pred,obj));
}
}
model.removeAll(remove);
model.addAll(add);
}
public static IRI createRandomUri() {
return new IRI("urn:rnd:"+randomUUID());
}
public static void makeConnected(Graph model, BlankNodeOrIRI root, IRI property) {
Set<BlankNodeOrIRI> roots = findRoots(model);
LOG.debug("Roots: {}",roots.size());
boolean found = roots.remove(root);
//connect all hanging roots to root by property
for (BlankNodeOrIRI n: roots) {
model.add(new TripleImpl(root,property,n));
}
}
public static Set<BlankNodeOrIRI> findRoots(Graph model) {
Set<BlankNodeOrIRI> roots = new HashSet<BlankNodeOrIRI>();
Set<BlankNodeOrIRI> visited = new HashSet<BlankNodeOrIRI>();
for (Triple t: model) {
BlankNodeOrIRI subj = t.getSubject();
findRoot(model, subj, roots, visited);
}
return roots;
}
private static void findRoot(Graph model, BlankNodeOrIRI node, Set<BlankNodeOrIRI> roots, Set<BlankNodeOrIRI> visited) {
if (visited.contains(node)) {
return;
}
visited.add(node);
Iterator<Triple> it = model.filter(null,null,node);
// something that is not the object of some stement is a root
if (!it.hasNext()) {
roots.add(node);
LOG.debug("Root found: {}",node);
return;
}
while (it.hasNext()) {
Triple t = it.next();
BlankNodeOrIRI subj = t.getSubject();
findRoot(model, subj, roots, visited);
}
}
}