blob: 05758961239b6a2b9f0df43601dd7a220a373a86 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.stanbol.enhancer.engines.htmlextractor.impl;
import static org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper.randomUUID;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import org.apache.clerezza.rdf.core.BNode;
import org.apache.clerezza.rdf.core.MGraph;
import org.apache.clerezza.rdf.core.NonLiteral;
import org.apache.clerezza.rdf.core.Resource;
import org.apache.clerezza.rdf.core.Triple;
import org.apache.clerezza.rdf.core.UriRef;
import org.apache.clerezza.rdf.core.impl.SimpleMGraph;
import org.apache.clerezza.rdf.core.impl.TripleImpl;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Utilities functions for RDF Graphs
*
* @author <a href="mailto:kasper@dfki.de">Walter Kasper</a>
*
*/
public class ClerezzaRDFUtils {
private static final Logger LOG = LoggerFactory.getLogger(ClerezzaRDFUtils.class);
public static void urifyBlankNodes(MGraph model) {
HashMap<BNode,UriRef> blankNodeMap = new HashMap<BNode,UriRef>();
MGraph remove = new SimpleMGraph();
MGraph add = new SimpleMGraph();
for (Triple t: model) {
NonLiteral subj = t.getSubject();
Resource obj = t.getObject();
UriRef pred = t.getPredicate();
boolean match = false;
if (subj instanceof BNode) {
match = true;
UriRef ru = blankNodeMap.get(subj);
if (ru == null) {
ru = createRandomUri();
blankNodeMap.put((BNode)subj, ru);
}
subj = ru;
}
if (obj instanceof BNode) {
match = true;
UriRef ru = blankNodeMap.get(obj);
if (ru == null) {
ru = createRandomUri();
blankNodeMap.put((BNode)obj, ru);
}
obj = ru;
}
if (match) {
remove.add(t);
add.add(new TripleImpl(subj,pred,obj));
}
}
model.removeAll(remove);
model.addAll(add);
}
public static UriRef createRandomUri() {
return new UriRef("urn:rnd:"+randomUUID());
}
public static void makeConnected(MGraph model, NonLiteral root, UriRef property) {
Set<NonLiteral> roots = findRoots(model);
LOG.debug("Roots: {}",roots.size());
boolean found = roots.remove(root);
//connect all hanging roots to root by property
for (NonLiteral n: roots) {
model.add(new TripleImpl(root,property,n));
}
}
public static Set<NonLiteral> findRoots(MGraph model) {
Set<NonLiteral> roots = new HashSet<NonLiteral>();
Set<NonLiteral> visited = new HashSet<NonLiteral>();
for (Triple t: model) {
NonLiteral subj = t.getSubject();
findRoot(model, subj, roots, visited);
}
return roots;
}
private static void findRoot(MGraph model, NonLiteral node, Set<NonLiteral> roots, Set<NonLiteral> visited) {
if (visited.contains(node)) {
return;
}
visited.add(node);
Iterator<Triple> it = model.filter(null,null,node);
// something that is not the object of some stement is a root
if (!it.hasNext()) {
roots.add(node);
LOG.debug("Root found: {}",node);
return;
}
while (it.hasNext()) {
Triple t = it.next();
NonLiteral subj = t.getSubject();
findRoot(model, subj, roots, visited);
}
}
}