blob: 4bfe6f054010fa68aab5b139037c6c9487e73e71 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.stanbol.enhancer.engines.opencalais.impl;
import static org.apache.stanbol.enhancer.test.helper.EnhancementStructureHelper.validateAllEntityAnnotations;
import static org.apache.stanbol.enhancer.test.helper.EnhancementStructureHelper.validateAllTextAnnotations;
import java.io.IOException;
import java.io.InputStream;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
import org.apache.clerezza.rdf.core.LiteralFactory;
import org.apache.clerezza.rdf.core.MGraph;
import org.apache.clerezza.rdf.core.Resource;
import org.apache.clerezza.rdf.core.UriRef;
import org.apache.clerezza.rdf.core.access.TcManager;
import org.apache.clerezza.rdf.core.impl.TripleImpl;
import org.apache.stanbol.enhancer.contentitem.inmemory.InMemoryContentItemFactory;
import org.apache.stanbol.enhancer.servicesapi.ContentItem;
import org.apache.stanbol.enhancer.servicesapi.ContentItemFactory;
import org.apache.stanbol.enhancer.servicesapi.EngineException;
import org.apache.stanbol.enhancer.servicesapi.impl.StringSource;
import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
import org.apache.stanbol.enhancer.test.helper.EnhancementStructureHelper;
import org.apache.stanbol.enhancer.test.helper.RemoteServiceHelper;
import org.junit.Assert;
import org.junit.Assume;
import org.junit.BeforeClass;
import org.junit.Test;
import org.osgi.service.cm.ConfigurationException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* This class provides JUnit tests for OpenCalaisEngine.
*
* @author <a href="mailto:kasper@dfki.de">Walter Kasper</a>
*/
public class TestOpenCalaisEngine {
/**
* This contains the logger.
*/
private static final Logger LOG = LoggerFactory.getLogger(TestOpenCalaisEngine.class);
private static OpenCalaisEngine calaisExtractor;
private static final ContentItemFactory ciFactory = InMemoryContentItemFactory.getInstance();
private static String TEST_LICENSE_KEY = System.getProperty(OpenCalaisEngine.LICENSE_KEY);
private static String TEST_TEXT = "Israeli PM Netanyahu pulls out of US nuclear summit\n"
+ "Israeli PM Benjamin Netanyahu has cancelled a visit to the US where he was to "
+ "attend a summit on nuclear security, Israeli officials say. Mr Netanyahu made "
+ "the decision after learning that Egypt and Turkey intended to raise the issue "
+ "of Israel's presumed nuclear arsenal, the officials said. Mr Obama is due to "
+ "host dozens of world leaders at the two-day conference, which begins in "
+ "Washington on Monday. Israel has never confirmed or denied that it possesses "
+ "atomic weapons. Israel's Intelligence and Atomic Energy Minister Dan Meridor "
+ "will take Netanyahu's place in the nuclear summit, Israeli radio said. More "
+ "than 40 countries are expected at the meeting, which will focus on preventing "
+ "the spread of nuclear weapons to militant groups.";
@BeforeClass
public static void oneTimeSetup() throws ConfigurationException {
calaisExtractor = new OpenCalaisEngine();
calaisExtractor.setCalaisTypeMap(new HashMap<UriRef,UriRef>());
calaisExtractor.tcManager = TcManager.getInstance();
if (TEST_LICENSE_KEY != null && TEST_LICENSE_KEY.matches("\\w+")) {
calaisExtractor.setLicenseKey(TEST_LICENSE_KEY);
}
}
public static ContentItem wrapAsContentItem(final String text) throws IOException {
return ciFactory.createContentItem(new StringSource(text));
}
@Test
public void testEntityExtraction() throws IOException, EngineException {
String testFile = "calaisresult.owl";
String format = "application/rdf+xml";
InputStream in = this.getClass().getClassLoader().getResourceAsStream(testFile);
Assert.assertNotNull("failed to load resource " + testFile, in);
MGraph model = calaisExtractor.readModel(in, format);
Assert.assertNotNull("model reader failed with format: " + format, model);
Collection<CalaisEntityOccurrence> entities;
try {
entities = calaisExtractor.queryModel(model);
} catch (EngineException e) {
RemoteServiceHelper.checkServiceUnavailable(e);
return;
}
LOG.info("Found entities: {}", entities.size());
LOG.debug("Entities:\n{}", entities);
Assert.assertFalse("No entities found!", entities.isEmpty());
//test the generation of the Enhancements
ContentItem ci = wrapAsContentItem(TEST_TEXT);
calaisExtractor.createEnhancements(entities, ci);
Map<UriRef,Resource> expectedValues = new HashMap<UriRef,Resource>();
expectedValues.put(Properties.ENHANCER_EXTRACTED_FROM, ci.getUri());
expectedValues.put(Properties.DC_CREATOR,
LiteralFactory.getInstance().createTypedLiteral(
calaisExtractor.getClass().getName()));
//adding null as expected for confidence makes it a required property
expectedValues.put(Properties.ENHANCER_CONFIDENCE, null);
validateAllTextAnnotations(ci.getMetadata(),
TEST_TEXT, expectedValues);
validateAllEntityAnnotations(ci.getMetadata(), expectedValues);
}
@Test
public void testCalaisConnection() throws IOException, EngineException {
Assume.assumeNotNull(calaisExtractor.getLicenseKey());
ContentItem ci = wrapAsContentItem(TEST_TEXT);
ci.getMetadata().add(
new TripleImpl(ci.getUri(), Properties.DC_LANGUAGE, LiteralFactory.getInstance()
.createTypedLiteral("en")));
MGraph model;
try {
model = calaisExtractor.getCalaisAnalysis(TEST_TEXT, "text/plain");
} catch (EngineException e) {
RemoteServiceHelper.checkServiceUnavailable(e);
return;
}
Assert.assertNotNull("No model", model);
Collection<CalaisEntityOccurrence> entities;
try {
entities = calaisExtractor.queryModel(model);
} catch (EngineException e) {
RemoteServiceHelper.checkServiceUnavailable(e);
return;
}
LOG.info("Found entities: {}", entities.size());
LOG.debug("Entities:\n{}", entities);
Assert.assertFalse("No entities found!", entities.isEmpty());
}
// problem with license keys for testing?
// ask user to supply it as system property or whatever?
}