blob: a96560790a8e17df268845963f501047304f0343 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.stanbol.enhancer.engines.zemanta.impl;
import static org.apache.clerezza.rdf.core.serializedform.SupportedFormat.TURTLE;
import static org.apache.stanbol.enhancer.engines.zemanta.impl.ZemantaEnhancementEngine.API_KEY_PROPERTY;
import static org.apache.stanbol.enhancer.test.helper.EnhancementStructureHelper.validateAllTextAnnotations;
import java.io.IOException;
import java.net.UnknownHostException;
import java.util.Dictionary;
import java.util.HashMap;
import java.util.Hashtable;
import java.util.Map;
import org.apache.clerezza.rdf.core.LiteralFactory;
import org.apache.clerezza.rdf.core.Resource;
import org.apache.clerezza.rdf.core.UriRef;
import org.apache.clerezza.rdf.jena.serializer.JenaSerializerProvider;
import org.apache.stanbol.enhancer.contentitem.inmemory.InMemoryContentItemFactory;
import org.apache.stanbol.enhancer.servicesapi.ContentItem;
import org.apache.stanbol.enhancer.servicesapi.ContentItemFactory;
import org.apache.stanbol.enhancer.servicesapi.EngineException;
import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
import org.apache.stanbol.enhancer.servicesapi.impl.StringSource;
import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
import org.apache.stanbol.enhancer.test.helper.EnhancementStructureHelper;
import org.apache.stanbol.enhancer.test.helper.RemoteServiceHelper;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
import org.osgi.service.cm.ConfigurationException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class ZemantaEnhancementEngineTest {
/**
* found on this Blog {@linkplain http://bcbio.wordpress.com/2009/01/04/extracting-keywords-from-biological-text-using-zemanta/}
*/
public static final String BIO_DOMAIN_TEXT = "glh-2 encodes a putative DEAD-box RNA " +
"helicase that contains six CCHC zinc fingers and is homologous to Drosophila VASA, " +
"a germ-line-specific, ATP-dependent, RNA helicase; GLH-2 activity may also be required " +
"for the wild-type morphology of P granules and for localization of several protein " +
"components, but not accumulation of P granule mRNA components; GLH-2 interacts in " +
"vitro with itself and with KGB-1, a JNK-like MAP kinase; GLH-2 is a constitutive P " +
"granule component and thus, with the exception of mature sperm, is expressed in germ " +
"cells at all stages of development; GLH-2 is cytoplasmic in oocytes and the early " +
"embryo, while perinuclear in all later developmental stages as well as in the distal " +
"and medial regions of the hermaphrodite gonad; GLH-2 is expressed at barely detectable " +
"levels in males";
static ZemantaEnhancementEngine zemantaEngine = new ZemantaEnhancementEngine();
private static final ContentItemFactory ciFactory = InMemoryContentItemFactory.getInstance();
private static final Logger log = LoggerFactory.getLogger(ZemantaEnhancementEngineTest.class);
/**
* This key was generated to support testing only. Please do only use it
* for testing. For real usages of the engine you need to create your own
* key!
*/
private static final String ZEMANTA_TEST_APPLICATION_KEY = "2qsvcvkut8rhnqbhm35znn76";
@BeforeClass
public static void setUpServices() throws IOException, ConfigurationException {
Dictionary<String, Object> properties = new Hashtable<String, Object>();
properties.put(API_KEY_PROPERTY, ZEMANTA_TEST_APPLICATION_KEY);
properties.put(EnhancementEngine.PROPERTY_NAME, "zemanta");
MockComponentContext context = new MockComponentContext(properties);
zemantaEngine.activate(context);
}
@AfterClass
public static void shutdownServices() {
zemantaEngine.deactivate(null);
}
public static ContentItem wrapAsContentItem(final String text) throws IOException {
String id = "urn:org.apache.stanbol.enhancer:test:engines.zemanta:content-item-"
+ EnhancementEngineHelper.randomUUID().toString();
return ciFactory.createContentItem(new UriRef(id), new StringSource(text));
}
@Test
public void tesetBioText() throws EngineException, IOException {
ContentItem ci = wrapAsContentItem(BIO_DOMAIN_TEXT);
try {
zemantaEngine.computeEnhancements(ci);
} catch (EngineException e) {
RemoteServiceHelper.checkServiceUnavailable(e);
return;
}
JenaSerializerProvider serializer = new JenaSerializerProvider();
serializer.serialize(System.out, ci.getMetadata(), TURTLE);
Map<UriRef,Resource> expectedValues = new HashMap<UriRef,Resource>();
expectedValues.put(Properties.ENHANCER_EXTRACTED_FROM, ci.getUri());
expectedValues.put(Properties.DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(
zemantaEngine.getClass().getName()));
//deactivate require fise:confidence values for fise:TextAnnotations, because
//the one used to group the TopicAnnotations does not have a confidence value
int textAnnoNum = validateAllTextAnnotations(ci.getMetadata(), BIO_DOMAIN_TEXT, expectedValues);
log.info(textAnnoNum + " TextAnnotations found ...");
//adding null as expected for confidence makes it a required property
expectedValues.put(Properties.ENHANCER_CONFIDENCE, null);
int entityAnnoNum = EnhancementStructureHelper.validateAllEntityAnnotations(ci.getMetadata(),expectedValues);
log.info(entityAnnoNum + " EntityAnnotations found ...");
int topicAnnoNum = EnhancementStructureHelper.validateAllTopicAnnotations(ci.getMetadata(),expectedValues);
log.info(topicAnnoNum + " TopicAnnotations found ...");
}
public static void main(String[] args) throws Exception{
ZemantaEnhancementEngineTest.setUpServices();
ZemantaEnhancementEngineTest test = new ZemantaEnhancementEngineTest();
test.tesetBioText();
ZemantaEnhancementEngineTest.shutdownServices();
}
}