blob: 25b562cfee0cbff8b140792676a369ab7cec8dbc [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.stanbol.enhancer.engines.paoding.token;
import java.io.File;
import java.io.IOException;
import java.util.Dictionary;
import java.util.Hashtable;
import org.apache.clerezza.rdf.core.UriRef;
import org.apache.clerezza.rdf.core.impl.PlainLiteralImpl;
import org.apache.clerezza.rdf.core.impl.TripleImpl;
import org.apache.stanbol.commons.solr.extras.paoding.Activator;
import org.apache.stanbol.enhancer.contentitem.inmemory.InMemoryContentItemFactory;
import org.apache.stanbol.enhancer.nlp.model.AnalysedText;
import org.apache.stanbol.enhancer.nlp.model.AnalysedTextFactory;
import org.apache.stanbol.enhancer.nlp.model.AnalysedTextUtils;
import org.apache.stanbol.enhancer.servicesapi.ContentItem;
import org.apache.stanbol.enhancer.servicesapi.ContentItemFactory;
import org.apache.stanbol.enhancer.servicesapi.EngineException;
import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
import org.apache.stanbol.enhancer.servicesapi.impl.StringSource;
import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import org.osgi.service.cm.ConfigurationException;
public class TestPaodingTokenizerEngine {
public static final String FAKE_BUNDLE_SYMBOLIC_NAME = "FAKE_BUNDLE_SYMBOLIC_NAME";
private static ContentItemFactory contentItemFactory;
private static UriRef id = new UriRef("http://www.example.org/contentItem1");
/**
* Test text taken from the <a href ="http://zh.wikipedia.org/wiki/Barack_Obama">
* Chinese wikipedia side for Barack Obama</a>.
*/
private static String text = "巴拉克·侯赛因·奥巴马二世,美国民主黨籍政治家,第44任美国总统,"
+ "為第一位非裔美国总统,同時擁有黑(盧歐族)白(英德爱混血)血统,於2008年初次當選美國總統,"
+ "並於2012年成功連任,。\n 奥巴马1961年出生於美国夏威夷州檀香山,童年和青少年時期分别在印尼和"
+ "夏威夷度过。1991年,奥巴马以优等生荣誉从哈佛法学院毕业。1996年,当选伊利诺州参议员。2000年,"
+ "競選美国众议院席位失败,后一直从事州参议员工作,且於2002年获得连任。2004年,"
+ "在美国民主党全国代表大会上发表主题演讲,因此成为全美知名的政界人物。同年11月,"
+ "以70%的选票当选代表伊利诺州的美国联邦参议员,是美國歷史上第五位有非裔血统的联邦参议员。";
private PaodingTokenizerEngine engine;
private ContentItem contentItem;
protected static final String TEST_PAODING_DIC_PATH = File.separatorChar + "target"
+ File.separatorChar + "paoding-dict";
private static File paodingDict;
@BeforeClass
public static void initDataFileProvicer() throws IOException{
String baseDir = System.getProperty("basedir") == null ? "." : System.getProperty("basedir");
paodingDict = new File(baseDir,TEST_PAODING_DIC_PATH);
if(!paodingDict.isDirectory()){
Activator.initPaodingDictionary(paodingDict, TestPaodingTokenizerEngine.class.
getClassLoader().getResourceAsStream(Activator.DICT_ARCHIVE));
}
Activator.initPaodingDictHomeProperty(paodingDict);
contentItemFactory = InMemoryContentItemFactory.getInstance();
}
@Before
public void setUpServices() throws IOException , ConfigurationException {
engine = new PaodingTokenizerEngine();
engine.analysedTextFactory = AnalysedTextFactory.getDefaultInstance();
Dictionary<String,Object> config = new Hashtable<String,Object>();
config.put(EnhancementEngine.PROPERTY_NAME, "paoding-token");
engine.activate(new MockComponentContext(config));
contentItem = contentItemFactory.createContentItem(id, new StringSource(text));
//add an annotation that this is Japanese
contentItem.getMetadata().add(new TripleImpl(id, Properties.DC_LANGUAGE,
new PlainLiteralImpl("zh")));
}
@Test
public void testEngine() throws EngineException {
Assert.assertEquals(EnhancementEngine.ENHANCE_ASYNC, engine.canEnhance(contentItem));
engine.computeEnhancements(contentItem);
//assert the results
AnalysedText at = AnalysedTextUtils.getAnalysedText(contentItem);
Assert.assertNotNull(at);
Assert.assertTrue(at.getTokens().hasNext()); //assert that tokens are present
}
@After
public void cleanUpServices(){
if(engine != null){
engine.deactivate(null);
}
engine = null;
}
}