blob: 87fc2cc2bc9e1dca5e0b2412e92f4c556a59223b [file] [log] [blame]
package opennlp.morfologik.lemmatizer;
import static org.junit.Assert.assertEquals;
import java.io.File;
import java.nio.charset.Charset;
import morfologik.stemming.EncoderType;
import opennlp.morfologik.builder.MorfologikDictionayBuilder;
import opennlp.morfologik.builder.POSDictionayBuilderTest;
import opennlp.tools.lemmatizer.DictionaryLemmatizer;
import org.junit.Test;
public class MorfologikLemmatizerTest {
@Test
public void testLemmatizeInsensitive() throws Exception {
DictionaryLemmatizer dict = createDictionary(false);
assertEquals("casar", dict.lemmatize("casa", "V"));
assertEquals("casa", dict.lemmatize("casa", "NOUN"));
assertEquals("casa", dict.lemmatize("Casa", "PROP"));
}
private MorfologikLemmatizer createDictionary(boolean caseSensitive)
throws Exception {
MorfologikDictionayBuilder builder = new MorfologikDictionayBuilder();
File dictInFile = new File(POSDictionayBuilderTest.class.getResource(
"/dictionaryWithLemma.txt").getFile());
File dictOutFile = File.createTempFile(
POSDictionayBuilderTest.class.getName(), ".dict");
builder.build(dictInFile, dictOutFile, Charset.forName("UTF-8"), "+", EncoderType.PREFIX);
MorfologikLemmatizer ml = new MorfologikLemmatizer(dictOutFile.toURI()
.toURL());
return ml;
}
}