blob: 90193d9c6e8e91bad7de29356e210a13f2523f2e [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package opennlp.tools.eval;
import java.io.File;
import java.io.IOException;
import java.math.BigInteger;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Test;
import opennlp.tools.formats.Conll02NameSampleStream;
import opennlp.tools.formats.Conll02NameSampleStream.LANGUAGE;
import opennlp.tools.namefind.NameFinderME;
import opennlp.tools.namefind.NameSample;
import opennlp.tools.namefind.TokenNameFinderEvaluator;
import opennlp.tools.namefind.TokenNameFinderFactory;
import opennlp.tools.namefind.TokenNameFinderModel;
import opennlp.tools.util.MarkableFileInputStreamFactory;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.TrainingParameters;
import opennlp.tools.util.model.ModelUtil;
/**
* Evaluates the name finder against the Dutch and Spanish CONLL2002 corpus.
* <p>
* Download the tarball from the CONLL2002 shared task
* <a href="http://www.cnts.ua.ac.be/conll2002/ner/"> site </a>
* and decompress it into this directory: $OPENNLP_DATA_DIR/conll2002.
* Also decompress the training files.
*
* TODO:
* - Files are provided in gzipped. It would be better if they would not be unpacked by the user.
* - Double check the encoding which is used to open the files. Currently that is UTF-8.
* - Make the Conll02 reader compatible. Currently it doesn't work with spanish data without pos tags.
*/
public class Conll02NameFinderEval extends AbstractEvalTest {
private static File dutchTrainingFile;
private static File dutchTestAFile;
private static File dutchTestBFile;
private static File spanishTrainingFile;
private static File spanishTestAFile;
private static File spanishTestBFile;
private TokenNameFinderModel train(File trainFile, LANGUAGE lang,
TrainingParameters params, int types) throws IOException {
ObjectStream<NameSample> samples = new Conll02NameSampleStream(
lang,new MarkableFileInputStreamFactory(trainFile), types);
return NameFinderME.train(lang.toString().toLowerCase(), null, samples,
params, new TokenNameFinderFactory());
}
private void eval(TokenNameFinderModel model, File testData, LANGUAGE lang,
int types, double expectedFMeasure) throws IOException {
ObjectStream<NameSample> samples = new Conll02NameSampleStream(
lang, new MarkableFileInputStreamFactory(testData), types);
TokenNameFinderEvaluator evaluator = new TokenNameFinderEvaluator(new NameFinderME(model));
evaluator.evaluate(samples);
Assert.assertEquals(expectedFMeasure, evaluator.getFMeasure().getFMeasure(), 0.0001);
}
@BeforeClass
public static void verifyTrainingData() throws Exception {
dutchTrainingFile = new File(getOpennlpDataDir(), "conll02/ner/data/ned.train");
dutchTestAFile = new File(getOpennlpDataDir(), "conll02/ner/data/ned.testa");
dutchTestBFile = new File(getOpennlpDataDir(), "conll02/ner/data/ned.testb");
spanishTrainingFile = new File(getOpennlpDataDir(), "conll02/ner/data/esp.train");
spanishTestAFile = new File(getOpennlpDataDir(), "conll02/ner/data/esp.testa");
spanishTestBFile = new File(getOpennlpDataDir(), "conll02/ner/data/esp.testb");
verifyTrainingData(new Conll02NameSampleStream(
LANGUAGE.NLD, new MarkableFileInputStreamFactory(dutchTrainingFile),
Conll02NameSampleStream.GENERATE_PERSON_ENTITIES),
new BigInteger("109687424525847313767541246922170457976"));
verifyTrainingData(new Conll02NameSampleStream(
LANGUAGE.NLD, new MarkableFileInputStreamFactory(dutchTestAFile),
Conll02NameSampleStream.GENERATE_PERSON_ENTITIES),
new BigInteger("12942966701628852910737840182656846323"));
verifyTrainingData(new Conll02NameSampleStream(
LANGUAGE.NLD, new MarkableFileInputStreamFactory(dutchTestBFile),
Conll02NameSampleStream.GENERATE_PERSON_ENTITIES),
new BigInteger("223206987942490952427646331013509976957"));
verifyTrainingData(new Conll02NameSampleStream(
LANGUAGE.SPA, new MarkableFileInputStreamFactory(spanishTrainingFile),
Conll02NameSampleStream.GENERATE_PERSON_ENTITIES),
new BigInteger("226089384066775461905386060946810714487"));
verifyTrainingData(new Conll02NameSampleStream(
LANGUAGE.SPA, new MarkableFileInputStreamFactory(spanishTestAFile),
Conll02NameSampleStream.GENERATE_PERSON_ENTITIES),
new BigInteger("313879596837181728494732341737647284762"));
verifyTrainingData(new Conll02NameSampleStream(
LANGUAGE.SPA, new MarkableFileInputStreamFactory(spanishTestBFile),
Conll02NameSampleStream.GENERATE_PERSON_ENTITIES),
new BigInteger("24037715705115461166858183817622459974"));
}
@Test
public void evalDutchPersonPerceptron() throws IOException {
TrainingParameters params = createPerceptronParams();
TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NLD, params,
Conll02NameSampleStream.GENERATE_PERSON_ENTITIES);
eval(maxentModel, dutchTestAFile, LANGUAGE.NLD,
Conll02NameSampleStream.GENERATE_PERSON_ENTITIES, 0.6238361266294227d);
eval(maxentModel, dutchTestBFile, LANGUAGE.NLD,
Conll02NameSampleStream.GENERATE_PERSON_ENTITIES, 0.744312026002167d);
}
@Test
public void evalDutchPersonMaxentGis() throws IOException {
TrainingParameters params = ModelUtil.createDefaultTrainingParameters();
TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NLD, params,
Conll02NameSampleStream.GENERATE_PERSON_ENTITIES);
eval(maxentModel, dutchTestAFile, LANGUAGE.NLD,
Conll02NameSampleStream.GENERATE_PERSON_ENTITIES, 0.5696539485359361d);
eval(maxentModel, dutchTestBFile, LANGUAGE.NLD,
Conll02NameSampleStream.GENERATE_PERSON_ENTITIES, 0.7127771911298839d);
}
@Test
public void evalDutchPersonMaxentQn() throws IOException {
TrainingParameters params = createMaxentQnParams();
TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NLD, params,
Conll02NameSampleStream.GENERATE_PERSON_ENTITIES);
eval(maxentModel, dutchTestAFile, LANGUAGE.NLD,
Conll02NameSampleStream.GENERATE_PERSON_ENTITIES, 0.6363636363636364d);
eval(maxentModel, dutchTestBFile, LANGUAGE.NLD,
Conll02NameSampleStream.GENERATE_PERSON_ENTITIES, 0.7482403898213319d);
}
@Test
public void evalDutchOrganizationPerceptron() throws IOException {
TrainingParameters params = createPerceptronParams();
TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NLD, params,
Conll02NameSampleStream.GENERATE_ORGANIZATION_ENTITIES);
eval(maxentModel, dutchTestAFile, LANGUAGE.NLD,
Conll02NameSampleStream.GENERATE_ORGANIZATION_ENTITIES, 0.6081871345029239d);
eval(maxentModel, dutchTestBFile, LANGUAGE.NLD,
Conll02NameSampleStream.GENERATE_ORGANIZATION_ENTITIES, 0.6502808988764045d);
}
@Test
public void evalDutchOrganizationMaxentGis() throws IOException {
TrainingParameters params = ModelUtil.createDefaultTrainingParameters();
TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NLD, params,
Conll02NameSampleStream.GENERATE_ORGANIZATION_ENTITIES);
eval(maxentModel, dutchTestAFile, LANGUAGE.NLD,
Conll02NameSampleStream.GENERATE_ORGANIZATION_ENTITIES, 0.5197969543147207d);
eval(maxentModel, dutchTestBFile, LANGUAGE.NLD,
Conll02NameSampleStream.GENERATE_ORGANIZATION_ENTITIES, 0.5753228120516498d);
}
@Test
public void evalDutchOrganizationMaxentQn() throws IOException {
TrainingParameters params = createMaxentQnParams();
TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NLD, params,
Conll02NameSampleStream.GENERATE_ORGANIZATION_ENTITIES);
eval(maxentModel, dutchTestAFile, LANGUAGE.NLD,
Conll02NameSampleStream.GENERATE_ORGANIZATION_ENTITIES, 0.5412748171368861d);
eval(maxentModel, dutchTestBFile, LANGUAGE.NLD,
Conll02NameSampleStream.GENERATE_ORGANIZATION_ENTITIES, 0.5764966740576497d);
}
@Test
public void evalDutchLocationPerceptron() throws IOException {
TrainingParameters params = createPerceptronParams();
TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NLD, params,
Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES);
eval(maxentModel, dutchTestAFile, LANGUAGE.NLD,
Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES, 0.7978609625668449d);
eval(maxentModel, dutchTestBFile, LANGUAGE.NLD,
Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES, 0.7880434782608695d);
}
@Test
public void evalDutchLocationMaxentGis() throws IOException {
TrainingParameters params = ModelUtil.createDefaultTrainingParameters();
TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NLD, params,
Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES);
eval(maxentModel, dutchTestAFile, LANGUAGE.NLD,
Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES, 0.5451977401129944d);
eval(maxentModel, dutchTestBFile, LANGUAGE.NLD,
Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES, 0.680952380952381d);
}
@Test
public void evalDutchLocationMaxentQn() throws IOException {
TrainingParameters params = createMaxentQnParams();
TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NLD, params,
Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES);
eval(maxentModel, dutchTestAFile, LANGUAGE.NLD,
Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES, 0.6737683089214381d);
eval(maxentModel, dutchTestBFile, LANGUAGE.NLD,
Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES, 0.7433903576982893d);
}
@Test
public void evalDutchMiscPerceptron() throws IOException {
TrainingParameters params = createPerceptronParams();
TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NLD, params,
Conll02NameSampleStream.GENERATE_MISC_ENTITIES);
eval(maxentModel, dutchTestAFile, LANGUAGE.NLD,
Conll02NameSampleStream.GENERATE_MISC_ENTITIES, 0.6651198762567672d);
eval(maxentModel, dutchTestBFile, LANGUAGE.NLD,
Conll02NameSampleStream.GENERATE_MISC_ENTITIES, 0.6748166259168704d);
}
@Test
public void evalDutchMiscMaxentGis() throws IOException {
TrainingParameters params = ModelUtil.createDefaultTrainingParameters();
TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NLD, params,
Conll02NameSampleStream.GENERATE_MISC_ENTITIES);
eval(maxentModel, dutchTestAFile, LANGUAGE.NLD,
Conll02NameSampleStream.GENERATE_MISC_ENTITIES, 0.5831157528285466d);
eval(maxentModel, dutchTestBFile, LANGUAGE.NLD,
Conll02NameSampleStream.GENERATE_MISC_ENTITIES, 0.5762897914379803d);
}
@Test
public void evalDutchMiscMaxentQn() throws IOException {
TrainingParameters params = createMaxentQnParams();
TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NLD, params,
Conll02NameSampleStream.GENERATE_MISC_ENTITIES);
eval(maxentModel, dutchTestAFile, LANGUAGE.NLD,
Conll02NameSampleStream.GENERATE_MISC_ENTITIES, 0.4227642276422764d);
eval(maxentModel, dutchTestBFile, LANGUAGE.NLD,
Conll02NameSampleStream.GENERATE_MISC_ENTITIES, 0.455294863665187d);
}
@Test
public void evalDutchCombinedPerceptron() throws IOException {
TrainingParameters params = createPerceptronParams();
int combinedType = Conll02NameSampleStream.GENERATE_PERSON_ENTITIES
| Conll02NameSampleStream.GENERATE_ORGANIZATION_ENTITIES
| Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES
| Conll02NameSampleStream.GENERATE_MISC_ENTITIES;
TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NLD, params,
combinedType);
eval(maxentModel, dutchTestAFile, LANGUAGE.NLD, combinedType, 0.727808326787117d);
eval(maxentModel, dutchTestBFile, LANGUAGE.NLD, combinedType, 0.7388253638253639d);
}
@Test
public void evalDutchCombinedMaxentGis() throws IOException {
TrainingParameters params = ModelUtil.createDefaultTrainingParameters();
int combinedType = Conll02NameSampleStream.GENERATE_PERSON_ENTITIES
| Conll02NameSampleStream.GENERATE_ORGANIZATION_ENTITIES
| Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES
| Conll02NameSampleStream.GENERATE_MISC_ENTITIES;
TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NLD, params,
combinedType);
eval(maxentModel, dutchTestAFile, LANGUAGE.NLD, combinedType, 0.6673209028459275d);
eval(maxentModel, dutchTestBFile, LANGUAGE.NLD, combinedType, 0.6984085910208306d);
}
@Test
public void evalDutchCombinedMaxentQn() throws IOException {
TrainingParameters params = createMaxentQnParams();
int combinedType = Conll02NameSampleStream.GENERATE_PERSON_ENTITIES
| Conll02NameSampleStream.GENERATE_ORGANIZATION_ENTITIES
| Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES
| Conll02NameSampleStream.GENERATE_MISC_ENTITIES;
TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NLD, params,
combinedType);
eval(maxentModel, dutchTestAFile, LANGUAGE.NLD, combinedType, 0.6999800915787379d);
eval(maxentModel, dutchTestBFile, LANGUAGE.NLD, combinedType, 0.7101430258496261d);
}
@Test
public void evalSpanishPersonPerceptron() throws IOException {
TrainingParameters params = createPerceptronParams();
TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.SPA, params,
Conll02NameSampleStream.GENERATE_PERSON_ENTITIES);
eval(maxentModel, spanishTestAFile, LANGUAGE.SPA,
Conll02NameSampleStream.GENERATE_PERSON_ENTITIES, 0.8331210191082803d);
eval(maxentModel, spanishTestBFile, LANGUAGE.SPA,
Conll02NameSampleStream.GENERATE_PERSON_ENTITIES, 0.8419705694177864d);
}
@Test
public void evalSpanishPersonMaxentGis() throws IOException {
TrainingParameters params = ModelUtil.createDefaultTrainingParameters();
TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.SPA, params,
Conll02NameSampleStream.GENERATE_PERSON_ENTITIES);
eval(maxentModel, spanishTestAFile, LANGUAGE.SPA,
Conll02NameSampleStream.GENERATE_PERSON_ENTITIES, 0.686960933536276d);
eval(maxentModel, spanishTestBFile, LANGUAGE.SPA,
Conll02NameSampleStream.GENERATE_PERSON_ENTITIES, 0.8132033008252063d);
}
@Test
public void evalSpanishPersonMaxentQn() throws IOException {
TrainingParameters params = createMaxentQnParams();
TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.SPA, params,
Conll02NameSampleStream.GENERATE_PERSON_ENTITIES);
eval(maxentModel, spanishTestAFile, LANGUAGE.SPA,
Conll02NameSampleStream.GENERATE_PERSON_ENTITIES, 0.7454634624816087d);
eval(maxentModel, spanishTestBFile, LANGUAGE.SPA,
Conll02NameSampleStream.GENERATE_PERSON_ENTITIES, 0.8215339233038348d);
}
@Test
public void evalSpanishOrganizationPerceptron() throws IOException {
TrainingParameters params = createPerceptronParams();
TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.SPA, params,
Conll02NameSampleStream.GENERATE_ORGANIZATION_ENTITIES);
eval(maxentModel, spanishTestAFile, LANGUAGE.SPA,
Conll02NameSampleStream.GENERATE_ORGANIZATION_ENTITIES, 0.7478819748758399d);
eval(maxentModel, spanishTestBFile, LANGUAGE.SPA,
Conll02NameSampleStream.GENERATE_ORGANIZATION_ENTITIES, 0.7715330894579315d);
}
@Test
public void evalSpanishOrganizationMaxentGis() throws IOException {
TrainingParameters params = ModelUtil.createDefaultTrainingParameters();
TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.SPA, params,
Conll02NameSampleStream.GENERATE_ORGANIZATION_ENTITIES);
eval(maxentModel, spanishTestAFile, LANGUAGE.SPA,
Conll02NameSampleStream.GENERATE_ORGANIZATION_ENTITIES, 0.6982288828337874d);
eval(maxentModel, spanishTestBFile, LANGUAGE.SPA,
Conll02NameSampleStream.GENERATE_ORGANIZATION_ENTITIES, 0.7640449438202247d);
}
@Test
public void evalSpanishOrganizationMaxentQn() throws IOException {
TrainingParameters params = createMaxentQnParams();
TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.SPA, params,
Conll02NameSampleStream.GENERATE_ORGANIZATION_ENTITIES);
eval(maxentModel, spanishTestAFile, LANGUAGE.SPA,
Conll02NameSampleStream.GENERATE_ORGANIZATION_ENTITIES, 0.6904593639575972d);
eval(maxentModel, spanishTestBFile, LANGUAGE.SPA,
Conll02NameSampleStream.GENERATE_ORGANIZATION_ENTITIES, 0.7843601895734598d);
}
@Test
public void evalSpanishLocationPerceptron() throws IOException {
TrainingParameters params = createPerceptronParams();
TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.SPA, params,
Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES);
eval(maxentModel, spanishTestAFile, LANGUAGE.SPA,
Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES, 0.7018867924528303d);
eval(maxentModel, spanishTestBFile, LANGUAGE.SPA,
Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES, 0.6315158777711205d);
}
@Test
public void evalSpanishLocationMaxentGis() throws IOException {
TrainingParameters params = ModelUtil.createDefaultTrainingParameters();
TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.SPA, params,
Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES);
eval(maxentModel, spanishTestAFile, LANGUAGE.SPA,
Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES, 0.7386907929749867d);
eval(maxentModel, spanishTestBFile, LANGUAGE.SPA,
Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES, 0.6772777167947311d);
}
@Test
public void evalSpanishLocationMaxentQn() throws IOException {
TrainingParameters params = createMaxentQnParams();
TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.SPA, params,
Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES);
eval(maxentModel, spanishTestAFile, LANGUAGE.SPA,
Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES, 0.7544565842438182d);
eval(maxentModel, spanishTestBFile, LANGUAGE.SPA,
Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES, 0.7005019520356944d);
}
@Test
public void evalSpanishMiscPerceptron() throws IOException {
TrainingParameters params = createPerceptronParams();
TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.SPA, params,
Conll02NameSampleStream.GENERATE_MISC_ENTITIES);
eval(maxentModel, spanishTestAFile, LANGUAGE.SPA,
Conll02NameSampleStream.GENERATE_MISC_ENTITIES, 0.5102880658436214d);
eval(maxentModel, spanishTestBFile, LANGUAGE.SPA,
Conll02NameSampleStream.GENERATE_MISC_ENTITIES, 0.5842696629213483d);
}
@Test
public void evalSpanishMiscMaxentGis() throws IOException {
TrainingParameters params = ModelUtil.createDefaultTrainingParameters();
TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.SPA, params,
Conll02NameSampleStream.GENERATE_MISC_ENTITIES);
eval(maxentModel, spanishTestAFile, LANGUAGE.SPA,
Conll02NameSampleStream.GENERATE_MISC_ENTITIES, 0.40971168437025796d);
eval(maxentModel, spanishTestBFile, LANGUAGE.SPA,
Conll02NameSampleStream.GENERATE_MISC_ENTITIES, 0.45703124999999994d);
}
@Test
public void evalSpanishMiscMaxentQn() throws IOException {
TrainingParameters params = createMaxentQnParams();
TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.SPA, params,
Conll02NameSampleStream.GENERATE_MISC_ENTITIES);
eval(maxentModel, spanishTestAFile, LANGUAGE.SPA,
Conll02NameSampleStream.GENERATE_MISC_ENTITIES, 0.46467817896389324d);
eval(maxentModel, spanishTestBFile, LANGUAGE.SPA,
Conll02NameSampleStream.GENERATE_MISC_ENTITIES, 0.5020576131687243d);
}
@Test
public void evalSpanishCombinedPerceptron() throws IOException {
TrainingParameters params = createPerceptronParams();
int combinedType = Conll02NameSampleStream.GENERATE_PERSON_ENTITIES
| Conll02NameSampleStream.GENERATE_ORGANIZATION_ENTITIES
| Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES
| Conll02NameSampleStream.GENERATE_MISC_ENTITIES;
TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.SPA, params,
combinedType);
eval(maxentModel, spanishTestAFile, LANGUAGE.SPA, combinedType, 0.7476700838769804d);
eval(maxentModel, spanishTestBFile, LANGUAGE.SPA, combinedType, 0.7692307692307693d);
}
@Test
public void evalSpanishCombinedMaxentGis() throws IOException {
TrainingParameters params = ModelUtil.createDefaultTrainingParameters();
int combinedType = Conll02NameSampleStream.GENERATE_PERSON_ENTITIES
| Conll02NameSampleStream.GENERATE_ORGANIZATION_ENTITIES
| Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES
| Conll02NameSampleStream.GENERATE_MISC_ENTITIES;
TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.SPA, params,
combinedType);
eval(maxentModel, spanishTestAFile, LANGUAGE.SPA, combinedType, 0.707400023454908d);
eval(maxentModel, spanishTestBFile, LANGUAGE.SPA, combinedType, 0.7576868829337094d);
}
@Test
public void evalSpanishCombinedMaxentQn() throws IOException {
TrainingParameters params = createMaxentQnParams();
int combinedType = Conll02NameSampleStream.GENERATE_PERSON_ENTITIES
| Conll02NameSampleStream.GENERATE_ORGANIZATION_ENTITIES
| Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES
| Conll02NameSampleStream.GENERATE_MISC_ENTITIES;
TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.SPA, params,
combinedType);
eval(maxentModel, spanishTestAFile, LANGUAGE.SPA, combinedType, 0.7455564833591795d);
eval(maxentModel, spanishTestBFile, LANGUAGE.SPA, combinedType, 0.7856735159817352d);
}
}