blob: 04f7eacb27701e6c42ca012daf89978761c43cc3 [file] [log] [blame]
<?xml version="1.0"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<project name="analyzers-opennlp" default="default">
<description>
OpenNLP Library Integration
</description>
<path id="opennlpjars">
<fileset dir="lib"/>
</path>
<property name="test.model.data.dir" location="src/tools/test-model-data"/>
<property name="tests.userdir" location="src/test-files"/>
<property name="test.model.dir" location="${tests.userdir}/org/apache/lucene/analysis/opennlp"/>
<import file="../analysis-module-build.xml"/>
<property name="analysis-extras.conf.dir"
location="${common.dir}/../solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/collection1/conf"/>
<path id="classpath">
<pathelement path="${analyzers-common.jar}"/>
<path refid="opennlpjars"/>
<path refid="base.classpath"/>
</path>
<path id="test.classpath">
<path refid="test.base.classpath"/>
<pathelement path="${tests.userdir}"/>
</path>
<target name="compile-core" depends="jar-analyzers-common, common.compile-core" />
<!--
This does not create real NLP models, just small unencumbered ones for the unit tests.
All text taken from reuters corpus.
Tags applied with online demos at CCG Urbana-Champaign.
-->
<target name="train-test-models" description="Train all small test models for unit tests" depends="resolve">
<mkdir dir="${test.model.dir}"/>
<!-- https://opennlp.apache.org/docs/1.9.0/manual/opennlp.html#tools.sentdetect.training -->
<trainModel command="SentenceDetectorTrainer" lang="en" data="sentences.txt" model="en-test-sent.bin"/>
<copy file="${test.model.dir}/en-test-sent.bin" todir="${analysis-extras.conf.dir}"/>
<!-- https://opennlp.apache.org/docs/1.9.0/manual/opennlp.html#tools.tokenizer.training -->
<trainModel command="TokenizerTrainer" lang="en" data="tokenizer.txt" model="en-test-tokenizer.bin"/>
<copy file="${test.model.dir}/en-test-tokenizer.bin" todir="${analysis-extras.conf.dir}"/>
<!-- https://opennlp.apache.org/docs/1.9.0/manual/opennlp.html#tools.postagger.training -->
<trainModel command="POSTaggerTrainer" lang="en" data="pos.txt" model="en-test-pos-maxent.bin"/>
<!-- https://opennlp.apache.org/docs/1.9.0/manual/opennlp.html#tools.chunker.training -->
<trainModel command="ChunkerTrainerME" lang="en" data="chunks.txt" model="en-test-chunker.bin"/>
<!-- https://opennlp.apache.org/docs/1.9.0/manual/opennlp.html#tools.namefind.training -->
<trainModel command="TokenNameFinderTrainer" lang="en" data="ner.txt" model="en-test-ner.bin">
<extra-args>
<arg value="-params"/>
<arg value="ner_TrainerParams.txt"/>
</extra-args>
</trainModel>
<copy file="${test.model.dir}/en-test-ner.bin" todir="${analysis-extras.conf.dir}"/>
<!-- https://opennlp.apache.org/docs/1.9.0/manual/opennlp.html#tools.lemmatizer.training -->
<trainModel command="LemmatizerTrainerME" lang="en" data="lemmas.txt" model="en-test-lemmatizer.bin"/>
</target>
<macrodef name="trainModel">
<attribute name="command"/>
<attribute name="lang"/>
<attribute name="data"/>
<attribute name="model"/>
<element name="extra-args" optional="true"/>
<sequential>
<java classname="opennlp.tools.cmdline.CLI"
dir="${test.model.data.dir}"
fork="true"
failonerror="true">
<classpath>
<path refid="opennlpjars"/>
</classpath>
<arg value="@{command}"/>
<arg value="-lang"/>
<arg value="@{lang}"/>
<arg value="-data"/>
<arg value="@{data}"/>
<arg value="-model"/>
<arg value="${test.model.dir}/@{model}"/>
<extra-args/>
</java>
</sequential>
</macrodef>
<target name="regenerate" depends="train-test-models"/>
</project>