| <?xml version="1.0"?> |
| |
| <!-- |
| Licensed to the Apache Software Foundation (ASF) under one or more |
| contributor license agreements. See the NOTICE file distributed with |
| this work for additional information regarding copyright ownership. |
| The ASF licenses this file to You under the Apache License, Version 2.0 |
| the "License"); you may not use this file except in compliance with |
| the License. You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, software |
| distributed under the License is distributed on an "AS IS" BASIS, |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| See the License for the specific language governing permissions and |
| limitations under the License. |
| --> |
| |
| <project name="analyzers-common" default="default"> |
| |
| <description> |
| Analyzers for indexing content in different languages and domains. |
| </description> |
| |
| <!-- some files for testing that do not have license headers --> |
| <property name="rat.excludes" value="**/*.aff,**/*.dic,**/*.txt,**/charfilter/*.htm*,**/*LuceneResourcesWikiPage.html"/> |
| <property name="rat.additional-includes" value="src/tools/**"/> |
| |
| <import file="../analysis-module-build.xml"/> |
| |
| <property name="snowball.programs.dir" location="src/java/org/tartarus/snowball/ext"/> |
| |
| <property name="unicode-props-file" location="src/java/org/apache/lucene/analysis/util/UnicodeProps.java"/> |
| |
| <target name="jflex" depends="-install-jflex,clean-jflex,-jflex-ClassicAnalyzer,-jflex-UAX29URLEmailTokenizer, |
| -jflex-wiki-tokenizer,-jflex-HTMLStripCharFilter"/> |
| |
| <target name="-jflex-HTMLStripCharFilter" |
| depends="init,generate-jflex-html-char-entities"> |
| <jflex file="src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex" |
| outdir="src/java/org/apache/lucene/analysis/charfilter" |
| nobak="on" inputstreamctor="false"/> |
| <!-- Remove the inappropriate JFlex-generated constructor --> |
| <replaceregexp file="src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java" |
| match="/\*\*\s*\*\s*Creates a new scanner\s*\*\s*\*\s*@param\s*in\s*the java.io.Reader to read input from\.\s*\*/\s*public HTMLStripCharFilter\(java\.io\.Reader in\)\s*\{\s*this.zzReader = in;\s*\}" |
| replace="" flags="s"/> |
| </target> |
| |
| <target name="generate-jflex-html-char-entities"> |
| <exec dir="src/java/org/apache/lucene/analysis/charfilter" |
| output="src/java/org/apache/lucene/analysis/charfilter/HTMLCharacterEntities.jflex" |
| executable="${python.exe}" failonerror="true" logerror="true"> |
| <!-- Tell Python not to write any bytecode cache into the filesystem: --> |
| <arg value="-B"/> |
| <arg value="htmlentity.py"/> |
| </exec> |
| <fixcrlf file="src/java/org/apache/lucene/analysis/charfilter/HTMLCharacterEntities.jflex" encoding="UTF-8"/> |
| </target> |
| |
| <target name="-jflex-wiki-tokenizer" depends="init,-install-jflex"> |
| <run-jflex dir="src/java/org/apache/lucene/analysis/wikipedia" name="WikipediaTokenizerImpl"/> |
| </target> |
| |
| <target name="-jflex-UAX29URLEmailTokenizer" depends="init,-install-jflex"> |
| <run-jflex-and-disable-buffer-expansion |
| dir="src/java/org/apache/lucene/analysis/standard" name="UAX29URLEmailTokenizerImpl"/> |
| </target> |
| |
| <target name="-jflex-ClassicAnalyzer" depends="init,-install-jflex"> |
| <run-jflex dir="src/java/org/apache/lucene/analysis/standard" name="ClassicTokenizerImpl"/> |
| </target> |
| |
| <target name="clean-jflex"> |
| <delete> |
| <fileset dir="src/java/org/apache/lucene/analysis/charfilter" includes="*.java"> |
| <containsregexp expression="generated.*by.*JFlex"/> |
| </fileset> |
| <fileset dir="src/java/org/apache/lucene/analysis/wikipedia" includes="*.java"> |
| <containsregexp expression="generated.*by.*JFlex"/> |
| </fileset> |
| <fileset dir="src/java/org/apache/lucene/analysis/standard" includes="**/*.java"> |
| <containsregexp expression="generated.*by.*JFlex"/> |
| </fileset> |
| </delete> |
| </target> |
| |
| <target xmlns:ivy="antlib:org.apache.ivy.ant" name="-resolve-icu4j" unless="icu4j.resolved" depends="ivy-availability-check,ivy-configure"> |
| <loadproperties prefix="ivyversions" srcFile="${common.dir}/ivy-versions.properties"/> |
| <ivy:cachepath organisation="com.ibm.icu" module="icu4j" revision="${ivyversions./com.ibm.icu/icu4j}" |
| inline="true" conf="default" transitive="true" pathid="icu4j.classpath"/> |
| <property name="icu4j.resolved" value="true"/> |
| </target> |
| |
| <target name="unicode-data" depends="-resolve-icu4j,resolve-groovy"> |
| <groovy classpathref="icu4j.classpath" src="src/tools/groovy/generate-unicode-data.groovy"/> |
| <fixcrlf file="${unicode-props-file}" encoding="UTF-8"/> |
| </target> |
| |
| <property name="tld.zones" value="http://www.internic.net/zones/root.zone"/> |
| <property name="tld.output" location="src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro"/> |
| |
| <target name="gen-tlds" depends="compile-tools"> |
| <java |
| classname="org.apache.lucene.analysis.standard.GenerateJflexTLDMacros" |
| dir="." |
| fork="true" |
| failonerror="true"> |
| <classpath> |
| <pathelement location="${build.dir}/classes/tools"/> |
| </classpath> |
| <arg value="${tld.zones}"/> |
| <arg value="${tld.output}"/> |
| </java> |
| </target> |
| |
| <target name="compile-tools" depends="common.compile-tools"> |
| <compile |
| srcdir="src/tools/java" |
| destdir="${build.dir}/classes/tools"> |
| <classpath refid="classpath"/> |
| </compile> |
| </target> |
| |
| <target name="javadocs" depends="module-build.javadocs"/> |
| |
| <target name="regenerate" depends="jflex,unicode-data"/> |
| |
| <target name="patch-snowball" description="Patches all snowball programs in '${snowball.programs.dir}' to make them work with MethodHandles"> |
| <fileset id="snowball.programs" dir="${snowball.programs.dir}" includes="*Stemmer.java"/> |
| <replaceregexp match="^public class \w+Stemmer\b" replace="@SuppressWarnings("unused") \0" flags="m" encoding="UTF-8"> |
| <fileset refid="snowball.programs"/> |
| </replaceregexp> |
| <replaceregexp match="private final static \w+Stemmer methodObject\b.*$" replace="/* patched */ private static final java.lang.invoke.MethodHandles.Lookup methodObject = java.lang.invoke.MethodHandles.lookup();" flags="m" encoding="UTF-8"> |
| <fileset refid="snowball.programs"/> |
| </replaceregexp> |
| <fixcrlf srcdir="${snowball.programs.dir}" includes="*Stemmer.java" tab="remove" tablength="2" encoding="UTF-8" javafiles="yes" fixlast="yes"/> |
| </target> |
| </project> |