| <?xml version="1.0"?> |
| |
| <!-- |
| Licensed to the Apache Software Foundation (ASF) under one or more |
| contributor license agreements. See the NOTICE file distributed with |
| this work for additional information regarding copyright ownership. |
| The ASF licenses this file to You under the Apache License, Version 2.0 |
| the "License"); you may not use this file except in compliance with |
| the License. You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, software |
| distributed under the License is distributed on an "AS IS" BASIS, |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| See the License for the specific language governing permissions and |
| limitations under the License. |
| --> |
| |
| <project name="analyzers-common" default="default"> |
| |
| <description> |
| Analyzers for indexing content in different languages and domains. |
| </description> |
| |
| <!-- some files for testing that do not have license headers --> |
| <property name="rat.excludes" value="**/*.aff,**/*.dic,**/*.txt,**/charfilter/*.htm*,**/*LuceneResourcesWikiPage.html"/> |
| |
| <import file="../analysis-module-build.xml"/> |
| |
| <target name="compile-core" depends="jflex-notice, common.compile-core"/> |
| |
| <target name="jflex" depends="jflex-check,clean-jflex,gen-uax29-supp-macros, |
| jflex-StandardAnalyzer,jflex-UAX29URLEmailTokenizer, |
| jflex-wiki-tokenizer,jflex-HTMLStripCharFilter"/> |
| |
| <target name="gen-uax29-supp-macros"> |
| <subant target="gen-uax29-supp-macros"> |
| <fileset dir="../icu" includes="build.xml"/> |
| </subant> |
| </target> |
| |
| <target name="jflex-HTMLStripCharFilter" |
| depends="init,jflex-check,generate-jflex-html-char-entities" |
| if="jflex.present"> |
| <taskdef classname="jflex.anttask.JFlexTask" name="jflex"> |
| <classpath refid="jflex.classpath"/> |
| </taskdef> |
| <jflex file="src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex" |
| outdir="src/java/org/apache/lucene/analysis/charfilter" |
| nobak="on"/> |
| <!-- Remove the inappropriate JFlex-generated constructors --> |
| <replaceregexp file="src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java" |
| match="/\*\*\s*\*\s*Creates a new scanner.*this\(new java\.io\.InputStreamReader\(in\)\);\s*\}" |
| replace="" flags="sg"/> |
| </target> |
| |
| <target name="generate-jflex-html-char-entities"> |
| <exec dir="src/java/org/apache/lucene/analysis/charfilter" |
| output="src/java/org/apache/lucene/analysis/charfilter/HTMLCharacterEntities.jflex" |
| executable="${python.exe}" failonerror="true" logerror="true"> |
| <!-- Tell Python not to write any bytecode cache into the filesystem: --> |
| <arg value="-B"/> |
| <arg value="htmlentity.py"/> |
| </exec> |
| <fixcrlf file="src/java/org/apache/lucene/analysis/charfilter/HTMLCharacterEntities.jflex" encoding="UTF-8"/> |
| </target> |
| |
| <target name="jflex-wiki-tokenizer" depends="init,jflex-check" if="jflex.present"> |
| <taskdef classname="jflex.anttask.JFlexTask" name="jflex"> |
| <classpath refid="jflex.classpath"/> |
| </taskdef> |
| <run-jflex dir="src/java/org/apache/lucene/analysis/wikipedia" name="WikipediaTokenizerImpl"/> |
| </target> |
| |
| <target name="jflex-StandardAnalyzer" depends="init,jflex-check" if="jflex.present"> |
| <taskdef classname="jflex.anttask.JFlexTask" name="jflex"> |
| <classpath refid="jflex.classpath"/> |
| </taskdef> |
| <run-jflex dir="src/java/org/apache/lucene/analysis/standard" name="StandardTokenizerImpl"/> |
| <run-jflex dir="src/java/org/apache/lucene/analysis/standard" name="ClassicTokenizerImpl"/> |
| </target> |
| |
| <target name="jflex-UAX29URLEmailTokenizer" depends="jflex-check" if="jflex.present"> |
| <taskdef classname="jflex.anttask.JFlexTask" name="jflex"> |
| <classpath refid="jflex.classpath"/> |
| </taskdef> |
| <run-jflex dir="src/java/org/apache/lucene/analysis/standard" name="UAX29URLEmailTokenizerImpl"/> |
| </target> |
| |
| <!-- Remove the inappropriate JFlex-generated constructor --> |
| <macrodef name="run-jflex"> |
| <attribute name="dir"/> |
| <attribute name="name"/> |
| <sequential> |
| <jflex file="@{dir}/@{name}.jflex" |
| outdir="@{dir}" |
| nobak="on" /> |
| <replaceregexp file="@{dir}/@{name}.java" |
| match="/\*\*\s*\*\s*Creates a new scanner\..*this\(new java\.io\.InputStreamReader\(in\)\);\s*\}" |
| replace="" flags="sg"/> |
| </sequential> |
| </macrodef> |
| |
| <target name="clean-jflex"> |
| <delete> |
| <fileset dir="src/java/org/apache/lucene/analysis/charfilter" includes="*.java"> |
| <containsregexp expression="generated.*by.*JFlex"/> |
| </fileset> |
| <fileset dir="src/java/org/apache/lucene/analysis/wikipedia" includes="*.java"> |
| <containsregexp expression="generated.*by.*JFlex"/> |
| </fileset> |
| <fileset dir="src/java/org/apache/lucene/analysis/standard" includes="**/*.java"> |
| <containsregexp expression="generated.*by.*JFlex"/> |
| </fileset> |
| </delete> |
| </target> |
| |
| <property name="tld.zones" value="http://www.internic.net/zones/root.zone"/> |
| <property name="tld.output" location="src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro"/> |
| |
| <target name="gen-tlds" depends="compile-tools"> |
| <java |
| classname="org.apache.lucene.analysis.standard.GenerateJflexTLDMacros" |
| dir="." |
| fork="true" |
| failonerror="true"> |
| <classpath> |
| <pathelement location="${build.dir}/classes/tools"/> |
| </classpath> |
| <arg value="${tld.zones}"/> |
| <arg value="${tld.output}"/> |
| </java> |
| </target> |
| |
| <target name="compile-tools" depends="common.compile-tools"> |
| <compile |
| srcdir="src/tools/java" |
| destdir="${build.dir}/classes/tools"> |
| <classpath refid="classpath"/> |
| </compile> |
| </target> |
| |
| <target name="javadocs" depends="module-build.javadocs"/> |
| </project> |