blob: c90934874dcf2aded777f8a2bf8a4d7cf58e5823 [file] [log] [blame]
<?xml version="1.0"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<project name="Nutch" default="job">
<!-- Load all the default properties, and any the user wants -->
<!-- to contribute (without having to type -D or edit this file -->
<property file="${user.home}/build.properties" />
<property file="${basedir}/build.properties" />
<property file="${basedir}/default.properties" />
<property name="test.junit.output.format" value="plain"/>
<!-- the normal classpath -->
<path id="classpath">
<pathelement location="${build.classes}"/>
<fileset dir="${lib.dir}">
<include name="*.jar" />
</fileset>
</path>
<!-- the unit test classpath -->
<dirname property="plugins.classpath.dir" file="${build.plugins}"/>
<path id="test.classpath">
<pathelement location="${test.build.classes}" />
<pathelement location="${conf.dir}"/>
<pathelement location="${test.src.dir}"/>
<pathelement location="${plugins.classpath.dir}"/>
<path refid="classpath"/>
</path>
<!-- xmlcatalog definition for xslt task -->
<xmlcatalog id="docDTDs">
<dtd publicId="-//W3C//DTD XHTML 1.0 Transitional//EN"
location="${xmlcatalog.dir}/xhtml1-transitional.dtd"/>
</xmlcatalog>
<!-- ====================================================== -->
<!-- Stuff needed by all targets -->
<!-- ====================================================== -->
<target name="init">
<mkdir dir="${build.dir}"/>
<mkdir dir="${build.classes}"/>
<mkdir dir="${test.build.dir}"/>
<mkdir dir="${test.build.classes}"/>
<touch datetime="01/25/1971 2:00 pm">
<fileset dir="${conf.dir}" includes="**/*.template"/>
</touch>
<copy todir="${conf.dir}" verbose="true">
<fileset dir="${conf.dir}" includes="**/*.template"/>
<mapper type="glob" from="*.template" to="*"/>
</copy>
<!-- unpack hadoop scripts from hadoop jar into bin directory -->
<mkdir dir="${build.dir}/hadoop"/>
<unjar dest="${build.dir}/hadoop">
<fileset dir="${lib.dir}" includes="hadoop*.jar"/>
<patternset includes="bin.tgz"/>
</unjar>
<untar src="${build.dir}/hadoop/bin.tgz" dest="bin" compression="gzip"/>
<!-- fix broken library paths with spaces -->
<replace file="bin/hadoop" token="PlatformName" value="PlatformName | sed -e 's/ /_/g'"/>
<chmod dir="bin" perm="ugo+rx" includes="*.sh,hadoop"/>
<!-- unpack hadoop webapp from hadoop jar into build directory -->
<mkdir dir="${build.dir}/webapps"/>
<unjar dest="${build.dir}">
<fileset dir="${lib.dir}" includes="hadoop*.jar"/>
<patternset includes="webapps/**"/>
</unjar>
</target>
<!-- ====================================================== -->
<!-- Compile the Java files -->
<!-- ====================================================== -->
<target name="compile" depends="compile-core, compile-plugins"/>
<target name="compile-core" depends="init">
<javac
encoding="${build.encoding}"
srcdir="${src.dir}"
includes="org/apache/nutch/**/*.java"
destdir="${build.classes}"
debug="${javac.debug}"
optimize="${javac.optimize}"
target="${javac.version}"
source="${javac.version}"
deprecation="${javac.deprecation}">
<classpath refid="classpath"/>
</javac>
</target>
<target name="compile-plugins">
<ant dir="src/plugin" target="deploy" inheritAll="false"/>
</target>
<target name="generate-src" depends="init">
<javacc target="${src.dir}/org/apache/nutch/analysis/NutchAnalysis.jj"
javacchome="${javacc.home}">
</javacc>
<fixcrlf srcdir="${src.dir}" eol="lf" includes="**/*.java"/>
</target>
<target name="dynamic" depends="generate-src, compile">
</target>
<!-- ================================================================== -->
<!-- Make nutch.jar -->
<!-- ================================================================== -->
<!-- -->
<!-- ================================================================== -->
<target name="jar" depends="compile-core">
<copy file="${conf.dir}/nutch-default.xml"
todir="${build.classes}"/>
<copy file="${conf.dir}/nutch-site.xml"
todir="${build.classes}"/>
<jar jarfile="${build.dir}/${final.name}.jar"
basedir="${build.classes}">
<manifest>
</manifest>
</jar>
</target>
<!-- ================================================================== -->
<!-- Make job jar -->
<!-- ================================================================== -->
<!-- -->
<!-- ================================================================== -->
<target name="job" depends="compile">
<jar jarfile="${build.dir}/${final.name}.job">
<zipfileset dir="${build.classes}"/>
<zipfileset dir="${conf.dir}" excludes="*.template"/>
<zipfileset dir="${lib.dir}" prefix="lib"
includes="**/*.jar" excludes="hadoop-*.jar"/>
<zipfileset dir="${build.plugins}" prefix="plugins"/>
</jar>
</target>
<!-- ================================================================== -->
<!-- Make nutch.war -->
<!-- ================================================================== -->
<!-- -->
<!-- ================================================================== -->
<target name="war" depends="jar,compile,generate-docs">
<!-- generate the nutch.xml (servlet context) file -->
<xslt in="${basedir}/conf/nutch-default.xml"
out="${build.dir}/nutch.xml"
style="${basedir}/conf/context.xsl">
<xmlcatalog refid="docDTDs"/>
<outputproperty name="indent" value="yes"/>
</xslt>
<war destfile="${build.dir}/${final.name}.war"
webxml="${web.src.dir}/web.xml">
<fileset dir="${web.src.dir}/jsp"/>
<zipfileset dir="${docs.src}" includes="include/*.html"/>
<zipfileset dir="${build.docs}" includes="*/include/*.html"/>
<fileset dir="${docs.dir}"/>
<lib dir="${lib.dir}">
<include name="lucene*.jar"/>
<include name="taglibs-*.jar"/>
<include name="hadoop-*.jar"/>
<include name="dom4j-*.jar"/>
<include name="xerces-*.jar"/>
<include name="commons-cli-*.jar"/>
<include name="commons-lang-*.jar"/>
<include name="commons-logging-*.jar"/>
<include name="log4j-*.jar"/>
</lib>
<lib dir="${build.dir}">
<include name="${final.name}.jar"/>
</lib>
<classes dir="${conf.dir}" excludes="**/*.template"/>
<classes dir="${web.src.dir}/locale"/>
<classes file="${web.src.dir}/log4j.properties"/>
<zipfileset prefix="WEB-INF/classes/plugins" dir="${build.plugins}"/>
<webinf dir="${lib.dir}">
<include name="taglibs-*.tld"/>
</webinf>
</war>
</target>
<!-- ================================================================== -->
<!-- Compile test code -->
<!-- ================================================================== -->
<target name="compile-core-test" depends="compile-core">
<javac
encoding="${build.encoding}"
srcdir="${test.src.dir}"
includes="org/apache/nutch/**/*.java"
destdir="${test.build.classes}"
debug="${javac.debug}"
optimize="${javac.optimize}"
target="${javac.version}"
source="${javac.version}"
deprecation="${javac.deprecation}">
<classpath refid="test.classpath"/>
</javac>
</target>
<!-- ================================================================== -->
<!-- Run code checks (PMD) -->
<!-- ================================================================== -->
<target name="pmd" depends="compile">
<property name="pmd.report" location="${build.dir}/pmd-report.html" />
<taskdef name="pmd" classname="net.sourceforge.pmd.ant.PMDTask">
<classpath>
<fileset dir="${lib.dir}">
<include name="pmd-ext/*.jar" />
<include name="xerces*.jar" />
</fileset>
</classpath>
</taskdef>
<pmd shortFilenames="true" failonerror="true" failOnRuleViolation="false"
encoding="${build.encoding}" failuresPropertyName="pmd.failures">
<ruleset>unusedcode</ruleset>
<!--ruleset>basic</ruleset-->
<!--ruleset>optimizations</ruleset-->
<formatter type="html" toFile="${pmd.report}" />
<!-- <formatter type="xml" toFile="${tempbuild}/$report_pmd.xml"/> -->
<fileset dir="${basedir}/src">
<include name="java/**/*.java"/>
<include name="plugin/**/*.java"/>
<!-- Exclude generated sources -->
<exclude name="**/NutchAnalysis.java" />
<exclude name="**/NutchAnalysisTokenManager.java" />
</fileset>
</pmd>
<condition property="pmd.stop" value="true">
<and>
<isset property="pmd.failures" />
<not>
<equals arg1="0" arg2="${pmd.failures}" trim="true" />
</not>
</and>
</condition>
<fail if="pmd.stop">FAILURE: PMD shows ${pmd.failures} rule violations. See ${pmd.report} for details.</fail>
</target>
<!-- ================================================================== -->
<!-- Run unit tests -->
<!-- ================================================================== -->
<target name="test" depends="test-core, test-plugins"/>
<target name="test-core" depends="compile, compile-core-test">
<delete dir="${test.build.data}"/>
<mkdir dir="${test.build.data}"/>
<!--
copy resources needed in junit tests
-->
<copy todir="${test.build.data}">
<fileset dir="src/testresources" includes="**/*"/>
</copy>
<copy file="${test.src.dir}/nutch-site.xml"
todir="${test.build.classes}"/>
<copy file="${test.src.dir}/log4j.properties"
todir="${test.build.classes}"/>
<junit printsummary="yes" haltonfailure="no" fork="yes" dir="${basedir}"
errorProperty="tests.failed" failureProperty="tests.failed" maxmemory="1000m">
<sysproperty key="test.build.data" value="${test.build.data}"/>
<sysproperty key="test.src.dir" value="${test.src.dir}"/>
<classpath refid="test.classpath"/>
<formatter type="${test.junit.output.format}" />
<batchtest todir="${test.build.dir}" unless="testcase">
<fileset dir="${test.src.dir}"
includes="**/Test*.java" excludes="**/${test.exclude}.java" />
</batchtest>
<batchtest todir="${test.build.dir}" if="testcase">
<fileset dir="${test.src.dir}" includes="**/${testcase}.java"/>
</batchtest>
</junit>
<fail if="tests.failed">Tests failed!</fail>
</target>
<target name="test-plugins" depends="compile">
<ant dir="src/plugin" target="test" inheritAll="false"/>
</target>
<target name="nightly" depends="test, tar">
</target>
<!-- ================================================================== -->
<!-- Documentation -->
<!-- ================================================================== -->
<target name="javadoc" depends="compile">
<mkdir dir="${build.javadoc}"/>
<javadoc
overview="${src.dir}/overview.html"
destdir="${build.javadoc}"
author="true"
version="true"
use="true"
windowtitle="${Name} ${version} API"
doctitle="${Name} ${version} API"
bottom="Copyright &amp;copy; ${year} The Apache Software Foundation"
>
<arg value="${javadoc.proxy.host}"/>
<arg value="${javadoc.proxy.port}"/>
<packageset dir="${src.dir}"/>
<packageset dir="${plugins.dir}/lib-http/src/java"/>
<packageset dir="${plugins.dir}/lib-parsems/src/java"/>
<packageset dir="${plugins.dir}/lib-regex-filter/src/java"/>
<packageset dir="${plugins.dir}/microformats-reltag/src/java"/>
<packageset dir="${plugins.dir}/ontology/src/java"/>
<packageset dir="${plugins.dir}/protocol-file/src/java"/>
<packageset dir="${plugins.dir}/protocol-ftp/src/java"/>
<packageset dir="${plugins.dir}/protocol-http/src/java"/>
<packageset dir="${plugins.dir}/protocol-httpclient/src/java"/>
<packageset dir="${plugins.dir}/parse-ext/src/java"/>
<packageset dir="${plugins.dir}/parse-html/src/java"/>
<packageset dir="${plugins.dir}/parse-js/src/java"/>
<packageset dir="${plugins.dir}/parse-text/src/java"/>
<packageset dir="${plugins.dir}/parse-pdf/src/java"/>
<!-- <packageset dir="${plugins.dir}/parse-rtf/src/java"/> plugin excluded from build due to licensing issues-->
<!-- <packageset dir="${plugins.dir}/parse-mp3/src/java"/> plugin excluded from build due to licensing issues-->
<packageset dir="${plugins.dir}/parse-msexcel/src/java"/>
<packageset dir="${plugins.dir}/parse-mspowerpoint/src/java"/>
<packageset dir="${plugins.dir}/parse-msword/src/java"/>
<packageset dir="${plugins.dir}/parse-oo/src/java"/>
<packageset dir="${plugins.dir}/parse-rss/src/java"/>
<packageset dir="${plugins.dir}/parse-swf/src/java"/>
<packageset dir="${plugins.dir}/parse-zip/src/java"/>
<packageset dir="${plugins.dir}/index-basic/src/java"/>
<packageset dir="${plugins.dir}/index-more/src/java"/>
<packageset dir="${plugins.dir}/query-basic/src/java"/>
<packageset dir="${plugins.dir}/query-more/src/java"/>
<packageset dir="${plugins.dir}/query-site/src/java"/>
<packageset dir="${plugins.dir}/query-url/src/java"/>
<packageset dir="${plugins.dir}/scoring-opic/src/java"/>
<packageset dir="${plugins.dir}/summary-basic/src/java"/>
<packageset dir="${plugins.dir}/summary-lucene/src/java"/>
<packageset dir="${plugins.dir}/urlfilter-automaton/src/java"/>
<packageset dir="${plugins.dir}/urlfilter-regex/src/java"/>
<packageset dir="${plugins.dir}/urlfilter-prefix/src/java"/>
<packageset dir="${plugins.dir}/creativecommons/src/java"/>
<packageset dir="${plugins.dir}/languageidentifier/src/java"/>
<packageset dir="${plugins.dir}/clustering-carrot2/src/java"/>
<packageset dir="${plugins.dir}/ontology/src/java"/>
<link href="${javadoc.link.java}"/>
<link href="${javadoc.link.lucene}"/>
<link href="${javadoc.link.hadoop}"/>
<classpath refid="classpath"/>
<classpath>
<fileset dir="${plugins.dir}" >
<include name="**/*.jar"/>
</fileset>
</classpath>
<group title="Core" packages="org.apache.nutch.*"/>
<group title="Plugins API" packages="${plugins.api}"/>
<group title="Protocol Plugins" packages="${plugins.protocol}"/>
<group title="URL Filter Plugins" packages="${plugins.urlfilter}"/>
<group title="Scoring Plugins" packages="${plugins.scoring}"/>
<group title="Parse Plugins" packages="${plugins.parse}"/>
<group title="Analysis Plugins" packages="${plugins.analysis}"/>
<group title="Indexing Filter Plugins" packages="${plugins.index}"/>
<group title="Query Filter Plugins" packages="${plugins.query}"/>
<group title="Summary Plugins" packages="${plugins.summary}"/>
<group title="Clustering Plugins" packages="${plugins.clustering}"/>
<group title="Ontology Plugins" packages="${plugins.ontology}"/>
<group title="Misc. Plugins" packages="${plugins.misc}"/>
</javadoc>
<!-- Copy the plugin.dtd file to the plugin doc-files dir -->
<copy file="${plugins.dir}/plugin.dtd"
todir="${build.javadoc}/org/apache/nutch/plugin/doc-files"/>
</target>
<target name="default-doc">
<style basedir="${conf.dir}" destdir="${docs.dir}"
includes="nutch-default.xml" style="conf/nutch-conf.xsl"/>
</target>
<target name="generate-locale" if="doc.locale">
<echo message="Generating docs for locale=${doc.locale}"/>
<mkdir dir="${build.docs}/${doc.locale}/include"/>
<xslt in="${docs.src}/include/${doc.locale}/header.xml"
out="${build.docs}/${doc.locale}/include/header.html"
style="${docs.src}/style/nutch-header.xsl">
<xmlcatalog refid="docDTDs"/>
</xslt>
<dependset>
<srcfileset dir="${docs.src}/include/${doc.locale}" includes="*.xml"/>
<srcfileset dir="${docs.src}/style" includes="*.xsl"/>
<targetfileset dir="${docs.dir}/${doc.locale}" includes="*.html"/>
</dependset>
<copy file="${docs.src}/style/nutch-page.xsl"
todir="${build.docs}/${doc.locale}"
preservelastmodified="true"/>
<xslt basedir="${docs.src}/pages/${doc.locale}"
destdir="${docs.dir}/${doc.locale}"
includes="*.xml"
style="${build.docs}/${doc.locale}/nutch-page.xsl">
<xmlcatalog refid="docDTDs"/>
</xslt>
</target>
<target name="generate-docs" depends="init">
<dependset>
<srcfileset dir="${docs.src}/include" includes="*.html"/>
<targetfileset dir="${docs.dir}" includes="**/*.html"/>
</dependset>
<mkdir dir="${build.docs}/include"/>
<copy todir="${build.docs}/include">
<fileset dir="${docs.src}/include"/>
</copy>
<antcall target="generate-locale">
<param name="doc.locale" value="ca"/>
</antcall>
<antcall target="generate-locale">
<param name="doc.locale" value="de"/>
</antcall>
<antcall target="generate-locale">
<param name="doc.locale" value="en"/>
</antcall>
<antcall target="generate-locale">
<param name="doc.locale" value="es"/>
</antcall>
<antcall target="generate-locale">
<param name="doc.locale" value="fi"/>
</antcall>
<antcall target="generate-locale">
<param name="doc.locale" value="fr"/>
</antcall>
<antcall target="generate-locale">
<param name="doc.locale" value="hu"/>
</antcall>
<antcall target="generate-locale">
<param name="doc.locale" value="it"/>
</antcall>
<antcall target="generate-locale">
<param name="doc.locale" value="jp"/>
</antcall>
<antcall target="generate-locale">
<param name="doc.locale" value="ms"/>
</antcall>
<antcall target="generate-locale">
<param name="doc.locale" value="nl"/>
</antcall>
<antcall target="generate-locale">
<param name="doc.locale" value="pl"/>
</antcall>
<antcall target="generate-locale">
<param name="doc.locale" value="pt"/>
</antcall>
<antcall target="generate-locale">
<param name="doc.locale" value="sh"/>
</antcall>
<antcall target="generate-locale">
<param name="doc.locale" value="sr"/>
</antcall>
<antcall target="generate-locale">
<param name="doc.locale" value="sv"/>
</antcall>
<antcall target="generate-locale">
<param name="doc.locale" value="th"/>
</antcall>
<antcall target="generate-locale">
<param name="doc.locale" value="zh"/>
</antcall>
<fixcrlf srcdir="${docs.dir}" eol="lf" encoding="utf-8"
includes="**/*.html"/>
</target>
<!-- ================================================================== -->
<!-- D I S T R I B U T I O N -->
<!-- ================================================================== -->
<!-- -->
<!-- ================================================================== -->
<target name="package" depends="jar, job, war, javadoc">
<mkdir dir="${dist.dir}"/>
<mkdir dir="${dist.dir}/lib"/>
<mkdir dir="${dist.dir}/bin"/>
<mkdir dir="${dist.dir}/docs"/>
<mkdir dir="${dist.dir}/docs/api"/>
<mkdir dir="${dist.dir}/plugins"/>
<copy todir="${dist.dir}/lib" includeEmptyDirs="false">
<fileset dir="lib"/>
</copy>
<copy todir="${dist.dir}/plugins">
<fileset dir="${build.plugins}"/>
</copy>
<copy todir="${dist.dir}/webapps">
<fileset dir="${build.webapps}"/>
</copy>
<copy file="${build.dir}/${final.name}.jar" todir="${dist.dir}"/>
<copy file="${build.dir}/${final.name}.job" todir="${dist.dir}"/>
<copy file="${build.dir}/${final.name}.war" todir="${dist.dir}"/>
<copy todir="${dist.dir}/bin">
<fileset dir="bin"/>
</copy>
<copy todir="${dist.dir}/conf">
<fileset dir="${conf.dir}" excludes="**/*.template"/>
</copy>
<chmod perm="ugo+x" type="file">
<fileset dir="${dist.dir}/bin"/>
</chmod>
<copy todir="${dist.dir}/docs">
<fileset dir="${docs.dir}"/>
</copy>
<copy todir="${dist.dir}/docs/api">
<fileset dir="${build.javadoc}"/>
</copy>
<copy todir="${dist.dir}">
<fileset dir=".">
<include name="*.txt" />
</fileset>
</copy>
<copy todir="${dist.dir}/src" includeEmptyDirs="true">
<fileset dir="src"/>
</copy>
<copy todir="${dist.dir}/" file="build.xml"/>
<copy todir="${dist.dir}/" file="default.properties"/>
</target>
<!-- ================================================================== -->
<!-- Make release tarball -->
<!-- ================================================================== -->
<target name="tar" depends="package">
<tar compression="gzip" longfile="gnu"
destfile="${build.dir}/${final.name}.tar.gz">
<tarfileset dir="${build.dir}" mode="664">
<exclude name="${final.name}/bin/*" />
<include name="${final.name}/**" />
</tarfileset>
<tarfileset dir="${build.dir}" mode="755">
<include name="${final.name}/bin/*" />
</tarfileset>
</tar>
</target>
<!-- ================================================================== -->
<!-- Clean. Delete the build files, and their directories -->
<!-- ================================================================== -->
<target name="clean">
<delete dir="${build.dir}"/>
</target>
</project>