| <?xml version="1.0"?> |
| <!-- |
| Licensed to the Apache Software Foundation (ASF) under one or more |
| contributor license agreements. See the NOTICE file distributed with |
| this work for additional information regarding copyright ownership. |
| The ASF licenses this file to You under the Apache License, Version 2.0 |
| (the "License"); you may not use this file except in compliance with |
| the License. You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, software |
| distributed under the License is distributed on an "AS IS" BASIS, |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| See the License for the specific language governing permissions and |
| limitations under the License. |
| --> |
| <project name="${name}" default="runtime" |
| xmlns:if="ant:if" |
| xmlns:ivy="antlib:org.apache.ivy.ant" |
| xmlns:artifact="antlib:org.apache.maven.artifact.ant" |
| xmlns:rat="antlib:org.apache.rat.anttasks" |
| xmlns="antlib:org.apache.tools.ant"> |
| |
| <!-- Load all the default properties, and any the user wants --> |
| <!-- to contribute (without having to type -D or edit this file --> |
| <property file="${user.home}/build.properties" /> |
| <property file="${basedir}/build.properties" /> |
| <property file="${basedir}/default.properties" /> |
| <property name="release.dir" value="${build.dir}/release"/> |
| |
| <!-- define Maven coordinates, repository url and artifacts name etc --> |
| <property name="groupId" value="org.apache.nutch" /> |
| <property name="artifactId" value="nutch" /> |
| <property name="maven-repository-url" value="https://repository.apache.org/service/local/staging/deploy/maven2" /> |
| <property name="maven-repository-id" value="apache.releases.https" /> |
| <property name="maven-jar" value="${release.dir}/${artifactId}-${version}.jar" /> |
| <property name="maven-javadoc-jar" value="${release.dir}/${artifactId}-${version}-javadoc.jar" /> |
| <property name="maven-sources-jar" value="${release.dir}/${artifactId}-${version}-sources.jar" /> |
| |
| <property environment="env"/> |
| |
| <property name="spotbugs.version" value="4.2.0" /> |
| <property name="spotbugs.home" value="${ivy.dir}/spotbugs-${spotbugs.version}" /> |
| <property name="spotbugs.jar" value="${spotbugs.home}/lib/spotbugs-ant.jar" /> |
| |
| <property name="apache-rat.version" value="0.16.1" /> |
| <property name="apache-rat.home" value="${ivy.dir}/apache-rat-${apache-rat.version}" /> |
| <property name="apache-rat.jar" value="${apache-rat.home}/apache-rat-${apache-rat.version}.jar" /> |
| |
| <condition property="using.jdk.11"> |
| <matches string="${java.version}" pattern="11.+" casesensitive="false" /> |
| </condition> |
| |
| <!-- the normal classpath --> |
| <path id="classpath"> |
| <pathelement location="${build.classes}"/> |
| <fileset dir="${build.lib.dir}"> |
| <include name="*.jar" /> |
| </fileset> |
| </path> |
| |
| <dirname property="plugins.classpath.dir" file="${build.plugins}"/> |
| |
| <!-- the unit test classpath --> |
| <path id="test.classpath"> |
| <pathelement location="${test.build.classes}" /> |
| <pathelement location="${conf.dir}"/> |
| <pathelement location="${test.src.dir}"/> |
| <pathelement location="${plugins.classpath.dir}"/> |
| <path refid="classpath"/> |
| <pathelement location="${build.dir}/${final.name}.job" /> |
| <fileset dir="${build.lib.dir}"> |
| <include name="*.jar" /> |
| </fileset> |
| <fileset dir="${test.build.lib.dir}"> |
| <include name="*.jar" /> |
| </fileset> |
| </path> |
| |
| <presetdef name="javac"> |
| <javac includeantruntime="false" /> |
| </presetdef> |
| |
| <target name="dependencytree" depends="resolve-default" description="Show dependency tree"> |
| <ivy:dependencytree /> |
| </target> |
| |
| <!-- ====================================================== --> |
| <!-- Stuff needed by all targets --> |
| <!-- ====================================================== --> |
| <target name="init" depends="ivy-init" description="--> stuff required by all targets"> |
| <mkdir dir="${build.dir}"/> |
| <mkdir dir="${build.classes}"/> |
| <mkdir dir="${release.dir}"/> |
| |
| <mkdir dir="${test.build.dir}"/> |
| <mkdir dir="${test.build.classes}"/> |
| <mkdir dir="${test.build.lib.dir}"/> |
| |
| <touch millis="33660000000"> |
| <!-- touch templates back in time to ensure that |
| custom-configured configuration files are never overwritten |
| even if the template was updated. Use millis instead of |
| datetime="01/25/1971 2:00:00 pm" to avoid issues with |
| non-English locales --> |
| <fileset dir="${conf.dir}" includes="**/*.template"/> |
| </touch> |
| |
| <copy todir="${conf.dir}" verbose="true"> |
| <fileset dir="${conf.dir}" includes="**/*.template"/> |
| <mapper type="glob" from="*.template" to="*"/> |
| </copy> |
| </target> |
| |
| <!-- ====================================================== --> |
| <!-- Compile the Java files --> |
| <!-- ====================================================== --> |
| <target name="compile" depends="compile-core, compile-plugins" description="--> compile all Java files"/> |
| |
| <target name="compile-core" depends="init, resolve-default" description="--> compile core Java files only"> |
| <javac |
| encoding="${build.encoding}" |
| srcdir="${src.dir}" |
| includes="org/apache/nutch/**/*.java" |
| destdir="${build.classes}" |
| debug="${javac.debug}" |
| optimize="${javac.optimize}" |
| target="${javac.version}" |
| source="${javac.version}" |
| deprecation="${javac.deprecation}"> |
| <compilerarg value="-Xlint:-path"/> |
| <classpath refid="classpath"/> |
| </javac> |
| <copy todir="${build.classes}"> |
| <fileset dir="${src.dir}" includes="**/*.html" /> |
| <fileset dir="${src.dir}" includes="**/*.css" /> |
| <fileset dir="${src.dir}" includes="**/*.properties" /> |
| </copy> |
| </target> |
| |
| <target name="compile-plugins" depends="init, resolve-default" description="--> compile plugins only"> |
| <ant dir="src/plugin" target="deploy" inheritAll="false"/> |
| </target> |
| |
| <!-- ================================================================== --> |
| <!-- Make nutch.jar --> |
| <!-- ================================================================== --> |
| <!-- --> |
| <!-- ================================================================== --> |
| <target name="jar" depends="compile-core" description="--> make nutch.jar"> |
| <copy file="${conf.dir}/nutch-default.xml" |
| todir="${build.classes}"/> |
| <copy file="${conf.dir}/nutch-site.xml" |
| todir="${build.classes}"/> |
| <jar jarfile="${build.dir}/${final.name}.jar" |
| basedir="${build.classes}"> |
| <manifest> |
| </manifest> |
| </jar> |
| </target> |
| |
| <!-- ================================================================== --> |
| <!-- Make Maven Central Release --> |
| <!-- ================================================================== --> |
| <!-- --> |
| <!-- ================================================================== --> |
| <target name="release" depends="compile-core" description="--> generate the release distribution"> |
| <copy file="${conf.dir}/nutch-default.xml" |
| todir="${build.classes}"/> |
| <copy file="${conf.dir}/nutch-site.xml" |
| todir="${build.classes}"/> |
| |
| <!-- build the main artifact --> |
| <jar jarfile="${maven-jar}" basedir="${build.classes}" /> |
| |
| <!-- build the javadoc artifact --> |
| <javadoc |
| destdir="${release.dir}/javadoc" |
| overview="${src.dir}/overview.html" |
| author="true" |
| version="true" |
| use="true" |
| windowtitle="${name} ${version} API" |
| doctitle="${name} ${version} API" |
| bottom="Copyright &copy; ${year} The Apache Software Foundation" |
| failonerror="true" |
| failonwarning="true" |
| > |
| <arg value="${javadoc.proxy.host}"/> |
| <arg value="${javadoc.proxy.port}"/> |
| <arg value="--allow-script-in-comments"/> |
| <!-- |
| argument -no-module-directories required on JDK 11 |
| otherwise the Javascript search is broken, |
| see https://bugs.openjdk.org/browse/JDK-8215291 |
| --> |
| <arg value="--no-module-directories" if:set="using.jdk.11"/> |
| |
| <packageset dir="${src.dir}"/> |
| <packageset dir="${plugins.dir}/creativecommons/src/java"/> |
| <packageset dir="${plugins.dir}/feed/src/java"/> |
| <packageset dir="${plugins.dir}/headings/src/java"/> |
| <packageset dir="${plugins.dir}/exchange-jexl/src/java"/> |
| <packageset dir="${plugins.dir}/index-anchor/src/java"/> |
| <packageset dir="${plugins.dir}/index-arbitrary/src/java"/> |
| <packageset dir="${plugins.dir}/index-basic/src/java"/> |
| <packageset dir="${plugins.dir}/index-geoip/src/java"/> |
| <packageset dir="${plugins.dir}/index-jexl-filter/src/java"/> |
| <packageset dir="${plugins.dir}/index-links/src/java"/> |
| <packageset dir="${plugins.dir}/index-metadata/src/java"/> |
| <packageset dir="${plugins.dir}/index-more/src/java"/> |
| <packageset dir="${plugins.dir}/index-replace/src/java"/> |
| <packageset dir="${plugins.dir}/index-static/src/java"/> |
| <packageset dir="${plugins.dir}/indexer-cloudsearch/src/java/" /> |
| <packageset dir="${plugins.dir}/indexer-csv/src/java"/> |
| <packageset dir="${plugins.dir}/indexer-dummy/src/java"/> |
| <packageset dir="${plugins.dir}/indexer-elastic/src/java/" /> |
| <packageset dir="${plugins.dir}/indexer-kafka/src/java/" /> |
| <packageset dir="${plugins.dir}/indexer-opensearch-1x/src/java/" /> |
| <packageset dir="${plugins.dir}/indexer-rabbit/src/java"/> |
| <packageset dir="${plugins.dir}/indexer-solr/src/java"/> |
| <packageset dir="${plugins.dir}/language-identifier/src/java"/> |
| <packageset dir="${plugins.dir}/lib-htmlunit/src/java"/> |
| <packageset dir="${plugins.dir}/lib-http/src/java"/> |
| <packageset dir="${plugins.dir}/lib-rabbitmq/src/java"/> |
| <packageset dir="${plugins.dir}/lib-regex-filter/src/java"/> |
| <packageset dir="${plugins.dir}/lib-selenium/src/java"/> |
| <packageset dir="${plugins.dir}/microformats-reltag/src/java"/> |
| <packageset dir="${plugins.dir}/mimetype-filter/src/java"/> |
| <packageset dir="${plugins.dir}/parse-ext/src/java"/> |
| <packageset dir="${plugins.dir}/parse-html/src/java"/> |
| <packageset dir="${plugins.dir}/parse-js/src/java"/> |
| <packageset dir="${plugins.dir}/parse-metatags/src/java"/> |
| <packageset dir="${plugins.dir}/parse-tika/src/java"/> |
| <packageset dir="${plugins.dir}/parse-zip/src/java"/> |
| <packageset dir="${plugins.dir}/parsefilter-debug/src/java"/> |
| <packageset dir="${plugins.dir}/parsefilter-naivebayes/src/java"/> |
| <packageset dir="${plugins.dir}/parsefilter-regex/src/java"/> |
| <packageset dir="${plugins.dir}/protocol-file/src/java"/> |
| <packageset dir="${plugins.dir}/protocol-ftp/src/java"/> |
| <packageset dir="${plugins.dir}/protocol-htmlunit/src/java"/> |
| <packageset dir="${plugins.dir}/protocol-http/src/java"/> |
| <packageset dir="${plugins.dir}/protocol-httpclient/src/java"/> |
| <packageset dir="${plugins.dir}/protocol-interactiveselenium/src/java"/> |
| <packageset dir="${plugins.dir}/protocol-okhttp/src/java"/> |
| <packageset dir="${plugins.dir}/protocol-selenium/src/java"/> |
| <packageset dir="${plugins.dir}/publish-rabbitmq/src/java"/> |
| <packageset dir="${plugins.dir}/scoring-depth/src/java"/> |
| <packageset dir="${plugins.dir}/scoring-link/src/java"/> |
| <packageset dir="${plugins.dir}/scoring-opic/src/java"/> |
| <packageset dir="${plugins.dir}/scoring-orphan/src/java"/> |
| <packageset dir="${plugins.dir}/scoring-similarity/src/java"/> |
| <packageset dir="${plugins.dir}/scoring-metadata/src/java"/> |
| <packageset dir="${plugins.dir}/subcollection/src/java"/> |
| <packageset dir="${plugins.dir}/tld/src/java"/> |
| <packageset dir="${plugins.dir}/urlfilter-automaton/src/java"/> |
| <packageset dir="${plugins.dir}/urlfilter-domain/src/java"/> |
| <packageset dir="${plugins.dir}/urlfilter-domaindenylist/src/java"/> |
| <packageset dir="${plugins.dir}/urlfilter-fast/src/java"/> |
| <packageset dir="${plugins.dir}/urlfilter-ignoreexempt/src/java"/> |
| <packageset dir="${plugins.dir}/urlfilter-prefix/src/java"/> |
| <packageset dir="${plugins.dir}/urlfilter-regex/src/java"/> |
| <packageset dir="${plugins.dir}/urlfilter-suffix/src/java"/> |
| <packageset dir="${plugins.dir}/urlfilter-validator/src/java"/> |
| <packageset dir="${plugins.dir}/urlmeta/src/java"/> |
| <packageset dir="${plugins.dir}/urlnormalizer-ajax/src/java"/> |
| <packageset dir="${plugins.dir}/urlnormalizer-basic/src/java"/> |
| <packageset dir="${plugins.dir}/urlnormalizer-host/src/java"/> |
| <packageset dir="${plugins.dir}/urlnormalizer-pass/src/java"/> |
| <packageset dir="${plugins.dir}/urlnormalizer-protocol/src/java"/> |
| <packageset dir="${plugins.dir}/urlnormalizer-querystring/src/java"/> |
| <packageset dir="${plugins.dir}/urlnormalizer-regex/src/java"/> |
| <packageset dir="${plugins.dir}/urlnormalizer-slash/src/java"/> |
| |
| <link href="${javadoc.link.java}"/> |
| <link href="${javadoc.link.hadoop}"/> |
| <!--link href="${javadoc.link.lucene.core}"/> |
| <link href="${javadoc.link.lucene.analyzers-common}"/> |
| <link href="${javadoc.link.solr-solrj}"/--> |
| |
| <classpath refid="classpath"/> |
| <classpath> |
| <fileset dir="${build.plugins}" > |
| <include name="**/*.jar"/> |
| </fileset> |
| </classpath> |
| |
| <group title="Core" packages="org.apache.nutch.*"/> |
| <group title="Plugins API" packages="${plugins.api}"/> |
| <group title="Protocol Plugins" packages="${plugins.protocol}"/> |
| <group title="URL Filter Plugins" packages="${plugins.urlfilter}"/> |
| <group title="URL Normalizer Plugins" packages="${plugins.urlnormalizer}"/> |
| <group title="Scoring Plugins" packages="${plugins.scoring}"/> |
| <group title="Parse Plugins" packages="${plugins.parse}"/> |
| <group title="Parse Filter Plugins" packages="${plugins.parsefilter}"/> |
| <group title="Publisher Plugins" packages="${plugins.publisher}"/> |
| <group title="Exchange Plugins" packages="${plugins.exchange}"/> |
| <group title="Indexing Filter Plugins" packages="${plugins.index}"/> |
| <group title="Indexer Plugins" packages="${plugins.indexer}"/> |
| <group title="Misc. Plugins" packages="${plugins.misc}"/> |
| </javadoc> |
| |
| <jar jarfile="${maven-javadoc-jar}"> |
| <fileset dir="${release.dir}/javadoc" /> |
| </jar> |
| |
| <!-- build the sources artifact --> |
| <jar jarfile="${maven-sources-jar}"> |
| <fileset dir="${src.dir}" /> |
| </jar> |
| </target> |
| |
| <target name="makepom" depends="" description="--> generate pom file for deployment"> |
| <!-- generate a pom file --> |
| <ivy:makepom ivyfile="${ivy.file}" pomfile="${basedir}/pom.xml" templatefile="ivy/mvn.template"> |
| <mapping conf="default" scope="compile"/> |
| <mapping conf="runtime" scope="runtime"/> |
| </ivy:makepom> |
| </target> |
| |
| <!-- ================================================================== --> |
| <!-- Deploy to Apache Nexus --> |
| <!-- ================================================================== --> |
| <!-- --> |
| <!-- ================================================================== --> |
| <target name="deploy" depends="release, makepom" description="--> deploy to Apache Nexus"> |
| |
| <!-- sign and deploy the main artifact --> |
| <artifact:mvn mavenHome="${env.MVN_HOME}" fork="true" failonerror="true"> |
| <jvmarg value="-Dmaven.multiModuleProjectDirectory=false" /> |
| <arg value="org.apache.maven.plugins:maven-gpg-plugin:3.2.2:sign-and-deploy-file" /> |
| <arg value="-Durl=${maven-repository-url}" /> |
| <arg value="-DrepositoryId=${maven-repository-id}" /> |
| <arg value="-DpomFile=pom.xml" /> |
| <arg value="-Dfile=${maven-jar}" /> |
| <arg value="-Papache-release" /> |
| </artifact:mvn> |
| |
| <!-- sign and deploy the sources artifact --> |
| <artifact:mvn mavenHome="${env.MVN_HOME}" fork="true" failonerror="true"> |
| <jvmarg value="-Dmaven.multiModuleProjectDirectory=false" /> |
| <arg value="org.apache.maven.plugins:maven-gpg-plugin:3.2.2:sign-and-deploy-file" /> |
| <arg value="-Durl=${maven-repository-url}" /> |
| <arg value="-DrepositoryId=${maven-repository-id}" /> |
| <arg value="-DpomFile=pom.xml" /> |
| <arg value="-Dfile=${maven-sources-jar}" /> |
| <arg value="-Dclassifier=sources" /> |
| <arg value="-Papache-release" /> |
| </artifact:mvn> |
| |
| <!-- sign and deploy the javadoc artifact --> |
| <artifact:mvn mavenHome="${env.MVN_HOME}" fork="true" failonerror="true"> |
| <jvmarg value="-Dmaven.multiModuleProjectDirectory=false" /> |
| <arg value="org.apache.maven.plugins:maven-gpg-plugin:3.2.2:sign-and-deploy-file" /> |
| <arg value="-Durl=${maven-repository-url}" /> |
| <arg value="-DrepositoryId=${maven-repository-id}" /> |
| <arg value="-DpomFile=pom.xml" /> |
| <arg value="-Dfile=${maven-javadoc-jar}" /> |
| <arg value="-Dclassifier=javadoc" /> |
| <arg value="-Papache-release" /> |
| </artifact:mvn> |
| </target> |
| |
| <!-- ================================================================== --> |
| <!-- Generate REST API Documentation with Miredot --> |
| <!-- ================================================================== --> |
| <target name="restdocs" depends="makepom" description="--> generate REST API Documentation with Miredot"> |
| |
| <artifact:mvn mavenHome="${env.MVN_HOME}" fork="true" failonerror="true"> |
| <jvmarg value="-Dmaven.multiModuleProjectDirectory=false" /> |
| <arg value="package"/> |
| <arg value="-DskipTests"/> |
| <arg value="-e"/> |
| <!--arg value="-o"/--> |
| <!-- run offline (-o): must not download dependencies as this is |
| done from http://repo1.maven.org/ hardwired in |
| maven-ant-tasks-2.1.3.jar, see NUTCH-2722. |
| |
| Dependencies and plugins need to be resolved and cached locally beforehand |
| by running |
| `mvn dependency:resolve` |
| resp. |
| `mvn dependency:resolve-plugins` |
| after the pom.xml has been generated. --> |
| </artifact:mvn> |
| </target> |
| |
| <!-- ================================================================== --> |
| <!-- Make job jar --> |
| <!-- ================================================================== --> |
| <!-- --> |
| <!-- ================================================================== --> |
| <target name="job" depends="compile" description="--> make nutch.job jar"> |
| <jar jarfile="${build.dir}/${final.name}.job"> |
| <!-- If the build.classes has the nutch config files because the jar |
| command command has run, exclude them. The conf directory has |
| them. |
| --> |
| <zipfileset dir="${build.classes}" |
| excludes="nutch-default.xml,nutch-site.xml"/> |
| <zipfileset dir="${conf.dir}" excludes="*.template,hadoop*.*"/> |
| <zipfileset dir="${build.lib.dir}" prefix="lib" |
| includes="**/*.jar" excludes="hadoop-*.jar,slf4j*.jar,log4j*.jar"/> |
| <zipfileset dir="${build.plugins}" prefix="classes/plugins"/> |
| </jar> |
| </target> |
| |
| <target name="runtime" depends="jar, job" description="--> default target for running Nutch"> |
| <mkdir dir="${runtime.dir}"/> |
| <mkdir dir="${runtime.local}"/> |
| <mkdir dir="${runtime.deploy}"/> |
| <!-- deploy area --> |
| <copy file="${build.dir}/${final.name}.job" |
| todir="${runtime.deploy}"/> |
| <copy todir="${runtime.deploy}/bin"> |
| <fileset dir="src/bin"/> |
| </copy> |
| <chmod perm="ugo+x" type="file"> |
| <fileset dir="${runtime.deploy}/bin"/> |
| </chmod> |
| <!-- local area --> |
| <copy file="${build.dir}/${final.name}.jar" |
| todir="${runtime.local}/lib"/> |
| <copy todir="${runtime.local}/lib/native"> |
| <fileset dir="lib/native"/> |
| </copy> |
| <copy todir="${runtime.local}/conf"> |
| <fileset dir="${conf.dir}" excludes="*.template"/> |
| </copy> |
| <copy todir="${runtime.local}/bin"> |
| <fileset dir="src/bin"/> |
| </copy> |
| <chmod perm="ugo+x" type="file"> |
| <fileset dir="${runtime.local}/bin"/> |
| </chmod> |
| <copy todir="${runtime.local}/lib"> |
| <fileset dir="${build.dir}/lib"/> |
| </copy> |
| <copy todir="${runtime.local}/plugins"> |
| <fileset dir="${build.dir}/plugins"/> |
| </copy> |
| <copy todir="${runtime.local}/test"> |
| <fileset dir="${build.dir}/test"/> |
| </copy> |
| </target> |
| |
| <!-- ================================================================== --> |
| <!-- Compile test code --> |
| <!-- ================================================================== --> |
| <target name="compile-core-test" depends="init, compile-core, resolve-test" description="--> compile test code"> |
| <javac |
| encoding="${build.encoding}" |
| srcdir="${test.src.dir}" |
| includes="org/apache/nutch/**/*.java" |
| destdir="${test.build.classes}" |
| debug="${javac.debug}" |
| optimize="${javac.optimize}" |
| target="${javac.version}" |
| source="${javac.version}" |
| deprecation="${javac.deprecation}"> |
| <compilerarg value="-Xlint:-path"/> |
| <classpath refid="test.classpath"/> |
| </javac> |
| </target> |
| |
| <!-- ================================================================== --> |
| <!-- Run unit tests --> |
| <!-- ================================================================== --> |
| <target name="test" depends="test-core, test-plugins" description="--> run JUnit tests"/> |
| |
| <target name="test-core" depends="compile-core-test, job" description="--> run core JUnit tests only"> |
| |
| <delete dir="${test.build.data}"/> |
| <mkdir dir="${test.build.data}"/> |
| <!-- |
| copy resources needed in junit tests |
| --> |
| <copy todir="${test.build.data}"> |
| <fileset dir="src/testresources" includes="**/*"/> |
| </copy> |
| |
| <copy file="${test.src.dir}/log4j.properties" |
| todir="${test.build.classes}"/> |
| |
| <copy file="${test.src.dir}/crawl-tests.xml" |
| todir="${test.build.classes}"/> |
| |
| <copy file="${test.src.dir}/domain-urlfilter.txt" |
| todir="${test.build.classes}"/> |
| |
| <copy file="${test.src.dir}/filter-all.txt" |
| todir="${test.build.classes}"/> |
| |
| <junit printsummary="yes" haltonfailure="no" fork="yes" dir="${basedir}" |
| errorProperty="tests.failed" failureProperty="tests.failed" maxmemory="1000m"> |
| <sysproperty key="test.build.data" value="${test.build.data}"/> |
| <sysproperty key="test.src.dir" value="${test.src.dir}"/> |
| <sysproperty key="javax.xml.parsers.DocumentBuilderFactory" value="com.sun.org.apache.xerces.internal.jaxp.DocumentBuilderFactoryImpl"/> |
| <classpath refid="test.classpath"/> |
| <formatter type="${test.junit.output.format}" /> |
| <batchtest todir="${test.build.dir}" unless="testcase"> |
| <fileset dir="${test.src.dir}" |
| includes="**/Test*.java" excludes="**/${test.exclude}.java" /> |
| </batchtest> |
| <batchtest todir="${test.build.dir}" if="testcase"> |
| <fileset dir="${test.src.dir}" includes="**/${testcase}.java"/> |
| </batchtest> |
| </junit> |
| |
| <fail if="tests.failed">Tests failed!</fail> |
| |
| </target> |
| |
| <target name="test-plugins" depends="resolve-test, compile" description="--> run plugin JUnit tests only"> |
| <ant dir="src/plugin" target="test" inheritAll="false"/> |
| </target> |
| |
| <target name="test-plugin" depends="resolve-test, compile" description="--> run a single plugin's JUnit tests"> |
| <ant dir="src/plugin" target="test-single" inheritAll="false"/> |
| </target> |
| |
| <target name="nightly" depends="test, tar-src, zip-src" description="--> run the nightly target build"> |
| </target> |
| |
| <!-- ================================================================== --> |
| <!-- Ivy targets --> |
| <!-- ================================================================== --> |
| |
| <!-- target: resolve ================================================= --> |
| <target name="resolve-default" depends="clean-default-lib, init" description="--> resolve and retrieve dependencies with ivy"> |
| <ivy:resolve file="${ivy.file}" conf="default" log="download-only"/> |
| <ivy:retrieve pattern="${build.lib.dir}/[artifact]-[revision](-[classifier]).[ext]" symlink="false" log="quiet"/> |
| <antcall target="copy-libs"/> |
| </target> |
| |
| <target name="resolve-test" depends="clean-test-lib, init" description="--> resolve and retrieve dependencies with ivy"> |
| <ivy:resolve file="${ivy.file}" conf="test" log="download-only"/> |
| <ivy:retrieve pattern="${test.build.lib.dir}/[artifact]-[revision](-[classifier]).[ext]" symlink="false" log="quiet"/> |
| <antcall target="copy-libs"/> |
| </target> |
| |
| <target name="copy-libs" description="--> copy the libs in lib, which are not ivy enabled"> |
| <!-- copy the libs in lib, which are not ivy enabled--> |
| <copy todir="${build.lib.dir}/" failonerror="false"> |
| <fileset dir="${lib.dir}" includes="**/*.jar"/> |
| </copy> |
| </target> |
| |
| <!-- target: publish-local =========================================== --> |
| <target name="publish-local" depends="jar" description="--> publish this project in the local ivy repository"> |
| <ivy:publish artifactspattern="${build.dir}/[artifact]-${version}.[ext]" |
| resolver="local" |
| pubrevision="${version}" |
| pubdate="${now}" |
| status="integration" |
| forcedeliver="true" |
| overwrite="true" |
| /> |
| <echo message="project ${ant.project.name} published locally with version ${version}" /> |
| </target> |
| |
| <!-- target: report ================================================== --> |
| <target name="report" depends="resolve-test" description="--> generates a report of dependencies"> |
| <ivy:report todir="${build.dir}" xml="true"/> |
| </target> |
| |
| <!-- target: 3rd-party licenses report =============================== --> |
| <target name="report-licenses" depends="resolve-default" description="--> generates a report of licenses of dependencies"> |
| <ivy:report todir="${build.dir}" xml="false" graph="false" xslfile="ivy/ivy-report-license.xsl" |
| outputpattern="[organisation]-[module]-[conf]-3rd-party-licenses.tsv"/> |
| </target> |
| |
| <!-- target: ivy-init ================================================ --> |
| <target name="ivy-init" depends="ivy-probe-antlib, ivy-init-antlib" description="--> initialise Ivy settings"> |
| <ivy:settings file="${ivy.dir}/ivysettings.xml" /> |
| </target> |
| |
| <!-- target: ivy-probe-antlib ======================================== --> |
| <target name="ivy-probe-antlib" description="--> probe the antlib library"> |
| <condition property="ivy.found"> |
| <typefound uri="antlib:org.apache.ivy.ant" name="cleancache" /> |
| </condition> |
| </target> |
| |
| <!-- target: ivy-download ============================================ --> |
| <target name="ivy-download" description="--> download ivy"> |
| <available file="${ivy.jar}" property="ivy.jar.found"/> |
| <antcall target="ivy-download-unchecked"/> |
| </target> |
| |
| <!-- target: ivy-download-unchecked ================================== --> |
| <target name="ivy-download-unchecked" unless="ivy.jar.found" description="--> fetch any ivy file"> |
| <get src="${ivy.repo.url}" dest="${ivy.jar}" usetimestamp="true" /> |
| </target> |
| |
| <!-- target: ivy-init-antlib ========================================= --> |
| <target name="ivy-init-antlib" depends="ivy-download" unless="ivy.found" description="--> attempt to use Ivy with Antlib"> |
| <typedef uri="antlib:org.apache.ivy.ant" onerror="fail" loaderRef="ivyLoader"> |
| <classpath> |
| <pathelement location="${ivy.jar}" /> |
| </classpath> |
| </typedef> |
| <fail> |
| <condition> |
| <not> |
| <typefound uri="antlib:org.apache.ivy.ant" name="cleancache" /> |
| </not> |
| </condition> |
| You need Apache Ivy 2.5.0 or later from https://ant.apache.org/ |
| It could not be loaded from ${ivy.repo.url} |
| </fail> |
| </target> |
| |
| <!-- ================================================================== --> |
| <!-- Documentation --> |
| <!-- ================================================================== --> |
| <target name="javadoc" depends="compile" description="--> generate Javadoc"> |
| <mkdir dir="${build.javadoc}"/> |
| <mkdir dir="${build.javadoc}/resources"/> |
| <javadoc |
| overview="${src.dir}/overview.html" |
| destdir="${build.javadoc}" |
| author="true" |
| version="true" |
| use="true" |
| windowtitle="${name} ${version} API" |
| doctitle="${name} ${version} API" |
| bottom="Copyright &copy; ${year} The Apache Software Foundation" |
| failonerror="true" |
| failonwarning="true" |
| additionalparam="-Xdoclint:all" |
| > |
| <arg value="${javadoc.proxy.host}"/> |
| <arg value="${javadoc.proxy.port}"/> |
| <arg value="--allow-script-in-comments"/> |
| <!-- |
| argument -no-module-directories required on JDK 11 |
| otherwise the Javascript search is broken, |
| see https://bugs.openjdk.org/browse/JDK-8215291 |
| --> |
| <arg value="--no-module-directories" if:set="using.jdk.11"/> |
| |
| <packageset dir="${src.dir}"/> |
| <packageset dir="${plugins.dir}/creativecommons/src/java"/> |
| <packageset dir="${plugins.dir}/feed/src/java"/> |
| <packageset dir="${plugins.dir}/headings/src/java"/> |
| <packageset dir="${plugins.dir}/exchange-jexl/src/java"/> |
| <packageset dir="${plugins.dir}/index-anchor/src/java"/> |
| <packageset dir="${plugins.dir}/index-arbitrary/src/java"/> |
| <packageset dir="${plugins.dir}/index-basic/src/java"/> |
| <packageset dir="${plugins.dir}/index-geoip/src/java"/> |
| <packageset dir="${plugins.dir}/index-jexl-filter/src/java"/> |
| <packageset dir="${plugins.dir}/index-links/src/java"/> |
| <packageset dir="${plugins.dir}/index-metadata/src/java"/> |
| <packageset dir="${plugins.dir}/index-more/src/java"/> |
| <packageset dir="${plugins.dir}/index-replace/src/java"/> |
| <packageset dir="${plugins.dir}/index-static/src/java"/> |
| <packageset dir="${plugins.dir}/indexer-cloudsearch/src/java/" /> |
| <packageset dir="${plugins.dir}/indexer-csv/src/java"/> |
| <packageset dir="${plugins.dir}/indexer-dummy/src/java"/> |
| <packageset dir="${plugins.dir}/indexer-elastic/src/java/" /> |
| <packageset dir="${plugins.dir}/indexer-kafka/src/java/" /> |
| <packageset dir="${plugins.dir}/indexer-opensearch-1x/src/java/" /> |
| <packageset dir="${plugins.dir}/indexer-rabbit/src/java"/> |
| <packageset dir="${plugins.dir}/indexer-solr/src/java"/> |
| <packageset dir="${plugins.dir}/language-identifier/src/java"/> |
| <packageset dir="${plugins.dir}/lib-htmlunit/src/java"/> |
| <packageset dir="${plugins.dir}/lib-http/src/java"/> |
| <packageset dir="${plugins.dir}/lib-rabbitmq/src/java"/> |
| <packageset dir="${plugins.dir}/lib-regex-filter/src/java"/> |
| <packageset dir="${plugins.dir}/lib-selenium/src/java"/> |
| <packageset dir="${plugins.dir}/microformats-reltag/src/java"/> |
| <packageset dir="${plugins.dir}/mimetype-filter/src/java"/> |
| <packageset dir="${plugins.dir}/parse-ext/src/java"/> |
| <packageset dir="${plugins.dir}/parse-html/src/java"/> |
| <packageset dir="${plugins.dir}/parse-js/src/java"/> |
| <packageset dir="${plugins.dir}/parse-metatags/src/java"/> |
| <packageset dir="${plugins.dir}/parse-tika/src/java"/> |
| <packageset dir="${plugins.dir}/parse-zip/src/java"/> |
| <packageset dir="${plugins.dir}/parsefilter-debug/src/java"/> |
| <packageset dir="${plugins.dir}/parsefilter-naivebayes/src/java"/> |
| <packageset dir="${plugins.dir}/parsefilter-regex/src/java"/> |
| <packageset dir="${plugins.dir}/protocol-file/src/java"/> |
| <packageset dir="${plugins.dir}/protocol-ftp/src/java"/> |
| <packageset dir="${plugins.dir}/protocol-htmlunit/src/java"/> |
| <packageset dir="${plugins.dir}/protocol-http/src/java"/> |
| <packageset dir="${plugins.dir}/protocol-httpclient/src/java"/> |
| <packageset dir="${plugins.dir}/protocol-interactiveselenium/src/java"/> |
| <packageset dir="${plugins.dir}/protocol-okhttp/src/java"/> |
| <packageset dir="${plugins.dir}/protocol-selenium/src/java"/> |
| <packageset dir="${plugins.dir}/publish-rabbitmq/src/java"/> |
| <packageset dir="${plugins.dir}/scoring-depth/src/java"/> |
| <packageset dir="${plugins.dir}/scoring-link/src/java"/> |
| <packageset dir="${plugins.dir}/scoring-opic/src/java"/> |
| <packageset dir="${plugins.dir}/scoring-orphan/src/java"/> |
| <packageset dir="${plugins.dir}/scoring-similarity/src/java"/> |
| <packageset dir="${plugins.dir}/scoring-metadata/src/java"/> |
| <packageset dir="${plugins.dir}/subcollection/src/java"/> |
| <packageset dir="${plugins.dir}/tld/src/java"/> |
| <packageset dir="${plugins.dir}/urlfilter-automaton/src/java"/> |
| <packageset dir="${plugins.dir}/urlfilter-domain/src/java"/> |
| <packageset dir="${plugins.dir}/urlfilter-domaindenylist/src/java"/> |
| <packageset dir="${plugins.dir}/urlfilter-fast/src/java"/> |
| <packageset dir="${plugins.dir}/urlfilter-ignoreexempt/src/java"/> |
| <packageset dir="${plugins.dir}/urlfilter-prefix/src/java"/> |
| <packageset dir="${plugins.dir}/urlfilter-regex/src/java"/> |
| <packageset dir="${plugins.dir}/urlfilter-suffix/src/java"/> |
| <packageset dir="${plugins.dir}/urlfilter-validator/src/java"/> |
| <packageset dir="${plugins.dir}/urlmeta/src/java"/> |
| <packageset dir="${plugins.dir}/urlnormalizer-ajax/src/java"/> |
| <packageset dir="${plugins.dir}/urlnormalizer-basic/src/java"/> |
| <packageset dir="${plugins.dir}/urlnormalizer-host/src/java"/> |
| <packageset dir="${plugins.dir}/urlnormalizer-pass/src/java"/> |
| <packageset dir="${plugins.dir}/urlnormalizer-protocol/src/java"/> |
| <packageset dir="${plugins.dir}/urlnormalizer-querystring/src/java"/> |
| <packageset dir="${plugins.dir}/urlnormalizer-regex/src/java"/> |
| <packageset dir="${plugins.dir}/urlnormalizer-slash/src/java"/> |
| |
| <link href="${javadoc.link.java}"/> |
| <link href="${javadoc.link.hadoop}"/> |
| <!--link href="${javadoc.link.lucene.core}"/> |
| <link href="${javadoc.link.lucene.analyzers-common}"/> |
| <link href="${javadoc.link.solr-solrj}"/--> |
| |
| <classpath refid="classpath"/> |
| <classpath> |
| <fileset dir="${build.plugins}" > |
| <include name="**/*.jar"/> |
| </fileset> |
| </classpath> |
| |
| <group title="Core" packages="org.apache.nutch.*"/> |
| <group title="Plugins API" packages="${plugins.api}"/> |
| <group title="Protocol Plugins" packages="${plugins.protocol}"/> |
| <group title="URL Filter Plugins" packages="${plugins.urlfilter}"/> |
| <group title="URL Normalizer Plugins" packages="${plugins.urlnormalizer}"/> |
| <group title="Scoring Plugins" packages="${plugins.scoring}"/> |
| <group title="Parse Plugins" packages="${plugins.parse}"/> |
| <group title="Parse Filter Plugins" packages="${plugins.parsefilter}"/> |
| <group title="Publisher Plugins" packages="${plugins.publisher}"/> |
| <group title="Exchange Plugins" packages="${plugins.exchange}"/> |
| <group title="Indexing Filter Plugins" packages="${plugins.index}"/> |
| <group title="Indexer Plugins" packages="${plugins.indexer}"/> |
| <group title="Misc. Plugins" packages="${plugins.misc}"/> |
| </javadoc> |
| <!-- Copy the plugin.dtd file to the plugin doc-files dir --> |
| <copy file="${plugins.dir}/plugin.dtd" |
| todir="${build.javadoc}/org/apache/nutch/plugin/doc-files"/> |
| |
| <!-- Copy the definition of Nutch properties --> |
| <copy file="${conf.dir}/nutch-default.xml" todir="${build.javadoc}/resources/"/> |
| <copy file="${conf.dir}/configuration.xsl" todir="${build.javadoc}/resources/"/> |
| </target> |
| |
| <!-- ================================================================== --> |
| <!-- D I S T R I B U T I O N --> |
| <!-- ================================================================== --> |
| <!-- --> |
| <!-- ================================================================== --> |
| <target name="package-src" depends="runtime, javadoc" description="--> generate source distribution package"> |
| <mkdir dir="${dist.dir}"/> |
| <mkdir dir="${src.dist.version.dir}"/> |
| <mkdir dir="${src.dist.version.dir}/lib"/> |
| <mkdir dir="${src.dist.version.dir}/docs"/> |
| <mkdir dir="${src.dist.version.dir}/docs/api"/> |
| <mkdir dir="${src.dist.version.dir}/ivy"/> |
| |
| <copy todir="${src.dist.version.dir}/lib" includeEmptyDirs="false"> |
| <fileset dir="lib"/> |
| </copy> |
| |
| <copy todir="${src.dist.version.dir}/conf"> |
| <fileset dir="${conf.dir}" excludes="**/*.template"/> |
| </copy> |
| |
| <copy todir="${src.dist.version.dir}/docs/api"> |
| <fileset dir="${build.javadoc}"/> |
| </copy> |
| |
| <copy todir="${src.dist.version.dir}"> |
| <fileset dir="."> |
| <include name="*.txt" /> |
| <!--<include name="KEYS" />--> |
| </fileset> |
| </copy> |
| |
| <copy todir="${src.dist.version.dir}/src" includeEmptyDirs="true"> |
| <fileset dir="src"/> |
| </copy> |
| |
| <copy todir="${src.dist.version.dir}/ivy" includeEmptyDirs="true"> |
| <fileset dir="ivy"/> |
| </copy> |
| |
| <copy todir="${src.dist.version.dir}/" file="build.xml"/> |
| <copy todir="${src.dist.version.dir}/" file="default.properties"/> |
| |
| <copy todir="${src.dist.version.dir}/" file="LICENSE-binary"/> |
| <copy todir="${src.dist.version.dir}/" file="NOTICE-binary"/> |
| <copy todir="${src.dist.version.dir}/licenses-binary" includeEmptyDirs="true"> |
| <fileset dir="licenses-binary"/> |
| </copy> |
| |
| </target> |
| |
| <target name="package-bin" depends="runtime, javadoc" description="--> generate binary distribution package"> |
| <mkdir dir="${dist.dir}"/> |
| <mkdir dir="${bin.dist.version.dir}"/> |
| <mkdir dir="${bin.dist.version.dir}/lib"/> |
| <mkdir dir="${bin.dist.version.dir}/bin"/> |
| <mkdir dir="${bin.dist.version.dir}/conf"/> |
| <mkdir dir="${bin.dist.version.dir}/docs"/> |
| <mkdir dir="${bin.dist.version.dir}/docs/api"/> |
| <mkdir dir="${bin.dist.version.dir}/plugins"/> |
| |
| <copy todir="${bin.dist.version.dir}/lib" includeEmptyDirs="false"> |
| <fileset dir="runtime/local/lib"/> |
| </copy> |
| |
| <copy todir="${bin.dist.version.dir}/bin"> |
| <fileset dir="runtime/local/bin"/> |
| </copy> |
| |
| <chmod perm="ugo+x" type="file"> |
| <fileset dir="${bin.dist.version.dir}/bin"/> |
| </chmod> |
| |
| <copy todir="${bin.dist.version.dir}/conf"> |
| <fileset dir="runtime/local/conf" excludes="**/*.template"/> |
| </copy> |
| |
| <copy todir="${bin.dist.version.dir}/docs/api"> |
| <fileset dir="${build.javadoc}"/> |
| </copy> |
| |
| <copy todir="${bin.dist.version.dir}"> |
| <fileset dir="."> |
| <include name="*.txt" /> |
| </fileset> |
| </copy> |
| |
| <copy todir="${bin.dist.version.dir}/" file="LICENSE-binary"/> |
| <copy todir="${bin.dist.version.dir}/" file="NOTICE-binary"/> |
| <copy todir="${bin.dist.version.dir}/licenses-binary" includeEmptyDirs="true"> |
| <fileset dir="licenses-binary"/> |
| </copy> |
| |
| <copy todir="${bin.dist.version.dir}/plugins" includeEmptyDirs="true"> |
| <fileset dir="runtime/local/plugins"/> |
| </copy> |
| |
| <copy todir="${bin.dist.version.dir}/plugins"> |
| <fileset dir="${plugins.dir}"> |
| <include name="**/README.*" /> |
| </fileset> |
| </copy> |
| </target> |
| |
| <!-- ================================================================== --> |
| <!-- Make src release tarball --> |
| <!-- ================================================================== --> |
| <target name="tar-src" depends="package-src" description="--> generate src.tar.gz distribution package"> |
| <tar compression="gzip" longfile="gnu" |
| destfile="${src.dist.version.dir}.tar.gz"> |
| <tarfileset dir="${src.dist.version.dir}" mode="664" prefix="${final.name}"> |
| <exclude name="src/bin/*" /> |
| <exclude name="ivy/ivy*.jar" /> |
| <include name="**" /> |
| </tarfileset> |
| <tarfileset dir="${src.dist.version.dir}" mode="755" prefix="${final.name}"> |
| <include name="src/bin/*" /> |
| </tarfileset> |
| </tar> |
| </target> |
| |
| <!-- ================================================================== --> |
| <!-- Make bin release tarball --> |
| <!-- ================================================================== --> |
| <target name="tar-bin" depends="package-bin" description="--> generate bin.tar.gz distribution package"> |
| <tar compression="gzip" longfile="gnu" |
| destfile="${bin.dist.version.dir}.tar.gz"> |
| <tarfileset dir="${bin.dist.version.dir}" mode="664" prefix="${final.name}"> |
| <exclude name="bin/*" /> |
| <include name="**" /> |
| </tarfileset> |
| <tarfileset dir="${bin.dist.version.dir}" mode="755" prefix="${final.name}"> |
| <include name="bin/*" /> |
| </tarfileset> |
| </tar> |
| </target> |
| |
| <!-- ================================================================== --> |
| <!-- Make src release zip --> |
| <!-- ================================================================== --> |
| <target name="zip-src" depends="package-src" description="--> generate src.zip distribution package"> |
| <zip compress="true" casesensitive="yes" |
| destfile="${src.dist.version.dir}.zip"> |
| <zipfileset dir="${src.dist.version.dir}" filemode="664" prefix="${final.name}"> |
| <exclude name="src/bin/*" /> |
| <exclude name="ivy/ivy*.jar" /> |
| <include name="**" /> |
| </zipfileset> |
| <zipfileset dir="${src.dist.version.dir}" filemode="755" prefix="${final.name}"> |
| <include name="src/bin/*" /> |
| </zipfileset> |
| </zip> |
| </target> |
| |
| <!-- ================================================================== --> |
| <!-- Make bin release zip --> |
| <!-- ================================================================== --> |
| <target name="zip-bin" depends="package-bin" description="--> generate bin.zip distribution package"> |
| <zip compress="true" casesensitive="yes" |
| destfile="${bin.dist.version.dir}.zip"> |
| <zipfileset dir="${bin.dist.version.dir}" filemode="664" prefix="${final.name}"> |
| <exclude name="bin/*" /> |
| <include name="**" /> |
| </zipfileset> |
| <zipfileset dir="${bin.dist.version.dir}" filemode="755" prefix="${final.name}"> |
| <include name="bin/*" /> |
| </zipfileset> |
| </zip> |
| </target> |
| |
| <!-- ================================================================== --> |
| <!-- Clean. Delete the build files, and their directories --> |
| <!-- ================================================================== --> |
| |
| <!-- target: clean =================================================== --> |
| <target name="clean" depends="clean-build, clean-lib, clean-dist, clean-runtime" description="--> clean the project" /> |
| |
| <!-- target: clean-eclipse ============================================ --> |
| <target name="clean-eclipse" depends="clean-build, clean-lib, clean-dist" description="--> cleans the eclipse project"/> |
| |
| <!-- target: clean-local ============================================= --> |
| <target name="clean-local" depends="" |
| description="--> cleans the local repository for the current module"> |
| <delete dir="${ivy.local.default.root}/${ivy.organisation}/${ivy.module}"/> |
| </target> |
| |
| <!-- target: clean-lib =============================================== --> |
| <target name="clean-lib" depends="clean-default-lib, clean-test-lib" |
| description="--> clean the project libraries directories (dependencies: default + test)"> |
| </target> |
| <!-- target: clean-default-lib =============================================== --> |
| <target name="clean-default-lib" description="--> clean the project libraries directory (dependencies)"> |
| <delete includeemptydirs="true" dir="${build.lib.dir}"/> |
| </target> |
| <!-- target: clean-test-lib =============================================== --> |
| <target name="clean-test-lib" description="--> clean the project test libraries directory (dependencies)"> |
| <delete includeemptydirs="true" dir="${test.build.lib.dir}"/> |
| </target> |
| |
| <!-- target: clean-build ============================================= --> |
| <target name="clean-build" description="--> clean the project built files"> |
| <delete includeemptydirs="true" dir="${build.dir}"/> |
| </target> |
| |
| <!-- target: clean-dist ============================================= --> |
| <target name="clean-dist" description="--> clean the project dist files"> |
| <delete includeemptydirs="true" dir="${dist.dir}"/> |
| </target> |
| |
| <!-- target: clean-cache ============================================= --> |
| <target name="clean-cache" depends="ivy-init" |
| description="--> delete ivy cache"> |
| <ivy:cleancache /> |
| </target> |
| |
| <target name="clean-runtime" description="--> clean the project runtime area"> |
| <delete includeemptydirs="true" dir="${runtime.dir}"/> |
| </target> |
| |
| <!-- ================================================================== --> |
| <!-- RAT targets --> |
| <!-- ================================================================== --> |
| <target name="apache-rat-download" description="--> download Apache Rat jar"> |
| <available file="${apache-rat.jar}" property="apache-rat.jar.found"/> |
| <antcall target="apache-rat-download-unchecked"/> |
| </target> |
| |
| <target name="apache-rat-download-unchecked" unless="apache-rat.jar.found" |
| description="--> downloads the Apache Rat jar"> |
| <get src="https://archive.apache.org/dist/creadur/apache-rat-${apache-rat.version}/apache-rat-${apache-rat.version}-bin.tar.gz" |
| dest="${ivy.dir}/apache-rat-${apache-rat.version}-bin.tar.gz" usetimestamp="false" /> |
| |
| <untar src="${ivy.dir}/apache-rat-${apache-rat.version}-bin.tar.gz" |
| dest="${ivy.dir}/" compression="gzip"> |
| </untar> |
| |
| <delete file="${ivy.dir}/apache-rat-${apache-rat.version}-bin.tar.gz" /> |
| </target> |
| |
| <target name="run-rat" depends="init, apache-rat-download" |
| description="--> runs Apache Rat on codebase"> |
| <taskdef |
| uri="antlib:org.apache.rat.anttasks" |
| resource="org/apache/rat/anttasks/antlib.xml"> |
| <classpath> |
| <pathelement location="${apache-rat.jar}" /> |
| </classpath> |
| </taskdef> |
| <rat:report |
| reportFile="${build.dir}/apache-rat-report.txt"> |
| <fileset dir="src"> |
| <include name="**"/> |
| <exclude name="plugin/language-identifier/src/java/org/apache/nutch/analysis/lang/langmappings.properties"/> |
| <exclude name="plugin/language-identifier/src/test/org/apache/nutch/analysis/lang/de.test"/> |
| <exclude name="plugin/language-identifier/src/test/org/apache/nutch/analysis/lang/en.test"/> |
| <exclude name="plugin/language-identifier/src/test/org/apache/nutch/analysis/lang/es.test"/> |
| <exclude name="plugin/language-identifier/src/test/org/apache/nutch/analysis/lang/fi.test"/> |
| <exclude name="plugin/language-identifier/src/test/org/apache/nutch/analysis/lang/fr.test"/> |
| <exclude name="plugin/language-identifier/src/test/org/apache/nutch/analysis/lang/it.test"/> |
| <exclude name="plugin/language-identifier/src/test/org/apache/nutch/analysis/lang/nl.test"/> |
| <exclude name="plugin/language-identifier/src/test/org/apache/nutch/analysis/lang/pt.test"/> |
| <exclude name="plugin/language-identifier/src/test/org/apache/nutch/analysis/lang/sv.test"/> |
| <exclude name="plugin/language-identifier/src/test/org/apache/nutch/analysis/lang/test-referencial.txt"/> |
| <exclude name="plugin/language-identifier/src/test/org/apache/nutch/analysis/lang/da.test"/> |
| <exclude name="plugin/parse-tika/sample/ootest.txt"/> |
| <exclude name="plugin/parse-tika/sample/test.rtf"/> |
| <exclude name="plugin/urlfilter-ignoreexempt/data/.donotdelete"/> |
| <exclude name="plugin/urlfilter-automaton/sample/Benchmarks.rules"/> |
| <exclude name="plugin/urlfilter-automaton/sample/Benchmarks.urls"/> |
| <exclude name="plugin/urlfilter-automaton/sample/IntranetCrawling.rules"/> |
| <exclude name="plugin/urlfilter-automaton/sample/IntranetCrawling.urls"/> |
| <exclude name="plugin/urlfilter-automaton/sample/WholeWebCrawling.rules"/> |
| <exclude name="plugin/urlfilter-automaton/sample/WholeWebCrawling.urls"/> |
| <exclude name="plugin/urlfilter-fast/sample/Benchmarks.urls"/> |
| <exclude name="plugin/urlfilter-fast/sample/fast-urlfilter-benchmark.txt"/> |
| <exclude name="plugin/urlfilter-fast/sample/fast-urlfilter-test.txt"/> |
| <exclude name="plugin/urlfilter-fast/sample/test.urls"/> |
| <exclude name="plugin/urlfilter-regex/sample/Benchmarks.rules"/> |
| <exclude name="plugin/urlfilter-regex/sample/Benchmarks.urls"/> |
| <exclude name="plugin/urlfilter-regex/sample/IntranetCrawling.rules"/> |
| <exclude name="plugin/urlfilter-regex/sample/IntranetCrawling.urls"/> |
| <exclude name="plugin/urlfilter-regex/sample/WholeWebCrawling.rules"/> |
| <exclude name="plugin/urlfilter-regex/sample/WholeWebCrawling.urls"/> |
| <exclude name="plugin/urlfilter-regex/sample/nutch1838.rules"/> |
| <exclude name="plugin/urlfilter-regex/sample/nutch1838.urls"/> |
| </fileset> |
| </rat:report> |
| </target> |
| |
| <!-- ================================================================== --> |
| <!-- Spotbugs --> |
| <!-- ================================================================== --> |
| <target name="spotbugs-download" description="--> download spotbugs jar"> |
| <available file="${spotbugs.jar}" property="spotbugs.jar.found"/> |
| <antcall target="spotbugs-download-unchecked"/> |
| </target> |
| |
| <target name="spotbugs-download-unchecked" unless="spotbugs.jar.found" |
| description="--> downloads the spotbugs binary (spotbugs-*.tgz)."> |
| <get src="https://github.com/spotbugs/spotbugs/releases/download/${spotbugs.version}/spotbugs-${spotbugs.version}.tgz " |
| dest="${ivy.dir}/spotbugs-${spotbugs.version}.tgz" usetimestamp="false" /> |
| |
| <untar src="${ivy.dir}/spotbugs-${spotbugs.version}.tgz" |
| dest="${ivy.dir}" compression="gzip"> |
| </untar> |
| |
| <delete file="${ivy.dir}/spotbugs-${spotbugs.version}.tgz" /> |
| </target> |
| |
| <target name="spotbugs" depends="jar, compile-plugins, spotbugs-download" description="--> runs spotbugs source code analysis."> |
| <taskdef |
| resource="edu/umd/cs/findbugs/anttask/tasks.properties" |
| classpath="${spotbugs.jar}" /> |
| <spotbugs home="${spotbugs.home}" |
| output="html" |
| outputFile="${build.dir}/nutch-spotbugs.html" |
| projectName="Apache Nutch Spotbugs Analysis" |
| stylesheet="fancy-hist.xsl" > |
| <auxClasspath> |
| <!-- depency jars required for analysis but not analyzed (not our bugs) --> |
| <pathelement path="${basedir}/${build.dir}/lib"/> |
| <fileset dir="${basedir}/${build.dir}/plugins"> |
| <include name="**/*.jar"/> |
| </fileset> |
| </auxClasspath> |
| <sourcePath> |
| <fileset dir="${basedir}/src/java" /> |
| <fileset dir="${basedir}/src/plugin"> |
| <include name="*/src/java/**/*.java" /> |
| </fileset> |
| </sourcePath> |
| <!-- jars to be analyzed: --> |
| <!-- 1. apache-nutch-*.jar --> |
| <fileset dir="${build.dir}"> |
| <include name="${final.name}.jar" /> |
| </fileset> |
| <!-- 2. plugin jars, eg. build/feed/feed.jar --> |
| <fileset dir="${build.dir}/"> |
| <include name="*/*.jar" /> |
| <exclude name="lib/*.jar"/> |
| </fileset> |
| </spotbugs> |
| </target> |
| <path id="eclipse.classpath"> |
| <fileset dir="${build.lib.dir}"> |
| <include name="*.jar" /> |
| <exclude name="ant-eclipse-1.0-jvm1.2.jar" /> |
| </fileset> |
| <fileset dir="${build.plugins}"> |
| <include name="**/*.jar" /> |
| </fileset> |
| <fileset dir="${test.build.lib.dir}"> |
| <include name="*.jar" /> |
| </fileset> |
| </path> |
| |
| |
| <!-- ================================================================== --> |
| <!-- Eclipse targets --> |
| <!-- ================================================================== --> |
| |
| <!-- classpath for generating eclipse project --> |
| <path id="eclipse.classpath"> |
| <fileset dir="${build.lib.dir}"> |
| <include name="*.jar" /> |
| <exclude name="ant-eclipse-1.0-jvm1.2.jar" /> |
| </fileset> |
| <fileset dir="${build.plugins}"> |
| <include name="**/*.jar" /> |
| </fileset> |
| <fileset dir="${test.build.lib.dir}"> |
| <include name="*.jar" /> |
| </fileset> |
| </path> |
| |
| <!-- target: ant-eclipse-download =================================== --> |
| <target name="ant-eclipse-download" description="--> downloads the ant-eclipse binary."> |
| <get src="https://downloads.sourceforge.net/project/ant-eclipse/ant-eclipse/1.0/ant-eclipse-1.0.bin.tar.bz2" |
| dest="${build.dir}/ant-eclipse-1.0.bin.tar.bz2" usetimestamp="false" /> |
| |
| <untar src="${build.dir}/ant-eclipse-1.0.bin.tar.bz2" |
| dest="${build.dir}" compression="bzip2"> |
| <patternset> |
| <include name="lib/ant-eclipse-1.0-jvm1.2.jar"/> |
| </patternset> |
| </untar> |
| |
| <delete file="${build.dir}/ant-eclipse-1.0.bin.tar.bz2" /> |
| </target> |
| |
| <!-- target: eclipse ================================================ --> |
| <target name="eclipse" |
| depends="clean-eclipse,init,resolve-test,job,ant-eclipse-download" |
| description="--> create eclipse project files"> |
| |
| <pathconvert property="eclipse.project"> |
| <path path="${basedir}"/> |
| <regexpmapper from="^.*/([^/]+)$$" to="\1" handledirsep="yes"/> |
| </pathconvert> |
| |
| <taskdef name="eclipse" |
| classname="prantl.ant.eclipse.EclipseTask" |
| classpath="${build.dir}/lib/ant-eclipse-1.0-jvm1.2.jar" /> |
| <eclipse updatealways="true"> |
| <project name="${eclipse.project}" /> |
| <classpath> |
| <library path="${conf.dir}" exported="false" /> |
| <library path="${basedir}/src/bin" exported="false" /> |
| <library pathref="eclipse.classpath" exported="false" /> |
| |
| <source path="${basedir}/src/java/" /> |
| <source path="${basedir}/src/test/" output="build/test/classes" /> |
| |
| <source path="${plugins.dir}/creativecommons/src/java/" /> |
| <source path="${plugins.dir}/creativecommons/src/test/" /> |
| <source path="${plugins.dir}/feed/src/java/" /> |
| <source path="${plugins.dir}/feed/src/test/" /> |
| <source path="${plugins.dir}/headings/src/java/" /> |
| <source path="${plugins.dir}/headings/src/test/" /> |
| <source path="${plugins.dir}/exchange-jexl/src/java/" /> |
| <source path="${plugins.dir}/index-anchor/src/java/" /> |
| <source path="${plugins.dir}/index-anchor/src/test/" /> |
| <source path="${plugins.dir}/index-arbitrary/src/java/" /> |
| <source path="${plugins.dir}/index-arbitrary/src/test/" /> |
| <source path="${plugins.dir}/index-basic/src/java/" /> |
| <source path="${plugins.dir}/index-basic/src/test/" /> |
| <source path="${plugins.dir}/index-geoip/src/java/" /> |
| <source path="${plugins.dir}/index-jexl-filter/src/java/" /> |
| <source path="${plugins.dir}/index-jexl-filter/src/test/" /> |
| <source path="${plugins.dir}/index-links/src/java/" /> |
| <source path="${plugins.dir}/index-links/src/test/" /> |
| <source path="${plugins.dir}/index-metadata/src/java/" /> |
| <source path="${plugins.dir}/index-more/src/java/" /> |
| <source path="${plugins.dir}/index-more/src/test/" /> |
| <source path="${plugins.dir}/index-replace/src/java/" /> |
| <source path="${plugins.dir}/index-replace/src/test/" /> |
| <source path="${plugins.dir}/index-static/src/java/" /> |
| <source path="${plugins.dir}/index-static/src/test/" /> |
| <source path="${plugins.dir}/indexer-cloudsearch/src/java/" /> |
| <source path="${plugins.dir}/indexer-csv/src/java"/> |
| <source path="${plugins.dir}/indexer-csv/src/test"/> |
| <source path="${plugins.dir}/indexer-dummy/src/java/" /> |
| <source path="${plugins.dir}/indexer-elastic/src/java/" /> |
| <source path="${plugins.dir}/indexer-kafka/src/java/" /> |
| <source path="${plugins.dir}/indexer-opensearch-1x/src/java/" /> |
| <source path="${plugins.dir}/indexer-rabbit/src/java/" /> |
| <source path="${plugins.dir}/indexer-solr/src/java/" /> |
| <source path="${plugins.dir}/language-identifier/src/java/" /> |
| <source path="${plugins.dir}/language-identifier/src/test/" /> |
| <source path="${plugins.dir}/lib-htmlunit/src/java/" /> |
| <source path="${plugins.dir}/lib-http/src/java/" /> |
| <source path="${plugins.dir}/lib-http/src/test/" /> |
| <source path="${plugins.dir}/lib-rabbitmq/src/java/" /> |
| <source path="${plugins.dir}/lib-regex-filter/src/java/" /> |
| <source path="${plugins.dir}/lib-regex-filter/src/test/" /> |
| <source path="${plugins.dir}/lib-selenium/src/java/" /> |
| <source path="${plugins.dir}/microformats-reltag/src/java/" /> |
| <source path="${plugins.dir}/mimetype-filter/src/java/" /> |
| <source path="${plugins.dir}/mimetype-filter/src/test/" /> |
| <source path="${plugins.dir}/parse-ext/src/java/" /> |
| <source path="${plugins.dir}/parse-ext/src/test/" /> |
| <source path="${plugins.dir}/parse-html/src/java/" /> |
| <source path="${plugins.dir}/parse-html/src/test/" /> |
| <source path="${plugins.dir}/parse-js/src/java/" /> |
| <source path="${plugins.dir}/parse-js/src/test/" /> |
| <source path="${plugins.dir}/parse-metatags/src/java/" /> |
| <source path="${plugins.dir}/parse-metatags/src/test/" /> |
| <source path="${plugins.dir}/parse-tika/src/java/" /> |
| <source path="${plugins.dir}/parse-tika/src/test/" /> |
| <source path="${plugins.dir}/parse-zip/src/java/" /> |
| <source path="${plugins.dir}/parse-zip/src/test/" /> |
| <source path="${plugins.dir}/parsefilter-debug/src/java/" /> |
| <source path="${plugins.dir}/parsefilter-naivebayes/src/java/" /> |
| <source path="${plugins.dir}/parsefilter-regex/src/java/" /> |
| <source path="${plugins.dir}/parsefilter-regex/src/test/" /> |
| <source path="${plugins.dir}/protocol-file/src/java/" /> |
| <source path="${plugins.dir}/protocol-file/src/test/" /> |
| <source path="${plugins.dir}/protocol-foo/src/java/" /> |
| <source path="${plugins.dir}/protocol-ftp/src/java/" /> |
| <source path="${plugins.dir}/protocol-htmlunit/src/java/" /> |
| <source path="${plugins.dir}/protocol-http/src/java/" /> |
| <source path="${plugins.dir}/protocol-http/src/test/" /> |
| <source path="${plugins.dir}/protocol-httpclient/src/java/" /> |
| <source path="${plugins.dir}/protocol-httpclient/src/test/" /> |
| <source path="${plugins.dir}/protocol-interactiveselenium/src/java/" /> |
| <source path="${plugins.dir}/protocol-okhttp/src/java/" /> |
| <source path="${plugins.dir}/protocol-okhttp/src/test/" /> |
| <source path="${plugins.dir}/protocol-selenium/src/java"/> |
| <source path="${plugins.dir}/publish-rabbitmq/src/java"/> |
| <source path="${plugins.dir}/scoring-depth/src/java/" /> |
| <source path="${plugins.dir}/scoring-link/src/java/" /> |
| <source path="${plugins.dir}/scoring-opic/src/java/" /> |
| <source path="${plugins.dir}/scoring-orphan/src/java"/> |
| <source path="${plugins.dir}/scoring-orphan/src/test"/> |
| <source path="${plugins.dir}/scoring-similarity/src/java/" /> |
| <source path="${plugins.dir}/scoring-metadata/src/java/" /> |
| <source path="${plugins.dir}/scoring-metadata/src/test" /> |
| <source path="${plugins.dir}/subcollection/src/java/" /> |
| <source path="${plugins.dir}/subcollection/src/test/" /> |
| <source path="${plugins.dir}/tld/src/java/" /> |
| <source path="${plugins.dir}/urlfilter-automaton/src/java/" /> |
| <source path="${plugins.dir}/urlfilter-automaton/src/test/" /> |
| <source path="${plugins.dir}/urlfilter-domain/src/java/" /> |
| <source path="${plugins.dir}/urlfilter-domain/src/test/" /> |
| <source path="${plugins.dir}/urlfilter-domaindenylist/src/java/" /> |
| <source path="${plugins.dir}/urlfilter-domaindenylist/src/test/" /> |
| <source path="${plugins.dir}/urlfilter-fast/src/java/"/> |
| <source path="${plugins.dir}/urlfilter-fast/src/test/"/> |
| <source path="${plugins.dir}/urlfilter-ignoreexempt/src/java/" /> |
| <source path="${plugins.dir}/urlfilter-prefix/src/java/" /> |
| <source path="${plugins.dir}/urlfilter-prefix/src/test/" /> |
| <source path="${plugins.dir}/urlfilter-regex/src/java/" /> |
| <source path="${plugins.dir}/urlfilter-regex/src/test/" /> |
| <source path="${plugins.dir}/urlfilter-suffix/src/java/" /> |
| <source path="${plugins.dir}/urlfilter-suffix/src/test/" /> |
| <source path="${plugins.dir}/urlfilter-validator/src/java/" /> |
| <source path="${plugins.dir}/urlfilter-validator/src/test/" /> |
| <source path="${plugins.dir}/urlmeta/src/java/" /> |
| <source path="${plugins.dir}/urlnormalizer-ajax/src/java/" /> |
| <source path="${plugins.dir}/urlnormalizer-ajax/src/test/" /> |
| <source path="${plugins.dir}/urlnormalizer-basic/src/java/" /> |
| <source path="${plugins.dir}/urlnormalizer-basic/src/test/" /> |
| <source path="${plugins.dir}/urlnormalizer-host/src/java/" /> |
| <source path="${plugins.dir}/urlnormalizer-host/src/test/" /> |
| <source path="${plugins.dir}/urlnormalizer-pass/src/java/" /> |
| <source path="${plugins.dir}/urlnormalizer-pass/src/test/" /> |
| <source path="${plugins.dir}/urlnormalizer-protocol/src/java/" /> |
| <source path="${plugins.dir}/urlnormalizer-protocol/src/test/" /> |
| <source path="${plugins.dir}/urlnormalizer-querystring/src/java/" /> |
| <source path="${plugins.dir}/urlnormalizer-querystring/src/test/" /> |
| <source path="${plugins.dir}/urlnormalizer-regex/src/java/" /> |
| <source path="${plugins.dir}/urlnormalizer-regex/src/test/" /> |
| <source path="${plugins.dir}/urlnormalizer-slash/src/java/" /> |
| <source path="${plugins.dir}/urlnormalizer-slash/src/test/" /> |
| |
| <output path="${build.classes}" /> |
| </classpath> |
| </eclipse> |
| </target> |
| |
| </project> |