blob: 47041f465c863045620759ccae6b4e1f92d9cb86 [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
name=apache-nutch
version=1.21-SNAPSHOT
final.name=${name}-${version}
year=2024
basedir = ./
src.dir = ./src/java
lib.dir = ./lib
conf.dir = ./conf
plugins.dir = ./src/plugin
build.dir = ./build
build.classes = ${build.dir}/classes
build.plugins = ${build.dir}/plugins
build.javadoc = ${build.dir}/docs/api
build.encoding = UTF-8
build.ivy.dir=${build.dir}/ivy
build.lib.dir=${build.dir}/lib
test.src.dir = ./src/test
test.build.dir = ${build.dir}/test
test.build.lib.dir = ${test.build.dir}/lib
test.build.data = ${test.build.dir}/data
test.build.classes = ${test.build.dir}/classes
test.build.javadoc = ${test.build.dir}/docs/api
test.junit.output.format = plain
# Proxy Host and Port to use for building JavaDoc
javadoc.proxy.host=-J-DproxyHost=
javadoc.proxy.port=-J-DproxyPort=
javadoc.link.java=https://docs.oracle.com/en/java/javase/11/docs/api/
javadoc.link.hadoop=https://hadoop.apache.org/docs/r3.3.6/api/
javadoc.packages=org.apache.nutch.*
dist.dir=./dist
src.dist.version.dir=${dist.dir}/${final.name}-src
bin.dist.version.dir=${dist.dir}/${final.name}-bin
javac.debug=on
javac.optimize=on
javac.deprecation=on
javac.version=11
runtime.dir=./runtime
runtime.deploy=${runtime.dir}/deploy
runtime.local=${runtime.dir}/local
ivy.version=2.5.2
ivy.dir=${basedir}/ivy
ivy.file=${ivy.dir}/ivy.xml
ivy.jar=${ivy.dir}/ivy-${ivy.version}.jar
ivy.repo.url=https://repo1.maven.org/maven2/org/apache/ivy/ivy/${ivy.version}/ivy-${ivy.version}.jar
ivy.local.default.root=${ivy.default.ivy.user.dir}/local
ivy.local.default.ivy.pattern=[organisation]/[module]/[revision]/[type]s/[artifact].[ext]
ivy.local.default.artifact.pattern=[organisation]/[module]/[revision]/[type]s/[artifact].[ext]
ivy.shared.default.root=${ivy.default.ivy.user.dir}/shared
ivy.shared.default.ivy.pattern=[organisation]/[module]/[revision]/[type]s/[artifact].[ext]
ivy.shared.default.artifact.pattern=[organisation]/[module]/[revision]/[type]s/[artifact].[ext]
#
# Plugins API
#
plugins.api=\
org.apache.nutch.protocol.http.api*:\
org.apache.nutch.urlfilter.api*
#
# Protocol Plugins
#
plugins.protocol=\
org.apache.nutch.protocol.file*:\
org.apache.nutch.protocol.ftp*:\
org.apache.nutch.protocol.http*:\
org.apache.nutch.protocol.httpclient*:\
org.apache.nutch.protocol.interactiveselenium*:\
org.apache.nutch.protocol.okhttp*:\
org.apache.nutch.protocol.selenium*:\
org.apache.nutch.protocol.htmlunit*:\
#
# URL Filter Plugins
#
plugins.urlfilter=\
org.apache.nutch.urlfilter.automaton*:\
org.apache.nutch.urlfilter.domain*:\
org.apache.nutch.urlfilter.domaindenylist*:\
org.apache.nutch.urlfilter.fast*:\
org.apache.nutch.urlfilter.ignoreexempt*:\
org.apache.nutch.urlfilter.prefix*:\
org.apache.nutch.urlfilter.regex*:\
org.apache.nutch.urlfilter.suffix*:\
org.apache.nutch.urlfilter.validator*
#
# URL Normalizer Plugins
#
plugins.urlnormalizer=\
org.apache.nutch.net.urlnormalizer.ajax*:\
org.apache.nutch.net.urlnormalizer.basic*:\
org.apache.nutch.net.urlnormalizer.host*:\
org.apache.nutch.net.urlnormalizer.pass*:\
org.apache.nutch.net.urlnormalizer.protocol*:\
org.apache.nutch.net.urlnormalizer.querystring*:\
org.apache.nutch.net.urlnormalizer.regex*:\
org.apache.nutch.net.urlnormalizer.slash*
#
# Scoring Plugins
#
plugins.scoring=\
org.apache.nutch.scoring.depth*:\
org.apache.nutch.scoring.link*:\
org.apache.nutch.scoring.opic*:\
org.apache.nutch.scoring.orphan*:\
org.apache.nutch.scoring.similarity*:\
org.apache.nutch.scoring.tld*:\
org.apache.nutch.scoring.urlmeta*\
org.apache.nutch.scoring.metadata*
#
# Parse Plugins
#
plugins.parse=\
org.apache.nutch.parse.ext*:\
org.apache.nutch.parse.feed*:\
org.apache.nutch.parse.html*:\
org.apache.nutch.parse.js:\
org.apache.nutch.parse.replace*:\
org.apache.nutch.parse.tika:\
org.apache.nutch.parse.zip
#
# Parse Filter Plugins
#
plugins.parsefilter=\
org.apache.nutch.parsefilter.debug*:\
org.apache.nutch.parse.headings*:\
org.apache.nutch.parsefilter.naivebayes*:\
org.apache.nutch.parsefilter.regex*:\
org.apache.nutch.parse.metatags*
#
# Publisher Plugins
#
plugins.publisher=\
org.apache.nutch.publisher.rabbitmq*
#
# Exchange Plugins
#
plugins.exchange=\
org.apache.nutch.exchange.jexl*
#
# Indexing Filter Plugins
#
plugins.index=\
org.apache.nutch.indexer.anchor*:\
org.apache.nutch.indexer.basic*:\
org.apache.nutch.indexer.feed*:\
org.apache.nutch.indexer.geoip*:\
org.apache.nutch.indexer.jexl*:\
org.apache.nutch.indexer.filter*:\
org.apache.nutch.indexer.links*:\
org.apache.nutch.indexer.metadata*:\
org.apache.nutch.indexer.more*:\
org.apache.nutch.indexer.replace*:\
org.apache.nutch.indexer.staticfield*:\
org.apache.nutch.indexer.subcollection*:\
org.apache.nutch.indexer.tld*:\
org.apache.nutch.indexer.urlmeta*
#
# Indexing Backend Plugins
#
plugins.indexer=\
org.apache.nutch.indexwriter.cloudsearch*:\
org.apache.nutch.indexwriter.csv*:\
org.apache.nutch.indexwriter.dummy*:\
org.apache.nutch.indexwriter.elastic*:\
org.apache.nutch.indexwriter.rabbit*:\
org.apache.nutch.indexwriter.kafka*:\
org.apache.nutch.indexwriter.solr*
#
# Misc. Plugins
#
# (gathers plugins that cannot be dispatched
# in any category, mainly because they contain
# many extension points)
#
plugins.misc=\
org.apache.nutch.collection*:\
org.apache.nutch.analysis.lang*:\
org.creativecommons.nutch*:\
org.apache.nutch.microformats.reltag*: