| <?xml version='1.0' encoding='utf-8'?> |
| |
| <!-- Licensed to the Apache Software Foundation (ASF) under one or more |
| contributor license agreements. See the NOTICE file distributed with |
| this work for additional information regarding copyright ownership. |
| The ASF licenses this file to You under the Apache License, Version 2.0 |
| (the "License"); you may not use this file except in compliance with |
| the License. You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, software |
| distributed under the License is distributed on an "AS IS" BASIS, |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| See the License for the specific language governing permissions and |
| limitations under the License. |
| --> |
| |
| <ivy-module xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" |
| xsi:noNamespaceSchemaLocation="http://ant.apache.org/ivy/schemas/ivy.xsd" |
| xmlns:ns0="http://ant.apache.org/ivy/maven" version="2.0"> |
| <info organisation="org.apache.nutch" module="nutch"> |
| <license name="Apache 2.0" url="https://www.apache.org/licenses/LICENSE-2.0.txt" /> |
| <ivyauthor name="Apache Nutch Team" url="https://nutch.apache.org/" /> |
| <description homepage="https://nutch.apache.org/">Nutch is an open source web-search |
| software. It builds on Hadoop, Tika and Solr, adding web-specifics, |
| such as a crawler, a link-graph database etc. |
| </description> |
| </info> |
| |
| <configurations> |
| <include file="${basedir}/ivy/ivy-configurations.xml" /> |
| </configurations> |
| |
| <publications> |
| <!--get the artifact from our module name --> |
| <artifact conf="master" /> |
| </publications> |
| |
| <dependencies> |
| <dependency org="org.apache.logging.log4j" name="log4j-api" rev="2.21.1" conf="*->master" /> |
| <dependency org="org.apache.logging.log4j" name="log4j-core" rev="2.21.1" conf="*->master" /> |
| <dependency org="org.apache.logging.log4j" name="log4j-slf4j2-impl" rev="2.21.1" conf="*->master" /> |
| <dependency org="org.slf4j" name="slf4j-api" rev="2.0.9" conf="*->master" /> |
| |
| <dependency org="org.apache.commons" name="commons-lang3" rev="3.13.0" conf="*->default" /> |
| <dependency org="org.apache.commons" name="commons-collections4" rev="4.4" conf="*->master" /> |
| <dependency org="org.apache.httpcomponents" name="httpclient" rev="4.5.13" conf="*->master" /> |
| <dependency org="commons-codec" name="commons-codec" rev="1.16.0" conf="*->default" /> |
| <!-- hadoop 3.4.0 should have 2.11.0; Tika is broken in distributed mode until then; |
| We're currently relying on the hadoop-safe-tika shim that shades commons-io to upgrade |
| Tika |
| see https://github.com/apache/nutch/pull/776 --> |
| <dependency org="commons-io" name="commons-io" rev="2.11.0" conf="*->default" /> |
| <dependency org="org.apache.commons" name="commons-compress" rev="1.24.0" conf="*->default" /> |
| <dependency org="org.apache.commons" name="commons-jexl3" rev="3.2.1" conf="*->default" /> |
| <dependency org="com.tdunning" name="t-digest" rev="3.3" /> |
| |
| <!-- Hadoop Dependencies --> |
| <dependency org="org.apache.hadoop" name="hadoop-common" rev="3.3.6" conf="*->default"> |
| <exclude org="ch.qos.reload4j" name="*"/> |
| <exclude org="org.slf4j" name="*" /> |
| </dependency> |
| <dependency org="org.apache.hadoop" name="hadoop-hdfs" rev="3.3.6" conf="*->default"> |
| <exclude org="ch.qos.reload4j" name="*"/> |
| <exclude org="org.slf4j" name="*" /> |
| </dependency> |
| <dependency org="org.apache.hadoop" name="hadoop-mapreduce-client-core" rev="3.3.6" conf="*->default"> |
| <exclude org="ch.qos.reload4j" name="*"/> |
| <exclude org="org.slf4j" name="*" /> |
| </dependency> |
| <dependency org="org.apache.hadoop" name="hadoop-mapreduce-client-jobclient" rev="3.3.6" conf="*->default"> |
| <exclude org="ch.qos.reload4j" name="*"/> |
| <exclude org="org.slf4j" name="*" /> |
| </dependency><!-- End of Hadoop Dependencies --> |
| |
| <dependency org="org.tallison.tika" name="tika-core-shaded" rev="2.9.1.0" conf="*->default" transitive="false"/> |
| |
| <dependency org="xml-apis" name="xml-apis" rev="1.4.01" /><!-- force this version as it is required by Tika --> |
| <dependency org="xerces" name="xercesImpl" rev="2.12.2" /> |
| |
| <dependency org="com.ibm.icu" name="icu4j" rev="71.1" /> |
| |
| <dependency org="com.google.guava" name="guava" rev="31.1-jre" /> |
| |
| <dependency org="com.github.crawler-commons" name="crawler-commons" rev="1.4" /> |
| |
| <dependency org="com.google.code.gson" name="gson" rev="2.9.1"/> |
| <dependency org="com.martinkl.warc" name="warc-hadoop" rev="0.1.0"> |
| <exclude module="hadoop-client" /> |
| </dependency> |
| |
| <dependency org="org.apache.cxf" name="cxf-rt-frontend-jaxws" rev="3.5.3" conf="*->default" /> |
| <dependency org="org.apache.cxf" name="cxf-rt-frontend-jaxrs" rev="3.5.3" conf="*->default" /> |
| <dependency org="org.apache.cxf" name="cxf-rt-transports-http" rev="3.5.3" conf="*->default" /> |
| <dependency org="org.apache.cxf" name="cxf-rt-transports-http-jetty" rev="3.5.3" conf="*->default" /> |
| <dependency org="org.apache.cxf" name="cxf-rt-rs-client" rev="3.5.3" conf="test->default" /> |
| <dependency org="com.fasterxml.jackson.core" name="jackson-databind" rev="2.15.2" conf="*->default" /> |
| <dependency org="com.fasterxml.jackson.core" name="jackson-annotations" rev="2.15.2" conf="*->default" /> |
| <dependency org="com.fasterxml.jackson.dataformat" name="jackson-dataformat-cbor" rev="2.15.2" conf="*->default" /> |
| <dependency org="com.fasterxml.jackson.jaxrs" name="jackson-jaxrs-json-provider" rev="2.15.2" conf="*->default" /> |
| |
| <!-- WARC artifacts needed --> |
| <dependency org="org.netpreserve.commons" name="webarchive-commons" rev="1.1.9" conf="*->default"> |
| <exclude module="hadoop-core" /> |
| <exclude org="com.google.guava" /> |
| <exclude org="junit" /> |
| <!-- Exclude dependencies with incompatible license (see https://www.apache.org/legal/resolved.html#category-x) --> |
| <exclude org="org.json" /><!-- JSON License --> |
| <!-- |
| Exclusion of the following dependencies disables support of WARC generation by |
| "bin/nutch commoncrawldump -warc ..." |
| Please remove these exclusion and recompile Nutch to generate WARC files using the tool "commoncrawldump". |
| --> |
| <exclude org="it.unimi.dsi" module="dsiutils" /><!-- LGPL 2.1 --> |
| <exclude org="org.gnu.inet" module="libidn" /><!-- LGPL 2.1 --> |
| </dependency> |
| |
| <!--artifacts needed for testing --> |
| <dependency org="junit" name="junit" rev="4.13.2" conf="test->default" /> |
| <dependency org="org.apache.mrunit" name="mrunit" rev="1.1.0" conf="test->default"> |
| <artifact name="mrunit" ns0:classifier="hadoop2" /> |
| <exclude org="log4j" module="log4j" /> |
| </dependency> |
| |
| <!-- Jetty used to serve test pages for unit tests, but is also provided as dependency of Hadoop --> |
| <dependency org="org.eclipse.jetty" name="jetty-server" rev="9.4.50.v20221201" conf="test->default"> |
| <exclude org="ch.qos.reload4j" module="*" /> |
| <exclude org="org.slf4j" module="slf4j-reload" /> |
| </dependency> |
| |
| <!--Added Because of Elasticsearch JEST client--> |
| <!--TODO refactor these to indexer-elastic-rest plugin somehow, currently doesn't resolve correctly--> |
| <dependency org="org.apache.httpcomponents" name="httpcore-nio" rev="4.4.14" /> |
| <dependency org="org.apache.httpcomponents" name="httpcore" rev="4.4.14" /> |
| |
| <dependency org="de.vandermeer" name="asciitable" rev="0.3.2" /> |
| |
| <!--global exclusion --> |
| <exclude module="jmxtools" /> |
| <exclude module="jms" /> |
| <exclude module="jmxri" /> |
| <exclude module="slf4j-log4j12" /> |
| <exclude module="log4j" /><!-- exclude log4j 1.x --> |
| <exclude org="com.thoughtworks.xstream" /> |
| |
| </dependencies> |
| |
| </ivy-module> |