Merge pull request #511 from sebastian-nagel/NUTCH-2779-tika-1.24.1

 NUTCH-2779 Upgrade to Tika 1.24.1
diff --git a/ivy/ivy.xml b/ivy/ivy.xml
index 48db09a..4686c78 100644
--- a/ivy/ivy.xml
+++ b/ivy/ivy.xml
@@ -63,7 +63,7 @@
 		<dependency org="org.apache.hadoop" name="hadoop-mapreduce-client-jobclient" rev="3.1.3" conf="*->default"/>
 		<!-- End of Hadoop Dependencies -->
 
-		<dependency org="org.apache.tika" name="tika-core" rev="1.22" />
+		<dependency org="org.apache.tika" name="tika-core" rev="1.24.1" />
 
 		<dependency org="xml-apis" name="xml-apis" rev="1.4.01"/><!-- force this version as it is required by Tika -->
 		<dependency org="xerces" name="xercesImpl" rev="2.12.0" />
diff --git a/src/plugin/parse-tika/build-ivy.xml b/src/plugin/parse-tika/build-ivy.xml
index 285bfcd..c67ea7a 100644
--- a/src/plugin/parse-tika/build-ivy.xml
+++ b/src/plugin/parse-tika/build-ivy.xml
@@ -25,6 +25,7 @@
     <property name="ivy.checksums" value="" />
     <property name="ivy.jar.dir" value="${ivy.home}/lib" />
     <property name="ivy.jar.file" value="${ivy.jar.dir}/ivy-${ivy.install.version}.jar" />
+    <ivy:settings id="ivy.instance" file="../../../ivy/ivysettings.xml" />
 
     <target name="download-ivy" unless="offline">
 
diff --git a/src/plugin/parse-tika/howto_upgrade_tika.txt b/src/plugin/parse-tika/howto_upgrade_tika.txt
index aa4147c..ca3cdae 100644
--- a/src/plugin/parse-tika/howto_upgrade_tika.txt
+++ b/src/plugin/parse-tika/howto_upgrade_tika.txt
@@ -23,7 +23,7 @@
      (eventually with different versions)
    - duplicated libs can be added to the exclusions of transitive dependencies in
        build/plugins/parse-tika/ivy.xml
-   - but it should be made sure that the library versions in ivy/ivy.xml correspend to
+   - but it should be made sure that the library versions in ivy/ivy.xml correspond to
      those required by Tika
 
 5. Remove the locally "installed" dependencies in src/plugin/parse-tika/lib/:
diff --git a/src/plugin/parse-tika/ivy.xml b/src/plugin/parse-tika/ivy.xml
index f03dbef..574af75 100644
--- a/src/plugin/parse-tika/ivy.xml
+++ b/src/plugin/parse-tika/ivy.xml
@@ -36,7 +36,7 @@
   </publications>
 
   <dependencies>
-    <dependency org="org.apache.tika" name="tika-parsers" rev="1.22" conf="*->default">
+    <dependency org="org.apache.tika" name="tika-parsers" rev="1.24.1" conf="*->default">
       <!-- exclusions of dependencies provided in Nutch core (ivy/ivy.xml) -->
       <exclude org="org.apache.tika" name="tika-core" />
       <exclude org="org.apache.httpcomponents" name="httpclient" />
diff --git a/src/plugin/parse-tika/plugin.xml b/src/plugin/parse-tika/plugin.xml
index 18dad6c..8b87ac9 100644
--- a/src/plugin/parse-tika/plugin.xml
+++ b/src/plugin/parse-tika/plugin.xml
@@ -26,45 +26,40 @@
          <export name="*"/>
       </library>
       <!-- dependencies of Tika (tika-parsers) -->
-      <library name="animal-sniffer-annotations-1.17.jar"/>
-      <library name="ant-1.10.5.jar"/>
-      <library name="ant-launcher-1.10.5.jar"/>
       <library name="apache-mime4j-core-0.8.3.jar"/>
       <library name="apache-mime4j-dom-0.8.3.jar"/>
-      <library name="asm-7.2-beta.jar"/>
-      <library name="bcmail-jdk15on-1.62.jar"/>
-      <library name="bcpkix-jdk15on-1.62.jar"/>
-      <library name="bcprov-jdk15on-1.62.jar"/>
+      <library name="asm-8.0.1.jar"/>
+      <library name="bcmail-jdk15on-1.65.jar"/>
+      <library name="bcpkix-jdk15on-1.65.jar"/>
+      <library name="bcprov-jdk15on-1.65.jar"/>
       <library name="boilerpipe-1.1.0.jar"/>
       <library name="bzip2-0.9.1.jar"/>
-      <library name="c3p0-0.9.5.4.jar"/>
+      <library name="c3p0-0.9.5.5.jar"/>
       <library name="cdm-4.5.5.jar"/>
-      <library name="checker-qual-2.8.1.jar"/>
-      <library name="codemodel-2.3.2.jar"/>
-      <library name="commons-csv-1.7.jar"/>
+      <library name="checker-qual-2.10.0.jar"/>
+      <library name="commons-csv-1.8.jar"/>
       <library name="commons-exec-1.3.jar"/>
       <library name="commons-io-2.6.jar"/>
       <library name="commons-logging-1.2.jar"/>
       <library name="commons-math3-3.6.1.jar"/>
-      <library name="curvesapi-1.05.jar"/>
-      <library name="cxf-rt-rs-client-3.3.2.jar"/>
-      <library name="cxf-rt-security-3.3.2.jar"/>
+      <library name="curvesapi-1.06.jar"/>
+      <library name="cxf-rt-rs-client-3.3.6.jar"/>
+      <library name="cxf-rt-security-3.3.6.jar"/>
       <library name="dec-0.1.2.jar"/>
-      <library name="dtd-parser-1.4.1.jar"/>
       <library name="ehcache-core-2.6.2.jar"/>
-      <library name="error_prone_annotations-2.3.2.jar"/>
+      <library name="error_prone_annotations-2.3.4.jar"/>
       <library name="failureaccess-1.0.1.jar"/>
       <library name="FastInfoset-1.2.16.jar"/>
-      <library name="fontbox-2.0.16.jar"/>
+      <library name="fontbox-2.0.19.jar"/>
       <library name="geoapi-3.0.1.jar"/>
       <library name="grib-4.5.5.jar"/>
-      <library name="gson-2.8.5.jar"/>
-      <library name="guava-28.0-jre.jar"/>
-      <library name="httpmime-4.5.9.jar"/>
+      <library name="gson-2.8.6.jar"/>
+      <library name="guava-28.2-jre.jar"/>
+      <library name="HikariCP-java7-2.4.13.jar"/>
+      <library name="httpmime-4.5.12.jar"/>
       <library name="httpservices-4.5.5.jar"/>
-      <library name="isoparser-1.1.22.jar"/>
+      <library name="isoparser-1.9.41.2.jar"/>
       <library name="istack-commons-runtime-3.0.8.jar"/>
-      <library name="istack-commons-tools-3.0.8.jar"/>
       <library name="j2objc-annotations-1.3.jar"/>
       <library name="jackcess-3.0.1.jar"/>
       <library name="jackcess-encrypt-3.0.0.jar"/>
@@ -73,55 +68,54 @@
       <library name="jakarta.activation-api-1.2.1.jar"/>
       <library name="jakarta.ws.rs-api-2.1.5.jar"/>
       <library name="jakarta.xml.bind-api-2.3.2.jar"/>
-      <library name="java-libpst-0.8.1.jar"/>
+      <library name="java-libpst-0.9.3.jar"/>
       <library name="javax.annotation-api-1.3.2.jar"/>
       <library name="jaxb-runtime-2.3.2.jar"/>
-      <library name="jaxb-xjc-2.3.2.jar"/>
-      <library name="jbig2-imageio-3.0.2.jar"/>
+      <library name="jbig2-imageio-3.0.3.jar"/>
       <library name="jcip-annotations-1.0.jar"/>
-      <library name="jcl-over-slf4j-1.7.26.jar"/>
-      <library name="jcommander-1.35.jar"/>
+      <library name="jcl-over-slf4j-1.7.28.jar"/>
+      <library name="jcommander-1.78.jar"/>
       <library name="jdom2-2.0.6.jar"/>
       <library name="jempbox-1.8.16.jar"/>
       <library name="jhighlight-1.0.3.jar"/>
       <library name="jmatio-1.5.jar"/>
-      <library name="jna-5.3.1.jar"/>
+      <library name="jna-5.5.0.jar"/>
       <library name="joda-time-2.2.jar"/>
       <library name="json-simple-1.1.1.jar"/>
-      <library name="jsoup-1.12.1.jar"/>
+      <library name="jsoup-1.13.1.jar"/>
       <library name="jsr305-3.0.2.jar"/>
-      <library name="jul-to-slf4j-1.7.26.jar"/>
+      <library name="jul-to-slf4j-1.7.28.jar"/>
       <library name="juniversalchardet-1.0.3.jar"/>
       <library name="junrar-4.0.0.jar"/>
       <library name="listenablefuture-9999.0-empty-to-avoid-conflict-with-guava.jar"/>
-      <library name="mchange-commons-java-0.2.15.jar"/>
-      <library name="metadata-extractor-2.11.0.jar"/>
+      <library name="mchange-commons-java-0.2.19.jar"/>
+      <library name="metadata-extractor-2.13.0.jar"/>
       <library name="netcdf4-4.5.5.jar"/>
-      <library name="openjson-1.0.11.jar"/>
-      <library name="opennlp-tools-1.9.1.jar"/>
+      <library name="openjson-1.0.12.jar"/>
+      <library name="opennlp-tools-1.9.2.jar"/>
       <library name="parso-2.0.11.jar"/>
-      <library name="pdfbox-2.0.16.jar"/>
-      <library name="pdfbox-tools-2.0.16.jar"/>
-      <library name="poi-4.0.1.jar"/>
-      <library name="poi-ooxml-4.0.1.jar"/>
-      <library name="poi-ooxml-schemas-4.0.1.jar"/>
-      <library name="poi-scratchpad-4.0.1.jar"/>
-      <library name="quartz-2.2.0.jar"/>
-      <library name="relaxng-datatype-2.3.2.jar"/>
-      <library name="rngom-2.3.2.jar"/>
-      <library name="rome-1.12.1.jar"/>
-      <library name="rome-utils-1.12.1.jar"/>
+      <library name="pdfbox-2.0.19.jar"/>
+      <library name="pdfbox-tools-2.0.19.jar"/>
+      <library name="poi-4.1.2.jar"/>
+      <library name="poi-ooxml-4.1.2.jar"/>
+      <library name="poi-ooxml-schemas-4.1.2.jar"/>
+      <library name="poi-scratchpad-4.1.2.jar"/>
+      <library name="preflight-2.0.19.jar"/>
+      <library name="quartz-2.3.2.jar"/>
+      <library name="rome-1.12.2.jar"/>
+      <library name="rome-utils-1.12.2.jar"/>
       <library name="sentiment-analysis-parser-0.1.jar"/>
-      <library name="sis-feature-0.8.jar"/>
-      <library name="sis-metadata-0.8.jar"/>
-      <library name="sis-netcdf-0.8.jar"/>
-      <library name="sis-referencing-0.8.jar"/>
-      <library name="sis-storage-0.8.jar"/>
-      <library name="sis-utility-0.8.jar"/>
+      <library name="sis-feature-1.0.jar"/>
+      <library name="sis-metadata-1.0.jar"/>
+      <library name="sis-netcdf-1.0.jar"/>
+      <library name="sis-referencing-1.0.jar"/>
+      <library name="sis-storage-1.0.jar"/>
+      <library name="sis-utility-1.0.jar"/>
+      <library name="SparseBitSet-1.2.jar"/>
       <library name="stax2-api-3.1.4.jar"/>
-      <library name="stax-ex-1.8.1.jar"/>
+      <library name="stax-ex-1.8.2.jar"/>
       <library name="tagsoup-1.2.1.jar"/>
-      <library name="tika-parsers-1.22.jar"/>
+      <library name="tika-parsers-1.24.1.jar"/>
       <library name="txw2-2.3.2.jar"/>
       <library name="udunits-4.5.5.jar"/>
       <library name="unit-api-1.0.jar"/>
@@ -129,10 +123,11 @@
       <library name="vorbis-java-tika-0.8.jar"/>
       <library name="woodstox-core-5.0.3.jar"/>
       <library name="xercesImpl-2.12.0.jar"/>
-      <library name="xmlbeans-3.0.2.jar"/>
-      <library name="xmlschema-core-2.2.4.jar"/>
-      <library name="xmpcore-5.1.3.jar"/>
-      <library name="xsom-2.3.2.jar"/>
+      <library name="xmlbeans-3.1.0.jar"/>
+      <library name="xmlschema-core-2.2.5.jar"/>
+      <library name="xmpbox-2.0.19.jar"/>
+      <library name="xmpcore-6.1.10.jar"/>
+      <library name="xmpcore-shaded-6.1.10.jar"/>
       <library name="xz-1.8.jar"/>
       <!-- end of dependencies of Tika (tika-parsers) -->
    </runtime>