NUTCH-2809 Upgrade any23 plugin dependency to 2.4 (#553)

* NUTCH-2809 Upgrade any23 plugin dependency to 2.4
diff --git a/.gitignore b/.gitignore
index 02a74cf..249ca77 100644
--- a/.gitignore
+++ b/.gitignore
@@ -23,3 +23,4 @@
 .idea/
 *.iml
 *.swp
+csvindexwriter
diff --git a/src/plugin/any23/ivy.xml b/src/plugin/any23/ivy.xml
index 9a0aa34..d821b32 100644
--- a/src/plugin/any23/ivy.xml
+++ b/src/plugin/any23/ivy.xml
@@ -36,7 +36,7 @@
   </publications>
 
   <dependencies>
-    <dependency org="org.apache.any23" name="apache-any23-core" rev="2.2" conf="*->default">
+    <dependency org="org.apache.any23" name="apache-any23-core" rev="2.4" conf="*->default">
       <exclude org="org.apache.commons" name="commons-lang" />
       <exclude org="org.apache.commons" name="commons-compress" />
       <exclude org="org.slf4j" name="slf4j-log4j12" />
diff --git a/src/plugin/any23/plugin.xml b/src/plugin/any23/plugin.xml
index 71c5522..934709d 100644
--- a/src/plugin/any23/plugin.xml
+++ b/src/plugin/any23/plugin.xml
@@ -25,162 +25,185 @@
     <library name="any23.jar">
       <export name="*"/>
     </library>
-      <library name="aopalliance-1.0.jar"/>
-      <library name="apache-any23-api-2.2.jar"/>
-      <library name="apache-any23-core-2.2.jar"/>
-      <library name="apache-any23-csvutils-2.2.jar"/>
-      <library name="apache-any23-encoding-2.2.jar"/>
-      <library name="apache-any23-mime-2.2.jar"/>
-      <library name="apache-mime4j-core-0.8.1.jar"/>
-      <library name="apache-mime4j-dom-0.8.1.jar"/>
-      <library name="asm-5.0.4.jar"/>
-      <library name="bcmail-jdk15on-1.54.jar"/>
-      <library name="bcpkix-jdk15on-1.54.jar"/>
-      <library name="bcprov-jdk15on-1.54.jar"/>
+      <library name="FastInfoset-1.2.16.jar"/>
+      <library name="HikariCP-java7-2.4.13.jar"/>
+      <library name="SparseBitSet-1.2.jar"/>
+      <library name="apache-any23-api-2.4.jar"/>
+      <library name="apache-any23-core-2.4.jar"/>
+      <library name="apache-any23-csvutils-2.4.jar"/>
+      <library name="apache-any23-encoding-2.4.jar"/>
+      <library name="apache-any23-mime-2.4.jar"/>
+      <library name="apache-mime4j-core-0.8.3.jar"/>
+      <library name="apache-mime4j-dom-0.8.3.jar"/>
+      <library name="asm-7.3.1.jar"/>
+      <library name="bcmail-jdk15on-1.64.jar"/>
+      <library name="bcpkix-jdk15on-1.64.jar"/>
+      <library name="bcprov-jdk15on-1.64.jar"/>
+      <library name="biweekly-0.6.3.jar"/>
       <library name="boilerpipe-1.1.0.jar"/>
       <library name="bzip2-0.9.1.jar"/>
-      <library name="c3p0-0.9.1.1.jar"/>
-      <library name="caffeine-2.5.6.jar"/>
+      <library name="c3p0-0.9.5.5.jar"/>
+      <library name="caffeine-2.6.1.jar"/>
       <library name="cdm-4.5.5.jar"/>
-      <library name="commons-codec-1.10.jar"/>
-      <library name="commons-collections4-4.1.jar"/>
-      <library name="commons-csv-1.5.jar"/>
+      <library name="checker-qual-2.10.0.jar"/>
+      <library name="commons-codec-1.14.jar"/>
+      <library name="commons-collections4-4.4.jar"/>
+      <library name="commons-csv-1.8.jar"/>
       <library name="commons-exec-1.3.jar"/>
-      <library name="commons-httpclient-3.1.jar"/>
-      <library name="commons-io-2.4.jar"/>
-      <library name="commons-lang-2.6.jar"/>
+      <library name="commons-io-2.6.jar"/>
+      <library name="commons-lang3-3.10.jar"/>
       <library name="commons-logging-1.2.jar"/>
+      <library name="commons-math3-3.6.1.jar"/>
       <library name="commons-rdf-api-0.5.0.jar"/>
-      <library name="curvesapi-1.04.jar"/>
-      <library name="cxf-core-3.0.16.jar"/>
-      <library name="cxf-rt-frontend-jaxrs-3.0.16.jar"/>
-      <library name="cxf-rt-rs-client-3.0.16.jar"/>
-      <library name="cxf-rt-transports-http-3.0.16.jar"/>
+      <library name="curvesapi-1.06.jar"/>
+      <library name="cxf-core-3.3.5.jar"/>
+      <library name="cxf-rt-frontend-jaxrs-3.3.5.jar"/>
+      <library name="cxf-rt-rs-client-3.3.5.jar"/>
+      <library name="cxf-rt-security-3.3.5.jar"/>
+      <library name="cxf-rt-transports-http-3.3.5.jar"/>
+      <library name="dec-0.1.2.jar"/>
       <library name="ehcache-core-2.6.2.jar"/>
-      <library name="fluent-hc-4.5.3.jar"/>
-      <library name="fontbox-2.0.8.jar"/>
-      <library name="geoapi-3.0.0.jar"/>
+      <library name="error_prone_annotations-2.3.4.jar"/>
+      <library name="f8-1.1.jar"/>
+      <library name="failureaccess-1.0.1.jar"/>
+      <library name="fluent-hc-4.5.10.jar"/>
+      <library name="fontbox-2.0.19.jar"/>
+      <library name="geoapi-3.0.1.jar"/>
       <library name="grib-4.5.5.jar"/>
-      <library name="gson-2.8.1.jar"/>
-      <library name="guava-20.0.jar"/>
-      <library name="guice-4.1.0.jar"/>
-      <library name="guice-assistedinject-4.1.0.jar"/>
-      <library name="guice-multibindings-4.1.0.jar"/>
-      <library name="httpclient-4.5.3.jar"/>
-      <library name="httpclient-cache-4.5.3.jar"/>
-      <library name="httpclient-osgi-4.5.3.jar"/>
-      <library name="httpcore-4.4.6.jar"/>
-      <library name="httpcore-nio-4.4.6.jar"/>
-      <library name="httpcore-osgi-4.4.6.jar"/>
-      <library name="httpmime-4.5.3.jar"/>
+      <library name="gson-2.8.6.jar"/>
+      <library name="guava-28.2-jre.jar"/>
+      <library name="hppcrt-0.7.5.jar"/>
+      <library name="httpclient-4.5.12.jar"/>
+      <library name="httpclient-cache-4.5.12.jar"/>
+      <library name="httpclient-osgi-4.5.12.jar"/>
+      <library name="httpcore-4.4.13.jar"/>
+      <library name="httpcore-nio-4.4.12.jar"/>
+      <library name="httpcore-osgi-4.4.13.jar"/>
+      <library name="httpmime-4.5.12.jar"/>
       <library name="httpservices-4.5.5.jar"/>
-      <library name="isoparser-1.1.18.jar"/>
-      <library name="jackcess-2.1.8.jar"/>
-      <library name="jackcess-encrypt-2.1.2.jar"/>
-      <library name="jackson-annotations-2.9.0.jar"/>
-      <library name="jackson-core-2.9.2.jar"/>
-      <library name="jackson-databind-2.9.0.jar"/>
-      <library name="java-libpst-0.8.1.jar"/>
-      <library name="javax.annotation-api-1.2.jar"/>
+      <library name="isoparser-1.1.22.jar"/>
+      <library name="istack-commons-runtime-3.0.8.jar"/>
+      <library name="j2objc-annotations-1.3.jar"/>
+      <library name="jackcess-3.0.1.jar"/>
+      <library name="jackcess-encrypt-3.0.0.jar"/>
+      <library name="jackson-annotations-2.10.3.jar"/>
+      <library name="jackson-core-2.10.3.jar"/>
+      <library name="jackson-databind-2.10.3.jar"/>
+      <library name="jai-imageio-core-1.4.0.jar"/>
+      <library name="jakarta.activation-1.2.1.jar"/>
+      <library name="jakarta.activation-api-1.2.1.jar"/>
+      <library name="jakarta.ws.rs-api-2.1.5.jar"/>
+      <library name="jakarta.xml.bind-api-2.3.2.jar"/>
+      <library name="java-libpst-0.9.3.jar"/>
+      <library name="javax.activation-api-1.2.0.jar"/>
+      <library name="javax.annotation-api-1.3.2.jar"/>
       <library name="javax.inject-1.jar"/>
-      <library name="javax.ws.rs-api-2.0.1.jar"/>
+      <library name="jaxb-api-2.3.1.jar"/>
+      <library name="jaxb-runtime-2.3.2.jar"/>
+      <library name="jbig2-imageio-3.0.3.jar"/>
       <library name="jcip-annotations-1.0.jar"/>
-      <library name="jcl-over-slf4j-1.7.25.jar"/>
-      <library name="jcommander-1.30.jar"/>
-      <library name="jdom2-2.0.4.jar"/>
-      <library name="jempbox-1.8.13.jar"/>
-      <library name="jhighlight-1.0.2.jar"/>
-      <library name="jmatio-1.2.jar"/>
-      <library name="jna-4.1.0.jar"/>
+      <library name="jcl-over-slf4j-1.7.30.jar"/>
+      <library name="jcommander-1.78.jar"/>
+      <library name="jdom2-2.0.6.jar"/>
+      <library name="jempbox-1.8.16.jar"/>
+      <library name="jhighlight-1.0.3.jar"/>
+      <library name="jmatio-1.5.jar"/>
+      <library name="jna-5.5.0.jar"/>
       <library name="joda-time-2.2.jar"/>
-      <library name="json-1.8.jar"/>
       <library name="json-simple-1.1.1.jar"/>
-      <library name="jsonld-java-0.11.1.jar"/>
-      <library name="jsoup-1.11.2.jar"/>
-      <library name="jsr-275-0.9.3.jar"/>
-      <library name="jsr305-3.0.1.jar"/>
-      <library name="jul-to-slf4j-1.7.24.jar"/>
+      <library name="jsonld-java-0.13.1.jar"/>
+      <library name="jsoup-1.13.1.jar"/>
+      <library name="jsr305-3.0.2.jar"/>
+      <library name="jul-to-slf4j-1.7.30.jar"/>
       <library name="juniversalchardet-1.0.3.jar"/>
-      <library name="junrar-0.7.jar"/>
+      <library name="junrar-4.0.0.jar"/>
+      <library name="listenablefuture-9999.0-empty-to-avoid-conflict-with-guava.jar"/>
       <library name="mapdb-1.0.8.jar"/>
-      <library name="metadata-extractor-2.10.1.jar"/>
-      <library name="nekohtml-1.9.20.jar"/>
+      <library name="mchange-commons-java-0.2.19.jar"/>
+      <library name="metadata-extractor-2.13.0.jar"/>
       <library name="netcdf4-4.5.5.jar"/>
-      <library name="opennlp-tools-1.8.3.jar"/>
-      <library name="owlapi-api-5.1.3.jar"/>
-      <library name="owlapi-apibinding-5.1.3.jar"/>
-      <library name="owlapi-impl-5.1.3.jar"/>
-      <library name="owlapi-oboformat-5.1.3.jar"/>
-      <library name="owlapi-parsers-5.1.3.jar"/>
-      <library name="owlapi-rio-5.1.3.jar"/>
-      <library name="owlapi-tools-5.1.3.jar"/>
-      <library name="pdfbox-2.0.8.jar"/>
-      <library name="pdfbox-tools-2.0.8.jar"/>
-      <library name="poi-3.16.jar"/>
-      <library name="poi-ooxml-3.16.jar"/>
-      <library name="poi-ooxml-schemas-3.16.jar"/>
-      <library name="poi-scratchpad-3.16.jar"/>
-      <library name="protobuf-java-2.5.0.jar"/>
-      <library name="quartz-2.2.0.jar"/>
-      <library name="rdf4j-http-client-2.2.4.jar"/>
-      <library name="rdf4j-http-protocol-2.2.4.jar"/>
-      <library name="rdf4j-model-2.2.4.jar"/>
-      <library name="rdf4j-query-2.2.4.jar"/>
-      <library name="rdf4j-queryalgebra-evaluation-2.2.4.jar"/>
-      <library name="rdf4j-queryalgebra-model-2.2.4.jar"/>
-      <library name="rdf4j-queryparser-api-2.2.4.jar"/>
-      <library name="rdf4j-queryparser-serql-2.2.4.jar"/>
-      <library name="rdf4j-queryparser-sparql-2.2.4.jar"/>
-      <library name="rdf4j-queryresultio-api-2.2.4.jar"/>
-      <library name="rdf4j-queryresultio-sparqlxml-2.2.4.jar"/>
-      <library name="rdf4j-repository-api-2.2.4.jar"/>
-      <library name="rdf4j-repository-sail-2.2.4.jar"/>
-      <library name="rdf4j-repository-sparql-2.2.4.jar"/>
-      <library name="rdf4j-rio-api-2.2.4.jar"/>
-      <library name="rdf4j-rio-binary-2.2.2.jar"/>
-      <library name="rdf4j-rio-datatypes-2.2.4.jar"/>
-      <library name="rdf4j-rio-jsonld-2.2.4.jar"/>
-      <library name="rdf4j-rio-languages-2.2.4.jar"/>
-      <library name="rdf4j-rio-n3-2.2.4.jar"/>
-      <library name="rdf4j-rio-nquads-2.2.4.jar"/>
-      <library name="rdf4j-rio-ntriples-2.2.4.jar"/>
-      <library name="rdf4j-rio-rdfjson-2.2.4.jar"/>
-      <library name="rdf4j-rio-rdfxml-2.2.4.jar"/>
-      <library name="rdf4j-rio-trig-2.2.4.jar"/>
-      <library name="rdf4j-rio-trix-2.2.4.jar"/>
-      <library name="rdf4j-rio-turtle-2.2.4.jar"/>
-      <library name="rdf4j-sail-api-2.2.4.jar"/>
-      <library name="rdf4j-sail-base-2.2.4.jar"/>
-      <library name="rdf4j-sail-inferencer-2.2.4.jar"/>
-      <library name="rdf4j-sail-memory-2.2.4.jar"/>
-      <library name="rdf4j-sail-model-2.2.4.jar"/>
-      <library name="rdf4j-util-2.2.4.jar"/>
-      <library name="rome-1.5.1.jar"/>
-      <library name="rome-utils-1.5.1.jar"/>
+      <library name="openjson-1.0.11.jar"/>
+      <library name="opennlp-tools-1.9.2.jar"/>
+      <library name="owlapi-api-5.1.13.jar"/>
+      <library name="owlapi-apibinding-5.1.13.jar"/>
+      <library name="owlapi-impl-5.1.13.jar"/>
+      <library name="owlapi-oboformat-5.1.13.jar"/>
+      <library name="owlapi-parsers-5.1.13.jar"/>
+      <library name="owlapi-rio-5.1.13.jar"/>
+      <library name="owlapi-tools-5.1.13.jar"/>
+      <library name="parso-2.0.11.jar"/>
+      <library name="pdfbox-2.0.19.jar"/>
+      <library name="pdfbox-tools-2.0.19.jar"/>
+      <library name="poi-4.1.2.jar"/>
+      <library name="poi-ooxml-4.1.2.jar"/>
+      <library name="poi-ooxml-schemas-4.1.2.jar"/>
+      <library name="poi-scratchpad-4.1.2.jar"/>
+      <library name="preflight-2.0.19.jar"/>
+      <library name="protobuf-java-3.11.4.jar"/>
+      <library name="quartz-2.3.2.jar"/>
+      <library name="rdf4j-http-client-3.1.2.jar"/>
+      <library name="rdf4j-http-protocol-3.1.2.jar"/>
+      <library name="rdf4j-model-3.1.2.jar"/>
+      <library name="rdf4j-query-3.1.2.jar"/>
+      <library name="rdf4j-queryalgebra-evaluation-3.1.2.jar"/>
+      <library name="rdf4j-queryalgebra-model-3.1.2.jar"/>
+      <library name="rdf4j-queryparser-api-3.1.2.jar"/>
+      <library name="rdf4j-queryparser-sparql-3.1.2.jar"/>
+      <library name="rdf4j-queryresultio-api-3.1.2.jar"/>
+      <library name="rdf4j-queryresultio-binary-3.1.2.jar"/>
+      <library name="rdf4j-queryresultio-sparqlxml-3.1.2.jar"/>
+      <library name="rdf4j-repository-api-3.1.2.jar"/>
+      <library name="rdf4j-repository-sail-3.1.2.jar"/>
+      <library name="rdf4j-repository-sparql-3.1.2.jar"/>
+      <library name="rdf4j-rio-api-3.1.2.jar"/>
+      <library name="rdf4j-rio-binary-3.1.2.jar"/>
+      <library name="rdf4j-rio-datatypes-3.1.2.jar"/>
+      <library name="rdf4j-rio-jsonld-3.1.2.jar"/>
+      <library name="rdf4j-rio-languages-3.1.2.jar"/>
+      <library name="rdf4j-rio-n3-3.1.2.jar"/>
+      <library name="rdf4j-rio-nquads-3.1.2.jar"/>
+      <library name="rdf4j-rio-ntriples-3.1.2.jar"/>
+      <library name="rdf4j-rio-rdfjson-3.1.2.jar"/>
+      <library name="rdf4j-rio-rdfxml-3.1.2.jar"/>
+      <library name="rdf4j-rio-trig-3.1.2.jar"/>
+      <library name="rdf4j-rio-trix-3.1.2.jar"/>
+      <library name="rdf4j-rio-turtle-3.1.2.jar"/>
+      <library name="rdf4j-sail-api-3.1.2.jar"/>
+      <library name="rdf4j-sail-base-3.1.2.jar"/>
+      <library name="rdf4j-sail-memory-3.1.2.jar"/>
+      <library name="rdf4j-util-3.1.2.jar"/>
+      <library name="rome-1.12.2.jar"/>
+      <library name="rome-utils-1.12.2.jar"/>
       <library name="semargl-core-0.7.jar"/>
       <library name="semargl-rdf-0.7.jar"/>
       <library name="semargl-rdf4j-0.7.jar"/>
       <library name="semargl-rdfa-0.7.jar"/>
       <library name="sentiment-analysis-parser-0.1.jar"/>
-      <library name="sis-metadata-0.6.jar"/>
-      <library name="sis-netcdf-0.6.jar"/>
-      <library name="sis-referencing-0.6.jar"/>
-      <library name="sis-storage-0.6.jar"/>
-      <library name="sis-utility-0.6.jar"/>
-      <library name="snakeyaml-1.17.jar"/>
+      <library name="sis-feature-1.0.jar"/>
+      <library name="sis-metadata-1.0.jar"/>
+      <library name="sis-netcdf-1.0.jar"/>
+      <library name="sis-referencing-1.0.jar"/>
+      <library name="sis-storage-1.0.jar"/>
+      <library name="sis-utility-1.0.jar"/>
+      <library name="snakeyaml-1.26.jar"/>
+      <library name="stax-ex-1.8.2.jar"/>
       <library name="stax2-api-3.1.4.jar"/>
       <library name="tagsoup-1.2.1.jar"/>
-      <library name="tika-core-1.17.jar"/>
-      <library name="tika-parsers-1.17.jar"/>
-      <library name="trove4j-3.0.3.jar"/>
+      <library name="tika-core-1.24.jar"/>
+      <library name="tika-parsers-1.24.jar"/>
+      <library name="txw2-2.3.2.jar"/>
       <library name="udunits-4.5.5.jar"/>
+      <library name="unit-api-1.0.jar"/>
+      <library name="vinnie-2.0.2.jar"/>
       <library name="vorbis-java-core-0.8.jar"/>
       <library name="vorbis-java-tika-0.8.jar"/>
-      <library name="woodstox-core-asl-4.4.1.jar"/>
-      <library name="xmlbeans-2.6.0.jar"/>
-      <library name="xmlschema-core-2.2.2.jar"/>
-      <library name="xmpcore-5.1.3.jar"/>
-      <library name="xz-1.6.jar"/>
+      <library name="woodstox-core-5.0.3.jar"/>
+      <library name="xmlbeans-3.1.0.jar"/>
+      <library name="xmlschema-core-2.2.5.jar"/>
+      <library name="xmpbox-2.0.19.jar"/>
+      <library name="xmpcore-6.1.10.jar"/>
+      <library name="xmpcore-shaded-6.1.10.jar"/>
+      <library name="xz-1.8.jar"/>
   </runtime>
 
   <requires>
diff --git a/src/plugin/any23/src/test/org/apache/nutch/any23/TestAny23ParseFilter.java b/src/plugin/any23/src/test/org/apache/nutch/any23/TestAny23ParseFilter.java
index 3f0ace3..09c253f 100644
--- a/src/plugin/any23/src/test/org/apache/nutch/any23/TestAny23ParseFilter.java
+++ b/src/plugin/any23/src/test/org/apache/nutch/any23/TestAny23ParseFilter.java
@@ -49,9 +49,9 @@
   
   private String file2 = "microdata_basic.html";
 
-  private static final int EXPECTED_TRIPLES_1 = 68;
+  private static final int EXPECTED_TRIPLES_1 = 79;
   
-  private static final int EXPECTED_TRIPLES_2 = 38;
+  private static final int EXPECTED_TRIPLES_2 = 40;
   
   @Before
   public void setUp() {