| <?xml version="1.0" encoding="UTF-8"?> |
| <!-- |
| Licensed to the Apache Software Foundation (ASF) under one or more |
| contributor license agreements. See the NOTICE file distributed with |
| this work for additional information regarding copyright ownership. |
| The ASF licenses this file to You under the Apache License, Version 2.0 |
| (the "License"); you may not use this file except in compliance with |
| the License. You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, software |
| distributed under the License is distributed on an "AS IS" BASIS, |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| See the License for the specific language governing permissions and |
| limitations under the License. |
| --> |
| <plugin |
| id="any23" |
| name="Apache Any23 Parser/Indexer" |
| version="1.0.0" |
| provider-name="nutch.org"> |
| |
| <runtime> |
| <library name="any23.jar"> |
| <export name="*"/> |
| </library> |
| <!-- Begin Any23 dependencies --> |
| <library name="apache-any23-api-2.7.jar"/> |
| <library name="apache-any23-core-2.7.jar"/> |
| <library name="apache-any23-csvutils-2.7.jar"/> |
| <library name="apache-any23-encoding-2.7.jar"/> |
| <library name="apache-any23-mime-2.7.jar"/> |
| <library name="apache-mime4j-core-0.8.4.jar"/> |
| <library name="apache-mime4j-dom-0.8.4.jar"/> |
| <library name="asm-9.2.jar"/> |
| <library name="bcmail-jdk15on-1.70.jar"/> |
| <library name="bcpkix-jdk15on-1.70.jar"/> |
| <library name="bcutil-jdk15on-1.70.jar"/> |
| <library name="biweekly-0.6.6.jar"/> |
| <library name="boilerpipe-1.1.0.jar"/> |
| <library name="caffeine-2.8.1.jar"/> |
| <library name="checker-qual-3.1.0.jar"/> |
| <library name="commons-codec-1.15.jar"/> |
| <library name="commons-collections4-4.4.jar"/> |
| <library name="commons-csv-1.9.0.jar"/> |
| <library name="commons-exec-1.3.jar"/> |
| <library name="commons-io-2.11.0.jar"/> |
| <library name="commons-lang3-3.12.0.jar"/> |
| <library name="commons-logging-1.2.jar"/> |
| <library name="commons-math3-3.6.1.jar"/> |
| <library name="commons-rdf-api-0.5.0.jar"/> |
| <library name="commons-text-1.9.jar"/> |
| <library name="curvesapi-1.06.jar"/> |
| <library name="dd-plist-1.23.jar"/> |
| <library name="dec-0.1.2.jar"/> |
| <library name="error_prone_annotations-2.3.4.jar"/> |
| <library name="f8-1.1.jar"/> |
| <library name="failureaccess-1.0.1.jar"/> |
| <library name="fluent-hc-4.5.13.jar"/> |
| <library name="fontbox-2.0.25.jar"/> |
| <library name="guava-30.1.1-jre.jar"/> |
| <library name="hppcrt-0.7.5.jar"/> |
| <library name="httpclient-4.5.13.jar"/> |
| <library name="httpclient-cache-4.5.13.jar"/> |
| <library name="httpclient-osgi-4.5.13.jar"/> |
| <library name="httpcore-4.4.15.jar"/> |
| <library name="httpcore-nio-4.4.14.jar"/> |
| <library name="httpcore-osgi-4.4.14.jar"/> |
| <library name="httpmime-4.5.13.jar"/> |
| <library name="istack-commons-runtime-3.0.12.jar"/> |
| <library name="jackcess-4.0.1.jar"/> |
| <library name="jackcess-encrypt-4.0.1.jar"/> |
| <library name="jackson-annotations-2.11.4.jar"/> |
| <library name="jackson-core-2.12.1.jar"/> |
| <library name="jackson-databind-2.11.4.jar"/> |
| <library name="jai-imageio-core-1.4.0.jar"/> |
| <library name="jakarta.activation-1.2.2.jar"/> |
| <library name="jakarta.xml.bind-api-2.3.3.jar"/> |
| <library name="java-libpst-0.9.3.jar"/> |
| <library name="javax.activation-api-1.2.0.jar"/> |
| <library name="javax.inject-1.jar"/> |
| <library name="jaxb-api-2.3.1.jar"/> |
| <library name="jaxb-runtime-2.3.5.jar"/> |
| <library name="jbig2-imageio-3.0.3.jar"/> |
| <library name="jcl-over-slf4j-1.7.35.jar"/> |
| <library name="jcommander-1.82.jar"/> |
| <library name="jdom2-2.0.6.1.jar"/> |
| <library name="jempbox-1.8.16.jar"/> |
| <library name="jhighlight-1.0.3.jar"/> |
| <library name="jmatio-1.5.jar"/> |
| <library name="jsonld-java-0.13.4.jar"/> |
| <library name="jsoup-1.14.3.jar"/> |
| <library name="jsr305-3.0.2.jar"/> |
| <library name="juniversalchardet-1.0.3.jar"/> |
| <library name="junrar-7.4.1.jar"/> |
| <library name="listenablefuture-9999.0-empty-to-avoid-conflict-with-guava.jar"/> |
| <library name="log4j-api-2.17.1.jar"/> |
| <library name="log4j-core-2.17.1.jar"/> |
| <library name="log4j-slf4j-impl-2.17.1.jar"/> |
| <library name="mapdb-1.0.8.jar"/> |
| <library name="metadata-extractor-2.16.0.jar"/> |
| <library name="owlapi-api-5.1.20.jar"/> |
| <library name="owlapi-apibinding-5.1.20.jar"/> |
| <library name="owlapi-impl-5.1.20.jar"/> |
| <library name="owlapi-oboformat-5.1.20.jar"/> |
| <library name="owlapi-parsers-5.1.20.jar"/> |
| <library name="owlapi-rio-5.1.20.jar"/> |
| <library name="owlapi-tools-5.1.20.jar"/> |
| <library name="parso-2.0.14.jar"/> |
| <library name="pdfbox-2.0.25.jar"/> |
| <library name="pdfbox-debugger-2.0.25.jar"/> |
| <library name="pdfbox-tools-2.0.25.jar"/> |
| <library name="poi-5.2.0.jar"/> |
| <library name="poi-ooxml-5.2.0.jar"/> |
| <library name="poi-ooxml-lite-5.2.0.jar"/> |
| <library name="poi-scratchpad-5.2.0.jar"/> |
| <library name="rdf4j-http-client-3.7.4.jar"/> |
| <library name="rdf4j-http-protocol-3.7.4.jar"/> |
| <library name="rdf4j-model-3.7.4.jar"/> |
| <library name="rdf4j-model-api-3.7.4.jar"/> |
| <library name="rdf4j-model-vocabulary-3.7.4.jar"/> |
| <library name="rdf4j-query-3.7.4.jar"/> |
| <library name="rdf4j-queryalgebra-evaluation-3.7.4.jar"/> |
| <library name="rdf4j-queryalgebra-model-3.7.4.jar"/> |
| <library name="rdf4j-queryparser-api-3.7.4.jar"/> |
| <library name="rdf4j-queryparser-sparql-3.7.4.jar"/> |
| <library name="rdf4j-queryresultio-api-3.7.4.jar"/> |
| <library name="rdf4j-queryresultio-binary-3.7.4.jar"/> |
| <library name="rdf4j-queryresultio-sparqlxml-3.7.4.jar"/> |
| <library name="rdf4j-repository-api-3.7.4.jar"/> |
| <library name="rdf4j-repository-sail-3.7.4.jar"/> |
| <library name="rdf4j-repository-sparql-3.7.4.jar"/> |
| <library name="rdf4j-rio-api-3.7.4.jar"/> |
| <library name="rdf4j-rio-binary-3.7.4.jar"/> |
| <library name="rdf4j-rio-datatypes-3.7.4.jar"/> |
| <library name="rdf4j-rio-hdt-3.7.4.jar"/> |
| <library name="rdf4j-rio-jsonld-3.7.4.jar"/> |
| <library name="rdf4j-rio-languages-3.7.4.jar"/> |
| <library name="rdf4j-rio-n3-3.7.4.jar"/> |
| <library name="rdf4j-rio-nquads-3.7.4.jar"/> |
| <library name="rdf4j-rio-ntriples-3.7.4.jar"/> |
| <library name="rdf4j-rio-rdfjson-3.7.4.jar"/> |
| <library name="rdf4j-rio-rdfxml-3.7.4.jar"/> |
| <library name="rdf4j-rio-trig-3.7.4.jar"/> |
| <library name="rdf4j-rio-trix-3.7.4.jar"/> |
| <library name="rdf4j-rio-turtle-3.7.4.jar"/> |
| <library name="rdf4j-sail-api-3.7.4.jar"/> |
| <library name="rdf4j-sail-base-3.7.4.jar"/> |
| <library name="rdf4j-sail-memory-3.7.4.jar"/> |
| <library name="rdf4j-util-3.7.4.jar"/> |
| <library name="rome-1.18.0.jar"/> |
| <library name="rome-utils-1.18.0.jar"/> |
| <library name="semargl-core-0.7.jar"/> |
| <library name="semargl-rdf-0.7.jar"/> |
| <library name="semargl-rdf4j-0.7.jar"/> |
| <library name="semargl-rdfa-0.7.jar"/> |
| <library name="snakeyaml-1.30.jar"/> |
| <library name="SparseBitSet-1.2.jar"/> |
| <library name="tagsoup-1.2.1.jar"/> |
| <library name="tika-core-2.3.0.jar"/> |
| <library name="tika-parser-apple-module-2.3.0.jar"/> |
| <library name="tika-parser-audiovideo-module-2.3.0.jar"/> |
| <library name="tika-parser-cad-module-2.3.0.jar"/> |
| <library name="tika-parser-code-module-2.3.0.jar"/> |
| <library name="tika-parser-crypto-module-2.3.0.jar"/> |
| <library name="tika-parser-digest-commons-2.3.0.jar"/> |
| <library name="tika-parser-font-module-2.3.0.jar"/> |
| <library name="tika-parser-html-commons-2.3.0.jar"/> |
| <library name="tika-parser-html-module-2.3.0.jar"/> |
| <library name="tika-parser-image-module-2.3.0.jar"/> |
| <library name="tika-parser-mail-commons-2.3.0.jar"/> |
| <library name="tika-parser-mail-module-2.3.0.jar"/> |
| <library name="tika-parser-microsoft-module-2.3.0.jar"/> |
| <library name="tika-parser-miscoffice-module-2.3.0.jar"/> |
| <library name="tika-parser-news-module-2.3.0.jar"/> |
| <library name="tika-parser-ocr-module-2.3.0.jar"/> |
| <library name="tika-parser-pdf-module-2.3.0.jar"/> |
| <library name="tika-parser-pkg-module-2.3.0.jar"/> |
| <library name="tika-parsers-standard-package-2.3.0.jar"/> |
| <library name="tika-parser-text-module-2.3.0.jar"/> |
| <library name="tika-parser-xml-module-2.3.0.jar"/> |
| <library name="tika-parser-xmp-commons-2.3.0.jar"/> |
| <library name="tika-parser-zip-commons-2.3.0.jar"/> |
| <library name="txw2-2.3.5.jar"/> |
| <library name="vinnie-2.0.2.jar"/> |
| <library name="vorbis-java-core-0.8.jar"/> |
| <library name="vorbis-java-tika-0.8.jar"/> |
| <library name="xmlbeans-5.0.3.jar"/> |
| <library name="xmpbox-2.0.25.jar"/> |
| <library name="xmpcore-6.1.11.jar"/> |
| <library name="xz-1.9.jar"/> |
| <!-- End Any23 dependencies --> |
| </runtime> |
| |
| <requires> |
| <import plugin="nutch-extensionpoints"/> |
| </requires> |
| |
| <extension id="org.apache.nutch.any23.Any23ParseFilter" |
| name="Any23 parser" |
| point="org.apache.nutch.parse.HtmlParseFilter"> |
| |
| <implementation id="Any23Parser" |
| class="org.apache.nutch.any23.Any23ParseFilter"/> |
| </extension> |
| |
| <extension id="org.apache.nutch.any23.Any23IndexingFilter" |
| name="Any23 indexing filter" |
| point="org.apache.nutch.indexer.IndexingFilter"> |
| |
| <implementation id="Any23IndexingFilter" |
| class="org.apache.nutch.any23.Any23IndexingFilter"/> |
| </extension> |
| |
| </plugin> |