| <?xml version="1.0" encoding="UTF-8"?> |
| <!-- |
| Licensed to the Apache Software Foundation (ASF) under one or more |
| contributor license agreements. See the NOTICE file distributed with |
| this work for additional information regarding copyright ownership. |
| The ASF licenses this file to You under the Apache License, Version 2.0 |
| (the "License"); you may not use this file except in compliance with |
| the License. You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, software |
| distributed under the License is distributed on an "AS IS" BASIS, |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| See the License for the specific language governing permissions and |
| limitations under the License. |
| --> |
| <plugin |
| id="parse-tika" |
| name="Tika Parser Plug-in" |
| version="1.0.0" |
| provider-name="nutch.org"> |
| |
| <runtime> |
| <library name="parse-tika.jar"> |
| <export name="*"/> |
| </library> |
| <library name="apache-mime4j-core-0.7.2.jar"/> |
| <library name="apache-mime4j-dom-0.7.2.jar"/> |
| <library name="asm-5.0.4.jar"/> |
| <library name="aspectjrt-1.8.0.jar"/> |
| <library name="bcmail-jdk15on-1.52.jar"/> |
| <library name="bcpkix-jdk15on-1.52.jar"/> |
| <library name="bcprov-jdk15on-1.52.jar"/> |
| <library name="boilerpipe-1.1.0.jar"/> |
| <library name="bzip2-0.9.1.jar"/> |
| <library name="c3p0-0.9.1.1.jar"/> |
| <library name="cdm-4.5.5.jar"/> |
| <library name="commons-codec-1.6.jar"/> |
| <library name="commons-compress-1.10.jar"/> |
| <library name="commons-csv-1.0.jar"/> |
| <library name="commons-exec-1.3.jar"/> |
| <library name="commons-io-2.4.jar"/> |
| <library name="commons-lang-2.6.jar"/> |
| <library name="commons-logging-1.1.3.jar"/> |
| <library name="commons-logging-api-1.1.jar"/> |
| <library name="commons-vfs2-2.0.jar"/> |
| <library name="cxf-core-3.0.3.jar"/> |
| <library name="cxf-rt-frontend-jaxrs-3.0.3.jar"/> |
| <library name="cxf-rt-rs-client-3.0.3.jar"/> |
| <library name="cxf-rt-transports-http-3.0.3.jar"/> |
| <library name="ehcache-core-2.6.2.jar"/> |
| <library name="fontbox-1.8.10.jar"/> |
| <library name="geoapi-3.0.0.jar"/> |
| <library name="grib-4.5.5.jar"/> |
| <library name="gson-2.2.4.jar"/> |
| <library name="guava-17.0.jar"/> |
| <library name="httpmime-4.2.6.jar"/> |
| <library name="httpservices-4.5.5.jar"/> |
| <library name="isoparser-1.0.2.jar"/> |
| <library name="jackcess-2.1.2.jar"/> |
| <library name="jackcess-encrypt-2.1.1.jar"/> |
| <library name="java-libpst-0.8.1.jar"/> |
| <library name="javax.annotation-api-1.2.jar"/> |
| <library name="javax.ws.rs-api-2.0.1.jar"/> |
| <library name="jcip-annotations-1.0.jar"/> |
| <library name="jcommander-1.35.jar"/> |
| <library name="jdom-2.0.2.jar"/> |
| <library name="jdom2-2.0.4.jar"/> |
| <library name="jempbox-1.8.10.jar"/> |
| <library name="jhighlight-1.0.2.jar"/> |
| <library name="jj2000-5.2.jar"/> |
| <library name="jmatio-1.0.jar"/> |
| <library name="jna-4.1.0.jar"/> |
| <library name="joda-time-2.2.jar"/> |
| <library name="json-20140107.jar"/> |
| <library name="json-simple-1.1.1.jar"/> |
| <library name="jsoup-1.7.2.jar"/> |
| <library name="jsr-275-0.9.3.jar"/> |
| <library name="juniversalchardet-1.0.3.jar"/> |
| <library name="junrar-0.7.jar"/> |
| <library name="jwnl-1.3.3.jar"/> |
| <library name="maven-scm-api-1.4.jar"/> |
| <library name="maven-scm-provider-svn-commons-1.4.jar"/> |
| <library name="maven-scm-provider-svnexe-1.4.jar"/> |
| <library name="metadata-extractor-2.8.0.jar"/> |
| <library name="netcdf4-4.5.5.jar"/> |
| <library name="opennlp-maxent-3.0.3.jar"/> |
| <library name="opennlp-tools-1.5.3.jar"/> |
| <library name="pdfbox-1.8.10.jar"/> |
| <library name="plexus-utils-1.5.6.jar"/> |
| <library name="poi-3.13.jar"/> |
| <library name="poi-ooxml-3.13.jar"/> |
| <library name="poi-ooxml-schemas-3.13.jar"/> |
| <library name="poi-scratchpad-3.13.jar"/> |
| <library name="protobuf-java-2.5.0.jar"/> |
| <library name="quartz-2.2.0.jar"/> |
| <library name="regexp-1.3.jar"/> |
| <library name="rome-1.5.1.jar"/> |
| <library name="rome-utils-1.5.1.jar"/> |
| <library name="sis-metadata-0.5.jar"/> |
| <library name="sis-netcdf-0.5.jar"/> |
| <library name="sis-referencing-0.5.jar"/> |
| <library name="sis-storage-0.5.jar"/> |
| <library name="sis-utility-0.5.jar"/> |
| <library name="slf4j-api-1.7.12.jar"/> |
| <library name="stax2-api-3.1.4.jar"/> |
| <library name="tagsoup-1.2.1.jar"/> |
| <library name="tika-parsers-1.12.jar"/> |
| <library name="udunits-4.5.5.jar"/> |
| <library name="vorbis-java-core-0.6.jar"/> |
| <library name="vorbis-java-tika-0.6.jar"/> |
| <library name="woodstox-core-asl-4.4.1.jar"/> |
| <library name="xmlbeans-2.6.0.jar"/> |
| <library name="xmlschema-core-2.1.0.jar"/> |
| <library name="xmpcore-5.1.2.jar"/> |
| <library name="xz-1.5.jar"/> |
| </runtime> |
| |
| <requires> |
| <import plugin="nutch-extensionpoints"/> |
| <import plugin="lib-nekohtml"/> |
| </requires> |
| |
| <extension point="org.apache.nutch.parse.Parser" |
| id="org.apache.nutch.parse.tika" |
| name="TikaParser"> |
| |
| <implementation id="org.apache.nutch.parse.tika.TikaParser" |
| class="org.apache.nutch.parse.tika.TikaParser"> |
| <parameter name="contentType" value="*"/> |
| </implementation> |
| |
| </extension> |
| |
| </plugin> |