| <?xml version="1.0" encoding="UTF-8"?> |
| <!-- |
| Licensed to the Apache Software Foundation (ASF) under one or more |
| contributor license agreements. See the NOTICE file distributed with |
| this work for additional information regarding copyright ownership. |
| The ASF licenses this file to You under the Apache License, Version 2.0 |
| (the "License"); you may not use this file except in compliance with |
| the License. You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, software |
| distributed under the License is distributed on an "AS IS" BASIS, |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| See the License for the specific language governing permissions and |
| limitations under the License. |
| --> |
| <plugin |
| id="parse-tika" |
| name="Tika Parser Plug-in" |
| version="1.0.0" |
| provider-name="nutch.org"> |
| |
| <runtime> |
| <library name="parse-tika.jar"> |
| <export name="*"/> |
| </library> |
| |
| <library name="apache-mime4j-core-0.7.jar"/> |
| <library name="apache-mime4j-dom-0.7.jar"/> |
| <library name="asm-3.1.jar"/> |
| <library name="bcmail-jdk15-1.45.jar"/> |
| <library name="bcprov-jdk15-1.45.jar"/> |
| <library name="boilerpipe-1.1.0.jar"/> |
| <library name="commons-codec-1.4.jar"/> |
| <library name="commons-compress-1.1.jar"/> |
| <library name="commons-httpclient-3.1.jar"/> |
| <library name="commons-logging-1.1.1.jar"/> |
| <library name="dom4j-1.6.1.jar"/> |
| <library name="fontbox-1.6.0.jar"/> |
| <library name="geronimo-stax-api_1.0_spec-1.0.1.jar"/> |
| <library name="jdom-1.0.jar"/> |
| <library name="jempbox-1.6.0.jar"/> |
| <library name="metadata-extractor-2.4.0-beta-1.jar"/> |
| <library name="netcdf-4.2-min.jar"/> |
| <library name="pdfbox-1.6.0.jar"/> |
| <library name="poi-3.8-beta4.jar"/> |
| <library name="poi-ooxml-3.8-beta4.jar"/> |
| <library name="poi-ooxml-schemas-3.8-beta4.jar"/> |
| <library name="poi-scratchpad-3.8-beta4.jar"/> |
| <library name="rome-0.9.jar"/> |
| <library name="slf4j-api-1.5.6.jar"/> |
| <library name="tagsoup-1.2.1.jar"/> |
| <library name="tika-parsers-0.10.jar"/> |
| <library name="xmlbeans-2.3.0.jar"/> |
| </runtime> |
| |
| <requires> |
| <import plugin="nutch-extensionpoints"/> |
| </requires> |
| |
| |
| <extension point="org.apache.nutch.parse.Parser" |
| id="org.apache.nutch.parse.tika" |
| name="TikaParser"> |
| |
| <implementation id="org.apache.nutch.parse.tika.TikaParser" |
| class="org.apache.nutch.parse.tika.TikaParser"> |
| <parameter name="contentType" value="*"/> |
| </implementation> |
| |
| </extension> |
| |
| </plugin> |