| <?xml version="1.0" encoding="UTF-8"?> |
| <!-- |
| Licensed to the Apache Software Foundation (ASF) under one or more |
| contributor license agreements. See the NOTICE file distributed with |
| this work for additional information regarding copyright ownership. |
| The ASF licenses this file to You under the Apache License, Version 2.0 |
| (the "License"); you may not use this file except in compliance with |
| the License. You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, software |
| distributed under the License is distributed on an "AS IS" BASIS, |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| See the License for the specific language governing permissions and |
| limitations under the License. |
| |
| Author : mattmann |
| Description: This xml file represents a natural ordering for which parsing |
| plugin should get called for a particular mimeType. |
| --> |
| |
| <parse-plugins> |
| |
| <!-- by default if the mimeType is set to *, or |
| if it can't be determined, use parse-tika --> |
| <mimeType name="*"> |
| <plugin id="parse-tika" /> |
| </mimeType> |
| |
| <mimeType name="text/html"> |
| <plugin id="parse-html" /> |
| </mimeType> |
| |
| <mimeType name="application/xhtml+xml"> |
| <plugin id="parse-html" /> |
| </mimeType> |
| |
| <mimeType name="application/rss+xml"> |
| <plugin id="parse-tika" /> |
| <plugin id="feed" /> |
| </mimeType> |
| |
| <mimeType name="application/x-bzip2"> |
| <!-- try and parse it with the zip parser --> |
| <plugin id="parse-zip" /> |
| </mimeType> |
| |
| <mimeType name="application/x-gzip"> |
| <!-- try and parse it with the zip parser --> |
| <plugin id="parse-zip" /> |
| </mimeType> |
| |
| <mimeType name="application/x-javascript"> |
| <plugin id="parse-js" /> |
| </mimeType> |
| |
| <mimeType name="application/x-shockwave-flash"> |
| <plugin id="parse-swf" /> |
| </mimeType> |
| |
| <mimeType name="application/zip"> |
| <plugin id="parse-zip" /> |
| </mimeType> |
| |
| <mimeType name="text/xml"> |
| <plugin id="parse-tika" /> |
| <plugin id="feed" /> |
| </mimeType> |
| |
| <!-- Types for parse-ext plugin: required for unit tests to pass. --> |
| |
| <mimeType name="application/vnd.nutch.example.cat"> |
| <plugin id="parse-ext" /> |
| </mimeType> |
| |
| <mimeType name="application/vnd.nutch.example.md5sum"> |
| <plugin id="parse-ext" /> |
| </mimeType> |
| |
| <!-- alias mappings for parse-xxx names to the actual extension implementation |
| ids described in each plugin's plugin.xml file --> |
| <aliases> |
| <alias name="parse-html" |
| extension-id="org.apache.nutch.parse.html.HtmlParser" /> |
| <alias name="parse-tika" |
| extension-id="org.apache.nutch.parse.tika.TikaParser" /> |
| <alias name="parse-ext" extension-id="ExtParser" /> |
| <alias name="parse-js" extension-id="JSParser" /> |
| <alias name="feed" |
| extension-id="org.apache.nutch.parse.feed.FeedParser" /> |
| <alias name="parse-swf" |
| extension-id="org.apache.nutch.parse.swf.SWFParser" /> |
| <alias name="parse-zip" |
| extension-id="org.apache.nutch.parse.zip.ZipParser" /> |
| </aliases> |
| |
| </parse-plugins> |