| <?xml version="1.0" encoding="UTF-8"?> |
| <!-- |
| Licensed to the Apache Software Foundation (ASF) under one or more |
| contributor license agreements. See the NOTICE file distributed with |
| this work for additional information regarding copyright ownership. |
| The ASF licenses this file to You under the Apache License, Version 2.0 |
| (the "License"); you may not use this file except in compliance with |
| the License. You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, software |
| distributed under the License is distributed on an "AS IS" BASIS, |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| See the License for the specific language governing permissions and |
| limitations under the License. |
| |
| Author : mattmann |
| Description: This xml file represents a natural ordering for which parsing |
| plugin should get called for a particular mimeType. |
| --> |
| |
| <parse-plugins> |
| |
| <!-- |
| The following mimetype are now handled by the default parser (parse-tika). |
| You can uncomment the associations below to override parse-tika |
| and chose which plugin should be used for a given content type |
| |
| <mimeType name="application/msword"> |
| <plugin id="parse-msword" /> |
| </mimeType> |
| |
| <mimeType name="application/pdf"> |
| <plugin id="parse-pdf" /> |
| </mimeType> |
| |
| <mimeType name="application/postscript"> |
| <plugin id="parse-pdf" /> |
| </mimeType> |
| |
| <mimeType name="application/vnd.ms-excel"> |
| <plugin id="parse-msexcel" /> |
| </mimeType> |
| |
| <mimeType name="application/vnd.ms-powerpoint"> |
| <plugin id="parse-mspowerpoint" /> |
| </mimeType> |
| |
| <mimeType name="application/vnd.oasis.opendocument.text"> |
| <plugin id="parse-oo" /> |
| </mimeType> |
| |
| <mimeType name="application/vnd.oasis.opendocument.text-template"> |
| <plugin id="parse-oo" /> |
| </mimeType> |
| |
| <mimeType name="application/vnd.oasis.opendocument.text-master"> |
| <plugin id="parse-oo" /> |
| </mimeType> |
| |
| <mimeType name="application/vnd.oasis.opendocument.text-web"> |
| <plugin id="parse-oo" /> |
| </mimeType> |
| |
| <mimeType name="application/vnd.oasis.opendocument.presentation"> |
| <plugin id="parse-oo" /> |
| </mimeType> |
| |
| <mimeType name="application/vnd.oasis.opendocument.presentation-template"> |
| <plugin id="parse-oo" /> |
| </mimeType> |
| |
| <mimeType name="application/vnd.oasis.opendocument.spreadsheet"> |
| <plugin id="parse-oo" /> |
| </mimeType> |
| |
| <mimeType name="application/vnd.oasis.opendocument.spreadsheet-template"> |
| <plugin id="parse-oo" /> |
| </mimeType> |
| |
| <mimeType name="application/vnd.sun.xml.calc"> |
| <plugin id="parse-oo" /> |
| </mimeType> |
| |
| <mimeType name="application/vnd.sun.xml.calc.template"> |
| <plugin id="parse-oo" /> |
| </mimeType> |
| |
| <mimeType name="application/vnd.sun.xml.impress"> |
| <plugin id="parse-oo" /> |
| </mimeType> |
| |
| <mimeType name="application/vnd.sun.xml.impress.template"> |
| <plugin id="parse-oo" /> |
| </mimeType> |
| |
| <mimeType name="application/vnd.sun.xml.writer"> |
| <plugin id="parse-oo" /> |
| </mimeType> |
| |
| <mimeType name="application/vnd.sun.xml.writer.template"> |
| <plugin id="parse-oo" /> |
| </mimeType> |
| |
| <mimeType name="application/x-csh"> |
| <plugin id="parse-text" /> |
| </mimeType> |
| |
| <mimeType name="application/x-kword"> |
| <plugin id="parse-msword" /> |
| </mimeType> |
| |
| <mimeType name="application/x-kspread"> |
| <plugin id="parse-msexcel" /> |
| </mimeType> |
| |
| <mimeType name="text/html"> |
| <plugin id="parse-html" /> |
| </mimeType> |
| |
| <mimeType name="text/plain"> |
| <plugin id="parse-text" /> |
| </mimeType> |
| |
| <mimeType name="text/richtext"> |
| <plugin id="parse-tika" /> |
| </mimeType> |
| |
| <mimeType name="text/rtf"> |
| <plugin id="parse-tika" /> |
| </mimeType> |
| |
| <mimeType name="text/sgml"> |
| <plugin id="parse-html" /> |
| </mimeType> |
| |
| <mimeType name="text/tab-separated-values"> |
| <plugin id="parse-msexcel" /> |
| </mimeType> |
| --> |
| |
| <mimeType name="application/xhtml+xml"> |
| <plugin id="parse-html" /> |
| </mimeType> |
| |
| <mimeType name="application/rss+xml"> |
| <plugin id="parse-rss" /> |
| <plugin id="feed" /> |
| </mimeType> |
| |
| <mimeType name="application/x-bzip2"> |
| <!-- try and parse it with the zip parser --> |
| <plugin id="parse-zip" /> |
| </mimeType> |
| |
| <mimeType name="application/x-gzip"> |
| <!-- try and parse it with the zip parser --> |
| <plugin id="parse-zip" /> |
| </mimeType> |
| |
| <mimeType name="application/x-javascript"> |
| <plugin id="parse-js" /> |
| </mimeType> |
| |
| <mimeType name="application/x-shockwave-flash"> |
| <plugin id="parse-swf" /> |
| </mimeType> |
| |
| <mimeType name="application/zip"> |
| <plugin id="parse-zip" /> |
| </mimeType> |
| |
| <mimeType name="text/xml"> |
| <plugin id="parse-html" /> |
| <plugin id="parse-rss" /> |
| <plugin id="feed" /> |
| </mimeType> |
| |
| <!-- Types for parse-ext plugin: required for unit tests to pass. --> |
| |
| <mimeType name="application/vnd.nutch.example.cat"> |
| <plugin id="parse-ext" /> |
| </mimeType> |
| |
| <mimeType name="application/vnd.nutch.example.md5sum"> |
| <plugin id="parse-ext" /> |
| </mimeType> |
| |
| <!-- alias mappings for parse-xxx names to the actual extension implementation |
| ids described in each plugin's plugin.xml file --> |
| <aliases> |
| <alias name="parse-tika" |
| extension-id="org.apache.nutch.parse.tika.Parser" /> |
| <alias name="parse-ext" extension-id="ExtParser" /> |
| <alias name="parse-html" |
| extension-id="org.apache.nutch.parse.html.HtmlParser" /> |
| <alias name="parse-js" extension-id="JSParser" /> |
| <alias name="parse-msexcel" |
| extension-id="org.apache.nutch.parse.msexcel.MSExcelParser" /> |
| <alias name="parse-mspowerpoint" |
| extension-id="org.apache.nutch.parse.mspowerpoint.MSPowerPointParser" /> |
| <alias name="parse-msword" |
| extension-id="org.apache.nutch.parse.msword.MSWordParser" /> |
| <alias name="parse-oo" |
| extension-id="org.apache.nutch.parse.oo.OpenDocument.Text" /> |
| <alias name="parse-pdf" |
| extension-id="org.apache.nutch.parse.pdf.PdfParser" /> |
| <alias name="parse-rss" |
| extension-id="org.apache.nutch.parse.rss.RSSParser" /> |
| <alias name="feed" |
| extension-id="org.apache.nutch.parse.feed.FeedParser" /> |
| <alias name="parse-swf" |
| extension-id="org.apache.nutch.parse.swf.SWFParser" /> |
| <alias name="parse-text" |
| extension-id="org.apache.nutch.parse.text.TextParser" /> |
| <alias name="parse-zip" |
| extension-id="org.apache.nutch.parse.zip.ZipParser" /> |
| </aliases> |
| |
| </parse-plugins> |