| <?xml version="1.0" encoding="UTF-8"?> |
| <!-- |
| Licensed to the Apache Software Foundation (ASF) under one or more |
| contributor license agreements. See the NOTICE file distributed with |
| this work for additional information regarding copyright ownership. |
| The ASF licenses this file to You under the Apache License, Version 2.0 |
| (the "License"); you may not use this file except in compliance with |
| the License. You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, software |
| distributed under the License is distributed on an "AS IS" BASIS, |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| See the License for the specific language governing permissions and |
| limitations under the License. |
| --> |
| <properties> |
| <parsers> |
| <parser class="org.apache.tika.parser.ocr.TesseractOCRParser"> |
| <params> |
| <param name="timeout" type="int">240</param> |
| <param name="density" type="int">200</param> |
| <param name="depth" type="int">8</param> |
| <param name="resize" type="int">300</param> |
| |
| <param name="minFileSizeToOcr" type="long">1</param> |
| <param name="outputType" type="string">hocr</param> |
| <param name="filter" type="string">box</param> |
| <param name="applyRotation" type="bool">false</param> |
| <param name="enableImagePreprocessing" type="bool">false</param> |
| |
| <param name="language" type="string">fra+deu</param> |
| |
| </params> |
| </parser> |
| </parsers> |
| </properties> |