Matthias Andreas Benkard | 545aeb6 | 2020-12-06 15:16:22 +0100 | [diff] [blame^] | 1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?> |
| 2 | <properties> |
| 3 | <service-loader dynamic="true" loadErrorHandler="IGNORE"/> |
| 4 | |
| 5 | <encodingDetectors> |
| 6 | <encodingDetector class="org.apache.tika.detect.DefaultEncodingDetector"/> |
| 7 | </encodingDetectors> |
| 8 | |
| 9 | <translator class="org.apache.tika.language.translate.DefaultTranslator"/> |
| 10 | |
| 11 | <detectors> |
| 12 | <detector class="org.apache.tika.detect.DefaultDetector"/> |
| 13 | </detectors> |
| 14 | |
| 15 | <parsers> |
| 16 | |
| 17 | <parser class="org.apache.tika.parser.DefaultParser"> |
| 18 | <parser-exclude class="org.apache.tika.parser.pdf.PDFParser"/> |
| 19 | <parser-exclude class="org.apache.tika.parser.ocr.TesseractOCRParser"/> |
| 20 | </parser> |
| 21 | |
| 22 | <parser class="org.apache.tika.parser.pdf.PDFParser"> |
| 23 | <params> |
| 24 | <param name="extractInlineImages" type="bool">true</param> |
| 25 | <param name="ocrStrategy" type="string">auto</param> |
| 26 | </params> |
| 27 | </parser> |
| 28 | |
| 29 | <parser class="org.apache.tika.parser.ocr.TesseractOCRParser"/> |
| 30 | |
| 31 | </parsers> |
| 32 | </properties> |