| <?xml version="1.0" encoding="UTF-8" standalone="no"?> |
| <properties> |
| <service-loader dynamic="true" loadErrorHandler="IGNORE"/> |
| |
| <encodingDetectors> |
| <encodingDetector class="org.apache.tika.detect.DefaultEncodingDetector"/> |
| </encodingDetectors> |
| |
| <translator class="org.apache.tika.language.translate.DefaultTranslator"/> |
| |
| <detectors> |
| <detector class="org.apache.tika.detect.DefaultDetector"/> |
| </detectors> |
| |
| <parsers> |
| |
| <parser class="org.apache.tika.parser.DefaultParser"> |
| <parser-exclude class="org.apache.tika.parser.pdf.PDFParser"/> |
| <parser-exclude class="org.apache.tika.parser.ocr.TesseractOCRParser"/> |
| </parser> |
| |
| <parser class="org.apache.tika.parser.pdf.PDFParser"> |
| <params> |
| <param name="extractInlineImages" type="bool">true</param> |
| <param name="ocrStrategy" type="string">auto</param> |
| </params> |
| </parser> |
| |
| <parser class="org.apache.tika.parser.ocr.TesseractOCRParser"/> |
| |
| </parsers> |
| </properties> |