Configure Tika correctly.
With the new configuration, Tika can now extract text from PDFs and
XML documents.
Also configures logging for the application.
Change-Id: I7a89c2b232ed4e220665dd335a5f5a0cc3ef2994
diff --git a/pom.xml b/pom.xml
index 2bc4582..3546067 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1,6 +1,8 @@
<?xml version="1.0"?>
-<project xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd" xmlns="http://maven.apache.org/POM/4.0.0"
- xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+<project
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"
+ xmlns="http://maven.apache.org/POM/4.0.0"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<modelVersion>4.0.0</modelVersion>
@@ -30,13 +32,19 @@
<basic-annotations.version>0.2.0</basic-annotations.version>
<findbugs-jsr305.version>3.0.2</findbugs-jsr305.version>
<google.java.format.version>1.8</google.java.format.version>
- <itext7.version>7.1.13</itext7.version>
<kotlin-annotations.version>1.4.10</kotlin-annotations.version>
<lanterna.version>3.0.4</lanterna.version>
<picocli.version>4.3.2</picocli.version>
<pdfocr.version>1.0.2</pdfocr.version>
<term4j.version>0.4.0</term4j.version>
- <tika.version>1.24.1</tika.version>
+ <tess4j.version>4.5.1</tess4j.version>
+ <tika.version>1.25</tika.version>
+ <jbig2-imageio.version>3.0.3</jbig2-imageio.version>
+ <jai-imageio-core.version>1.4.0</jai-imageio-core.version>
+ <jai-imageio-jpeg2000.version>1.3.0</jai-imageio-jpeg2000.version>
+ <sqlite-jdbc.version>3.30.1</sqlite-jdbc.version>
+ <jboss-logging.version>3.4.1.Final</jboss-logging.version>
+ <slf4j.version>1.7.30</slf4j.version>
</properties>
<dependencies>
@@ -68,6 +76,13 @@
<version>${picocli.version}</version>
</dependency>
+ <!-- Logging -->
+ <dependency>
+ <groupId>org.jboss.logging</groupId>
+ <artifactId>jboss-logging</artifactId>
+ <version>${jboss-logging.version}</version>
+ </dependency>
+
<!-- Document reading -->
<dependency>
<groupId>org.apache.tika</groupId>
@@ -80,9 +95,54 @@
<version>${tika.version}</version>
</dependency>
<dependency>
+ <groupId>org.apache.tika</groupId>
+ <artifactId>tika-parsers</artifactId>
+ <version>${tika.version}</version>
+ <exclusions>
+ <exclusion>
+ <groupId>org.slf4j</groupId>
+ <artifactId>jcl-over-slf4j</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.slf4j</groupId>
+ <artifactId>jul-to-slf4j</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.pdfbox</groupId>
+ <artifactId>jbig2-imageio</artifactId>
+ <version>${jbig2-imageio.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>com.github.jai-imageio</groupId>
+ <artifactId>jai-imageio-core</artifactId>
+ <version>${jai-imageio-core.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>com.github.jai-imageio</groupId>
+ <artifactId>jai-imageio-jpeg2000</artifactId>
+ <version>${jai-imageio-jpeg2000.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.xerial</groupId>
+ <artifactId>sqlite-jdbc</artifactId>
+ <version>${sqlite-jdbc.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-jdk14</artifactId>
+ <version>${slf4j.version}</version>
+ </dependency>
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>log4j-over-slf4j</artifactId>
+ <version>${slf4j.version}</version>
+ </dependency>
+ <dependency>
<groupId>net.sourceforge.tess4j</groupId>
<artifactId>tess4j</artifactId>
- <version>4.5.1</version>
+ <version>${tess4j.version}</version>
<exclusions>
<exclusion>
<groupId>org.slf4j</groupId>
@@ -92,6 +152,14 @@
<groupId>org.slf4j</groupId>
<artifactId>log4j-over-slf4j</artifactId>
</exclusion>
+ <exclusion>
+ <groupId>org.slf4j</groupId>
+ <artifactId>jul-to-slf4j</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>ch.qos.logback</groupId>
+ <artifactId>logback-classic</artifactId>
+ </exclusion>
</exclusions>
</dependency>
@@ -111,6 +179,12 @@
<build>
+ <resources>
+ <resource>
+ <directory>src/main/resources</directory>
+ </resource>
+ </resources>
+
<pluginManagement>
<plugins>