Add Apache Tika, remove iText7.
Change-Id: I9736779cf57050a1dfd43d18625eb464a7179a9f
diff --git a/pom.xml b/pom.xml
index e58f095..2bc4582 100644
--- a/pom.xml
+++ b/pom.xml
@@ -36,6 +36,7 @@
<picocli.version>4.3.2</picocli.version>
<pdfocr.version>1.0.2</pdfocr.version>
<term4j.version>0.4.0</term4j.version>
+ <tika.version>1.24.1</tika.version>
</properties>
<dependencies>
@@ -67,28 +68,31 @@
<version>${picocli.version}</version>
</dependency>
- <!-- PDF manipulation -->
+ <!-- Document reading -->
<dependency>
- <groupId>com.itextpdf</groupId>
- <artifactId>itext7-core</artifactId>
- <version>${itext7.version}</version>
- <type>pom</type>
+ <groupId>org.apache.tika</groupId>
+ <artifactId>tika-core</artifactId>
+ <version>${tika.version}</version>
</dependency>
<dependency>
- <groupId>com.itextpdf</groupId>
- <artifactId>pdfocr-root</artifactId>
- <version>${pdfocr.version}</version>
- <type>pom</type>
+ <groupId>org.apache.tika</groupId>
+ <artifactId>tika-bundle</artifactId>
+ <version>${tika.version}</version>
</dependency>
<dependency>
- <groupId>com.itextpdf</groupId>
- <artifactId>pdfocr-api</artifactId>
- <version>${pdfocr.version}</version>
- </dependency>
- <dependency>
- <groupId>com.itextpdf</groupId>
- <artifactId>pdfocr-tesseract4</artifactId>
- <version>${pdfocr.version}</version>
+ <groupId>net.sourceforge.tess4j</groupId>
+ <artifactId>tess4j</artifactId>
+ <version>4.5.1</version>
+ <exclusions>
+ <exclusion>
+ <groupId>org.slf4j</groupId>
+ <artifactId>jcl-over-slf4j</artifactId>
+ </exclusion>
+ <exclusion>
+ <groupId>org.slf4j</groupId>
+ <artifactId>log4j-over-slf4j</artifactId>
+ </exclusion>
+ </exclusions>
</dependency>
<!-- Terminal output -->
diff --git a/src/main/java/eu/mulk/aendggner/AendGgner.java b/src/main/java/eu/mulk/aendggner/AendGgner.java
index 618f36f..e510efb 100644
--- a/src/main/java/eu/mulk/aendggner/AendGgner.java
+++ b/src/main/java/eu/mulk/aendggner/AendGgner.java
@@ -1,7 +1,13 @@
package eu.mulk.aendggner;
-import java.io.File;
+import java.io.IOException;
+import java.nio.file.Path;
+import java.util.List;
import java.util.concurrent.Callable;
+import org.apache.tika.config.TikaConfig;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
import picocli.CommandLine;
import picocli.CommandLine.Command;
import picocli.CommandLine.Parameters;
@@ -14,10 +20,10 @@
public class AendGgner implements Callable<Integer> {
@Parameters(index = "0", description = "The base text to modify.")
- private File baseFile;
+ private Path baseFile;
- @Parameters(index = "1", description = "The diff relative to the base text.")
- private File diffFile;
+ @Parameters(arity = "*", description = "The diff relative to the base text.")
+ private List<Path> patches;
public static void main(String... args) {
int exitCode = new CommandLine(new AendGgner()).execute(args);
@@ -25,8 +31,19 @@
}
@Override
- public final Integer call() {
- System.out.println("Hi.");
+ public final Integer call() throws TikaException, IOException {
+ var tika = new TikaConfig();
+
+ for (var file : patches) {
+ var metadata = new Metadata();
+ metadata.set(Metadata.RESOURCE_NAME_KEY, file.toString());
+ try (var is = TikaInputStream.get(file)) {
+ var mimetype = tika.getDetector().detect(
+ TikaInputStream.get(file), metadata);
+ System.out.printf("File %s is %s.\n", file, mimetype);
+ }
+ }
+
return 0;
}
}
diff --git a/src/main/java/module-info.java b/src/main/java/module-info.java
index e7d9c77..dbe7d62 100644
--- a/src/main/java/module-info.java
+++ b/src/main/java/module-info.java
@@ -1,3 +1,6 @@
module aendggner {
requires info.picocli;
+ requires org.apache.tika.core;
+ requires java.sql;
+ opens eu.mulk.aendggner;
}