blob: c4efd458ec1fdab87fcc01c7b35712b1ea5526c1 [file] [log] [blame]
Matthias Andreas Benkardb291c362020-11-22 10:22:27 +01001package eu.mulk.aendggner;
2
Matthias Andreas Benkard545aeb62020-12-06 15:16:22 +01003import java.io.BufferedReader;
Matthias Andreas Benkard1d92ac82020-11-23 06:31:43 +01004import java.io.IOException;
Matthias Andreas Benkard545aeb62020-12-06 15:16:22 +01005import java.nio.file.Files;
Matthias Andreas Benkard1d92ac82020-11-23 06:31:43 +01006import java.nio.file.Path;
7import java.util.List;
Matthias Andreas Benkardb291c362020-11-22 10:22:27 +01008import java.util.concurrent.Callable;
Matthias Andreas Benkard545aeb62020-12-06 15:16:22 +01009import java.util.logging.LogManager;
Matthias Andreas Benkard1d92ac82020-11-23 06:31:43 +010010import org.apache.tika.config.TikaConfig;
11import org.apache.tika.exception.TikaException;
12import org.apache.tika.io.TikaInputStream;
13import org.apache.tika.metadata.Metadata;
Matthias Andreas Benkard545aeb62020-12-06 15:16:22 +010014import org.apache.tika.parser.AutoDetectParser;
15import org.apache.tika.parser.ParseContext;
16import org.apache.tika.parser.Parser;
17import org.apache.tika.parser.ParsingReader;
18import org.jboss.logging.Logger;
19import org.xml.sax.SAXException;
Matthias Andreas Benkardb291c362020-11-22 10:22:27 +010020import picocli.CommandLine;
21import picocli.CommandLine.Command;
Matthias Andreas Benkardd9f32a82020-11-22 14:36:59 +010022import picocli.CommandLine.Parameters;
Matthias Andreas Benkardb291c362020-11-22 10:22:27 +010023
Matthias Andreas Benkardd9f32a82020-11-22 14:36:59 +010024@Command(
25 name = "ÄndGgner",
26 mixinStandardHelpOptions = true,
27 version = "ÄndGgner 0.1",
28 description = "Displays German amendment acts in a user-friendly, consolidated way.")
Matthias Andreas Benkardb291c362020-11-22 10:22:27 +010029public class AendGgner implements Callable<Integer> {
30
Matthias Andreas Benkard545aeb62020-12-06 15:16:22 +010031 private static final Logger log = Logger.getLogger(AendGgner.class);
32
Matthias Andreas Benkardd9f32a82020-11-22 14:36:59 +010033 @Parameters(index = "0", description = "The base text to modify.")
Matthias Andreas Benkard1d92ac82020-11-23 06:31:43 +010034 private Path baseFile;
Matthias Andreas Benkardd9f32a82020-11-22 14:36:59 +010035
Matthias Andreas Benkard1d92ac82020-11-23 06:31:43 +010036 @Parameters(arity = "*", description = "The diff relative to the base text.")
37 private List<Path> patches;
Matthias Andreas Benkardd9f32a82020-11-22 14:36:59 +010038
Matthias Andreas Benkardb291c362020-11-22 10:22:27 +010039 public static void main(String... args) {
40 int exitCode = new CommandLine(new AendGgner()).execute(args);
41 System.exit(exitCode);
42 }
43
44 @Override
Matthias Andreas Benkard545aeb62020-12-06 15:16:22 +010045 public final Integer call() throws TikaException, IOException, SAXException {
46 setupLogging();
47
48 log.debugf("Logging configured.");
49
50 TikaConfig tika;
51 try (var configResource =
52 this.getClass().getResourceAsStream("/eu/mulk/aendggner/tika-config.xml")) {
53 tika = new TikaConfig(configResource);
54 }
Matthias Andreas Benkard1d92ac82020-11-23 06:31:43 +010055
56 for (var file : patches) {
57 var metadata = new Metadata();
Matthias Andreas Benkard545aeb62020-12-06 15:16:22 +010058 metadata.set(Metadata.RESOURCE_NAME_KEY, file.getFileName().toString());
59
Matthias Andreas Benkard1d92ac82020-11-23 06:31:43 +010060 try (var is = TikaInputStream.get(file)) {
Matthias Andreas Benkard545aeb62020-12-06 15:16:22 +010061 var mimetype = tika.getDetector().detect(TikaInputStream.get(file), metadata);
62 log.infof("File %s is %s.", file, mimetype);
63 }
64
65 var parser = new AutoDetectParser(tika);
66 try (var in = Files.newInputStream(file);
67 var reader =
68 new BufferedReader(
69 new ParsingReader(parser, in, metadata, makeParseContext(parser)))) {
70 log.infof("%s: %d lines of text.", file, reader.lines().count());
71 // reader.lines().forEachOrdered(x -> log.infof("%s: %s", file, x));
Matthias Andreas Benkard1d92ac82020-11-23 06:31:43 +010072 }
73 }
74
Matthias Andreas Benkardb291c362020-11-22 10:22:27 +010075 return 0;
76 }
Matthias Andreas Benkard545aeb62020-12-06 15:16:22 +010077
78 private static ParseContext makeParseContext(Parser parser) {
79 var parseContext = new ParseContext();
80 parseContext.set(Parser.class, parser);
81 return parseContext;
82 }
83
84 private static void setupLogging() throws IOException {
85 try (var loggingProperties =
86 AendGgner.class.getResourceAsStream("/eu/mulk/aendggner/logging.properties")) {
87 LogManager.getLogManager().readConfiguration(loggingProperties);
88 }
89 }
Matthias Andreas Benkardb291c362020-11-22 10:22:27 +010090}