Matthias Andreas Benkard | b291c36 | 2020-11-22 10:22:27 +0100 | [diff] [blame] | 1 | package eu.mulk.aendggner; |
| 2 | |
Matthias Andreas Benkard | 545aeb6 | 2020-12-06 15:16:22 +0100 | [diff] [blame^] | 3 | import java.io.BufferedReader; |
Matthias Andreas Benkard | 1d92ac8 | 2020-11-23 06:31:43 +0100 | [diff] [blame] | 4 | import java.io.IOException; |
Matthias Andreas Benkard | 545aeb6 | 2020-12-06 15:16:22 +0100 | [diff] [blame^] | 5 | import java.nio.file.Files; |
Matthias Andreas Benkard | 1d92ac8 | 2020-11-23 06:31:43 +0100 | [diff] [blame] | 6 | import java.nio.file.Path; |
| 7 | import java.util.List; |
Matthias Andreas Benkard | b291c36 | 2020-11-22 10:22:27 +0100 | [diff] [blame] | 8 | import java.util.concurrent.Callable; |
Matthias Andreas Benkard | 545aeb6 | 2020-12-06 15:16:22 +0100 | [diff] [blame^] | 9 | import java.util.logging.LogManager; |
Matthias Andreas Benkard | 1d92ac8 | 2020-11-23 06:31:43 +0100 | [diff] [blame] | 10 | import org.apache.tika.config.TikaConfig; |
| 11 | import org.apache.tika.exception.TikaException; |
| 12 | import org.apache.tika.io.TikaInputStream; |
| 13 | import org.apache.tika.metadata.Metadata; |
Matthias Andreas Benkard | 545aeb6 | 2020-12-06 15:16:22 +0100 | [diff] [blame^] | 14 | import org.apache.tika.parser.AutoDetectParser; |
| 15 | import org.apache.tika.parser.ParseContext; |
| 16 | import org.apache.tika.parser.Parser; |
| 17 | import org.apache.tika.parser.ParsingReader; |
| 18 | import org.jboss.logging.Logger; |
| 19 | import org.xml.sax.SAXException; |
Matthias Andreas Benkard | b291c36 | 2020-11-22 10:22:27 +0100 | [diff] [blame] | 20 | import picocli.CommandLine; |
| 21 | import picocli.CommandLine.Command; |
Matthias Andreas Benkard | d9f32a8 | 2020-11-22 14:36:59 +0100 | [diff] [blame] | 22 | import picocli.CommandLine.Parameters; |
Matthias Andreas Benkard | b291c36 | 2020-11-22 10:22:27 +0100 | [diff] [blame] | 23 | |
Matthias Andreas Benkard | d9f32a8 | 2020-11-22 14:36:59 +0100 | [diff] [blame] | 24 | @Command( |
| 25 | name = "ÄndGgner", |
| 26 | mixinStandardHelpOptions = true, |
| 27 | version = "ÄndGgner 0.1", |
| 28 | description = "Displays German amendment acts in a user-friendly, consolidated way.") |
Matthias Andreas Benkard | b291c36 | 2020-11-22 10:22:27 +0100 | [diff] [blame] | 29 | public class AendGgner implements Callable<Integer> { |
| 30 | |
Matthias Andreas Benkard | 545aeb6 | 2020-12-06 15:16:22 +0100 | [diff] [blame^] | 31 | private static final Logger log = Logger.getLogger(AendGgner.class); |
| 32 | |
Matthias Andreas Benkard | d9f32a8 | 2020-11-22 14:36:59 +0100 | [diff] [blame] | 33 | @Parameters(index = "0", description = "The base text to modify.") |
Matthias Andreas Benkard | 1d92ac8 | 2020-11-23 06:31:43 +0100 | [diff] [blame] | 34 | private Path baseFile; |
Matthias Andreas Benkard | d9f32a8 | 2020-11-22 14:36:59 +0100 | [diff] [blame] | 35 | |
Matthias Andreas Benkard | 1d92ac8 | 2020-11-23 06:31:43 +0100 | [diff] [blame] | 36 | @Parameters(arity = "*", description = "The diff relative to the base text.") |
| 37 | private List<Path> patches; |
Matthias Andreas Benkard | d9f32a8 | 2020-11-22 14:36:59 +0100 | [diff] [blame] | 38 | |
Matthias Andreas Benkard | b291c36 | 2020-11-22 10:22:27 +0100 | [diff] [blame] | 39 | public static void main(String... args) { |
| 40 | int exitCode = new CommandLine(new AendGgner()).execute(args); |
| 41 | System.exit(exitCode); |
| 42 | } |
| 43 | |
| 44 | @Override |
Matthias Andreas Benkard | 545aeb6 | 2020-12-06 15:16:22 +0100 | [diff] [blame^] | 45 | public final Integer call() throws TikaException, IOException, SAXException { |
| 46 | setupLogging(); |
| 47 | |
| 48 | log.debugf("Logging configured."); |
| 49 | |
| 50 | TikaConfig tika; |
| 51 | try (var configResource = |
| 52 | this.getClass().getResourceAsStream("/eu/mulk/aendggner/tika-config.xml")) { |
| 53 | tika = new TikaConfig(configResource); |
| 54 | } |
Matthias Andreas Benkard | 1d92ac8 | 2020-11-23 06:31:43 +0100 | [diff] [blame] | 55 | |
| 56 | for (var file : patches) { |
| 57 | var metadata = new Metadata(); |
Matthias Andreas Benkard | 545aeb6 | 2020-12-06 15:16:22 +0100 | [diff] [blame^] | 58 | metadata.set(Metadata.RESOURCE_NAME_KEY, file.getFileName().toString()); |
| 59 | |
Matthias Andreas Benkard | 1d92ac8 | 2020-11-23 06:31:43 +0100 | [diff] [blame] | 60 | try (var is = TikaInputStream.get(file)) { |
Matthias Andreas Benkard | 545aeb6 | 2020-12-06 15:16:22 +0100 | [diff] [blame^] | 61 | var mimetype = tika.getDetector().detect(TikaInputStream.get(file), metadata); |
| 62 | log.infof("File %s is %s.", file, mimetype); |
| 63 | } |
| 64 | |
| 65 | var parser = new AutoDetectParser(tika); |
| 66 | try (var in = Files.newInputStream(file); |
| 67 | var reader = |
| 68 | new BufferedReader( |
| 69 | new ParsingReader(parser, in, metadata, makeParseContext(parser)))) { |
| 70 | log.infof("%s: %d lines of text.", file, reader.lines().count()); |
| 71 | // reader.lines().forEachOrdered(x -> log.infof("%s: %s", file, x)); |
Matthias Andreas Benkard | 1d92ac8 | 2020-11-23 06:31:43 +0100 | [diff] [blame] | 72 | } |
| 73 | } |
| 74 | |
Matthias Andreas Benkard | b291c36 | 2020-11-22 10:22:27 +0100 | [diff] [blame] | 75 | return 0; |
| 76 | } |
Matthias Andreas Benkard | 545aeb6 | 2020-12-06 15:16:22 +0100 | [diff] [blame^] | 77 | |
| 78 | private static ParseContext makeParseContext(Parser parser) { |
| 79 | var parseContext = new ParseContext(); |
| 80 | parseContext.set(Parser.class, parser); |
| 81 | return parseContext; |
| 82 | } |
| 83 | |
| 84 | private static void setupLogging() throws IOException { |
| 85 | try (var loggingProperties = |
| 86 | AendGgner.class.getResourceAsStream("/eu/mulk/aendggner/logging.properties")) { |
| 87 | LogManager.getLogManager().readConfiguration(loggingProperties); |
| 88 | } |
| 89 | } |
Matthias Andreas Benkard | b291c36 | 2020-11-22 10:22:27 +0100 | [diff] [blame] | 90 | } |