blob: c27fa45b72527c1dfb00ca9cb53891ba9f7d7d30 [file] [log] [blame]
Matthias Andreas Benkardd9b95882020-01-24 11:42:49 +01001package eu.mulk.mulkcms2.benki.wiki;
Matthias Andreas Benkard734879e2020-01-24 10:47:37 +01002
Matthias Andreas Benkardd9b95882020-01-24 11:42:49 +01003import eu.mulk.mulkcms2.benki.users.User;
Matthias Andreas Benkard35cb1592020-01-24 11:05:20 +01004import io.quarkus.hibernate.orm.panache.PanacheEntityBase;
Matthias Andreas Benkarde3bc3ee2023-08-06 16:21:11 +02005import jakarta.persistence.Column;
6import jakarta.persistence.Entity;
7import jakarta.persistence.FetchType;
8import jakarta.persistence.GeneratedValue;
9import jakarta.persistence.GenerationType;
10import jakarta.persistence.Id;
11import jakarta.persistence.JoinColumn;
12import jakarta.persistence.ManyToOne;
13import jakarta.persistence.Table;
Matthias Andreas Benkardd9b95882020-01-24 11:42:49 +010014import java.time.OffsetDateTime;
Matthias Andreas Benkard97130f92020-01-27 21:03:39 +010015import java.util.function.Function;
16import java.util.regex.Pattern;
17import java.util.stream.Collectors;
Matthias Andreas Benkard1e7674c2020-04-18 20:28:51 +020018import javax.annotation.CheckForNull;
Matthias Andreas Benkard97130f92020-01-27 21:03:39 +010019import org.jsoup.Jsoup;
20import org.jsoup.nodes.Document;
21import org.jsoup.nodes.Element;
22import org.jsoup.nodes.TextNode;
23import org.jsoup.parser.Tag;
Matthias Andreas Benkard734879e2020-01-24 10:47:37 +010024
25@Entity
Matthias Andreas Benkard57c9a8a2020-01-24 19:09:38 +010026@Table(name = "wiki_page_revisions", schema = "benki")
Matthias Andreas Benkard35cb1592020-01-24 11:05:20 +010027public class WikiPageRevision extends PanacheEntityBase {
Matthias Andreas Benkard734879e2020-01-24 10:47:37 +010028
29 @Id
Matthias Andreas Benkard0246c3e2020-01-27 05:39:08 +010030 @GeneratedValue(strategy = GenerationType.IDENTITY)
Matthias Andreas Benkard734879e2020-01-24 10:47:37 +010031 @Column(name = "id", nullable = false)
Matthias Andreas Benkard0246c3e2020-01-27 05:39:08 +010032 public Integer id;
Matthias Andreas Benkard734879e2020-01-24 10:47:37 +010033
Matthias Andreas Benkard734879e2020-01-24 10:47:37 +010034 @Column(name = "date", nullable = true)
Matthias Andreas Benkard1e7674c2020-04-18 20:28:51 +020035 @CheckForNull
Matthias Andreas Benkardd9b95882020-01-24 11:42:49 +010036 public OffsetDateTime date;
Matthias Andreas Benkard734879e2020-01-24 10:47:37 +010037
Matthias Andreas Benkard734879e2020-01-24 10:47:37 +010038 @Column(name = "title", nullable = true, length = -1)
Matthias Andreas Benkard1e7674c2020-04-18 20:28:51 +020039 @CheckForNull
Matthias Andreas Benkard35cb1592020-01-24 11:05:20 +010040 public String title;
Matthias Andreas Benkard734879e2020-01-24 10:47:37 +010041
Matthias Andreas Benkard734879e2020-01-24 10:47:37 +010042 @Column(name = "content", nullable = true, length = -1)
Matthias Andreas Benkard1e7674c2020-04-18 20:28:51 +020043 @CheckForNull
Matthias Andreas Benkard35cb1592020-01-24 11:05:20 +010044 public String content;
Matthias Andreas Benkard734879e2020-01-24 10:47:37 +010045
Matthias Andreas Benkard734879e2020-01-24 10:47:37 +010046 @Column(name = "format", nullable = true, length = -1)
Matthias Andreas Benkard1e7674c2020-04-18 20:28:51 +020047 @CheckForNull
Matthias Andreas Benkard35cb1592020-01-24 11:05:20 +010048 public String format;
Matthias Andreas Benkard734879e2020-01-24 10:47:37 +010049
Matthias Andreas Benkardaa754802020-01-24 11:55:26 +010050 @ManyToOne(fetch = FetchType.LAZY)
Matthias Andreas Benkard734879e2020-01-24 10:47:37 +010051 @JoinColumn(name = "page", referencedColumnName = "id", nullable = false)
Matthias Andreas Benkard35cb1592020-01-24 11:05:20 +010052 public WikiPage page;
Matthias Andreas Benkard734879e2020-01-24 10:47:37 +010053
Matthias Andreas Benkardaa754802020-01-24 11:55:26 +010054 @ManyToOne(fetch = FetchType.LAZY)
Matthias Andreas Benkard734879e2020-01-24 10:47:37 +010055 @JoinColumn(name = "author", referencedColumnName = "id")
Matthias Andreas Benkard35cb1592020-01-24 11:05:20 +010056 public User author;
Matthias Andreas Benkard5d075272020-01-26 18:05:10 +010057
58 public WikiPageRevision() {}
59
60 public WikiPageRevision(
61 OffsetDateTime date,
62 String title,
63 String content,
64 String format,
65 WikiPage page,
66 User author) {
67 this.date = date;
68 this.title = title;
Matthias Andreas Benkard97130f92020-01-27 21:03:39 +010069 this.content = unhrefify(unwikilinkify(Jsoup.parse(content))).select("body").html();
Matthias Andreas Benkard5d075272020-01-26 18:05:10 +010070 this.format = format;
71 this.page = page;
72 this.author = author;
73 }
Matthias Andreas Benkard97130f92020-01-27 21:03:39 +010074
Matthias Andreas Benkard1e7674c2020-04-18 20:28:51 +020075 @CheckForNull
Matthias Andreas Benkard97130f92020-01-27 21:03:39 +010076 public String enrichedContent() {
Matthias Andreas Benkard1e7674c2020-04-18 20:28:51 +020077 if (content == null) {
78 return null;
79 }
Matthias Andreas Benkard97130f92020-01-27 21:03:39 +010080 return wikilinkify(hrefify(Jsoup.parse(content))).select("body").html();
81 }
82
83 private static Document tagsoupMapText(Document soup, Function<String, String> fn) {
84 for (var subnode :
85 soup.select(":not(a):not(a *)").stream()
86 .flatMap(node -> node.childNodes().stream())
87 .collect(Collectors.toUnmodifiableList())) {
88 if (subnode instanceof TextNode) {
89 var newNode = new Element(Tag.valueOf("span"), "");
90 newNode.html(fn.apply(((TextNode) subnode).text()));
91 subnode.replaceWith(newNode);
92 newNode.unwrap();
93 }
94 }
95 return soup;
96 }
97
Matthias Andreas Benkard593765d2020-04-18 20:44:07 +020098 private static final Pattern WIKIWORD_REGEX =
Matthias Andreas Benkard97130f92020-01-27 21:03:39 +010099 Pattern.compile(
100 "\\p{javaUpperCase}+\\p{javaLowerCase}+\\p{javaUpperCase}+\\p{javaLowerCase}+\\w+");
Matthias Andreas Benkard593765d2020-04-18 20:44:07 +0200101 private static final Pattern URL_REGEX =
Matthias Andreas Benkard97130f92020-01-27 21:03:39 +0100102 Pattern.compile("\\(?\\bhttps?://[-A-Za-z0-9+&@#/%?=~_()|!:,.;]*[-A-Za-z0-9+&@#/%=~_()|]");
103
104 private static Document hrefify(Document soup) {
105 return tagsoupMapText(
106 soup,
107 x ->
108 URL_REGEX
109 .matcher(x)
110 .replaceAll(
111 match -> {
112 var s = match.group();
113 var leftParen = s.startsWith("(");
114 var rightParen = s.endsWith(")");
115 var url =
116 s.substring(leftParen ? 1 : 0, rightParen ? s.length() - 1 : s.length());
117 return String.format(
118 "%s<a href=\"%s\" class=\"benkiautohref\">%s</a>%s",
119 leftParen ? "(" : "", url, url, rightParen ? ")" : "");
120 }));
121 }
122
123 private static Document unhrefify(Document soup) {
124 soup.select(".benkiautohref").unwrap();
125 return soup;
126 }
127
128 private static Document wikilinkify(Document soup) {
129 return tagsoupMapText(
130 soup,
131 x ->
132 WIKIWORD_REGEX
133 .matcher(x)
134 .replaceAll(
135 match ->
136 String.format(
137 "<a href=\"/wiki/%s\" class=\"benkilink\">%s</a>",
138 match.group(), match.group())));
139 }
140
141 private static Document unwikilinkify(Document soup) {
142 soup.select(".benkilink").unwrap();
143 return soup;
144 }
Matthias Andreas Benkard734879e2020-01-24 10:47:37 +0100145}