Matthias Andreas Benkard | d9b9588 | 2020-01-24 11:42:49 +0100 | [diff] [blame] | 1 | package eu.mulk.mulkcms2.benki.wiki; |
Matthias Andreas Benkard | 734879e | 2020-01-24 10:47:37 +0100 | [diff] [blame] | 2 | |
Matthias Andreas Benkard | d9b9588 | 2020-01-24 11:42:49 +0100 | [diff] [blame] | 3 | import eu.mulk.mulkcms2.benki.users.User; |
Matthias Andreas Benkard | 35cb159 | 2020-01-24 11:05:20 +0100 | [diff] [blame] | 4 | import io.quarkus.hibernate.orm.panache.PanacheEntityBase; |
Matthias Andreas Benkard | e3bc3ee | 2023-08-06 16:21:11 +0200 | [diff] [blame^] | 5 | import jakarta.persistence.Column; |
| 6 | import jakarta.persistence.Entity; |
| 7 | import jakarta.persistence.FetchType; |
| 8 | import jakarta.persistence.GeneratedValue; |
| 9 | import jakarta.persistence.GenerationType; |
| 10 | import jakarta.persistence.Id; |
| 11 | import jakarta.persistence.JoinColumn; |
| 12 | import jakarta.persistence.ManyToOne; |
| 13 | import jakarta.persistence.Table; |
Matthias Andreas Benkard | d9b9588 | 2020-01-24 11:42:49 +0100 | [diff] [blame] | 14 | import java.time.OffsetDateTime; |
Matthias Andreas Benkard | 97130f9 | 2020-01-27 21:03:39 +0100 | [diff] [blame] | 15 | import java.util.function.Function; |
| 16 | import java.util.regex.Pattern; |
| 17 | import java.util.stream.Collectors; |
Matthias Andreas Benkard | 1e7674c | 2020-04-18 20:28:51 +0200 | [diff] [blame] | 18 | import javax.annotation.CheckForNull; |
Matthias Andreas Benkard | 97130f9 | 2020-01-27 21:03:39 +0100 | [diff] [blame] | 19 | import org.jsoup.Jsoup; |
| 20 | import org.jsoup.nodes.Document; |
| 21 | import org.jsoup.nodes.Element; |
| 22 | import org.jsoup.nodes.TextNode; |
| 23 | import org.jsoup.parser.Tag; |
Matthias Andreas Benkard | 734879e | 2020-01-24 10:47:37 +0100 | [diff] [blame] | 24 | |
| 25 | @Entity |
Matthias Andreas Benkard | 57c9a8a | 2020-01-24 19:09:38 +0100 | [diff] [blame] | 26 | @Table(name = "wiki_page_revisions", schema = "benki") |
Matthias Andreas Benkard | 35cb159 | 2020-01-24 11:05:20 +0100 | [diff] [blame] | 27 | public class WikiPageRevision extends PanacheEntityBase { |
Matthias Andreas Benkard | 734879e | 2020-01-24 10:47:37 +0100 | [diff] [blame] | 28 | |
| 29 | @Id |
Matthias Andreas Benkard | 0246c3e | 2020-01-27 05:39:08 +0100 | [diff] [blame] | 30 | @GeneratedValue(strategy = GenerationType.IDENTITY) |
Matthias Andreas Benkard | 734879e | 2020-01-24 10:47:37 +0100 | [diff] [blame] | 31 | @Column(name = "id", nullable = false) |
Matthias Andreas Benkard | 0246c3e | 2020-01-27 05:39:08 +0100 | [diff] [blame] | 32 | public Integer id; |
Matthias Andreas Benkard | 734879e | 2020-01-24 10:47:37 +0100 | [diff] [blame] | 33 | |
Matthias Andreas Benkard | 734879e | 2020-01-24 10:47:37 +0100 | [diff] [blame] | 34 | @Column(name = "date", nullable = true) |
Matthias Andreas Benkard | 1e7674c | 2020-04-18 20:28:51 +0200 | [diff] [blame] | 35 | @CheckForNull |
Matthias Andreas Benkard | d9b9588 | 2020-01-24 11:42:49 +0100 | [diff] [blame] | 36 | public OffsetDateTime date; |
Matthias Andreas Benkard | 734879e | 2020-01-24 10:47:37 +0100 | [diff] [blame] | 37 | |
Matthias Andreas Benkard | 734879e | 2020-01-24 10:47:37 +0100 | [diff] [blame] | 38 | @Column(name = "title", nullable = true, length = -1) |
Matthias Andreas Benkard | 1e7674c | 2020-04-18 20:28:51 +0200 | [diff] [blame] | 39 | @CheckForNull |
Matthias Andreas Benkard | 35cb159 | 2020-01-24 11:05:20 +0100 | [diff] [blame] | 40 | public String title; |
Matthias Andreas Benkard | 734879e | 2020-01-24 10:47:37 +0100 | [diff] [blame] | 41 | |
Matthias Andreas Benkard | 734879e | 2020-01-24 10:47:37 +0100 | [diff] [blame] | 42 | @Column(name = "content", nullable = true, length = -1) |
Matthias Andreas Benkard | 1e7674c | 2020-04-18 20:28:51 +0200 | [diff] [blame] | 43 | @CheckForNull |
Matthias Andreas Benkard | 35cb159 | 2020-01-24 11:05:20 +0100 | [diff] [blame] | 44 | public String content; |
Matthias Andreas Benkard | 734879e | 2020-01-24 10:47:37 +0100 | [diff] [blame] | 45 | |
Matthias Andreas Benkard | 734879e | 2020-01-24 10:47:37 +0100 | [diff] [blame] | 46 | @Column(name = "format", nullable = true, length = -1) |
Matthias Andreas Benkard | 1e7674c | 2020-04-18 20:28:51 +0200 | [diff] [blame] | 47 | @CheckForNull |
Matthias Andreas Benkard | 35cb159 | 2020-01-24 11:05:20 +0100 | [diff] [blame] | 48 | public String format; |
Matthias Andreas Benkard | 734879e | 2020-01-24 10:47:37 +0100 | [diff] [blame] | 49 | |
Matthias Andreas Benkard | aa75480 | 2020-01-24 11:55:26 +0100 | [diff] [blame] | 50 | @ManyToOne(fetch = FetchType.LAZY) |
Matthias Andreas Benkard | 734879e | 2020-01-24 10:47:37 +0100 | [diff] [blame] | 51 | @JoinColumn(name = "page", referencedColumnName = "id", nullable = false) |
Matthias Andreas Benkard | 35cb159 | 2020-01-24 11:05:20 +0100 | [diff] [blame] | 52 | public WikiPage page; |
Matthias Andreas Benkard | 734879e | 2020-01-24 10:47:37 +0100 | [diff] [blame] | 53 | |
Matthias Andreas Benkard | aa75480 | 2020-01-24 11:55:26 +0100 | [diff] [blame] | 54 | @ManyToOne(fetch = FetchType.LAZY) |
Matthias Andreas Benkard | 734879e | 2020-01-24 10:47:37 +0100 | [diff] [blame] | 55 | @JoinColumn(name = "author", referencedColumnName = "id") |
Matthias Andreas Benkard | 35cb159 | 2020-01-24 11:05:20 +0100 | [diff] [blame] | 56 | public User author; |
Matthias Andreas Benkard | 5d07527 | 2020-01-26 18:05:10 +0100 | [diff] [blame] | 57 | |
| 58 | public WikiPageRevision() {} |
| 59 | |
| 60 | public WikiPageRevision( |
| 61 | OffsetDateTime date, |
| 62 | String title, |
| 63 | String content, |
| 64 | String format, |
| 65 | WikiPage page, |
| 66 | User author) { |
| 67 | this.date = date; |
| 68 | this.title = title; |
Matthias Andreas Benkard | 97130f9 | 2020-01-27 21:03:39 +0100 | [diff] [blame] | 69 | this.content = unhrefify(unwikilinkify(Jsoup.parse(content))).select("body").html(); |
Matthias Andreas Benkard | 5d07527 | 2020-01-26 18:05:10 +0100 | [diff] [blame] | 70 | this.format = format; |
| 71 | this.page = page; |
| 72 | this.author = author; |
| 73 | } |
Matthias Andreas Benkard | 97130f9 | 2020-01-27 21:03:39 +0100 | [diff] [blame] | 74 | |
Matthias Andreas Benkard | 1e7674c | 2020-04-18 20:28:51 +0200 | [diff] [blame] | 75 | @CheckForNull |
Matthias Andreas Benkard | 97130f9 | 2020-01-27 21:03:39 +0100 | [diff] [blame] | 76 | public String enrichedContent() { |
Matthias Andreas Benkard | 1e7674c | 2020-04-18 20:28:51 +0200 | [diff] [blame] | 77 | if (content == null) { |
| 78 | return null; |
| 79 | } |
Matthias Andreas Benkard | 97130f9 | 2020-01-27 21:03:39 +0100 | [diff] [blame] | 80 | return wikilinkify(hrefify(Jsoup.parse(content))).select("body").html(); |
| 81 | } |
| 82 | |
| 83 | private static Document tagsoupMapText(Document soup, Function<String, String> fn) { |
| 84 | for (var subnode : |
| 85 | soup.select(":not(a):not(a *)").stream() |
| 86 | .flatMap(node -> node.childNodes().stream()) |
| 87 | .collect(Collectors.toUnmodifiableList())) { |
| 88 | if (subnode instanceof TextNode) { |
| 89 | var newNode = new Element(Tag.valueOf("span"), ""); |
| 90 | newNode.html(fn.apply(((TextNode) subnode).text())); |
| 91 | subnode.replaceWith(newNode); |
| 92 | newNode.unwrap(); |
| 93 | } |
| 94 | } |
| 95 | return soup; |
| 96 | } |
| 97 | |
Matthias Andreas Benkard | 593765d | 2020-04-18 20:44:07 +0200 | [diff] [blame] | 98 | private static final Pattern WIKIWORD_REGEX = |
Matthias Andreas Benkard | 97130f9 | 2020-01-27 21:03:39 +0100 | [diff] [blame] | 99 | Pattern.compile( |
| 100 | "\\p{javaUpperCase}+\\p{javaLowerCase}+\\p{javaUpperCase}+\\p{javaLowerCase}+\\w+"); |
Matthias Andreas Benkard | 593765d | 2020-04-18 20:44:07 +0200 | [diff] [blame] | 101 | private static final Pattern URL_REGEX = |
Matthias Andreas Benkard | 97130f9 | 2020-01-27 21:03:39 +0100 | [diff] [blame] | 102 | Pattern.compile("\\(?\\bhttps?://[-A-Za-z0-9+&@#/%?=~_()|!:,.;]*[-A-Za-z0-9+&@#/%=~_()|]"); |
| 103 | |
| 104 | private static Document hrefify(Document soup) { |
| 105 | return tagsoupMapText( |
| 106 | soup, |
| 107 | x -> |
| 108 | URL_REGEX |
| 109 | .matcher(x) |
| 110 | .replaceAll( |
| 111 | match -> { |
| 112 | var s = match.group(); |
| 113 | var leftParen = s.startsWith("("); |
| 114 | var rightParen = s.endsWith(")"); |
| 115 | var url = |
| 116 | s.substring(leftParen ? 1 : 0, rightParen ? s.length() - 1 : s.length()); |
| 117 | return String.format( |
| 118 | "%s<a href=\"%s\" class=\"benkiautohref\">%s</a>%s", |
| 119 | leftParen ? "(" : "", url, url, rightParen ? ")" : ""); |
| 120 | })); |
| 121 | } |
| 122 | |
| 123 | private static Document unhrefify(Document soup) { |
| 124 | soup.select(".benkiautohref").unwrap(); |
| 125 | return soup; |
| 126 | } |
| 127 | |
| 128 | private static Document wikilinkify(Document soup) { |
| 129 | return tagsoupMapText( |
| 130 | soup, |
| 131 | x -> |
| 132 | WIKIWORD_REGEX |
| 133 | .matcher(x) |
| 134 | .replaceAll( |
| 135 | match -> |
| 136 | String.format( |
| 137 | "<a href=\"/wiki/%s\" class=\"benkilink\">%s</a>", |
| 138 | match.group(), match.group()))); |
| 139 | } |
| 140 | |
| 141 | private static Document unwikilinkify(Document soup) { |
| 142 | soup.select(".benkilink").unwrap(); |
| 143 | return soup; |
| 144 | } |
Matthias Andreas Benkard | 734879e | 2020-01-24 10:47:37 +0100 | [diff] [blame] | 145 | } |