blob: c27fa45b72527c1dfb00ca9cb53891ba9f7d7d30 [file] [log] [blame]
package eu.mulk.mulkcms2.benki.wiki;
import eu.mulk.mulkcms2.benki.users.User;
import io.quarkus.hibernate.orm.panache.PanacheEntityBase;
import jakarta.persistence.Column;
import jakarta.persistence.Entity;
import jakarta.persistence.FetchType;
import jakarta.persistence.GeneratedValue;
import jakarta.persistence.GenerationType;
import jakarta.persistence.Id;
import jakarta.persistence.JoinColumn;
import jakarta.persistence.ManyToOne;
import jakarta.persistence.Table;
import java.time.OffsetDateTime;
import java.util.function.Function;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import javax.annotation.CheckForNull;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.TextNode;
import org.jsoup.parser.Tag;
@Entity
@Table(name = "wiki_page_revisions", schema = "benki")
public class WikiPageRevision extends PanacheEntityBase {
@Id
@GeneratedValue(strategy = GenerationType.IDENTITY)
@Column(name = "id", nullable = false)
public Integer id;
@Column(name = "date", nullable = true)
@CheckForNull
public OffsetDateTime date;
@Column(name = "title", nullable = true, length = -1)
@CheckForNull
public String title;
@Column(name = "content", nullable = true, length = -1)
@CheckForNull
public String content;
@Column(name = "format", nullable = true, length = -1)
@CheckForNull
public String format;
@ManyToOne(fetch = FetchType.LAZY)
@JoinColumn(name = "page", referencedColumnName = "id", nullable = false)
public WikiPage page;
@ManyToOne(fetch = FetchType.LAZY)
@JoinColumn(name = "author", referencedColumnName = "id")
public User author;
public WikiPageRevision() {}
public WikiPageRevision(
OffsetDateTime date,
String title,
String content,
String format,
WikiPage page,
User author) {
this.date = date;
this.title = title;
this.content = unhrefify(unwikilinkify(Jsoup.parse(content))).select("body").html();
this.format = format;
this.page = page;
this.author = author;
}
@CheckForNull
public String enrichedContent() {
if (content == null) {
return null;
}
return wikilinkify(hrefify(Jsoup.parse(content))).select("body").html();
}
private static Document tagsoupMapText(Document soup, Function<String, String> fn) {
for (var subnode :
soup.select(":not(a):not(a *)").stream()
.flatMap(node -> node.childNodes().stream())
.collect(Collectors.toUnmodifiableList())) {
if (subnode instanceof TextNode) {
var newNode = new Element(Tag.valueOf("span"), "");
newNode.html(fn.apply(((TextNode) subnode).text()));
subnode.replaceWith(newNode);
newNode.unwrap();
}
}
return soup;
}
private static final Pattern WIKIWORD_REGEX =
Pattern.compile(
"\\p{javaUpperCase}+\\p{javaLowerCase}+\\p{javaUpperCase}+\\p{javaLowerCase}+\\w+");
private static final Pattern URL_REGEX =
Pattern.compile("\\(?\\bhttps?://[-A-Za-z0-9+&@#/%?=~_()|!:,.;]*[-A-Za-z0-9+&@#/%=~_()|]");
private static Document hrefify(Document soup) {
return tagsoupMapText(
soup,
x ->
URL_REGEX
.matcher(x)
.replaceAll(
match -> {
var s = match.group();
var leftParen = s.startsWith("(");
var rightParen = s.endsWith(")");
var url =
s.substring(leftParen ? 1 : 0, rightParen ? s.length() - 1 : s.length());
return String.format(
"%s<a href=\"%s\" class=\"benkiautohref\">%s</a>%s",
leftParen ? "(" : "", url, url, rightParen ? ")" : "");
}));
}
private static Document unhrefify(Document soup) {
soup.select(".benkiautohref").unwrap();
return soup;
}
private static Document wikilinkify(Document soup) {
return tagsoupMapText(
soup,
x ->
WIKIWORD_REGEX
.matcher(x)
.replaceAll(
match ->
String.format(
"<a href=\"/wiki/%s\" class=\"benkilink\">%s</a>",
match.group(), match.group())));
}
private static Document unwikilinkify(Document soup) {
soup.select(".benkilink").unwrap();
return soup;
}
}