From d5c0a4348f67576315f9c6833aa42bca8d185bc3 Mon Sep 17 00:00:00 2001 From: MarkusRost <2701034-MarkusRost@users.noreply.gitlab.com> Date: Sat, 25 Jan 2025 23:07:45 +0100 Subject: [PATCH] update matrix hook, add zulip hook --- extensions/hooks/matrix.py | 216 ++++++++++++++++++++++++++++--------- extensions/hooks/zulip.py | 203 ++++++++++++++++++++++++++++++++++ 2 files changed, 366 insertions(+), 53 deletions(-) create mode 100644 extensions/hooks/zulip.py diff --git a/extensions/hooks/matrix.py b/extensions/hooks/matrix.py index f8945df..e299e68 100644 --- a/extensions/hooks/matrix.py +++ b/extensions/hooks/matrix.py @@ -18,11 +18,14 @@ from typing import Optional import requests import markdown import json +import re -from src.configloader import settings from src.api.context import Context from src.discord.message import DiscordMessage, DiscordMessageMetadata from src.api.hook import post_hook +from src.api.util import sanitize_to_markdown +from src.misc import class_searcher +from html.parser import HTMLParser # { # "hooks": { @@ -39,25 +42,87 @@ matrix_hook: Optional[dict] = None start_time = int(time.time()) +class StrikethroughProcessor(markdown.inlinepatterns.AsteriskProcessor): + PATTERNS = [ + markdown.inlinepatterns.EmStrongItem(re.compile(r'(~{2})(.+?)\1', re.DOTALL | re.UNICODE), 'single', 'del') + ] + +class StrikethroughExtension(markdown.extensions.Extension): + def extendMarkdown(self, md): + """ Modify inline patterns. """ + md.inlinePatterns.register(StrikethroughProcessor(r'~'), 'strikethrough', 50) + + def check_if_exists(settings: dict): global matrix_hook matrix_hook = settings.get("hooks", {}).get("matrix", {}) return bool(matrix_hook) - @post_hook(priority=50, register=check_if_exists) def matrix_posthook(message: DiscordMessage, metadata: DiscordMessageMetadata, context: Context, change: dict): if not matrix_hook.get("access_token"): raise KeyError("Matrix hook requires an access token to run!") - - if settings["appearance"]["mode"] == "embed": - matrix_json_formatted_body = discord_embed_converter(message).replace("\\:", ":").replace("\\|", "|") - else: - matrix_json_formatted_body = discord_compact_converter(message) + content = (message.webhook_object.get("content", "") or "") + if message.message_type == "embed": + embed = "" + images = [] + if message.embed.get("author", {}).get("name", None): + author = sanitize_to_markdown(message["author"]["name"]) + if message["author"].get("url", None): + author = "[{name}]({url})".format(name=author, url=message["author"]["url"]) + embed += "{author}:\n".format(author=author) + if message.embed.get("title", None): + title = message["title"] + if message.embed.get("url", None): + title = "[{title}]({url})".format(title=title, url=message["url"]) + thumbnail = "" + if message.embed.get("thumbnail", {}).get("url", None): + thumbnail = " [thumbnail]({url})".format(url=message["thumbnail"]["url"]) + images.append( "[^]({url})".format(url=message["thumbnail"]["url"]) ) + embed += "**{title}**{thumbnail}\n".format(title=title, thumbnail=thumbnail) + elif message.embed.get("thumbnail", {}).get("url", None): + embed += "[thumbnail]({url})\n".format(url=message["thumbnail"]["url"]) + images.append( "[^]({url})".format(url=message["thumbnail"]["url"]) ) + if message.embed.get("description", None): + embed += message["description"] + "\n" + if context.changed_content: + edit_diff = ContentParser(context._) + edit_diff.feed(context.changed_content) + diff_text = edit_diff.text + diff_text = re.sub(r'(~{2})(.+?)\1', '\\2', diff_text) + diff_text = re.sub(r'(\*{2})(.+?)\1', '\\2', diff_text) + embed += "\n" + diff_text + "\n" + if message.embed.get("fields", []): + for field in message["fields"]: + if context.changed_content and field["name"] in (context._("Removed"), context._("Added")): + continue + embed += "**{name}**\n> {value}\n".format(name=field["name"],value=field["value"].replace("\n", "\n> ")) + if message.embed.get("image", {}).get("url", None): + embed += "**[image]({url})**\n".format(url=message["image"]["url"]) + images.append( "**[^]({url})**".format(url=message["image"]["url"]) ) + if message.embed.get("footer", {}).get("text", None): + timestamp = "" + if message.embed.get("timestamp", None): + timestamp = " • {timestamp}".format(timestamp=message["timestamp"]) + embed += "{footer}{timestamp}\n".format(footer=sanitize_to_markdown(message["footer"]["text"]), timestamp=timestamp) + elif message.embed.get("timestamp", None): + embed += message["timestamp"] + "\n" + if content: + content += "\n" + content += "> " + embed[:-1].replace("\n", "\n> ") + if len(images): + content += "\n{images}".format(images=" ".join(images)) if matrix_hook.get("matrix_only"): context.event = None - - data = json.dumps({"msgtype": "m.text", "body": "test", "format": "org.matrix.custom.html", "formatted_body": matrix_json_formatted_body}) + if not content: + return + html_content = markdown.markdown(content, extensions=[StrikethroughExtension(),'tables','nl2br']) + data = json.dumps({ + "msgtype": "m.text", + "body": content, + "format": "org.matrix.custom.html", + "formatted_body": re.sub(r'\\(\\?)', '\\1', html_content) + }) for room in matrix_hook.get("rooms", []): response = requests.put("{homeserver}/_matrix/client/v3/rooms/{roomId}/send/{eventType}/{txnId}".format(homeserver=matrix_hook.get("homeserver"), roomId=room, @@ -66,51 +131,96 @@ def matrix_posthook(message: DiscordMessage, metadata: DiscordMessageMetadata, c data=data, headers={"Authorization": "Bearer "+matrix_hook.get("access_token"), "Content-Type": "application/json"}) -def discord_embed_converter(embed: DiscordMessage) -> str: - discord_embed = embed.embed - matrix_soup = [] +class ContentParser(HTMLParser): + """ContentPerser is an implementation of HTMLParser that parses output of action=compare&prop=diff API request + for two MediaWiki revisions.""" + max_length = 3000 + current_tag = "" + last_del = None + last_ins = None + done = False - matrix_soup.append("{author}
".format(author=discord_embed.get("author", {}).get("name", "unknown"), url=discord_embed.get("author", {}).get("url", ""))) - matrix_soup.append("{title}
".format(title=discord_embed.get("title", "Unknown title"), url=discord_embed.get("url", ""))) - if discord_embed.get("description"): - matrix_soup.append(markdown.markdown(discord_embed.get("description", ""), extensions=['pymdownx.tilde'])) - for field in discord_embed.get("fields", []): - if field.get("inline"): - matrix_soup.append((markdown.markdown(field.get("value", ""), extensions=['pymdownx.tilde']), True)) - else: - matrix_soup.append(("{name}
{data}".format(name=field.get("name", ""), data=removep(markdown.markdown(field.get("value", ""), extensions=['pymdownx.tilde']))), False)) - final_output = "" - in_table_mode = False - while matrix_soup: - element = matrix_soup.pop(0) - if isinstance(element, str): - element = removep(element) - if in_table_mode: - final_output += "" - in_table_mode = False - final_output += element - else: - if in_table_mode is False: - final_output+="" - in_table_mode = True - if element[1] is False: - final_output+="" - else: - if matrix_soup and matrix_soup[0][1] is True: - another_element = matrix_soup.pop(0) - final_output+="" - else: - final_output += "" - if in_table_mode: - final_output += "
"+removep(element[0])+"
"+removep(element[0])+""+removep(another_element[0])+"
" + removep(element[0]) + "
" - return final_output.replace("\n", "") + def __init__(self, lang): + super().__init__() + self.text = "| {removed} | {added}\n|---|---\n".format(removed=lang("Removed"), added=lang("Added")) + def handle_starttag(self, tagname, attribs): + if self.done: + return + if tagname == "ins" or tagname == "del": + self.current_tag = tagname + if tagname == "td": + classes = class_searcher(attribs).split(' ') + if "diff-deletedline" in classes: + self.current_tag = "tdd" + self.last_del = "\u200b" + if "diff-addedline" in classes: + self.current_tag = "tda" + self.last_ins = "\u200b" + if "diff-empty" in classes: + if self.last_del is None: + self.last_del = "" + if self.last_ins is None: + self.last_ins = "" -def removep(element: str) -> str: - if element.startswith("

") and element.endswith("

"): - return element[3:-4] - return element + def handle_data(self, data): + def escape_formatting(data: str) -> str: + """Escape Discord formatting""" + return re.sub(r"([`_*~:<>{}@/|#\-\.\\\[\]\(\)])", "\\\\\\1", data) + if not self.current_tag or self.done: + return + data = escape_formatting(data) + if self.current_tag == "del": + self.last_del += "~~" + data + "~~" + if self.current_tag == "ins": + self.last_ins += "**" + data + "**" + if self.current_tag == "tdd": + self.last_del += data + if self.current_tag == "tda": + self.last_ins += data - -def discord_compact_converter(embed: DiscordMessage) -> str: - return markdown.markdown(embed.webhook_object["content"], extensions=['pymdownx.tilde']) \ No newline at end of file + def handle_endtag(self, tagname): + if self.done: + return + if tagname == "ins": + self.current_tag = "tda" + elif tagname == "del": + self.current_tag = "tdd" + elif tagname == "td": + self.current_tag = "" + elif tagname == "tr": + if self.last_ins is None: + return + # if self.last_ins == "" and self.last_del != "\u200b": + # if "~~" in self.last_del: + # self.last_del = self.last_del.replace("~~", "__") + # self.last_del = "~~" + self.last_del + "~~" + # if self.last_del == "" and self.last_ins != "\u200b": + # if "**" in self.last_ins: + # self.last_ins = self.last_ins.replace("**", "__") + # self.last_ins = "**" + self.last_ins + "**" + if len(self.text) + len(self.last_del) + len(self.last_ins) > self.max_length: + remaining = self.max_length - len(self.text) + if remaining < 0: + remaining = 0 + if remaining % 2 == 1: + remaining = remaining + 1 + part = int(remaining / 2) + if len(self.last_del) < part: + part += part - len(self.last_del) + if len(self.last_ins) < part: + part += part - len(self.last_ins) + if len(self.last_del) > part: + self.last_del = self.last_del[:part] + if self.last_del.count("~~") % 2 == 1: + self.last_del += "~~" + self.last_del += " **And more**" + if len(self.last_ins) > part: + self.last_ins = self.last_ins[:part] + if self.last_ins.count("**") % 2 == 1: + self.last_ins += "**" + self.last_ins += " ~~And more~~" + self.done = True + self.text += "| {removed} | {added}\n".format(removed=self.last_del, added=self.last_ins) + self.last_del = None + self.last_ins = None \ No newline at end of file diff --git a/extensions/hooks/zulip.py b/extensions/hooks/zulip.py new file mode 100644 index 0000000..f028fa2 --- /dev/null +++ b/extensions/hooks/zulip.py @@ -0,0 +1,203 @@ +# This file is part of Recent changes Goat compatible Discord webhook (RcGcDw). +# +# RcGcDw is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# RcGcDw is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with RcGcDw. If not, see . +from src.api.context import Context +from src.discord.message import DiscordMessage, DiscordMessageMetadata +from src.api.hook import post_hook +from src.api.util import sanitize_to_markdown +from src.configloader import settings +from src.misc import class_searcher +from html.parser import HTMLParser +import requests +import base64 +import re + +# Bridge messages to Zulip +# { +# "hooks": { +# "zulip": { +# "realm": "https://mc-wiki.zulipchat.com", +# "email": "bot@zulip.com", +# "api_key": "12345", +# "target": { +# "type": "stream", +# "to": "Channel", +# "topic": "RcGcdw" +# } +# } +# } +# } +zulip = settings.get("hooks", {}).get("zulip", {}) + +auth = "" +if len(zulip): + auth = base64.b64encode("{email}:{api_key}".format(email=zulip["email"], api_key=zulip["api_key"]).encode('utf-8')) + +@post_hook +def zulip_hook(message: DiscordMessage, metadata: DiscordMessageMetadata, context: Context, change: dict): + if not len(zulip) or context.feed_type == "discussion": + return + content = (message.webhook_object.get("content", "") or "") + if message.message_type == "embed": + if content: + content += "\n" + images = [] + content += "`````quote\n" + if message.embed.get("author", {}).get("name", None): + author = sanitize_to_markdown(message["author"]["name"]) + if message["author"].get("url", None): + author = "[{name}]({url})".format(name=author, url=message["author"]["url"]) + content += "{author}:\n".format(author=author) + if message.embed.get("title", None): + title = message["title"] + if message.embed.get("url", None): + title = "[{title}]({url})".format(title=title, url=message["url"]) + thumbnail = "" + if message.embed.get("thumbnail", {}).get("url", None): + thumbnail = " [thumbnail]({url})".format(url=message["thumbnail"]["url"]) + images.append( "[^]({url})".format(url=message["thumbnail"]["url"]) ) + content += "**{title}**{thumbnail}\n".format(title=title, thumbnail=thumbnail) + elif message.embed.get("thumbnail", {}).get("url", None): + content += "[thumbnail]({url})\n".format(url=message["thumbnail"]["url"]) + images.append( "[^]({url})".format(url=message["thumbnail"]["url"]) ) + if message.embed.get("description", None): + content += message["description"] + "\n" + if context.changed_content: + edit_diff = ContentParser(context._) + edit_diff.feed(context.changed_content) + content += "\n" + edit_diff.text + "\n" + if message.embed.get("fields", []): + for field in message["fields"]: + if context.changed_content and field["name"] in (context._("Removed"), context._("Added")): + continue + content += "- **{name}**\n````quote\n{value}\n````\n".format(name=field["name"],value=field["value"]) + if message.embed.get("image", {}).get("url", None): + content += "**[image]({url})**\n".format(url=message["image"]["url"]) + images.append( "**[^]({url})**".format(url=message["image"]["url"]) ) + if message.embed.get("footer", {}).get("text", None): + timestamp = "" + if message.embed.get("timestamp", None): + timestamp = " • ".format(timestamp=message["timestamp"]) + content += "{footer}{timestamp}\n".format(footer=sanitize_to_markdown(message["footer"]["text"]), timestamp=timestamp) + elif message.embed.get("timestamp", None): + content += "\n".format(timestamp=message["timestamp"]) + content += "`````" + if len(images): + content += "\n{images}".format(images=" ".join(images)) + content = re.sub(r"@\*\*(all|everyone|channel|topic)\*\*", "@\u200b**\\1**", content) + if not content: + return + data = {**zulip["target"], 'content': content} + header = {'Authorization': "Basic {auth}".format(auth=auth.decode('utf-8'))} + req = requests.Request("POST", "{realm}/api/v1/messages".format(realm=zulip["realm"]), data=data, headers=header) + try: + req = req.prepare() + requests.Session().send(req, timeout=10) + except: + pass + + +class ContentParser(HTMLParser): + """ContentPerser is an implementation of HTMLParser that parses output of action=compare&prop=diff API request + for two MediaWiki revisions.""" + max_length = 3000 + current_tag = "" + last_del = None + last_ins = None + done = False + + def __init__(self, lang): + super().__init__() + self.text = "| {removed} | {added}\n|---|---\n".format(removed=lang("Removed"), added=lang("Added")) + + def handle_starttag(self, tagname, attribs): + if self.done: + return + if tagname == "ins" or tagname == "del": + self.current_tag = tagname + if tagname == "td": + classes = class_searcher(attribs).split(' ') + if "diff-deletedline" in classes: + self.current_tag = "tdd" + self.last_del = "\u200b" + if "diff-addedline" in classes: + self.current_tag = "tda" + self.last_ins = "\u200b" + if "diff-empty" in classes: + if self.last_del is None: + self.last_del = "" + if self.last_ins is None: + self.last_ins = "" + + def handle_data(self, data): + def escape_formatting(data: str) -> str: + """Escape Discord formatting""" + return re.sub(r"([`_*~:<>{}@/|#\-\.\\\[\]\(\)])", "\\\\\\1", data) + if not self.current_tag or self.done: + return + data = escape_formatting(data) + if self.current_tag == "del": + self.last_del += "~~" + data + "~~" + if self.current_tag == "ins": + self.last_ins += "**" + data + "**" + if self.current_tag == "tdd": + self.last_del += data + if self.current_tag == "tda": + self.last_ins += data + + def handle_endtag(self, tagname): + if self.done: + return + if tagname == "ins": + self.current_tag = "tda" + elif tagname == "del": + self.current_tag = "tdd" + elif tagname == "td": + self.current_tag = "" + elif tagname == "tr": + if self.last_ins is None: + return + # if self.last_ins == "" and self.last_del != "\u200b": + # if "~~" in self.last_del: + # self.last_del = self.last_del.replace("~~", "__") + # self.last_del = "~~" + self.last_del + "~~" + # if self.last_del == "" and self.last_ins != "\u200b": + # if "**" in self.last_ins: + # self.last_ins = self.last_ins.replace("**", "__") + # self.last_ins = "**" + self.last_ins + "**" + if len(self.text) + len(self.last_del) + len(self.last_ins) > self.max_length: + remaining = self.max_length - len(self.text) + if remaining < 0: + remaining = 0 + if remaining % 2 == 1: + remaining = remaining + 1 + part = int(remaining / 2) + if len(self.last_del) < part: + part += part - len(self.last_del) + if len(self.last_ins) < part: + part += part - len(self.last_ins) + if len(self.last_del) > part: + self.last_del = self.last_del[:part] + if self.last_del.count("~~") % 2 == 1: + self.last_del += "~~" + self.last_del += " **And more**" + if len(self.last_ins) > part: + self.last_ins = self.last_ins[:part] + if self.last_ins.count("**") % 2 == 1: + self.last_ins += "**" + self.last_ins += " ~~And more~~" + self.done = True + self.text += "| {removed} | {added}\n".format(removed=self.last_del, added=self.last_ins) + self.last_del = None + self.last_ins = None \ No newline at end of file