From 2aa8387b7288c11880af0e6956ef3b04052d3203 Mon Sep 17 00:00:00 2001 From: Frisk Date: Sun, 25 Apr 2021 13:20:58 +0200 Subject: [PATCH] Chenges in structure, updated API docs --- docs/API spec.md | 26 +++++++---- src/api/client.py | 4 +- src/api/context.py | 5 ++- src/discord/message.py | 3 ++ src/discord/queue.py | 20 ++++----- src/discussion_formatters.py | 2 +- src/misc.py | 21 ++------- src/rc_formatters.py | 3 +- src/rcgcdw.py | 59 +++++++++++++++++++++++-- src/{rc.py => wiki.py} | 83 ++++++++---------------------------- 10 files changed, 117 insertions(+), 109 deletions(-) rename src/{rc.py => wiki.py} (86%) diff --git a/docs/API spec.md b/docs/API spec.md index 0c7367b..977f406 100644 --- a/docs/API spec.md +++ b/docs/API spec.md @@ -6,6 +6,7 @@ A class allowing to change the message content and/or execute additional actions ### Formatters Formatters allow to specify how does a Discord message look like depending on message mode (embed, compact) and type of the event that had happened on the wiki (new, edit etc). +If formatter for given event is not registered, the script will look for formatter for event "generic" and if this is also not found it will throw a wartning. ### Post-processing hook A class allowing to change the message content and/or execute additional actions after message has been processed by the formatter. This type of hook executes after a formatter. @@ -35,23 +36,30 @@ api object exposes various data which allows to extend the usefulness of what ca import logging from src.discord.message import DiscordMessage from src.api import formatter +from src.api.context import Context +from src.api.util import create_article_path, link_formatter from src.i18n import rc_formatters _ = rc_formatters.gettext logger = logging.getLogger("extensions.abusefilter") -class abusefilter(Formatter): - def __init__(self, api): - super().__init__(api) +class abusefilter(): + def __init__(self, api): + super().__init__(api) - @formatter.embed(event="abuselog/modify", mode="embed") - def embed_modify(self, change: dict) -> DiscordMessage: - return DiscordMessage + @formatter.embed(event="abuselog/modify", mode="embed") + def embed_modify(self, ctx: Context, change: dict) -> DiscordMessage: + embed = DiscordMessage(ctx.message_type, ctx.event, webhook_url=ctx.webhook_url) + embed.set_link(create_article_path("Special:AbuseFilter/history/{number}/diff/prev/{historyid}".format(number=change["logparams"]['newId'], historyid=change["logparams"]["historyId"]))) + embed["title"] = _("Edited abuse filter number {number}").format(number=change["logparams"]['newId']) + return embed - @formatter.compact(event="abuselog/modify") - def compact_modify(self, change: dict) -> DiscordMessage: - return DiscordMessage + @formatter.compact(event="abuselog/modify") + def embed_modify(self, ctx: Context, change: dict) -> DiscordMessage: + link = link_formatter(create_article_path("Special:AbuseFilter/history/{number}/diff/prev/{historyid}".format(number=change["logparams"]['newId'], historyid=change["logparams"]["historyId"]))) + content = _("[{author}]({author_url}) edited abuse filter [number {number}]({filter_url})").format(author=author, author_url=author_url, number=change["logparams"]['newId'], filter_url=link) + return DiscordMessage ``` diff --git a/src/api/client.py b/src/api/client.py index aa81346..f311029 100644 --- a/src/api/client.py +++ b/src/api/client.py @@ -14,7 +14,6 @@ # along with RcGcDw. If not, see . import src.rcgcdw -import src.rc import src.misc from typing import Union from collections import OrderedDict @@ -25,7 +24,7 @@ class Client: """ def __init__(self): self._formatters = src.rcgcdw.formatter_hooks - self.__recent_changes = src.rc.wiki + self.__recent_changes = src.rcgcdw.wiki self.WIKI_API_PATH = src.misc.WIKI_API_PATH self.WIKI_ARTICLE_PATH = src.misc.WIKI_ARTICLE_PATH self.WIKI_SCRIPT_PATH = src.misc.WIKI_SCRIPT_PATH @@ -63,4 +62,5 @@ class Client: def get_formatters(self): return self._formatters + client = Client() diff --git a/src/api/context.py b/src/api/context.py index f6e575d..365d0f7 100644 --- a/src/api/context.py +++ b/src/api/context.py @@ -20,4 +20,7 @@ class Context: self.client = client self.webhook_url = webhook_url self.message_type = message_type - self.event = None \ No newline at end of file + self.event = None + + def set_categories(self, cats): + self.categories = cats \ No newline at end of file diff --git a/src/discord/message.py b/src/discord/message.py index 8ac1f72..fab107c 100644 --- a/src/discord/message.py +++ b/src/discord/message.py @@ -89,6 +89,9 @@ class DiscordMessage: def set_name(self, name): self.webhook_object["username"] = name + def set_link(self, link): + self.embed["link"] = link + class DiscordMessageRaw(DiscordMessage): def __init__(self, content: dict, webhook_url: str): diff --git a/src/discord/queue.py b/src/discord/queue.py index 9777287..d8659d0 100644 --- a/src/discord/queue.py +++ b/src/discord/queue.py @@ -174,14 +174,14 @@ def send_to_discord(data: Optional[DiscordMessage], meta: DiscordMessageMetadata logger.info("Message \"{}\" has been rejected due to matching filter ({}).".format( to_check, regex)) return # discard the message without anything - if messagequeue: - messagequeue.add_message((data, meta)) - else: - code = send_to_discord_webhook(data, metadata=meta) - if code == 3: + if messagequeue: messagequeue.add_message((data, meta)) - elif code == 2: - time.sleep(5.0) - messagequeue.add_message((data, meta)) - elif code < 2: - pass \ No newline at end of file + else: + code = send_to_discord_webhook(data, metadata=meta) + if code == 3: + messagequeue.add_message((data, meta)) + elif code == 2: + time.sleep(5.0) + messagequeue.add_message((data, meta)) + elif code < 2: + pass \ No newline at end of file diff --git a/src/discussion_formatters.py b/src/discussion_formatters.py index ac015a0..af292d2 100644 --- a/src/discussion_formatters.py +++ b/src/discussion_formatters.py @@ -19,7 +19,7 @@ import gettext from urllib.parse import quote_plus from src.configloader import settings -from src.misc import link_formatter, create_article_path, escape_formatting +from src.api.util import link_formatter, escape_formatting, create_article_path from src.discord.queue import send_to_discord from src.discord.message import DiscordMessage, DiscordMessageMetadata from src.i18n import discussion_formatters diff --git a/src/misc.py b/src/misc.py index 6acbb19..67a7eb3 100644 --- a/src/misc.py +++ b/src/misc.py @@ -15,10 +15,12 @@ # You should have received a copy of the GNU General Public License # along with RcGcDw. If not, see . import base64 -import json, logging, sys, re +import json, logging, sys from html.parser import HTMLParser -from urllib.parse import urlparse, urlunparse, quote +from urllib.parse import urlparse, urlunparse import requests + +from src.api.util import escape_formatting from src.configloader import settings from src.discord.message import DiscordMessage, DiscordMessageMetadata from src.discord.queue import messagequeue, send_to_discord @@ -105,16 +107,6 @@ def weighted_average(value, weight, new_value): return round(((value * weight) + new_value) / (weight + 1), 2) -def link_formatter(link): - """Formats a link to not embed it""" - return "<" + quote(link.replace(" ", "_"), "/:?=&") + ">" - - -def escape_formatting(data): - """Escape Discord formatting""" - return re.sub(r"([`_*~<>{}@/|\\])", "\\\\\\1", data, 0) - - class ContentParser(HTMLParser): """ContentPerser is an implementation of HTMLParser that parses output of action=compare&prop=diff API request for two MediaWiki revisions. It extracts the following: @@ -276,11 +268,6 @@ def prepare_paths(path, dry=False): prepare_paths(settings["wiki_url"]) -def create_article_path(article: str) -> str: - """Takes the string and creates an URL with it as the article name""" - return WIKI_ARTICLE_PATH.replace("$1", article) - - def send_simple(msgtype, message, name, avatar): discord_msg = DiscordMessage("compact", msgtype, settings["webhookURL"], content=message) discord_msg.set_avatar(avatar) diff --git a/src/rc_formatters.py b/src/rc_formatters.py index d0354e7..a1a1474 100644 --- a/src/rc_formatters.py +++ b/src/rc_formatters.py @@ -27,8 +27,9 @@ from urllib.parse import quote_plus, quote from bs4 import BeautifulSoup from src.configloader import settings -from src.misc import link_formatter, create_article_path, WIKI_SCRIPT_PATH, safe_read, \ +from src.misc import WIKI_SCRIPT_PATH, safe_read, \ WIKI_API_PATH, ContentParser, profile_field_name, LinkParser, AUTO_SUPPRESSION_ENABLED +from src.api.util import link_formatter, create_article_path from src.discord.queue import send_to_discord from src.discord.message import DiscordMessage, DiscordMessageMetadata diff --git a/src/rcgcdw.py b/src/rcgcdw.py index 0da7fdc..95e0acb 100644 --- a/src/rcgcdw.py +++ b/src/rcgcdw.py @@ -19,18 +19,23 @@ # WARNING! SHITTY CODE AHEAD. ENTER ONLY IF YOU ARE SURE YOU CAN TAKE IT # You have been warned -import time, logging.config, requests, datetime, gettext, math, os.path, schedule, sys +import time, logging.config, requests, datetime, gettext, math, os.path, schedule, sys, re import src.misc from collections import defaultdict, Counter + +import src.api.client +from typing import Optional +from src.api.context import Context from src.configloader import settings from src.misc import add_to_dict, datafile, \ - WIKI_API_PATH, create_article_path + WIKI_API_PATH, LinkParser +from src.api.util import create_article_path, default_message from src.discord.queue import send_to_discord from src.discord.message import DiscordMessage, DiscordMessageMetadata -from src.rc import wiki from src.exceptions import MWError from src.i18n import rcgcdw +from src.wiki import Wiki _ = rcgcdw.gettext ngettext = rcgcdw.ngettext @@ -67,6 +72,12 @@ if settings["limitrefetch"] != -1 and os.path.exists("lastchange.txt") is True: os.remove("lastchange.txt") +def no_formatter(ctx: Context, change: dict) -> None: + logger.warning(f"There is no formatter specified for {ctx.event}! Ignoring event.") + return + +formatter_hooks["no_formatter"] = no_formatter + def day_overview_request(): logger.info("Fetching daily overview... This may take up to 30 seconds!") timestamp = (datetime.datetime.utcnow() - datetime.timedelta(hours=24)).isoformat(timespec='milliseconds') @@ -216,7 +227,48 @@ def day_overview(): logger.debug("function requesting changes for day overview returned with error code") +def rc_processor(change, changed_categories): + """Prepares essential information for both embed and compact message format.""" + LinkParser = LinkParser() + metadata = DiscordMessageMetadata("POST", rev_id=change.get("revid", None), log_id=change.get("logid", None), + page_id=change.get("pageid", None)) + logger.debug(change) + context = Context(settings["appearance"]["mode"], settings["webhookURL"], src.api.client.client) + if ("actionhidden" in change or "suppressed" in change) and "suppressed" not in settings["ignored"]: # if event is hidden using suppression + context.event = "suppressed" + discord_message: Optional[DiscordMessage] = default_message("suppressed", formatter_hooks)(context, change) + else: + if "commenthidden" not in change: + LinkParser.feed(change.get("parsedcomment", "")) + parsed_comment = LinkParser.new_string + parsed_comment = re.sub(r"(`|_|\*|~|{|}|\|\|)", "\\\\\\1", parsed_comment) + else: + parsed_comment = _("~~hidden~~") + if "userhidden" in change: + change["user"] = _("hidden") + if change.get("ns", -1) in settings.get("ignored_namespaces", ()): + return + if change["type"] in ["edit", "new"]: + logger.debug("List of categories in essential_info: {}".format(changed_categories)) + identification_string = change["type"] + context.set_categories(changed_categories) + elif change["type"] == "categorize": + return + elif change["type"] == "log": + identification_string = "{logtype}/{logaction}".format(logtype=change["logtype"], logaction=change["logaction"]) + else: + identification_string = change.get("type", "unknown") # If event doesn't have a type + if identification_string in settings["ignored"]: + return + discord_message: Optional[DiscordMessage] = default_message(identification_string, formatter_hooks)(context, change) + send_to_discord(discord_message, metadata) + +def abuselog_processing(entry, recent_changes): + abuselog_appearance_mode(entry, recent_changes) + + # Log in and download wiki information +wiki = Wiki(rc_processor, abuselog_processing) try: if settings["wiki_bot_login"] and settings["wiki_bot_password"]: wiki.log_in() @@ -254,6 +306,7 @@ if 1 == 2: # additional translation strings in unreachable code load_extensions() + if TESTING: logger.debug("DEBUGGING ") storage["rcid"] = 1 diff --git a/src/rc.py b/src/wiki.py similarity index 86% rename from src/rc.py rename to src/wiki.py index f0c5a2f..1d9f386 100644 --- a/src/rc.py +++ b/src/wiki.py @@ -20,17 +20,15 @@ import sys import time import logging import requests -import src.api.client from bs4 import BeautifulSoup from src.configloader import settings -from src.misc import WIKI_SCRIPT_PATH, WIKI_API_PATH, datafile, send_simple, safe_read, LinkParser, \ +from src.misc import WIKI_SCRIPT_PATH, WIKI_API_PATH, datafile, send_simple, safe_read, \ AUTO_SUPPRESSION_ENABLED, parse_mw_request_info from src.discord.queue import messagequeue from src.exceptions import MWError, BadRequest, ClientError, ServerError, MediaWikiError from src.session import session -from src.api.context import Context -from typing import Union +from typing import Union, Callable # from src.rc_formatters import compact_formatter, embed_formatter, compact_abuselog_formatter, embed_abuselog_formatter from src.i18n import rc from collections import OrderedDict @@ -41,11 +39,11 @@ storage = datafile logger = logging.getLogger("rcgcdw.rc") -LinkParser = LinkParser() - class Wiki(object): """Store verious data and functions related to wiki and fetching of Recent Changes""" - def __init__(self): + def __init__(self, rc_processor: Callable, abuse_processor: Callable): + self.rc_processor = rc_processor + self.abuse_processor = abuse_processor self.map_ips = {} self.downtimecredibility = 0 self.last_downtime = 0 @@ -58,6 +56,7 @@ class Wiki(object): self.logged_in = False self.initial_run_complete = False + @staticmethod def handle_mw_errors(request): if "errors" in request: @@ -166,30 +165,30 @@ class Wiki(object): "There were too many new events, but the limit was high enough we don't care anymore about fetching them all.") if change["type"] == "categorize": if "commenthidden" not in change: - if len(wiki.mw_messages.keys()) > 0: + if len(self.mw_messages.keys()) > 0: cat_title = change["title"].split(':', 1)[1] # I so much hate this, blame Markus for making me do this if change["revid"] not in categorize_events: categorize_events[change["revid"]] = {"new": set(), "removed": set()} comment_to_match = re.sub(r'<.*?a>', '', change["parsedcomment"]) - if wiki.mw_messages["recentchanges-page-added-to-category"] in comment_to_match or \ - wiki.mw_messages[ + if self.mw_messages["recentchanges-page-added-to-category"] in comment_to_match or \ + self.mw_messages[ "recentchanges-page-added-to-category-bundled"] in comment_to_match: categorize_events[change["revid"]]["new"].add(cat_title) logger.debug("Matched {} to added category for {}".format(cat_title, change["revid"])) - elif wiki.mw_messages[ + elif self.mw_messages[ "recentchanges-page-removed-from-category"] in comment_to_match or \ - wiki.mw_messages[ + self.mw_messages[ "recentchanges-page-removed-from-category-bundled"] in comment_to_match: categorize_events[change["revid"]]["removed"].add(cat_title) logger.debug("Matched {} to removed category for {}".format(cat_title, change["revid"])) else: logger.debug( "Unknown match for category change with messages {}, {}, {}, {} and comment_to_match {}".format( - wiki.mw_messages["recentchanges-page-added-to-category"], - wiki.mw_messages["recentchanges-page-removed-from-category"], - wiki.mw_messages["recentchanges-page-removed-from-category-bundled"], - wiki.mw_messages["recentchanges-page-added-to-category-bundled"], + self.mw_messages["recentchanges-page-added-to-category"], + self.mw_messages["recentchanges-page-removed-from-category"], + self.mw_messages["recentchanges-page-removed-from-category-bundled"], + self.mw_messages["recentchanges-page-added-to-category-bundled"], comment_to_match)) else: logger.warning( @@ -204,7 +203,7 @@ class Wiki(object): logger.debug("Change ({}) is lower or equal to recent_id {}".format(change["rcid"], recent_id)) continue logger.debug(recent_id) - rc_processor(change, categorize_events.get(change.get("revid"), None)) + self.rc_processor(change, categorize_events.get(change.get("revid"), None)) return highest_id def prepare_abuse_log(self, abuse_log: list): @@ -218,7 +217,7 @@ class Wiki(object): continue if entry["id"] <= recent_id: continue - abuselog_processing(entry, self) + self.abuse_processor(entry, self) return entry["id"] def fetch_changes(self, amount): @@ -368,7 +367,7 @@ class Wiki(object): if not looped: while 1: # recursed loop, check for connection (every 10 seconds) as long as three services are down, don't do anything else if self.check_connection(looped=True): - wiki.fetch(amount=settings["limitrefetch"]) + self.fetch(amount=settings["limitrefetch"]) break time.sleep(10) return False @@ -451,49 +450,3 @@ class Wiki(object): comment = comment[0:1000] + "…" return comment return "" - - -wiki = Wiki() - - -def rc_processor(change, changed_categories): - """Prepares essential information for both embed and compact message format.""" - formatters = src.api.client.client.get_formatters() # TODO Make it better? Importing might be a hell - logger.debug(change) - context = Context(settings["appearance"]["mode"], settings["webhookURL"], src.api.client.client) - if ("actionhidden" in change or "suppressed" in change) and "suppressed" not in settings["ignored"]: # if event is hidden using suppression - context.event = "suppressed" - if "commenthidden" not in change: - LinkParser.feed(change["parsedcomment"]) - parsed_comment = LinkParser.new_string - LinkParser.new_string = "" - parsed_comment = re.sub(r"(`|_|\*|~|{|}|\|\|)", "\\\\\\1", parsed_comment, 0) - else: - parsed_comment = _("~~hidden~~") - if not parsed_comment: - parsed_comment = None - if "userhidden" in change: - change["user"] = _("hidden") - if change.get("ns", -1) in settings.get("ignored_namespaces", ()): - return - if change["type"] in ["edit", "new"]: - logger.debug("List of categories in essential_info: {}".format(changed_categories)) - identification_string = change["type"] - elif change["type"] == "log": - identification_string = "{logtype}/{logaction}".format(logtype=change["logtype"], logaction=change["logaction"]) - if identification_string not in supported_logs: - logger.warning( - "This event is not implemented in the script. Please make an issue on the tracker attaching the following info: wiki url, time, and this information: {}".format( - change)) - return - elif change["type"] == "categorize": - return - else: - logger.warning("This event is not implemented in the script. Please make an issue on the tracker attaching the following info: wiki url, time, and this information: {}".format(change)) - return - if identification_string in settings["ignored"]: - return - appearance_mode(identification_string, change, parsed_comment, changed_categories, wiki) - -def abuselog_processing(entry, recent_changes): - abuselog_appearance_mode(entry, recent_changes) \ No newline at end of file