diff --git a/extensions/base/curseprofile.py b/extensions/base/curseprofile.py index be9b9fb..117e41e 100644 --- a/extensions/base/curseprofile.py +++ b/extensions/base/curseprofile.py @@ -36,9 +36,9 @@ def embed_curseprofile_profile_edited(ctx: Context, change: dict) -> DiscordMess else: embed["title"] = ctx._("Edited their own profile") if ctx.parsedcomment is None: # If the field is empty - embed["description"] = ctx._("Cleared the {field} field").format(field=profile_field_name(change["logparams"]['4:section'], True)) + embed["description"] = ctx._("Cleared the {field} field").format(field=profile_field_name(change["logparams"]['4:section'], True, ctx._)) else: - embed["description"] = ctx._("{field} field changed to: {desc}").format(field=profile_field_name(change["logparams"]['4:section'], True), desc=ctx.parsedcomment) + embed["description"] = ctx._("{field} field changed to: {desc}").format(field=profile_field_name(change["logparams"]['4:section'], True, ctx._), desc=ctx.parsedcomment) embed["url"] = ctx.client.create_article_path("UserProfile:" + sanitize_to_url(target_user)) return embed diff --git a/extensions/base/discussions.py b/extensions/base/discussions.py index 4bb33f5..f75323e 100644 --- a/extensions/base/discussions.py +++ b/extensions/base/discussions.py @@ -20,7 +20,6 @@ import datetime, logging from urllib.parse import quote_plus from src.api.util import clean_link, sanitize_to_markdown from src.api.context import Context -from src.discord.queue import send_to_discord from src.discord.message import DiscordMessage, DiscordMessageMetadata from src.api import formatter diff --git a/extensions/base/rcgcdb.py b/extensions/base/rcgcdb.py index 9f68cd7..73869ce 100644 --- a/extensions/base/rcgcdb.py +++ b/extensions/base/rcgcdb.py @@ -2,7 +2,7 @@ import json from src.discord.message import DiscordMessage from src.api import formatter from src.api.context import Context -from src.api.util import embed_helper, compact_author, create_article_path, sanitize_to_markdown +from src.api.util import embed_helper, compact_author, sanitize_to_markdown @formatter.embed(event="generic") @@ -11,7 +11,7 @@ def embed_generic(ctx: Context, change: dict): embed_helper(ctx, embed, change) embed["title"] = ctx._("Unknown event `{event}`").format( event="{type}/{action}".format(type=change.get("type", ""), action=change.get("action", ""))) - embed["url"] = create_article_path("Special:RecentChanges") + embed["url"] = ctx.client.create_article_path("Special:RecentChanges") change_params = "[```json\n{params}\n```]({support})".format(params=json.dumps(change, indent=2), support=ctx.settings["support"]) if len(change_params) > 1000: diff --git a/src/api/client.py b/src/api/client.py index 4569043..aadea37 100644 --- a/src/api/client.py +++ b/src/api/client.py @@ -139,11 +139,13 @@ class Client: BadRequest: When params argument is of wrong type MediaWikiError: When MediaWiki returns an error """ - return self.__recent_changes.api_request(params, *json_path, timeout=timeout, allow_redirects=allow_redirects) + return self.__recent_changes.sync_api_request(params, *json_path, timeout=timeout, allow_redirects=allow_redirects) def get_formatters(self): return self._formatters def get_ipmapper(self) -> dict: - """Returns a dict mapping IPs with amount of their edits""" - return self.__recent_changes.map_ips + """Returns a dict mapping IPs with amount of their edits + + (for RcGcDw its empty dict since we don't support this)""" + return {} diff --git a/src/api/context.py b/src/api/context.py index 9d98c8e..ca9f2b6 100644 --- a/src/api/context.py +++ b/src/api/context.py @@ -35,7 +35,7 @@ class Context: self.comment_page = None self._ = language.gettext # Singular translations (ex. ctx._("Large goat")) self.gettext = language.gettext # In case you dislike _ or using "protected field" of ctx - self.ngettext = language.npgettext # Plural translations depending on amount (ex. ctx.ngettext("{} action", "{} actions", action_amount)) + self.ngettext = language.ngettext # Plural translations depending on amount (ex. ctx.ngettext("{} action", "{} actions", action_amount)) self.pgettext = language.pgettext # Translation with context (ex. ctx.pgettext("From mediawiki module", "Blocked {} user")) self.npgettext = language.npgettext # Plural translation with context (ex. ctx.npgettext("From mediawiki module", "Edited {} time", "Edited {} times", edit_amoint) self.settings = settings diff --git a/src/api/util.py b/src/api/util.py index 731087b..9422bf3 100644 --- a/src/api/util.py +++ b/src/api/util.py @@ -31,7 +31,7 @@ logger = logging.getLogger("src.api.util") def default_message(event: str, display: str, formatter_hooks: dict) -> Callable: """Returns a method of a formatter responsible for the event or None if such does not exist.""" - return formatter_hooks.get(display, {}).get(event, formatter_hooks.get("generic", formatter_hooks["no_formatter"])) + return formatter_hooks.get(display, {}).get(event, formatter_hooks.get("generic", formatter_hooks.get("no_formatter"))) def clean_link(link: str) -> str: @@ -76,6 +76,7 @@ def compact_summary(ctx: Context) -> str: return " *({})*".format(ctx.parsedcomment) return "" + def compact_author(ctx: Context, change: dict) -> (Optional[str], Optional[str]): """Returns link to the author and the author itself respecting the settings""" author, author_url = None, None @@ -104,40 +105,17 @@ def embed_helper(ctx: Context, message: DiscordMessage, change: dict, set_user=T author = None if "anon" in change: author_url = ctx.client.create_article_path("Special:Contributions/{user}".format(user=sanitize_to_url(change["user"]))) - ip_mapper = ctx.client.get_ipmapper() - logger.debug("current user: {} with cache of IPs: {}".format(change["user"], ip_mapper.keys())) - if change["user"] not in list(ip_mapper.keys()): - try: - contibs = ctx.client.make_api_request( - "?action=query&format=json&list=usercontribs&uclimit=max&ucuser={user}&ucstart={timestamp}&ucprop=".format( - user=sanitize_to_url(change["user"]), timestamp=change["timestamp"]), "query", - "usercontribs") - except (ServerError, MediaWikiError): - logger.warning("WARNING: Something went wrong when checking amount of contributions for given IP address") - if settings.get("hide_ips", False): - author = ctx._("Unregistered user") - else: - author = change["user"] + "(?)" - else: - ip_mapper[change["user"]] = len(contibs) - logger.debug("Current params user {} and state of map_ips {}".format(change["user"], ip_mapper)) - if settings.get("hide_ips", False): - author = ctx._("Unregistered user") - else: - author = "{author} ({contribs})".format(author=change["user"], contribs=len(contibs)) + # logger.debug("current user: {} with cache of IPs: {}".format(change["user"], ip_mapper.keys())) + if ctx.settings.get("hide_ips", False): + author = ctx._("Unregistered user") else: - logger.debug("Current params user {} and state of map_ips {}".format(change["user"], ip_mapper)) - if ctx.event in ("edit", "new"): - ip_mapper[change["user"]] += 1 - author = "{author} ({amount})".format( - author=change["user"] if settings.get("hide_ips", False) is False else ctx._("Unregistered user"), - amount=ip_mapper[change["user"]]) + author = change["user"] else: author_url = ctx.client.create_article_path("User:{}".format(sanitize_to_url(change["user"]))) author = change["user"] message.set_author(author, author_url) if set_edit_meta: - if settings["appearance"]["embed"]["show_footer"]: + if ctx.settings["appearance"]["embed"]["show_footer"]: message["timestamp"] = change["timestamp"] if "tags" in change and change["tags"]: tag_displayname = [] diff --git a/src/bot.py b/src/bot.py index b474949..0d288b9 100644 --- a/src/bot.py +++ b/src/bot.py @@ -2,12 +2,12 @@ import aiohttp import asyncio import logging.config import signal -import traceback +import sys import nest_asyncio import time from collections import defaultdict, namedtuple from typing import Generator - +import importlib from contextlib import asynccontextmanager from src.discord.queue import messagequeue from src.argparser import command_line_args @@ -37,6 +37,17 @@ main_tasks: dict = {} # Reasons for this: 1. we require amount of wikis to calculate the cooldown between requests # 2. Easier to code + +def load_extensions(): + """Loads all of the extensions, can be a local import because all we need is them to register""" + try: + importlib.import_module(settings.get('extensions_dir', 'extensions'), 'extensions') + except ImportError: + logger.critical("No extensions module found. What's going on?") + logger.exception("Error:") + sys.exit(1) + + async def populate_wikis(): logger.info("Populating domain manager with wikis...") start = time.time() @@ -225,6 +236,7 @@ async def main_loop(): await db.setup_connection() await db.create_pubsub_interface(domains.webhook_update) logger.debug("Connection type: {}".format(db.connection_pool)) + load_extensions() await populate_wikis() # START LISTENER CONNECTION domains.run_all_domains() diff --git a/src/discord/message.py b/src/discord/message.py index 4283b3c..635c178 100644 --- a/src/discord/message.py +++ b/src/discord/message.py @@ -184,7 +184,7 @@ class StackedDiscordMessage(): if self.message_type == 0: message_structure["content"] = "\n".join([message.return_content() for message in self.message_list]) elif self.message_type == 1: - message_structure["embeds"] = [message["embeds"][0] for message in self.message_list] + message_structure["embeds"] = [message.embed for message in self.message_list] return json.dumps(message_structure) def filter(self, params: dict) -> list[tuple[int, DiscordMessage]]: diff --git a/src/discord/queue.py b/src/discord/queue.py index 2bd3012..bbaf930 100644 --- a/src/discord/queue.py +++ b/src/discord/queue.py @@ -34,7 +34,7 @@ if TYPE_CHECKING: rate_limit = 0 -logger = logging.getLogger("rcgcdw.discord.queue") +logger = logging.getLogger("rcgcdb.discord.queue") class QueueEntry: @@ -127,7 +127,7 @@ class MessageQueue: try: current_pack.add_message(message) except MessageTooBig: - yield current_pack, index-1 + yield current_pack, index-1, "POST" current_pack = StackedDiscordMessage(0 if message.message_type == "compact" else 1, message.wiki) # next messages current_pack.add_message(message) yield current_pack, index, "POST" @@ -152,7 +152,7 @@ class MessageQueue: self.global_rate_limit = True await asyncio.sleep(e.remaining / 1000) return - for queue_message in messages[max(index-len(msg.message_list), 0):index]: # mark messages as delivered + for queue_message in messages[max(index-len(msg.message_list), 0):max(index, 1)]: # mark messages as delivered queue_message.confirm_sent_status(webhook_url) if client_error is False: msg.webhook = webhook_url @@ -211,7 +211,7 @@ async def send_to_discord_webhook(message: [StackedDiscordMessage, DiscordMessag header = settings["header"] header['Content-Type'] = 'application/json' header['X-RateLimit-Precision'] = "millisecond" - async with aiohttp.ClientSession(headers=header, timeout=3.0) as session: + async with aiohttp.ClientSession(headers=header, timeout=aiohttp.ClientTimeout(total=6)) as session: if isinstance(message, StackedDiscordMessage): async with session.post(f"https://discord.com/api/webhooks/{webhook_path}?wait=true", data=repr(message)) as resp: try: diff --git a/src/domain.py b/src/domain.py index 3839bf2..0d4ce77 100644 --- a/src/domain.py +++ b/src/domain.py @@ -100,7 +100,8 @@ class Domain: logger.error(f"Could not find a wiki with URL {wiki_url} in the domain group!") continue await self.run_wiki_scan(wiki) - for wiki in self.wikis.values(): + while True: # Iterate until hitting return, we don't have to iterate using for since we are sending wiki to the end anyways + wiki: src.wiki.Wiki = next(iter(self.wikis.values())) if (wiki.statistics.last_checked_rc or 0) < settings.get("irc_overtime", 3600): await self.run_wiki_scan(wiki) else: diff --git a/src/misc.py b/src/misc.py index 7e208ce..bd14a4f 100644 --- a/src/misc.py +++ b/src/misc.py @@ -91,8 +91,7 @@ def create_article_path(article: str, WIKI_ARTICLE_PATH: str) -> str: return WIKI_ARTICLE_PATH.replace("$1", article) -def profile_field_name(name, embed, lang): - _ = langs[lang]["misc"].gettext +def profile_field_name(name, embed, _): profile_fields = {"profile-location": _("Location"), "profile-aboutme": _("About me"), "profile-link-google": _("Google link"), "profile-link-facebook": _("Facebook link"), "profile-link-twitter": _("Twitter link"), "profile-link-reddit": _("Reddit link"), diff --git a/src/statistics.py b/src/statistics.py index b86eb5f..731a1b8 100644 --- a/src/statistics.py +++ b/src/statistics.py @@ -8,11 +8,11 @@ from enum import Enum class LogType(Enum): - CONNECTION_ERROR: 1 - HTTP_ERROR: 2 - MEDIAWIKI_ERROR: 3 - VALUE_UPDATE: 4 - SCAN_REASON: 5 + CONNECTION_ERROR = 1 + HTTP_ERROR = 2 + MEDIAWIKI_ERROR = 3 + VALUE_UPDATE = 4 + SCAN_REASON = 5 queue_limit = settings.get("queue_limit", 30) diff --git a/src/wiki.py b/src/wiki.py index 0c4e27f..5a211fd 100644 --- a/src/wiki.py +++ b/src/wiki.py @@ -6,6 +6,7 @@ import time import re import logging, aiohttp import asyncio +import requests from functools import cache from api.util import default_message @@ -40,7 +41,7 @@ MESSAGE_LIMIT = settings.get("message_limit", 30) class Wiki: def __init__(self, script_url: str, rc_id: Optional[int], discussion_id: Optional[int]): self.script_url: str = script_url - self.session: aiohttp.ClientSession = aiohttp.ClientSession(headers=settings["header"], timeout=aiohttp.ClientTimeout(6.0)) + self.session: aiohttp.ClientSession = aiohttp.ClientSession(headers=settings["header"], timeout=aiohttp.ClientTimeout(total=6)) self.statistics: Statistics = Statistics(rc_id, discussion_id) self.mw_messages: Optional[MWMessages] = None self.tags: dict[str, Optional[str]] = {} # Tag can be None if hidden @@ -51,6 +52,8 @@ class Wiki: self.message_history: list[StackedDiscordMessage] = list() self.namespaces: Optional[dict] = None self.recache_requested: bool = False + self.session_requests = requests.Session() + self.session_requests.headers.update(settings["header"]) @property def rc_id(self): @@ -193,8 +196,7 @@ class Wiki: """ # Making request try: - if isinstance(params, - str): # Todo Make it so there are some default arguments like warning/error format appended + if isinstance(params, str): request = await self.session.get(self.script_url + "api.php?" + params + "&errorformat=raw", timeout=timeout, allow_redirects=allow_redirects) elif isinstance(params, OrderedDict): @@ -235,6 +237,47 @@ class Wiki: raise return request_json + def sync_api_request(self, params: Union[str, OrderedDict], *json_path: str, timeout: int = 10, + allow_redirects: bool = False) -> dict: + """Synchronous function based on api_request created for compatibility reasons with RcGcDw API""" + try: + if isinstance(params, str): + request = self.session_requests.get(self.script_url + "api.php?" + params + "&errorformat=raw", timeout=10, allow_redirects=allow_redirects) + elif isinstance(params, OrderedDict): + request = self.session_requests.get(self.script_url + "api.php", params=params, timeout=10, allow_redirects=allow_redirects) + else: + raise BadRequest(params) + except (requests.exceptions.Timeout, requests.exceptions.ConnectionError, requests.exceptions.ChunkedEncodingError) as exc: + logger.warning("Reached {error} error for request on link {url}".format(error=repr(exc), + url=self.client.WIKI_API_PATH + str(params))) + raise ServerError + if 499 < request.status_code < 600: + raise ServerError + elif request.status_code == 302: + logger.critical( + "Redirect detected! Either the wiki given in the script settings (wiki field) is incorrect/the wiki got removed or is giving us the false value. Please provide the real URL to the wiki, current URL redirects to {}".format( + request.url)) + elif 399 < request.status_code < 500: + logger.error("Request returned ClientError status code on {url}".format(url=request.url)) + self.statistics.update(Log(type=LogType.HTTP_ERROR, title="{} error".format(request.status_code), details=str(request.headers) + "\n" + str(request.url))) + raise ClientError(request) + else: + try: + request_json = self.parse_mw_request_info(request.json(), request.url) + for item in json_path: + request_json = request_json[item] + except ValueError: + logger.warning("ValueError when extracting JSON data on {url}".format(url=request.url)) + raise ServerError + except MediaWikiError: + logger.exception("MediaWiki error on request: {}".format(request.url)) + raise + except KeyError: + logger.exception("KeyError while iterating over json_path, full response: {}".format(request.json())) + raise + return request_json + + async def fetch_wiki(self, amount=10) -> dict: if self.mw_messages is None: params = OrderedDict({"action": "query", "format": "json", "uselang": "content", "list": "tags|recentchanges", @@ -308,11 +351,13 @@ class Wiki: if highest_id is None or change["rcid"] > highest_id: # make sure that the highest_rc is really highest rcid but do allow other entries with potentially lesser rcids come after without breaking the cycle highest_id = change["rcid"] for combination, webhooks in self.targets.items(): - message = await rc_processor(self, change, categorize_events, combination, webhooks) + message = await rc_processor(self, change, categorize_events.get(change.get("revid"), None), combination, webhooks) + if message is None: + break message.wiki = self - message_list.append(QueueEntry(message, webhooks)) + message_list.append(QueueEntry(message, webhooks, self)) messagequeue.add_messages(message_list) - return + return @cache @@ -345,7 +390,7 @@ def process_cachable(response: dict, wiki_object: Wiki) -> None: wiki_object.recache_requested = False -async def rc_processor(wiki: Wiki, change: dict, changed_categories: dict, display_options: namedtuple("Settings", ["lang", "display"]), webhooks: list) -> DiscordMessage: +async def rc_processor(wiki: Wiki, change: dict, changed_categories: dict, display_options: namedtuple("Settings", ["lang", "display"]), webhooks: list) -> Optional[DiscordMessage]: """This function takes more vital information, communicates with a formatter and constructs DiscordMessage with it. It creates DiscordMessageMetadata object, LinkParser and Context. Prepares a comment """ from src.misc import LinkParser @@ -357,7 +402,7 @@ async def rc_processor(wiki: Wiki, change: dict, changed_categories: dict, displ context.event = "suppressed" try: discord_message: Optional[DiscordMessage] = await asyncio.get_event_loop().run_in_executor( - None, functools.partial(default_message("suppressed", display_options.display, formatter_hooks), context, change)) + None, functools.partial(default_message("suppressed", context.message_type, formatter_hooks), context, change)) except: if settings.get("error_tolerance", 1) > 0: discord_message: Optional[DiscordMessage] = None # It's handled by send_to_discord, we still want other code to run @@ -368,13 +413,13 @@ async def rc_processor(wiki: Wiki, change: dict, changed_categories: dict, displ LinkParser.feed(change.get("parsedcomment", "")) parsed_comment = LinkParser.new_string else: - parsed_comment = _("~~hidden~~") + parsed_comment = context._("~~hidden~~") if not parsed_comment and context.message_type == "embed" and settings["appearance"].get("embed", {}).get( "show_no_description_provided", True): - parsed_comment = _("No description provided") + parsed_comment = context._("No description provided") context.set_parsedcomment(parsed_comment) if "userhidden" in change: - change["user"] = _("hidden") + change["user"] = context._("hidden") if change.get("ns", -1) in settings.get("ignored_namespaces", ()): return if change["type"] in ["edit", "new"]: @@ -393,7 +438,7 @@ async def rc_processor(wiki: Wiki, change: dict, changed_categories: dict, displ context.event = identification_string try: discord_message: Optional[DiscordMessage] = await asyncio.get_event_loop().run_in_executor(None, - functools.partial(default_message(identification_string, display_options.display, formatter_hooks), context, + functools.partial(default_message(identification_string, context.message_type, formatter_hooks), context, change)) except: if settings.get("error_tolerance", 1) > 0: @@ -416,8 +461,8 @@ async def rc_processor(wiki: Wiki, change: dict, changed_categories: dict, displ else: for revid in logparams.get("ids", []): wiki.delete_messages(dict(revid=revid)) - discord_message.finish_embed() - if discord_message: + if discord_message: # TODO How to react when none? (crash in formatter), probably bad handling atm + discord_message.finish_embed() discord_message.metadata = metadata return discord_message