diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2755daf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/src/__pycache__ \ No newline at end of file diff --git a/src/bot.py b/src/bot.py index 5db453d..1bd2e79 100644 --- a/src/bot.py +++ b/src/bot.py @@ -1,6 +1,8 @@ import logging.config from src.config import settings import sqlite3 +import sys +import signal from src.wiki import Wiki, process_cats, process_mwmsgs, essential_info import asyncio, aiohttp from src.misc import get_paths @@ -49,58 +51,64 @@ def generate_targets(wiki_url: str) -> defaultdict: async def wiki_scanner(): - while True: - calc_delay = calculate_delay() - fetch_all = db_cursor.execute('SELECT * FROM rcgcdw GROUP BY wiki') - for db_wiki in fetch_all.fetchall(): - logger.debug("Wiki {}".format(db_wiki[3])) - extended = False - if db_wiki[3] not in all_wikis: - logger.debug("New wiki: {}".format(db_wiki[3])) - all_wikis[db_wiki[3]] = Wiki() - local_wiki = all_wikis[db_wiki[3]] # set a reference to a wiki object from memory - if local_wiki.mw_messages is None: - extended = True - logger.debug("test") - try: - wiki_response = await local_wiki.fetch_wiki(extended, db_wiki[3]) - await local_wiki.check_status(db_wiki[3], wiki_response.status) - except (WikiServerError, WikiError): - logger.exception("Exeption when fetching the wiki") - continue # ignore this wiki if it throws errors - try: - recent_changes_resp = await wiki_response.json(encoding="UTF-8") - if "error" in recent_changes_resp or "errors" in recent_changes_resp: - # TODO Remove on some errors (example "code": "readapidenied") - raise WikiError - recent_changes = recent_changes_resp['query']['recentchanges'] - recent_changes.reverse() - except: - logger.exception("On loading json of response.") - continue - if extended: - await process_mwmsgs(recent_changes_resp, local_wiki, mw_msgs) - if db_wiki[6] is None: # new wiki, just get the last rc to not spam the channel - if len(recent_changes) > 0: - DBHandler.add(db_wiki[3], recent_changes[-1]["rcid"]) + try: + while True: + calc_delay = calculate_delay() + fetch_all = db_cursor.execute('SELECT * FROM rcgcdw GROUP BY wiki') + for db_wiki in fetch_all.fetchall(): + logger.debug("Wiki {}".format(db_wiki[3])) + extended = False + if db_wiki[3] not in all_wikis: + logger.debug("New wiki: {}".format(db_wiki[3])) + all_wikis[db_wiki[3]] = Wiki() + local_wiki = all_wikis[db_wiki[3]] # set a reference to a wiki object from memory + if local_wiki.mw_messages is None: + extended = True + logger.debug("test") + try: + wiki_response = await local_wiki.fetch_wiki(extended, db_wiki[3]) + await local_wiki.check_status(db_wiki[3], wiki_response.status) + except (WikiServerError, WikiError): + logger.exception("Exeption when fetching the wiki") + continue # ignore this wiki if it throws errors + try: + recent_changes_resp = await wiki_response.json(encoding="UTF-8") + if "error" in recent_changes_resp or "errors" in recent_changes_resp: + # TODO Remove on some errors (example "code": "readapidenied") + raise WikiError + recent_changes = recent_changes_resp['query']['recentchanges'] + recent_changes.reverse() + except asyncio.exceptions.TimeoutError: + logger.debug("Timeout on fetching {}.".format(db_wiki[3])) continue - else: - DBHandler.add(db_wiki[3], 0) + except: + logger.exception("On loading json of response.") continue - categorize_events = {} - targets = generate_targets(db_wiki[3]) - paths = get_paths(db_wiki[3], recent_changes_resp) - for change in recent_changes: - await process_cats(change, local_wiki, mw_msgs, categorize_events) - for change in recent_changes: # Yeah, second loop since the categories require to be all loaded up - if change["rcid"] > db_wiki[6]: - for target in targets.items(): - await essential_info(change, categorize_events, local_wiki, db_wiki, target, paths, - recent_changes_resp) - if recent_changes: - DBHandler.add(db_wiki[3], change["rcid"]) - DBHandler.update_db() - await asyncio.sleep(delay=calc_delay) + if extended: + await process_mwmsgs(recent_changes_resp, local_wiki, mw_msgs) + if db_wiki[6] is None: # new wiki, just get the last rc to not spam the channel + if len(recent_changes) > 0: + DBHandler.add(db_wiki[3], recent_changes[-1]["rcid"]) + continue + else: + DBHandler.add(db_wiki[3], 0) + continue + categorize_events = {} + targets = generate_targets(db_wiki[3]) + paths = get_paths(db_wiki[3], recent_changes_resp) + for change in recent_changes: + await process_cats(change, local_wiki, mw_msgs, categorize_events) + for change in recent_changes: # Yeah, second loop since the categories require to be all loaded up + if change["rcid"] > db_wiki[6]: + for target in targets.items(): + await essential_info(change, categorize_events, local_wiki, db_wiki, target, paths, + recent_changes_resp) + if recent_changes: + DBHandler.add(db_wiki[3], change["rcid"]) + DBHandler.update_db() + await asyncio.sleep(delay=calc_delay) + except asyncio.CancelledError: + return async def message_sender(): @@ -108,17 +116,26 @@ async def message_sender(): await messagequeue.resend_msgs() +def shutdown(loop, signal=None): + DBHandler.update_db() + loop.stop() + logger.info("Script has shut down due to signal {}.".format(signal)) + for task in asyncio.all_tasks(loop): + task.cancel() + sys.exit(0) + def global_exception_handler(loop, context): """Global exception handler for asyncio, lets us know when something crashes""" msg = context.get("exception", context["message"]) - logger.error(msg) - #requests.post("https://discord.com/api/webhooks/" + settings["monitoring_webhook"], - # data=DiscordMessage("embed", "exception", None, content= - # "[RcGcDb] Exception detected, function might have shut down! Exception: {}".format(msg), wiki=None)) + logger.error("Global exception handler:" + msg) async def main_loop(): loop = asyncio.get_event_loop() + signals = (signal.SIGHUP, signal.SIGTERM, signal.SIGINT) + for s in signals: + loop.add_signal_handler( + s, lambda s=s: shutdown(loop, signal=s)) loop.set_exception_handler(global_exception_handler) task1 = asyncio.create_task(wiki_scanner()) task2 = asyncio.create_task(message_sender()) diff --git a/src/discord.py b/src/discord.py index ed5fed9..27edf98 100644 --- a/src/discord.py +++ b/src/discord.py @@ -15,7 +15,7 @@ logger = logging.getLogger("rcgcdb.discord") # User facing webhook functions -def wiki_removal(wiki_id, status): +async def wiki_removal(wiki_id, status): for observer in db_cursor.execute('SELECT * FROM rcgcdw WHERE wiki = ?', (wiki_id,)): def _(string: str) -> str: """Our own translation string to make it compatible with async""" @@ -23,7 +23,13 @@ def wiki_removal(wiki_id, status): reasons = {410: _("wiki deletion"), 404: _("wiki deletion"), 401: _("wiki becoming inaccessible"), 402: _("wiki becoming inaccessible"), 403: _("wiki becoming inaccessible")} reason = reasons.get(status, _("unknown error")) - send_to_discord_webhook(DiscordMessage("compact", "webhook/remove", webhook_url=[observer[2]], content=_("The webhook for {} has been removed due to {}.".format(wiki_id, reason)), wiki=None)) + await send_to_discord_webhook(DiscordMessage("compact", "webhook/remove", webhook_url=[observer[2]], content=_("The webhook for {} has been removed due to {}.".format(wiki_id, reason)), wiki=None)) + header = settings["header"] + header['Content-Type'] = 'application/json' + header['X-Audit-Log-Reason'] = "Wiki becoming unavailable" + async with aiohttp.ClientSession(headers=header, timeout=aiohttp.ClientTimeout(5.0)) as session: + await session.delete("https://discord.com/api/webhooks/"+observer[2]) + async def webhook_removal_monitor(webhook_url: list, reason: int): await send_to_discord_webhook_monitoring(DiscordMessage("compact", "webhook/remove", None, content="The webhook {} has been removed due to {}.".format("https://discord.com/api/webhooks/" + webhook_url[0], reason), wiki=None), @@ -97,32 +103,32 @@ class DiscordMessage: # Monitoring webhook functions -def wiki_removal_monitor(wiki_id, status): - pass +async def wiki_removal_monitor(wiki_id, status): + await send_to_discord_webhook_monitoring(DiscordMessage("compact", "webhook/remove", content="Removing {} because {}.".format(wiki_id, status), webhook_url=[None], wiki=None)) -async def send_to_discord_webhook_monitoring(data: DiscordMessage, session: aiohttp.ClientSession): +async def send_to_discord_webhook_monitoring(data: DiscordMessage): header = settings["header"] header['Content-Type'] = 'application/json' - try: - result = await session.post("https://discord.com/api/webhooks/"+settings["monitoring_webhook"], data=repr(data), - headers=header) - except (aiohttp.ClientConnectionError, aiohttp.ServerConnectionError): - logger.exception("Could not send the message to Discord") - return 3 - - -async def send_to_discord_webhook(data: DiscordMessage, session: aiohttp.ClientSession): - header = settings["header"] - header['Content-Type'] = 'application/json' - for webhook in data.webhook_url: + async with aiohttp.ClientSession(headers=header, timeout=aiohttp.ClientTimeout(5.0)) as session: try: - result = await session.post("https://discord.com/api/webhooks/"+webhook, data=repr(data), - headers=header) + result = await session.post("https://discord.com/api/webhooks/"+settings["monitoring_webhook"], data=repr(data)) except (aiohttp.ClientConnectionError, aiohttp.ServerConnectionError): logger.exception("Could not send the message to Discord") return 3 - return await handle_discord_http(result.status, repr(data), await result.text(), data) + + +async def send_to_discord_webhook(data: DiscordMessage): + header = settings["header"] + header['Content-Type'] = 'application/json' + async with aiohttp.ClientSession(headers=header, timeout=aiohttp.ClientTimeout(5.0)) as session: + for webhook in data.webhook_url: + try: + result = await session.post("https://discord.com/api/webhooks/"+webhook, data=repr(data)) + except (aiohttp.ClientConnectionError, aiohttp.ServerConnectionError): + logger.exception("Could not send the message to Discord") + return 3 + return await handle_discord_http(result.status, repr(data), await result.text(), data) async def handle_discord_http(code, formatted_embed, result, dmsg): diff --git a/src/request_tracking.py b/src/request_tracking.py new file mode 100644 index 0000000..13843ad --- /dev/null +++ b/src/request_tracking.py @@ -0,0 +1,23 @@ +import aiohttp +import logging +from src.config import settings + +logger = logging.getLogger("rcgcdb.request_tracking") + +class WikiRequestTracking: + def __init__(self): + self.current_timeout = 0 + + async def add_timeout(self, time: float): + self.current_timeout += time + + def is_fandom(self, url): + if any(x in url for x in ("fandom.com", "gamepedia.com", "wikia.org")): + return True + return False + +async def on_request_start(session, trace_config_ctx, params): + if + +trace_config = aiohttp.TraceConfig() +trace_config.on_request_start.append(on_request_start) \ No newline at end of file diff --git a/src/wiki.py b/src/wiki.py index 0f16ca6..f222d3e 100644 --- a/src/wiki.py +++ b/src/wiki.py @@ -40,7 +40,7 @@ class Wiki: "rcprop": "title|redirect|timestamp|ids|loginfo|parsedcomment|sizes|flags|tags|user", "rclimit": amount, "rctype": "edit|new|log|external", "siprop": "namespaces|general"} try: - async with aiohttp.ClientSession(headers=settings["header"], timeout=aiohttp.ClientTimeout(5.0)) as session: + async with aiohttp.ClientSession(headers=settings["header"], timeout=aiohttp.ClientTimeout(6.0)) as session: response = await session.get(url_path, params=params) except (aiohttp.ClientConnectionError, aiohttp.ServerTimeoutError): logger.exception("A connection error occurred while requesting {}".format(url_path)) @@ -73,8 +73,8 @@ class Wiki: raise WikiServerError async def remove(self, wiki_id, reason): - src.discord.wiki_removal(wiki_id, reason) - src.discord.wiki_removal_monitor(wiki_id, reason) + await src.discord.wiki_removal(wiki_id, reason) + await src.discord.wiki_removal_monitor(wiki_id, reason) db_cursor.execute("DELETE FROM rcgcdw WHERE wiki = ?", (wiki_id,)) logger.warning("{} rows affected by DELETE FROM rcgcdw WHERE wiki = {}".format(db_cursor.rowcount, wiki_id)) db_connection.commit()