RcGcDb/src/bot.py

164 lines
6.1 KiB
Python
Raw Normal View History

2020-07-09 22:24:23 +00:00
import logging.config
from src.config import settings
2020-07-09 23:58:25 +00:00
import sqlite3
2020-07-26 08:00:27 +00:00
import sys
import signal
2020-07-19 13:32:54 +00:00
from src.wiki import Wiki, process_cats, process_mwmsgs, essential_info
2020-07-10 13:38:36 +00:00
import asyncio, aiohttp
from src.misc import get_paths
2020-07-10 20:07:33 +00:00
from src.exceptions import *
from src.database import db_cursor
2020-07-19 23:40:20 +00:00
from collections import defaultdict
2020-07-21 12:15:40 +00:00
from src.queue_handler import DBHandler
2020-07-23 19:12:07 +00:00
from src.discord import DiscordMessage
2020-07-22 11:43:18 +00:00
from src.msgqueue import messagequeue
2020-07-23 19:12:07 +00:00
import requests
2020-07-09 22:24:23 +00:00
logging.config.dictConfig(settings["logging"])
logger = logging.getLogger("rcgcdb.bot")
logger.debug("Current settings: {settings}".format(settings=settings))
2020-07-27 03:16:50 +00:00
# Log Fail states with structure wiki_url: number of fail states
2020-07-11 15:54:08 +00:00
all_wikis: dict = {}
mw_msgs: dict = {} # will have the type of id: tuple
2020-07-09 22:24:23 +00:00
2020-07-10 13:38:36 +00:00
# First populate the all_wikis list with every wiki
# Reasons for this: 1. we require amount of wikis to calculate the cooldown between requests
# 2. Easier to code
2020-07-09 22:24:23 +00:00
2020-07-19 23:40:20 +00:00
for wiki in db_cursor.execute('SELECT DISTINCT wiki FROM rcgcdw'):
all_wikis[wiki] = Wiki()
2020-07-09 22:24:23 +00:00
2020-07-23 19:12:07 +00:00
2020-07-09 22:24:23 +00:00
# Start queueing logic
2020-07-19 23:40:20 +00:00
2020-07-11 15:54:08 +00:00
def calculate_delay() -> float:
2020-07-28 12:25:18 +00:00
"""Calculate the delay between fetching each wiki to avoid rate limits"""
2020-07-23 19:12:07 +00:00
min_delay = 60 / settings["max_requests_per_minute"]
2020-07-11 15:54:08 +00:00
if (len(all_wikis) * min_delay) < settings["minimal_cooldown_per_wiki_in_sec"]:
2020-07-23 19:12:07 +00:00
return settings["minimal_cooldown_per_wiki_in_sec"] / len(all_wikis)
2020-07-11 15:54:08 +00:00
else:
return min_delay
2020-07-19 23:40:20 +00:00
2020-07-21 12:15:40 +00:00
def generate_targets(wiki_url: str) -> defaultdict:
2020-07-28 12:25:18 +00:00
"""To minimize the amount of requests, we generate a list of language/display mode combinations to create messages for
this way we can send the same message to multiple webhooks which have the same wiki and settings without doing another
request to the wiki just to duplicate the message.
"""
2020-07-19 23:40:20 +00:00
combinations = defaultdict(list)
2020-07-27 03:16:50 +00:00
for webhook in db_cursor.execute('SELECT webhook, lang, display FROM rcgcdw WHERE wiki = ?', (wiki_url,)):
2020-07-27 12:13:36 +00:00
combination = (webhook["lang"], webhook["display"])
combinations[combination].append(webhook["webhook"])
2020-07-19 23:40:20 +00:00
return combinations
2020-07-19 13:32:54 +00:00
async def wiki_scanner():
2020-07-26 08:00:27 +00:00
try:
while True:
calc_delay = calculate_delay()
2020-07-27 03:16:50 +00:00
fetch_all = db_cursor.execute('SELECT webhook, wiki, lang, display, wikiid, rcid, postid FROM rcgcdw GROUP BY wiki')
2020-07-26 08:00:27 +00:00
for db_wiki in fetch_all.fetchall():
2020-07-27 12:13:36 +00:00
logger.debug("Wiki {}".format(db_wiki["wiki"]))
2020-07-26 08:00:27 +00:00
extended = False
2020-07-27 12:13:36 +00:00
if db_wiki["wiki"] not in all_wikis:
logger.debug("New wiki: {}".format(db_wiki["wiki"]))
all_wikis[db_wiki["wiki"]] = Wiki()
2020-07-27 12:13:36 +00:00
local_wiki = all_wikis[db_wiki["wiki"]] # set a reference to a wiki object from memory
2020-07-26 08:00:27 +00:00
if local_wiki.mw_messages is None:
extended = True
2020-07-26 21:52:24 +00:00
async with aiohttp.ClientSession(headers=settings["header"],
timeout=aiohttp.ClientTimeout(2.0)) as session:
try:
2020-07-27 12:13:36 +00:00
wiki_response = await local_wiki.fetch_wiki(extended, db_wiki["wiki"], session)
await local_wiki.check_status(db_wiki["wiki"], wiki_response.status)
2020-07-26 21:52:24 +00:00
except (WikiServerError, WikiError):
logger.exception("Exeption when fetching the wiki")
continue # ignore this wiki if it throws errors
try:
recent_changes_resp = await wiki_response.json()
if "error" in recent_changes_resp or "errors" in recent_changes_resp:
error = recent_changes_resp.get("error", recent_changes_resp["errors"])
if error["code"] == "readapidenied":
2020-07-27 12:13:36 +00:00
await local_wiki.fail_add(db_wiki["wiki"], 410)
2020-07-26 21:52:24 +00:00
continue
raise WikiError
recent_changes = recent_changes_resp['query']['recentchanges']
recent_changes.reverse()
except aiohttp.ContentTypeError:
logger.exception("Wiki seems to be resulting in non-json content.")
2020-07-27 12:13:36 +00:00
await local_wiki.fail_add(db_wiki["wiki"], 410)
2020-07-26 21:52:24 +00:00
continue
except:
logger.exception("On loading json of response.")
continue
2020-07-26 08:00:27 +00:00
if extended:
await process_mwmsgs(recent_changes_resp, local_wiki, mw_msgs)
2020-07-27 12:13:36 +00:00
if db_wiki["rcid"] is None: # new wiki, just get the last rc to not spam the channel
2020-07-26 08:00:27 +00:00
if len(recent_changes) > 0:
2020-07-27 12:13:36 +00:00
DBHandler.add(db_wiki["wiki"], recent_changes[-1]["rcid"])
2020-07-26 08:00:27 +00:00
else:
2020-07-27 12:13:36 +00:00
DBHandler.add(db_wiki["wiki"], 0)
DBHandler.update_db()
continue
2020-07-26 08:00:27 +00:00
categorize_events = {}
2020-07-27 12:13:36 +00:00
targets = generate_targets(db_wiki["wiki"])
paths = get_paths(db_wiki["wiki"], recent_changes_resp)
2020-07-26 08:00:27 +00:00
for change in recent_changes:
await process_cats(change, local_wiki, mw_msgs, categorize_events)
for change in recent_changes: # Yeah, second loop since the categories require to be all loaded up
2020-07-27 12:13:36 +00:00
if change["rcid"] > db_wiki["rcid"]:
2020-07-26 08:00:27 +00:00
for target in targets.items():
await essential_info(change, categorize_events, local_wiki, db_wiki, target, paths,
recent_changes_resp)
if recent_changes:
2020-07-27 12:13:36 +00:00
DBHandler.add(db_wiki["wiki"], change["rcid"])
2020-07-26 08:00:27 +00:00
DBHandler.update_db()
await asyncio.sleep(delay=calc_delay)
except asyncio.CancelledError:
2020-07-26 21:52:24 +00:00
raise
2020-07-21 12:15:40 +00:00
2020-07-19 13:32:54 +00:00
async def message_sender():
2020-07-22 11:43:18 +00:00
while True:
await messagequeue.resend_msgs()
2020-07-19 13:32:54 +00:00
2020-07-26 08:00:27 +00:00
def shutdown(loop, signal=None):
DBHandler.update_db()
loop.stop()
logger.info("Script has shut down due to signal {}.".format(signal))
for task in asyncio.all_tasks(loop):
2020-07-26 21:52:24 +00:00
logger.debug("Killing task {}".format(task.get_name()))
2020-07-26 08:00:27 +00:00
task.cancel()
sys.exit(0)
2020-07-23 19:12:07 +00:00
def global_exception_handler(loop, context):
"""Global exception handler for asyncio, lets us know when something crashes"""
msg = context.get("exception", context["message"])
2020-07-26 08:00:27 +00:00
logger.error("Global exception handler:" + msg)
2020-07-28 12:25:18 +00:00
requests.post("https://discord.com/api/webhooks/"+settings["monitoring_webhook"], data={"content": "test"})
2020-07-19 13:32:54 +00:00
async def main_loop():
2020-07-23 19:12:07 +00:00
loop = asyncio.get_event_loop()
2020-07-26 16:03:20 +00:00
try:
signals = (signal.SIGHUP, signal.SIGTERM, signal.SIGINT)
2020-07-26 21:52:24 +00:00
for s in signals:
loop.add_signal_handler(
s, lambda s=s: shutdown(loop, signal=s))
2020-07-26 16:03:20 +00:00
except AttributeError:
logger.info("Running on Windows huh? This complicates things")
signals = (signal.SIGBREAK, signal.SIGTERM, signal.SIGINT)
2020-07-23 19:12:07 +00:00
loop.set_exception_handler(global_exception_handler)
2020-07-26 21:52:24 +00:00
try:
task1 = asyncio.create_task(wiki_scanner())
task2 = asyncio.create_task(message_sender())
await task1
await task2
except KeyboardInterrupt:
shutdown(loop)
2020-07-19 13:32:54 +00:00
2020-07-21 12:15:40 +00:00
2020-07-19 13:32:54 +00:00
asyncio.run(main_loop())