RcGcDb/src/bot.py

159 lines
5.5 KiB
Python
Raw Normal View History

2020-07-09 22:24:23 +00:00
import logging.config
from src.config import settings
2020-07-09 23:58:25 +00:00
import sqlite3
2020-07-26 08:00:27 +00:00
import sys
import signal
2020-07-19 13:32:54 +00:00
from src.wiki import Wiki, process_cats, process_mwmsgs, essential_info
2020-07-10 13:38:36 +00:00
import asyncio, aiohttp
from src.misc import get_paths
2020-07-10 20:07:33 +00:00
from src.exceptions import *
from src.database import db_cursor
2020-07-19 23:40:20 +00:00
from collections import defaultdict
2020-07-21 12:15:40 +00:00
from src.queue_handler import DBHandler
2020-07-23 19:12:07 +00:00
from src.discord import DiscordMessage
2020-07-22 11:43:18 +00:00
from src.msgqueue import messagequeue
2020-07-23 19:12:07 +00:00
import requests
2020-07-09 22:24:23 +00:00
logging.config.dictConfig(settings["logging"])
logger = logging.getLogger("rcgcdb.bot")
logger.debug("Current settings: {settings}".format(settings=settings))
2020-07-27 03:16:50 +00:00
# Log Fail states with structure wiki_url: number of fail states
2020-07-11 15:54:08 +00:00
all_wikis: dict = {}
mw_msgs: dict = {} # will have the type of id: tuple
2020-07-09 22:24:23 +00:00
2020-07-10 13:38:36 +00:00
# First populate the all_wikis list with every wiki
# Reasons for this: 1. we require amount of wikis to calculate the cooldown between requests
# 2. Easier to code
2020-07-09 22:24:23 +00:00
2020-07-19 23:40:20 +00:00
for wiki in db_cursor.execute('SELECT DISTINCT wiki FROM rcgcdw'):
all_wikis[wiki] = Wiki()
2020-07-09 22:24:23 +00:00
2020-07-23 19:12:07 +00:00
2020-07-09 22:24:23 +00:00
# Start queueing logic
2020-07-19 23:40:20 +00:00
2020-07-11 15:54:08 +00:00
def calculate_delay() -> float:
2020-07-23 19:12:07 +00:00
min_delay = 60 / settings["max_requests_per_minute"]
2020-07-11 15:54:08 +00:00
if (len(all_wikis) * min_delay) < settings["minimal_cooldown_per_wiki_in_sec"]:
2020-07-23 19:12:07 +00:00
return settings["minimal_cooldown_per_wiki_in_sec"] / len(all_wikis)
2020-07-11 15:54:08 +00:00
else:
return min_delay
2020-07-19 23:40:20 +00:00
2020-07-21 12:15:40 +00:00
def generate_targets(wiki_url: str) -> defaultdict:
2020-07-19 23:40:20 +00:00
combinations = defaultdict(list)
2020-07-27 03:16:50 +00:00
for webhook in db_cursor.execute('SELECT webhook, lang, display FROM rcgcdw WHERE wiki = ?', (wiki_url,)):
combination = (webhook[1], webhook[2]) # lang, display
combinations[combination].append(webhook[0])
2020-07-19 23:40:20 +00:00
return combinations
2020-07-19 13:32:54 +00:00
async def wiki_scanner():
2020-07-26 08:00:27 +00:00
try:
while True:
calc_delay = calculate_delay()
2020-07-27 03:16:50 +00:00
fetch_all = db_cursor.execute('SELECT webhook, wiki, lang, display, wikiid, rcid, postid FROM rcgcdw GROUP BY wiki')
2020-07-26 08:00:27 +00:00
for db_wiki in fetch_all.fetchall():
2020-07-27 03:16:50 +00:00
logger.debug("Wiki {}".format(db_wiki[1]))
2020-07-26 08:00:27 +00:00
extended = False
2020-07-27 03:16:50 +00:00
if db_wiki[1] not in all_wikis:
logger.debug("New wiki: {}".format(db_wiki[1]))
all_wikis[db_wiki["wiki"]] = Wiki()
2020-07-27 03:16:50 +00:00
local_wiki = all_wikis[db_wiki[1]] # set a reference to a wiki object from memory
2020-07-26 08:00:27 +00:00
if local_wiki.mw_messages is None:
extended = True
2020-07-26 21:52:24 +00:00
async with aiohttp.ClientSession(headers=settings["header"],
timeout=aiohttp.ClientTimeout(2.0)) as session:
try:
2020-07-27 03:16:50 +00:00
wiki_response = await local_wiki.fetch_wiki(extended, db_wiki[1], session)
await local_wiki.check_status(db_wiki[1], wiki_response.status)
2020-07-26 21:52:24 +00:00
except (WikiServerError, WikiError):
logger.exception("Exeption when fetching the wiki")
continue # ignore this wiki if it throws errors
try:
recent_changes_resp = await wiki_response.json()
if "error" in recent_changes_resp or "errors" in recent_changes_resp:
error = recent_changes_resp.get("error", recent_changes_resp["errors"])
if error["code"] == "readapidenied":
2020-07-27 03:16:50 +00:00
await local_wiki.fail_add(db_wiki[1], 410)
2020-07-26 21:52:24 +00:00
continue
raise WikiError
recent_changes = recent_changes_resp['query']['recentchanges']
recent_changes.reverse()
except aiohttp.ContentTypeError:
logger.exception("Wiki seems to be resulting in non-json content.")
2020-07-27 03:16:50 +00:00
await local_wiki.fail_add(db_wiki[1], 410)
2020-07-26 21:52:24 +00:00
continue
except:
logger.exception("On loading json of response.")
continue
2020-07-26 08:00:27 +00:00
if extended:
await process_mwmsgs(recent_changes_resp, local_wiki, mw_msgs)
2020-07-27 03:16:50 +00:00
if db_wiki[5] is None: # new wiki, just get the last rc to not spam the channel
2020-07-26 08:00:27 +00:00
if len(recent_changes) > 0:
2020-07-27 03:16:50 +00:00
DBHandler.add(db_wiki[1], recent_changes[-1]["rcid"])
2020-07-26 08:00:27 +00:00
else:
2020-07-27 03:16:50 +00:00
DBHandler.add(db_wiki[1], 0)
DBHandler.update_db()
continue
2020-07-26 08:00:27 +00:00
categorize_events = {}
2020-07-27 03:16:50 +00:00
targets = generate_targets(db_wiki[1])
paths = get_paths(db_wiki[1], recent_changes_resp)
2020-07-26 08:00:27 +00:00
for change in recent_changes:
await process_cats(change, local_wiki, mw_msgs, categorize_events)
for change in recent_changes: # Yeah, second loop since the categories require to be all loaded up
2020-07-27 03:16:50 +00:00
if change["rcid"] > db_wiki[5]:
2020-07-26 08:00:27 +00:00
for target in targets.items():
await essential_info(change, categorize_events, local_wiki, db_wiki, target, paths,
recent_changes_resp)
if recent_changes:
2020-07-27 03:16:50 +00:00
DBHandler.add(db_wiki[1], change["rcid"])
2020-07-26 08:00:27 +00:00
DBHandler.update_db()
await asyncio.sleep(delay=calc_delay)
except asyncio.CancelledError:
2020-07-26 21:52:24 +00:00
raise
2020-07-21 12:15:40 +00:00
2020-07-19 13:32:54 +00:00
async def message_sender():
2020-07-22 11:43:18 +00:00
while True:
await messagequeue.resend_msgs()
2020-07-19 13:32:54 +00:00
2020-07-26 08:00:27 +00:00
def shutdown(loop, signal=None):
DBHandler.update_db()
loop.stop()
logger.info("Script has shut down due to signal {}.".format(signal))
for task in asyncio.all_tasks(loop):
2020-07-26 21:52:24 +00:00
logger.debug("Killing task {}".format(task.get_name()))
2020-07-26 08:00:27 +00:00
task.cancel()
sys.exit(0)
2020-07-23 19:12:07 +00:00
def global_exception_handler(loop, context):
"""Global exception handler for asyncio, lets us know when something crashes"""
msg = context.get("exception", context["message"])
2020-07-26 08:00:27 +00:00
logger.error("Global exception handler:" + msg)
2020-07-23 19:12:07 +00:00
2020-07-19 13:32:54 +00:00
async def main_loop():
2020-07-23 19:12:07 +00:00
loop = asyncio.get_event_loop()
2020-07-26 16:03:20 +00:00
try:
signals = (signal.SIGHUP, signal.SIGTERM, signal.SIGINT)
2020-07-26 21:52:24 +00:00
for s in signals:
loop.add_signal_handler(
s, lambda s=s: shutdown(loop, signal=s))
2020-07-26 16:03:20 +00:00
except AttributeError:
logger.info("Running on Windows huh? This complicates things")
signals = (signal.SIGBREAK, signal.SIGTERM, signal.SIGINT)
2020-07-23 19:12:07 +00:00
loop.set_exception_handler(global_exception_handler)
2020-07-26 21:52:24 +00:00
try:
task1 = asyncio.create_task(wiki_scanner())
task2 = asyncio.create_task(message_sender())
await task1
await task2
except KeyboardInterrupt:
shutdown(loop)
2020-07-19 13:32:54 +00:00
2020-07-21 12:15:40 +00:00
2020-07-19 13:32:54 +00:00
asyncio.run(main_loop())