Added code

This commit is contained in:
Frisk 2020-07-26 10:00:27 +02:00
parent c04982f78d
commit 7541e776db
No known key found for this signature in database
GPG key ID: 213F7C15068AF8AC
5 changed files with 124 additions and 77 deletions

1
.gitignore vendored Normal file
View file

@ -0,0 +1 @@
/src/__pycache__

View file

@ -1,6 +1,8 @@
import logging.config import logging.config
from src.config import settings from src.config import settings
import sqlite3 import sqlite3
import sys
import signal
from src.wiki import Wiki, process_cats, process_mwmsgs, essential_info from src.wiki import Wiki, process_cats, process_mwmsgs, essential_info
import asyncio, aiohttp import asyncio, aiohttp
from src.misc import get_paths from src.misc import get_paths
@ -49,58 +51,64 @@ def generate_targets(wiki_url: str) -> defaultdict:
async def wiki_scanner(): async def wiki_scanner():
while True: try:
calc_delay = calculate_delay() while True:
fetch_all = db_cursor.execute('SELECT * FROM rcgcdw GROUP BY wiki') calc_delay = calculate_delay()
for db_wiki in fetch_all.fetchall(): fetch_all = db_cursor.execute('SELECT * FROM rcgcdw GROUP BY wiki')
logger.debug("Wiki {}".format(db_wiki[3])) for db_wiki in fetch_all.fetchall():
extended = False logger.debug("Wiki {}".format(db_wiki[3]))
if db_wiki[3] not in all_wikis: extended = False
logger.debug("New wiki: {}".format(db_wiki[3])) if db_wiki[3] not in all_wikis:
all_wikis[db_wiki[3]] = Wiki() logger.debug("New wiki: {}".format(db_wiki[3]))
local_wiki = all_wikis[db_wiki[3]] # set a reference to a wiki object from memory all_wikis[db_wiki[3]] = Wiki()
if local_wiki.mw_messages is None: local_wiki = all_wikis[db_wiki[3]] # set a reference to a wiki object from memory
extended = True if local_wiki.mw_messages is None:
logger.debug("test") extended = True
try: logger.debug("test")
wiki_response = await local_wiki.fetch_wiki(extended, db_wiki[3]) try:
await local_wiki.check_status(db_wiki[3], wiki_response.status) wiki_response = await local_wiki.fetch_wiki(extended, db_wiki[3])
except (WikiServerError, WikiError): await local_wiki.check_status(db_wiki[3], wiki_response.status)
logger.exception("Exeption when fetching the wiki") except (WikiServerError, WikiError):
continue # ignore this wiki if it throws errors logger.exception("Exeption when fetching the wiki")
try: continue # ignore this wiki if it throws errors
recent_changes_resp = await wiki_response.json(encoding="UTF-8") try:
if "error" in recent_changes_resp or "errors" in recent_changes_resp: recent_changes_resp = await wiki_response.json(encoding="UTF-8")
# TODO Remove on some errors (example "code": "readapidenied") if "error" in recent_changes_resp or "errors" in recent_changes_resp:
raise WikiError # TODO Remove on some errors (example "code": "readapidenied")
recent_changes = recent_changes_resp['query']['recentchanges'] raise WikiError
recent_changes.reverse() recent_changes = recent_changes_resp['query']['recentchanges']
except: recent_changes.reverse()
logger.exception("On loading json of response.") except asyncio.exceptions.TimeoutError:
continue logger.debug("Timeout on fetching {}.".format(db_wiki[3]))
if extended:
await process_mwmsgs(recent_changes_resp, local_wiki, mw_msgs)
if db_wiki[6] is None: # new wiki, just get the last rc to not spam the channel
if len(recent_changes) > 0:
DBHandler.add(db_wiki[3], recent_changes[-1]["rcid"])
continue continue
else: except:
DBHandler.add(db_wiki[3], 0) logger.exception("On loading json of response.")
continue continue
categorize_events = {} if extended:
targets = generate_targets(db_wiki[3]) await process_mwmsgs(recent_changes_resp, local_wiki, mw_msgs)
paths = get_paths(db_wiki[3], recent_changes_resp) if db_wiki[6] is None: # new wiki, just get the last rc to not spam the channel
for change in recent_changes: if len(recent_changes) > 0:
await process_cats(change, local_wiki, mw_msgs, categorize_events) DBHandler.add(db_wiki[3], recent_changes[-1]["rcid"])
for change in recent_changes: # Yeah, second loop since the categories require to be all loaded up continue
if change["rcid"] > db_wiki[6]: else:
for target in targets.items(): DBHandler.add(db_wiki[3], 0)
await essential_info(change, categorize_events, local_wiki, db_wiki, target, paths, continue
recent_changes_resp) categorize_events = {}
if recent_changes: targets = generate_targets(db_wiki[3])
DBHandler.add(db_wiki[3], change["rcid"]) paths = get_paths(db_wiki[3], recent_changes_resp)
DBHandler.update_db() for change in recent_changes:
await asyncio.sleep(delay=calc_delay) await process_cats(change, local_wiki, mw_msgs, categorize_events)
for change in recent_changes: # Yeah, second loop since the categories require to be all loaded up
if change["rcid"] > db_wiki[6]:
for target in targets.items():
await essential_info(change, categorize_events, local_wiki, db_wiki, target, paths,
recent_changes_resp)
if recent_changes:
DBHandler.add(db_wiki[3], change["rcid"])
DBHandler.update_db()
await asyncio.sleep(delay=calc_delay)
except asyncio.CancelledError:
return
async def message_sender(): async def message_sender():
@ -108,17 +116,26 @@ async def message_sender():
await messagequeue.resend_msgs() await messagequeue.resend_msgs()
def shutdown(loop, signal=None):
DBHandler.update_db()
loop.stop()
logger.info("Script has shut down due to signal {}.".format(signal))
for task in asyncio.all_tasks(loop):
task.cancel()
sys.exit(0)
def global_exception_handler(loop, context): def global_exception_handler(loop, context):
"""Global exception handler for asyncio, lets us know when something crashes""" """Global exception handler for asyncio, lets us know when something crashes"""
msg = context.get("exception", context["message"]) msg = context.get("exception", context["message"])
logger.error(msg) logger.error("Global exception handler:" + msg)
#requests.post("https://discord.com/api/webhooks/" + settings["monitoring_webhook"],
# data=DiscordMessage("embed", "exception", None, content=
# "[RcGcDb] Exception detected, function might have shut down! Exception: {}".format(msg), wiki=None))
async def main_loop(): async def main_loop():
loop = asyncio.get_event_loop() loop = asyncio.get_event_loop()
signals = (signal.SIGHUP, signal.SIGTERM, signal.SIGINT)
for s in signals:
loop.add_signal_handler(
s, lambda s=s: shutdown(loop, signal=s))
loop.set_exception_handler(global_exception_handler) loop.set_exception_handler(global_exception_handler)
task1 = asyncio.create_task(wiki_scanner()) task1 = asyncio.create_task(wiki_scanner())
task2 = asyncio.create_task(message_sender()) task2 = asyncio.create_task(message_sender())

View file

@ -15,7 +15,7 @@ logger = logging.getLogger("rcgcdb.discord")
# User facing webhook functions # User facing webhook functions
def wiki_removal(wiki_id, status): async def wiki_removal(wiki_id, status):
for observer in db_cursor.execute('SELECT * FROM rcgcdw WHERE wiki = ?', (wiki_id,)): for observer in db_cursor.execute('SELECT * FROM rcgcdw WHERE wiki = ?', (wiki_id,)):
def _(string: str) -> str: def _(string: str) -> str:
"""Our own translation string to make it compatible with async""" """Our own translation string to make it compatible with async"""
@ -23,7 +23,13 @@ def wiki_removal(wiki_id, status):
reasons = {410: _("wiki deletion"), 404: _("wiki deletion"), 401: _("wiki becoming inaccessible"), reasons = {410: _("wiki deletion"), 404: _("wiki deletion"), 401: _("wiki becoming inaccessible"),
402: _("wiki becoming inaccessible"), 403: _("wiki becoming inaccessible")} 402: _("wiki becoming inaccessible"), 403: _("wiki becoming inaccessible")}
reason = reasons.get(status, _("unknown error")) reason = reasons.get(status, _("unknown error"))
send_to_discord_webhook(DiscordMessage("compact", "webhook/remove", webhook_url=[observer[2]], content=_("The webhook for {} has been removed due to {}.".format(wiki_id, reason)), wiki=None)) await send_to_discord_webhook(DiscordMessage("compact", "webhook/remove", webhook_url=[observer[2]], content=_("The webhook for {} has been removed due to {}.".format(wiki_id, reason)), wiki=None))
header = settings["header"]
header['Content-Type'] = 'application/json'
header['X-Audit-Log-Reason'] = "Wiki becoming unavailable"
async with aiohttp.ClientSession(headers=header, timeout=aiohttp.ClientTimeout(5.0)) as session:
await session.delete("https://discord.com/api/webhooks/"+observer[2])
async def webhook_removal_monitor(webhook_url: list, reason: int): async def webhook_removal_monitor(webhook_url: list, reason: int):
await send_to_discord_webhook_monitoring(DiscordMessage("compact", "webhook/remove", None, content="The webhook {} has been removed due to {}.".format("https://discord.com/api/webhooks/" + webhook_url[0], reason), wiki=None), await send_to_discord_webhook_monitoring(DiscordMessage("compact", "webhook/remove", None, content="The webhook {} has been removed due to {}.".format("https://discord.com/api/webhooks/" + webhook_url[0], reason), wiki=None),
@ -97,32 +103,32 @@ class DiscordMessage:
# Monitoring webhook functions # Monitoring webhook functions
def wiki_removal_monitor(wiki_id, status): async def wiki_removal_monitor(wiki_id, status):
pass await send_to_discord_webhook_monitoring(DiscordMessage("compact", "webhook/remove", content="Removing {} because {}.".format(wiki_id, status), webhook_url=[None], wiki=None))
async def send_to_discord_webhook_monitoring(data: DiscordMessage, session: aiohttp.ClientSession): async def send_to_discord_webhook_monitoring(data: DiscordMessage):
header = settings["header"] header = settings["header"]
header['Content-Type'] = 'application/json' header['Content-Type'] = 'application/json'
try: async with aiohttp.ClientSession(headers=header, timeout=aiohttp.ClientTimeout(5.0)) as session:
result = await session.post("https://discord.com/api/webhooks/"+settings["monitoring_webhook"], data=repr(data),
headers=header)
except (aiohttp.ClientConnectionError, aiohttp.ServerConnectionError):
logger.exception("Could not send the message to Discord")
return 3
async def send_to_discord_webhook(data: DiscordMessage, session: aiohttp.ClientSession):
header = settings["header"]
header['Content-Type'] = 'application/json'
for webhook in data.webhook_url:
try: try:
result = await session.post("https://discord.com/api/webhooks/"+webhook, data=repr(data), result = await session.post("https://discord.com/api/webhooks/"+settings["monitoring_webhook"], data=repr(data))
headers=header)
except (aiohttp.ClientConnectionError, aiohttp.ServerConnectionError): except (aiohttp.ClientConnectionError, aiohttp.ServerConnectionError):
logger.exception("Could not send the message to Discord") logger.exception("Could not send the message to Discord")
return 3 return 3
return await handle_discord_http(result.status, repr(data), await result.text(), data)
async def send_to_discord_webhook(data: DiscordMessage):
header = settings["header"]
header['Content-Type'] = 'application/json'
async with aiohttp.ClientSession(headers=header, timeout=aiohttp.ClientTimeout(5.0)) as session:
for webhook in data.webhook_url:
try:
result = await session.post("https://discord.com/api/webhooks/"+webhook, data=repr(data))
except (aiohttp.ClientConnectionError, aiohttp.ServerConnectionError):
logger.exception("Could not send the message to Discord")
return 3
return await handle_discord_http(result.status, repr(data), await result.text(), data)
async def handle_discord_http(code, formatted_embed, result, dmsg): async def handle_discord_http(code, formatted_embed, result, dmsg):

23
src/request_tracking.py Normal file
View file

@ -0,0 +1,23 @@
import aiohttp
import logging
from src.config import settings
logger = logging.getLogger("rcgcdb.request_tracking")
class WikiRequestTracking:
def __init__(self):
self.current_timeout = 0
async def add_timeout(self, time: float):
self.current_timeout += time
def is_fandom(self, url):
if any(x in url for x in ("fandom.com", "gamepedia.com", "wikia.org")):
return True
return False
async def on_request_start(session, trace_config_ctx, params):
if
trace_config = aiohttp.TraceConfig()
trace_config.on_request_start.append(on_request_start)

View file

@ -40,7 +40,7 @@ class Wiki:
"rcprop": "title|redirect|timestamp|ids|loginfo|parsedcomment|sizes|flags|tags|user", "rcprop": "title|redirect|timestamp|ids|loginfo|parsedcomment|sizes|flags|tags|user",
"rclimit": amount, "rctype": "edit|new|log|external", "siprop": "namespaces|general"} "rclimit": amount, "rctype": "edit|new|log|external", "siprop": "namespaces|general"}
try: try:
async with aiohttp.ClientSession(headers=settings["header"], timeout=aiohttp.ClientTimeout(5.0)) as session: async with aiohttp.ClientSession(headers=settings["header"], timeout=aiohttp.ClientTimeout(6.0)) as session:
response = await session.get(url_path, params=params) response = await session.get(url_path, params=params)
except (aiohttp.ClientConnectionError, aiohttp.ServerTimeoutError): except (aiohttp.ClientConnectionError, aiohttp.ServerTimeoutError):
logger.exception("A connection error occurred while requesting {}".format(url_path)) logger.exception("A connection error occurred while requesting {}".format(url_path))
@ -73,8 +73,8 @@ class Wiki:
raise WikiServerError raise WikiServerError
async def remove(self, wiki_id, reason): async def remove(self, wiki_id, reason):
src.discord.wiki_removal(wiki_id, reason) await src.discord.wiki_removal(wiki_id, reason)
src.discord.wiki_removal_monitor(wiki_id, reason) await src.discord.wiki_removal_monitor(wiki_id, reason)
db_cursor.execute("DELETE FROM rcgcdw WHERE wiki = ?", (wiki_id,)) db_cursor.execute("DELETE FROM rcgcdw WHERE wiki = ?", (wiki_id,))
logger.warning("{} rows affected by DELETE FROM rcgcdw WHERE wiki = {}".format(db_cursor.rowcount, wiki_id)) logger.warning("{} rows affected by DELETE FROM rcgcdw WHERE wiki = {}".format(db_cursor.rowcount, wiki_id))
db_connection.commit() db_connection.commit()