RcGcDb/src/domain.py

185 lines
7.8 KiB
Python
Raw Normal View History

2021-05-28 17:16:14 +00:00
from __future__ import annotations
import asyncio
2021-05-30 17:15:37 +00:00
import logging
2022-11-10 14:16:35 +00:00
import time
2023-08-14 15:23:32 +00:00
import traceback
2021-05-30 13:31:51 +00:00
from collections import OrderedDict
2021-05-28 17:16:14 +00:00
from typing import TYPE_CHECKING, Optional
2021-06-22 19:42:32 +00:00
from functools import cache
2024-03-03 09:07:25 +00:00
import sys
2022-11-07 14:46:15 +00:00
import aiohttp
2023-08-13 14:31:07 +00:00
from src.discord.message import DiscordMessage
2022-11-07 14:46:15 +00:00
from src.config import settings
from src.argparser import command_line_args
2022-10-03 13:34:36 +00:00
# from src.discussions import Discussions
2022-11-07 14:46:15 +00:00
from src.statistics import Log, LogType
2021-05-30 17:15:37 +00:00
logger = logging.getLogger("rcgcdb.domain")
2021-05-28 17:16:14 +00:00
if TYPE_CHECKING:
import src.wiki
2021-05-30 17:15:37 +00:00
import src.irc_feed
2021-05-28 17:16:14 +00:00
class Domain:
2021-05-30 13:31:51 +00:00
def __init__(self, name: str):
2021-05-30 11:23:48 +00:00
self.name = name # This should be always in format of topname.extension for example fandom.com
2021-05-30 17:15:37 +00:00
self.task: Optional[asyncio.Task] = None
self.wikis: OrderedDict[str, src.wiki.Wiki] = OrderedDict()
2021-05-30 17:15:37 +00:00
self.irc: Optional[src.irc_feed.AioIRCCat] = None
2022-11-07 14:46:15 +00:00
self.failures = 0
2022-10-09 12:10:08 +00:00
# self.discussions_handler: Optional[Discussions] = Discussions(self.wikis) if name == "fandom.com" else None
2021-05-30 13:31:51 +00:00
def __iter__(self):
return iter(self.wikis)
2023-05-06 12:29:27 +00:00
def __str__(self) -> str:
2023-08-15 16:16:00 +00:00
return (f"<Domain name='{self.name}' task='{self.task}' wikis='{self.wikis}' "
f"irc='{self.irc.connection.connected if self.irc else False}' failures={self.failures} "
f"calculated_delay={self.calculate_sleep_time(len(self)) if not self.irc else 'handled by IRC scheduler'}>")
2023-05-06 12:29:27 +00:00
def __repr__(self):
return self.__str__()
2021-05-30 13:31:51 +00:00
def __getitem__(self, item):
return
2021-05-30 17:15:37 +00:00
def __len__(self):
return len(self.wikis)
def destroy(self):
2022-08-16 10:50:49 +00:00
"""Destroy the domain do all of the tasks that should make sure there is no leftovers before being collected by GC"""
if self.irc:
2023-08-14 15:03:03 +00:00
logger.debug("Leaving IRC due to destroy() for domain {}".format(self.name))
2022-11-04 14:59:26 +00:00
self.irc.connection.die("Leaving")
2023-08-14 15:03:03 +00:00
# if self.discussions_handler:
# self.discussions_handler.close()
if self.task:
self.task.cancel()
2023-08-14 16:01:59 +00:00
def get_wiki(self, item: str, default=None) -> Optional[src.wiki.Wiki]:
2022-08-16 10:50:49 +00:00
"""Return a wiki with given domain name"""
2021-05-30 17:15:37 +00:00
return self.wikis.get(item, default)
def set_irc(self, irc_client: src.irc_feed.AioIRCCat):
2022-08-16 10:50:49 +00:00
"""Sets IRC"""
2021-05-28 17:16:14 +00:00
self.irc = irc_client
def stop_task(self):
"""Cancells the task"""
self.task.cancel() # Be aware that cancelling the task may take time
2021-05-30 17:15:37 +00:00
def run_domain(self):
2022-08-16 10:50:49 +00:00
"""Starts asyncio task for domain"""
if not self.task or self.task.cancelled():
self.task = asyncio.create_task(self.run_wiki_check(), name=self.name)
else:
logger.error(f"Tried to start a task for domain {self.name} however the task already exists!")
2021-05-30 17:15:37 +00:00
def remove_wiki(self, script_url: str):
2021-07-09 12:55:23 +00:00
self.wikis.pop(script_url)
2022-10-09 12:10:08 +00:00
async def add_wiki(self, wiki: src.wiki.Wiki, first=False):
2021-05-30 11:23:48 +00:00
"""Adds a wiki to domain list.
:parameter wiki - Wiki object
2021-05-30 13:31:51 +00:00
:parameter first (optional) - bool indicating if wikis should be added as first or last in the ordered dict"""
wiki.set_domain(self)
2022-06-22 17:17:20 +00:00
if wiki.script_url in self.wikis:
2022-10-09 12:10:08 +00:00
await self.wikis[wiki.script_url].update_targets()
else:
self.wikis[wiki.script_url] = wiki
2022-10-09 12:10:08 +00:00
await wiki.update_targets()
2021-05-30 13:31:51 +00:00
if first:
self.wikis.move_to_end(wiki.script_url, last=False)
2024-02-25 13:23:22 +00:00
logger.debug(f"Added new wiki {wiki.script_url} to domain {self.name}")
2021-05-30 11:23:48 +00:00
2024-02-25 13:23:22 +00:00
async def run_wiki_scan(self, wiki: src.wiki.Wiki, reason: Optional[str] = None):
await wiki.scan()
wiki.statistics.update(Log(type=LogType.SCAN_REASON, title=str(reason)))
2021-05-30 17:15:37 +00:00
self.wikis.move_to_end(wiki.script_url)
async def irc_scheduler(self):
2022-10-09 12:10:08 +00:00
try:
while True:
try:
wiki_url = self.irc.updated_wikis.pop()
except KeyError:
break
try:
wiki = self.wikis[wiki_url]
except KeyError:
logger.error(f"Could not find a wiki with URL {wiki_url} in the domain group!")
continue
2024-02-25 13:23:22 +00:00
await self.run_wiki_scan(wiki, "IRC feed event")
2022-10-29 15:04:25 +00:00
while True: # Iterate until hitting return, we don't have to iterate using for since we are sending wiki to the end anyways
wiki: src.wiki.Wiki = next(iter(self.wikis.values()))
2022-11-10 14:16:35 +00:00
if (int(time.time()) - (wiki.statistics.last_checked_rc or 0)) > settings.get("irc_overtime", 3600):
2024-02-25 13:23:22 +00:00
await self.run_wiki_scan(wiki, "IRC backup check")
2022-10-09 12:10:08 +00:00
else:
return # Recently scanned wikis will get at the end of the self.wikis, so we assume what is first hasn't been checked for a while
2022-11-07 14:46:15 +00:00
except Exception as e:
2022-10-09 12:10:08 +00:00
if command_line_args.debug:
2022-11-04 14:59:26 +00:00
logger.exception("IRC scheduler task for domain {} failed!".format(self.name))
2021-05-30 17:15:37 +00:00
else:
2022-11-07 14:46:15 +00:00
self.failures += 1
2023-08-14 15:23:32 +00:00
traceback.print_exc()
await self.send_exception_to_monitoring(e)
2022-11-07 14:46:15 +00:00
if self.failures > 2:
raise asyncio.exceptions.CancelledError
2021-05-30 17:15:37 +00:00
async def regular_scheduler(self):
2022-10-09 12:10:08 +00:00
try:
while True:
await asyncio.sleep(self.calculate_sleep_time(len(self))) # To make sure that we don't spam domains with one wiki every second we calculate a sane timeout for domains with few wikis
2024-02-25 13:23:22 +00:00
await self.run_wiki_scan(next(iter(self.wikis.values())), "regular check")
2022-11-07 14:46:15 +00:00
except Exception as e:
2022-10-09 12:10:08 +00:00
if command_line_args.debug:
2023-08-15 16:16:00 +00:00
logger.exception("Regular scheduler task for domain {} failed!".format(self.name))
2022-10-09 12:10:08 +00:00
else:
2022-11-07 14:46:15 +00:00
await self.send_exception_to_monitoring(e)
self.failures += 1
if self.failures > 2:
raise asyncio.exceptions.CancelledError
2021-05-30 17:15:37 +00:00
2021-06-22 19:42:32 +00:00
@cache
def calculate_sleep_time(self, queue_length: int):
return max((-25 * queue_length) + 150, 1)
2021-05-30 11:23:48 +00:00
async def run_wiki_check(self):
2022-08-16 10:50:49 +00:00
"""Runs appropriate scheduler depending on existence of IRC"""
2021-05-30 17:15:37 +00:00
if self.irc:
2022-10-09 12:10:08 +00:00
try:
while True:
await self.irc_scheduler()
await asyncio.sleep(10.0)
except asyncio.exceptions.CancelledError:
for wiki in self.wikis.values():
await wiki.session.close()
2022-11-04 14:59:26 +00:00
self.irc.connection.disconnect()
2021-05-30 17:15:37 +00:00
else:
2022-10-09 12:10:08 +00:00
try:
await self.regular_scheduler()
except asyncio.exceptions.CancelledError:
for wiki in self.wikis.values():
await wiki.session.close()
2022-11-07 14:46:15 +00:00
async def send_exception_to_monitoring(self, ex: Exception):
discord_message = DiscordMessage("embed", "generic", [""])
discord_message["title"] = "Domain scheduler exception for {} (recovered)".format(self.name)
2024-03-03 09:07:25 +00:00
discord_message["content"] = "".join(traceback.format_exception_only(ex, sys.last_type))[0:1995] # not compatbile with Python 3.10+
2023-08-14 15:06:49 +00:00
discord_message.add_field("Failure count", str(self.failures))
2022-11-07 14:46:15 +00:00
discord_message.finish_embed_message()
header = settings["header"]
header['Content-Type'] = 'application/json'
header['X-RateLimit-Precision'] = "millisecond"
try:
async with aiohttp.ClientSession(headers=header, timeout=aiohttp.ClientTimeout(total=6)) as session:
async with session.post("https://discord.com/api/webhooks/{}".format(settings["monitoring_webhook"]),
data=repr(discord_message)) as resp:
pass
except (aiohttp.ServerConnectionError, aiohttp.ServerTimeoutError):
logger.exception("Couldn't communicate with Discord as a result of Server Error when trying to signal domain task issue!")