From 49f12ed17227c5da0d1f635e0788f10b2c4d749c Mon Sep 17 00:00:00 2001 From: Frisk Date: Sun, 9 Oct 2022 14:10:08 +0200 Subject: [PATCH] Further fixes --- src/api/client.py | 25 ++++++++++---- src/domain.py | 76 +++++++++++++++++++++++++++++-------------- src/domain_manager.py | 6 ++-- src/irc_feed.py | 1 + src/mw_messages.py | 2 +- src/queue_handler.py | 2 +- src/wiki.py | 15 ++++++--- 7 files changed, 86 insertions(+), 41 deletions(-) diff --git a/src/api/client.py b/src/api/client.py index a0bae05..4569043 100644 --- a/src/api/client.py +++ b/src/api/client.py @@ -30,17 +30,17 @@ class Client: A client for interacting with RcGcDw when creating formatters or hooks. """ def __init__(self, hooks, wiki): - URLS = src.misc.get_paths(wiki.script_url,) self._formatters = hooks self.__recent_changes: Wiki = wiki - self.WIKI_API_PATH: str = src.misc.WIKI_API_PATH - self.WIKI_ARTICLE_PATH: str = src.misc.WIKI_ARTICLE_PATH - self.WIKI_SCRIPT_PATH: str = src.misc.WIKI_SCRIPT_PATH - self.WIKI_JUST_DOMAIN: str = src.misc.WIKI_JUST_DOMAIN + self.WIKI_API_PATH: Optional[str] = None + self.WIKI_ARTICLE_PATH: Optional[str] = None + self.WIKI_SCRIPT_PATH: str = wiki.script_url + self.WIKI_JUST_DOMAIN: Optional[str] = None self.content_parser = src.misc.ContentParser self.tags = self.__recent_changes.tags self.LinkParser: type(src.misc.LinkParser) = src.misc.LinkParser self.scheduler: sched.scheduler = sched.scheduler() + self._last_request: Optional[dict] = None #self.make_api_request: src.rc.wiki.__recent_changes.api_request = self.__recent_changes.api_request def schedule(self, function: Callable, *args: Any, every: Optional[float] = None, at: Optional[str] = None, @@ -77,12 +77,25 @@ class Client: def refresh_internal_data(self): """Refreshes internal storage data for wiki tags and MediaWiki messages.""" - self.__recent_changes.init_info() + self.__recent_changes.recache_requested = True def create_article_path(self, article: str) -> str: """Takes the string and creates an URL with it as the article name""" return self.WIKI_ARTICLE_PATH.replace("$1", article) + @property + def last_request(self): + return self._last_request + + @last_request.setter + def last_request(self, request: dict): + if not self.WIKI_ARTICLE_PATH: + urls = src.misc.get_paths(self.WIKI_SCRIPT_PATH, request) + self.WIKI_API_PATH = urls[0] + self.WIKI_ARTICLE_PATH = urls[2] + self.WIKI_JUST_DOMAIN = urls[3] + self._last_request = request + @property def namespaces(self) -> dict: """Return a dict of namespaces, if None return empty dict""" diff --git a/src/domain.py b/src/domain.py index 67989a6..3839bf2 100644 --- a/src/domain.py +++ b/src/domain.py @@ -4,15 +4,16 @@ import logging from collections import OrderedDict from src.config import settings from typing import TYPE_CHECKING, Optional +from src.argparser import command_line_args from functools import cache # from src.discussions import Discussions from statistics import Log, LogType +import src.wiki_ratelimiter logger = logging.getLogger("rcgcdb.domain") if TYPE_CHECKING: import src.wiki - import src.wiki_ratelimiter import src.irc_feed @@ -23,7 +24,7 @@ class Domain: self.wikis: OrderedDict[str, src.wiki.Wiki] = OrderedDict() self.rate_limiter: src.wiki_ratelimiter = src.wiki_ratelimiter.RateLimiter() self.irc: Optional[src.irc_feed.AioIRCCat] = None - self.discussions_handler: Optional[Discussions] = Discussions(self.wikis) if name == "fandom.com" else None + # self.discussions_handler: Optional[Discussions] = Discussions(self.wikis) if name == "fandom.com" else None def __iter__(self): return iter(self.wikis) @@ -65,16 +66,17 @@ class Domain: def remove_wiki(self, script_url: str): self.wikis.pop(script_url) - def add_wiki(self, wiki: src.wiki.Wiki, first=False): + async def add_wiki(self, wiki: src.wiki.Wiki, first=False): """Adds a wiki to domain list. :parameter wiki - Wiki object :parameter first (optional) - bool indicating if wikis should be added as first or last in the ordered dict""" wiki.set_domain(self) if wiki.script_url in self.wikis: - self.wikis[wiki.script_url].update_targets() + await self.wikis[wiki.script_url].update_targets() else: self.wikis[wiki.script_url] = wiki + await wiki.update_targets() if first: self.wikis.move_to_end(wiki.script_url, last=False) @@ -86,27 +88,42 @@ class Domain: self.rate_limiter.timeout_add(1.0) async def irc_scheduler(self): - while True: - try: - wiki_url = self.irc.updated_wikis.pop() - except KeyError: - break - try: - wiki = self.wikis[wiki_url] - except KeyError: - logger.error(f"Could not find a wiki with URL {wiki_url} in the domain group!") - continue - await self.run_wiki_scan(wiki) - for wiki in self.wikis.values(): - if wiki.statistics.last_checked_rc < settings.get("irc_overtime", 3600): + try: + while True: + try: + wiki_url = self.irc.updated_wikis.pop() + except KeyError: + break + try: + wiki = self.wikis[wiki_url] + except KeyError: + logger.error(f"Could not find a wiki with URL {wiki_url} in the domain group!") + continue await self.run_wiki_scan(wiki) + for wiki in self.wikis.values(): + if (wiki.statistics.last_checked_rc or 0) < settings.get("irc_overtime", 3600): + await self.run_wiki_scan(wiki) + else: + return # Recently scanned wikis will get at the end of the self.wikis, so we assume what is first hasn't been checked for a while + except: + if command_line_args.debug: + logger.exception("IRC task for domain {} failed!".format(self.name)) else: - return # Recently scanned wikis will get at the end of the self.wikis, so we assume what is first hasn't been checked for a while + # TODO Write + pass + async def regular_scheduler(self): - while True: - await asyncio.sleep(self.calculate_sleep_time(len(self))) # To make sure that we don't spam domains with one wiki every second we calculate a sane timeout for domains with few wikis - await self.run_wiki_scan(next(iter(self.wikis.values()))) + try: + while True: + await asyncio.sleep(self.calculate_sleep_time(len(self))) # To make sure that we don't spam domains with one wiki every second we calculate a sane timeout for domains with few wikis + await self.run_wiki_scan(next(iter(self.wikis.values()))) + except: + if command_line_args.debug: + logger.exception("IRC task for domain {} failed!".format(self.name)) + else: + # TODO Write + pass @cache def calculate_sleep_time(self, queue_length: int): @@ -115,8 +132,17 @@ class Domain: async def run_wiki_check(self): """Runs appropriate scheduler depending on existence of IRC""" if self.irc: - while True: - await self.irc_scheduler() - await asyncio.sleep(10.0) + try: + while True: + await self.irc_scheduler() + await asyncio.sleep(10.0) + except asyncio.exceptions.CancelledError: + for wiki in self.wikis.values(): + await wiki.session.close() + await self.irc.connection.disconnect() else: - await self.regular_scheduler() + try: + await self.regular_scheduler() + except asyncio.exceptions.CancelledError: + for wiki in self.wikis.values(): + await wiki.session.close() diff --git a/src/domain_manager.py b/src/domain_manager.py index 7043e06..391dc08 100644 --- a/src/domain_manager.py +++ b/src/domain_manager.py @@ -45,10 +45,10 @@ class DomainManager: :parameter wiki - Wiki object to be added""" wiki_domain = self.get_domain(wiki.script_url) try: - self.domains[wiki_domain].add_wiki(wiki) + await self.domains[wiki_domain].add_wiki(wiki) except KeyError: new_domain = await self.new_domain(wiki_domain) - new_domain.add_wiki(wiki) + await new_domain.add_wiki(wiki) def remove_domain(self, domain): domain.destoy() @@ -78,7 +78,7 @@ class DomainManager: domain_object = Domain(name) for irc_server in settings["irc_servers"].keys(): if name in settings["irc_servers"][irc_server]["domains"]: - domain_object.set_irc(AioIRCCat(settings["irc_servers"][irc_server]["irc_channel_mapping"], domain_object)) + domain_object.set_irc(AioIRCCat(settings["irc_servers"][irc_server]["irc_channel_mapping"], domain_object, None, None)) break # Allow only one IRC for a domain self.domains[name] = domain_object return self.domains[name] diff --git a/src/irc_feed.py b/src/irc_feed.py index dfb5663..93b1291 100644 --- a/src/irc_feed.py +++ b/src/irc_feed.py @@ -14,6 +14,7 @@ logger = logging.getLogger("rcgcdw.irc_feed") if TYPE_CHECKING: from src.domain import Domain + class AioIRCCat(irc.client_aio.AioSimpleIRCClient): def connect(self, *args, **kwargs): super().connect(*args, **kwargs) diff --git a/src/mw_messages.py b/src/mw_messages.py index 89defb5..77a7795 100644 --- a/src/mw_messages.py +++ b/src/mw_messages.py @@ -13,7 +13,7 @@ class MWMessages: message_sets[self.mw_id] = mc_messages def __getitem__(self, item): - message_sets[self.mw_id].get(item, "============") + return message_sets[self.mw_id].get(item, "============") def __iter__(self): for key, item in message_sets[self.mw_id].items(): diff --git a/src/queue_handler.py b/src/queue_handler.py index 66c7ac8..6a9b1c8 100644 --- a/src/queue_handler.py +++ b/src/queue_handler.py @@ -19,7 +19,7 @@ class UpdateDB: def clear_list(self): self.updated.clear() - async def fetch_rows(self, SQLstatement: str, args: Union[str, int]) -> collections.abc.AsyncIterable: + async def fetch_rows(self, SQLstatement: str, *args: Union[str, int]) -> collections.abc.AsyncIterable: async with db.pool().acquire() as connection: async with connection.transaction(): async for row in connection.cursor(SQLstatement, *args): diff --git a/src/wiki.py b/src/wiki.py index 18b4d98..0c4e27f 100644 --- a/src/wiki.py +++ b/src/wiki.py @@ -36,10 +36,11 @@ if TYPE_CHECKING: MESSAGE_LIMIT = settings.get("message_limit", 30) + class Wiki: def __init__(self, script_url: str, rc_id: Optional[int], discussion_id: Optional[int]): self.script_url: str = script_url - self.session = aiohttp.ClientSession(headers=settings["header"], timeout=aiohttp.ClientTimeout(6.0)) + self.session: aiohttp.ClientSession = aiohttp.ClientSession(headers=settings["header"], timeout=aiohttp.ClientTimeout(6.0)) self.statistics: Statistics = Statistics(rc_id, discussion_id) self.mw_messages: Optional[MWMessages] = None self.tags: dict[str, Optional[str]] = {} # Tag can be None if hidden @@ -48,8 +49,8 @@ class Wiki: self.targets: Optional[defaultdict[Settings, list[str]]] = None self.client: Client = Client(formatter_hooks, self) self.message_history: list[StackedDiscordMessage] = list() - - self.update_targets() + self.namespaces: Optional[dict] = None + self.recache_requested: bool = False @property def rc_id(self): @@ -265,12 +266,13 @@ class Wiki: request = await self.fetch_wiki(amount=amount) self.client.last_request = request except WikiServerError as e: - # If WikiServerError comes up 2 times in recent 2 minutes, this will reraise the exception, otherwise waits 2 seconds + # If WikiServerError comes up 2 times in recent 2 minutes, this will reraise the exception, otherwise waits 2 seconds and retries self.statistics.update(Log(type=LogType.CONNECTION_ERROR, title=str(e.exception))) if self.statistics.recent_connection_errors() > 1: raise await asyncio.sleep(2.0) - if not self.mw_messages: + continue + if not self.mw_messages or self.recache_requested: process_cachable(request, self) try: recent_changes = request["query"]["recentchanges"] @@ -312,6 +314,7 @@ class Wiki: messagequeue.add_messages(message_list) return + @cache def prepare_settings(display_mode: int) -> dict: """Prepares dict of RcGcDw compatible settings based on a template and display mode of given call""" @@ -338,6 +341,8 @@ def process_cachable(response: dict, wiki_object: Wiki) -> None: wiki_object.tags[tag["name"]] = (BeautifulSoup(tag["displayname"], "lxml")).get_text() except KeyError: wiki_object.tags[tag["name"]] = None + wiki_object.namespaces = response["query"]["namespaces"] + wiki_object.recache_requested = False async def rc_processor(wiki: Wiki, change: dict, changed_categories: dict, display_options: namedtuple("Settings", ["lang", "display"]), webhooks: list) -> DiscordMessage: