From 04f45c33e9b7c966e8dda52c0a9cd63db8d12cf5 Mon Sep 17 00:00:00 2001 From: Frisk Date: Sun, 30 May 2021 19:15:37 +0200 Subject: [PATCH] continued work --- src/bot.py | 4 +-- src/domain.py | 58 +++++++++++++++++++++++++++++++++++++------ src/domain_manager.py | 1 - src/irc_feed.py | 27 ++++++++++---------- 4 files changed, 66 insertions(+), 24 deletions(-) diff --git a/src/bot.py b/src/bot.py index 9e1fa6e..e8b9036 100644 --- a/src/bot.py +++ b/src/bot.py @@ -41,7 +41,7 @@ main_tasks: dict = {} # Reasons for this: 1. we require amount of wikis to calculate the cooldown between requests # 2. Easier to code -async def populate_allwikis(): +async def populate_wikis(): async with db.pool().acquire() as connection: async with connection.transaction(): async for db_wiki in connection.cursor('SELECT DISTINCT wiki, rcid, postid FROM rcgcdw'): @@ -554,7 +554,7 @@ async def main_loop(): nest_asyncio.apply(loop) await db.setup_connection() logger.debug("Connection type: {}".format(db.connection)) - await populate_allwikis() + await populate_wikis() try: signals = (signal.SIGHUP, signal.SIGTERM, signal.SIGINT) for s in signals: diff --git a/src/domain.py b/src/domain.py index 3d2a157..580aa24 100644 --- a/src/domain.py +++ b/src/domain.py @@ -1,22 +1,24 @@ from __future__ import annotations import asyncio +import logging from collections import OrderedDict +from src.config import settings from typing import TYPE_CHECKING, Optional - +logger = logging.getLogger("rcgcdb.domain") if TYPE_CHECKING: import src.wiki import src.wiki_ratelimiter - import irc.client_aio + import src.irc_feed class Domain: def __init__(self, name: str): self.name = name # This should be always in format of topname.extension for example fandom.com - self.task: asyncio.Task = self.create_task() + self.task: Optional[asyncio.Task] = None self.wikis: OrderedDict[str, src.wiki.Wiki] = OrderedDict() self.rate_limiter: src.wiki_ratelimiter = src.wiki_ratelimiter.RateLimiter() - self.irc = None + self.irc: Optional[src.irc_feed.AioIRCCat] = None def __iter__(self): return iter(self.wikis) @@ -24,9 +26,18 @@ class Domain: def __getitem__(self, item): return - def set_irc(self, irc_client: irc.client_aio.AioSimpleIRCClient): + def __len__(self): + return len(self.wikis) + + def get_wiki(self, item, default=None) -> Optional[src.wiki.Wiki]: + return self.wikis.get(item, default) + + def set_irc(self, irc_client: src.irc_feed.AioIRCCat): self.irc = irc_client + def run_domain(self): + self.task = asyncio.create_task(self.run_wiki_check()) + def add_wiki(self, wiki: src.wiki.Wiki, first=False): """Adds a wiki to domain list. @@ -36,8 +47,39 @@ class Domain: if first: self.wikis.move_to_end(wiki.script_url, last=False) - def create_task(self) -> asyncio.Task: - return asyncio.create_task(self.run_wiki_check()) + async def run_wiki_scan(self, wiki: src.wiki.Wiki): + await self.rate_limiter.timeout_wait() + await wiki.scan() + self.wikis.move_to_end(wiki.script_url) + self.rate_limiter.timeout_add(1.0) + + async def irc_scheduler(self): + while 1: + try: + wiki_url = self.irc.updated_wikis.pop() + except KeyError: + break + try: + wiki = self.wikis[wiki_url] + except KeyError: + logger.error(f"Could not find a wiki with URL {wiki_url} in the domain group!") + continue + await self.run_wiki_scan(wiki) + for wiki in self.wikis.values(): + if wiki.statistics.last_checked_rc < settings.get("irc_overtime", 3600): + await self.run_wiki_scan(wiki) + else: + return # Recently scanned wikis will get at the end of the self.wikis, so we assume what is first hasn't been checked for a while + + async def regular_scheduler(self): + while 1: + additional_time = max((-25*len(self))+150, 0) + + async def run_wiki_check(self): - raise NotImplementedError + if self.irc: + while: + await self.irc_scheduler() + else: + await self.regular_scheduler() diff --git a/src/domain_manager.py b/src/domain_manager.py index dfbb0c2..9ac0999 100644 --- a/src/domain_manager.py +++ b/src/domain_manager.py @@ -31,7 +31,6 @@ class DomainManager: return ".".join(urlunparse((*parsed_url[0:2], "", "", "", "")).split(".")[-2:]) async def new_domain(self, name: str) -> Domain: - irc = None domain_object = Domain(name) for irc_server in settings["irc_servers"].keys(): if name in settings["irc_servers"][irc_server]["domains"]: diff --git a/src/irc_feed.py b/src/irc_feed.py index 80205a3..7229b5c 100644 --- a/src/irc_feed.py +++ b/src/irc_feed.py @@ -1,8 +1,12 @@ from __future__ import annotations + +import asyncio +import types + import irc.client_aio import json import logging -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Callable from urllib.parse import urlparse, quote logger = logging.getLogger("rcgcdw.irc_feed") @@ -15,11 +19,11 @@ class AioIRCCat(irc.client_aio.AioSimpleIRCClient): super().connect(*args, **kwargs) self.connection_details = (args, kwargs) - def __init__(self, targets: dict[str, str], domain_object: Domain): + def __init__(self, targets: dict[str, str], domain_object: Domain, rc_callback: Callable, discussion_callback: Callable): irc.client_aio.SimpleIRCClient.__init__(self) self.targets = targets - self.updated = set() # Storage for edited wikis - self.updated_discussions = set() + self.updated_wikis: set[str] = set() + self.discussion_callback = discussion_callback self.domain = domain_object self.connection.buffer_class.errors = "replace" # Ignore encoding errors self.connection_details = None @@ -50,12 +54,10 @@ class AioIRCCat(irc.client_aio.AioSimpleIRCClient): # print(message) url = urlparse(message) full_url = "https://"+url.netloc + recognize_langs(url.path) - try: - if self.domain[full_url].rc_id != -1: - self.updated.add(full_url) - logger.debug("New website appended to the list! {}".format(full_url)) - except KeyError: - pass + wiki = self.domain.get_wiki(full_url) + if wiki and wiki.rc_id != -1: + self.updated_wikis.add(full_url) + logger.debug("New website appended to the list! {}".format(full_url)) def parse_fandom_discussion(self, message: str): @@ -67,9 +69,8 @@ class AioIRCCat(irc.client_aio.AioSimpleIRCClient): if post.get('action', 'unknown') != "deleted": # ignore deletion events url = urlparse(post.get('url')) full_url ="https://"+ url.netloc + recognize_langs(url.path) - if full_url in self.domain: # POSSIBLE MEMORY LEAK AS WE DON'T HAVE A WAY TO CHECK IF WIKI IS LOOKING FOR DISCUSSIONS OR NOT - self.updated_discussions.add("https://"+full_url) - logger.debug("New website appended to the list (discussions)! {}".format(full_url)) + if full_url in self.domain: + self.discussion_callback(full_url) def recognize_langs(path):