diff --git a/src/domain.py b/src/domain.py index 20a4f6a..2d51979 100644 --- a/src/domain.py +++ b/src/domain.py @@ -4,6 +4,7 @@ import logging from collections import OrderedDict from src.config import settings from typing import TYPE_CHECKING, Optional +from functools import cache logger = logging.getLogger("rcgcdb.domain") if TYPE_CHECKING: @@ -56,7 +57,7 @@ class Domain: async def run_wiki_scan(self, wiki: src.wiki.Wiki): await self.rate_limiter.timeout_wait() - await wiki.scan() + await wiki.scan(self.rate_limiter) self.wikis.move_to_end(wiki.script_url) self.rate_limiter.timeout_add(1.0) @@ -80,9 +81,12 @@ class Domain: async def regular_scheduler(self): while True: - await asyncio.sleep(max((-25*len(self))+150, 1)) # To make sure that we don't spam domains with one wiki every second we calculate a sane timeout for domains with few wikis + await asyncio.sleep(self.calculate_sleep_time(len(self))) # To make sure that we don't spam domains with one wiki every second we calculate a sane timeout for domains with few wikis await self.run_wiki_scan(self.wikis.pop()) + @cache + def calculate_sleep_time(self, queue_length: int): + return max((-25 * queue_length) + 150, 1) async def run_wiki_check(self): if self.irc: diff --git a/src/mw_messages.py b/src/mw_messages.py new file mode 100644 index 0000000..d285ac0 --- /dev/null +++ b/src/mw_messages.py @@ -0,0 +1,14 @@ +# Why overthink it? Module executes once, I can have one global +message_sets: dict[int, dict] = dict() + + +class MWMessages: + def __init__(self, mc_messages: dict): + for key, message_set in message_sets.items(): + if message_set == mc_messages: + self.mw_id = key + break + else: + self.mw_id = len(message_sets) + message_sets[self.mw_id] = mc_messages + diff --git a/src/wiki.py b/src/wiki.py index 96c3399..7edb440 100644 --- a/src/wiki.py +++ b/src/wiki.py @@ -1,6 +1,8 @@ from dataclasses import dataclass import re import logging, aiohttp + +from mw_messages import MWMessages from src.exceptions import * from src.database import db from src.formatters.rc import embed_formatter, compact_formatter @@ -16,25 +18,37 @@ from src.config import settings # noinspection PyPackageRequirements from bs4 import BeautifulSoup from collections import OrderedDict -from typing import Union +from typing import Union, Optional logger = logging.getLogger("rcgcdb.wiki") + class Wiki: def __init__(self, script_url: str, rc_id: int, discussion_id: int): self.script_url: str = script_url self.session = aiohttp.ClientSession(headers=settings["header"], timeout=aiohttp.ClientTimeout(6.0)) self.statistics: Statistics = Statistics(rc_id, discussion_id) self.fail_times: int = 0 - self.mw_messages: MWMessagesHashmap = MWMessagesHashmap() + self.mw_messages: Optional[MWMessages] = None # TODO Need to initialize MWMessages() somewhere + self.first_fetch_done: bool = False @property def rc_id(self): return self.statistics.last_action - def downtime_controller(self, down): + @staticmethod + async def remove(wiki_url, reason): + logger.info("Removing a wiki {}".format(wiki_url)) + await src.discord.wiki_removal(wiki_url, reason) + await src.discord.wiki_removal_monitor(wiki_url, reason) + async with db.pool().acquire() as connection: + result = await connection.execute('DELETE FROM rcgcdw WHERE wiki = $1', wiki_url) + logger.warning('{} rows affected by DELETE FROM rcgcdw WHERE wiki = "{}"'.format(result, wiki_url)) + + def downtime_controller(self, down): # TODO Finish this one if down: self.fail_times += 1 + else: self.fail_times -= 1 @@ -118,12 +132,11 @@ class Wiki: except KeyError: logger.exception("KeyError while iterating over json_path, full response: {}".format(request.json())) raise + self.first_fetch_done = True return request_json - async def fetch_wiki(self, extended, script_path, session: aiohttp.ClientSession, ratelimiter: RateLimiter, - amount=20) -> aiohttp.ClientResponse: - await ratelimiter.timeout_wait() - if extended: + async def fetch_wiki(self, amount=20) -> dict: + if self.first_fetch_done is False: params = OrderedDict({"action": "query", "format": "json", "uselang": "content", "list": "tags|recentchanges", "meta": "allmessages|siteinfo", "utf8": 1, "tglimit": "max", "tgprop": "displayname", @@ -139,12 +152,22 @@ class Wiki: "rclimit": amount, "rctype": "edit|new|log|categorize", "siprop": "namespaces|general"}) try: response = await self.api_request(params=params) - ratelimiter.timeout_add(1.0) except (aiohttp.ClientConnectionError, aiohttp.ServerTimeoutError, asyncio.TimeoutError): logger.error("A connection error occurred while requesting {}".format(params)) raise WikiServerError return response + def scan(self): + try: + request = await self.fetch_wiki() + except WikiServerError: + self.downtime_controller(True) + return # TODO Add a log entry? + else: + self.downtime_controller(False) + if not self.mw_messages: + mw_messages = request.get("query") + @dataclass class Wiki_old: mw_messages: int = None