RcGcDb/src/domain.py

102 lines
3.6 KiB
Python
Raw Normal View History

2021-05-28 17:16:14 +00:00
from __future__ import annotations
import asyncio
2021-05-30 17:15:37 +00:00
import logging
2021-05-30 13:31:51 +00:00
from collections import OrderedDict
2021-05-30 17:15:37 +00:00
from src.config import settings
2021-05-28 17:16:14 +00:00
from typing import TYPE_CHECKING, Optional
2021-06-22 19:42:32 +00:00
from functools import cache
2021-05-30 17:15:37 +00:00
logger = logging.getLogger("rcgcdb.domain")
2021-05-28 17:16:14 +00:00
if TYPE_CHECKING:
import src.wiki
import src.wiki_ratelimiter
2021-05-30 17:15:37 +00:00
import src.irc_feed
2021-05-28 17:16:14 +00:00
class Domain:
2021-05-30 13:31:51 +00:00
def __init__(self, name: str):
2021-05-30 11:23:48 +00:00
self.name = name # This should be always in format of topname.extension for example fandom.com
2021-05-30 17:15:37 +00:00
self.task: Optional[asyncio.Task] = None
2021-05-30 13:31:51 +00:00
self.wikis: OrderedDict[str, src.wiki.Wiki] = OrderedDict()
2021-05-28 17:16:14 +00:00
self.rate_limiter: src.wiki_ratelimiter = src.wiki_ratelimiter.RateLimiter()
2021-05-30 17:15:37 +00:00
self.irc: Optional[src.irc_feed.AioIRCCat] = None
2021-05-30 13:31:51 +00:00
def __iter__(self):
return iter(self.wikis)
def __getitem__(self, item):
return
2021-05-30 17:15:37 +00:00
def __len__(self):
return len(self.wikis)
def get_wiki(self, item, default=None) -> Optional[src.wiki.Wiki]:
return self.wikis.get(item, default)
def set_irc(self, irc_client: src.irc_feed.AioIRCCat):
2021-05-28 17:16:14 +00:00
self.irc = irc_client
def stop_task(self):
"""Cancells the task"""
self.task.cancel() # Be aware that cancelling the task may take time
2021-05-30 17:15:37 +00:00
def run_domain(self):
if not self.task or self.task.cancelled():
self.task = asyncio.create_task(self.run_wiki_check())
else:
logger.error(f"Tried to start a task for domain {self.name} however the task already exists!")
2021-05-30 17:15:37 +00:00
def remove_wiki(self, script_url: str):
2021-07-09 12:55:23 +00:00
self.wikis.pop(script_url)
2021-05-30 13:31:51 +00:00
def add_wiki(self, wiki: src.wiki.Wiki, first=False):
2021-05-30 11:23:48 +00:00
"""Adds a wiki to domain list.
:parameter wiki - Wiki object
2021-05-30 13:31:51 +00:00
:parameter first (optional) - bool indicating if wikis should be added as first or last in the ordered dict"""
wiki.set_domain(self)
2021-05-30 13:31:51 +00:00
self.wikis[wiki.script_url] = wiki
if first:
self.wikis.move_to_end(wiki.script_url, last=False)
2021-05-30 11:23:48 +00:00
2021-05-30 17:15:37 +00:00
async def run_wiki_scan(self, wiki: src.wiki.Wiki):
await self.rate_limiter.timeout_wait()
await wiki.scan()
2021-05-30 17:15:37 +00:00
self.wikis.move_to_end(wiki.script_url)
self.rate_limiter.timeout_add(1.0)
async def irc_scheduler(self):
while True:
2021-05-30 17:15:37 +00:00
try:
wiki_url = self.irc.updated_wikis.pop()
except KeyError:
break
try:
wiki = self.wikis[wiki_url]
except KeyError:
logger.error(f"Could not find a wiki with URL {wiki_url} in the domain group!")
continue
await self.run_wiki_scan(wiki)
for wiki in self.wikis.values():
if wiki.statistics.last_checked_rc < settings.get("irc_overtime", 3600):
await self.run_wiki_scan(wiki)
else:
return # Recently scanned wikis will get at the end of the self.wikis, so we assume what is first hasn't been checked for a while
async def regular_scheduler(self):
while True:
2021-06-22 19:42:32 +00:00
await asyncio.sleep(self.calculate_sleep_time(len(self))) # To make sure that we don't spam domains with one wiki every second we calculate a sane timeout for domains with few wikis
await self.run_wiki_scan(self.wikis.pop())
2021-05-30 17:15:37 +00:00
2021-06-22 19:42:32 +00:00
@cache
def calculate_sleep_time(self, queue_length: int):
return max((-25 * queue_length) + 150, 1)
2021-05-30 11:23:48 +00:00
async def run_wiki_check(self):
2021-05-30 17:15:37 +00:00
if self.irc:
while True:
2021-05-30 17:15:37 +00:00
await self.irc_scheduler()
await asyncio.sleep(10.0)
2021-05-30 17:15:37 +00:00
else:
await self.regular_scheduler()