Further fixes

This commit is contained in:
Frisk 2022-10-09 14:10:08 +02:00
parent a947971858
commit 49f12ed172
No known key found for this signature in database
GPG key ID: 213F7C15068AF8AC
7 changed files with 86 additions and 41 deletions

View file

@ -30,17 +30,17 @@ class Client:
A client for interacting with RcGcDw when creating formatters or hooks.
"""
def __init__(self, hooks, wiki):
URLS = src.misc.get_paths(wiki.script_url,)
self._formatters = hooks
self.__recent_changes: Wiki = wiki
self.WIKI_API_PATH: str = src.misc.WIKI_API_PATH
self.WIKI_ARTICLE_PATH: str = src.misc.WIKI_ARTICLE_PATH
self.WIKI_SCRIPT_PATH: str = src.misc.WIKI_SCRIPT_PATH
self.WIKI_JUST_DOMAIN: str = src.misc.WIKI_JUST_DOMAIN
self.WIKI_API_PATH: Optional[str] = None
self.WIKI_ARTICLE_PATH: Optional[str] = None
self.WIKI_SCRIPT_PATH: str = wiki.script_url
self.WIKI_JUST_DOMAIN: Optional[str] = None
self.content_parser = src.misc.ContentParser
self.tags = self.__recent_changes.tags
self.LinkParser: type(src.misc.LinkParser) = src.misc.LinkParser
self.scheduler: sched.scheduler = sched.scheduler()
self._last_request: Optional[dict] = None
#self.make_api_request: src.rc.wiki.__recent_changes.api_request = self.__recent_changes.api_request
def schedule(self, function: Callable, *args: Any, every: Optional[float] = None, at: Optional[str] = None,
@ -77,12 +77,25 @@ class Client:
def refresh_internal_data(self):
"""Refreshes internal storage data for wiki tags and MediaWiki messages."""
self.__recent_changes.init_info()
self.__recent_changes.recache_requested = True
def create_article_path(self, article: str) -> str:
"""Takes the string and creates an URL with it as the article name"""
return self.WIKI_ARTICLE_PATH.replace("$1", article)
@property
def last_request(self):
return self._last_request
@last_request.setter
def last_request(self, request: dict):
if not self.WIKI_ARTICLE_PATH:
urls = src.misc.get_paths(self.WIKI_SCRIPT_PATH, request)
self.WIKI_API_PATH = urls[0]
self.WIKI_ARTICLE_PATH = urls[2]
self.WIKI_JUST_DOMAIN = urls[3]
self._last_request = request
@property
def namespaces(self) -> dict:
"""Return a dict of namespaces, if None return empty dict"""

View file

@ -4,15 +4,16 @@ import logging
from collections import OrderedDict
from src.config import settings
from typing import TYPE_CHECKING, Optional
from src.argparser import command_line_args
from functools import cache
# from src.discussions import Discussions
from statistics import Log, LogType
import src.wiki_ratelimiter
logger = logging.getLogger("rcgcdb.domain")
if TYPE_CHECKING:
import src.wiki
import src.wiki_ratelimiter
import src.irc_feed
@ -23,7 +24,7 @@ class Domain:
self.wikis: OrderedDict[str, src.wiki.Wiki] = OrderedDict()
self.rate_limiter: src.wiki_ratelimiter = src.wiki_ratelimiter.RateLimiter()
self.irc: Optional[src.irc_feed.AioIRCCat] = None
self.discussions_handler: Optional[Discussions] = Discussions(self.wikis) if name == "fandom.com" else None
# self.discussions_handler: Optional[Discussions] = Discussions(self.wikis) if name == "fandom.com" else None
def __iter__(self):
return iter(self.wikis)
@ -65,16 +66,17 @@ class Domain:
def remove_wiki(self, script_url: str):
self.wikis.pop(script_url)
def add_wiki(self, wiki: src.wiki.Wiki, first=False):
async def add_wiki(self, wiki: src.wiki.Wiki, first=False):
"""Adds a wiki to domain list.
:parameter wiki - Wiki object
:parameter first (optional) - bool indicating if wikis should be added as first or last in the ordered dict"""
wiki.set_domain(self)
if wiki.script_url in self.wikis:
self.wikis[wiki.script_url].update_targets()
await self.wikis[wiki.script_url].update_targets()
else:
self.wikis[wiki.script_url] = wiki
await wiki.update_targets()
if first:
self.wikis.move_to_end(wiki.script_url, last=False)
@ -86,6 +88,7 @@ class Domain:
self.rate_limiter.timeout_add(1.0)
async def irc_scheduler(self):
try:
while True:
try:
wiki_url = self.irc.updated_wikis.pop()
@ -98,15 +101,29 @@ class Domain:
continue
await self.run_wiki_scan(wiki)
for wiki in self.wikis.values():
if wiki.statistics.last_checked_rc < settings.get("irc_overtime", 3600):
if (wiki.statistics.last_checked_rc or 0) < settings.get("irc_overtime", 3600):
await self.run_wiki_scan(wiki)
else:
return # Recently scanned wikis will get at the end of the self.wikis, so we assume what is first hasn't been checked for a while
except:
if command_line_args.debug:
logger.exception("IRC task for domain {} failed!".format(self.name))
else:
# TODO Write
pass
async def regular_scheduler(self):
try:
while True:
await asyncio.sleep(self.calculate_sleep_time(len(self))) # To make sure that we don't spam domains with one wiki every second we calculate a sane timeout for domains with few wikis
await self.run_wiki_scan(next(iter(self.wikis.values())))
except:
if command_line_args.debug:
logger.exception("IRC task for domain {} failed!".format(self.name))
else:
# TODO Write
pass
@cache
def calculate_sleep_time(self, queue_length: int):
@ -115,8 +132,17 @@ class Domain:
async def run_wiki_check(self):
"""Runs appropriate scheduler depending on existence of IRC"""
if self.irc:
try:
while True:
await self.irc_scheduler()
await asyncio.sleep(10.0)
except asyncio.exceptions.CancelledError:
for wiki in self.wikis.values():
await wiki.session.close()
await self.irc.connection.disconnect()
else:
try:
await self.regular_scheduler()
except asyncio.exceptions.CancelledError:
for wiki in self.wikis.values():
await wiki.session.close()

View file

@ -45,10 +45,10 @@ class DomainManager:
:parameter wiki - Wiki object to be added"""
wiki_domain = self.get_domain(wiki.script_url)
try:
self.domains[wiki_domain].add_wiki(wiki)
await self.domains[wiki_domain].add_wiki(wiki)
except KeyError:
new_domain = await self.new_domain(wiki_domain)
new_domain.add_wiki(wiki)
await new_domain.add_wiki(wiki)
def remove_domain(self, domain):
domain.destoy()
@ -78,7 +78,7 @@ class DomainManager:
domain_object = Domain(name)
for irc_server in settings["irc_servers"].keys():
if name in settings["irc_servers"][irc_server]["domains"]:
domain_object.set_irc(AioIRCCat(settings["irc_servers"][irc_server]["irc_channel_mapping"], domain_object))
domain_object.set_irc(AioIRCCat(settings["irc_servers"][irc_server]["irc_channel_mapping"], domain_object, None, None))
break # Allow only one IRC for a domain
self.domains[name] = domain_object
return self.domains[name]

View file

@ -14,6 +14,7 @@ logger = logging.getLogger("rcgcdw.irc_feed")
if TYPE_CHECKING:
from src.domain import Domain
class AioIRCCat(irc.client_aio.AioSimpleIRCClient):
def connect(self, *args, **kwargs):
super().connect(*args, **kwargs)

View file

@ -13,7 +13,7 @@ class MWMessages:
message_sets[self.mw_id] = mc_messages
def __getitem__(self, item):
message_sets[self.mw_id].get(item, "============")
return message_sets[self.mw_id].get(item, "============")
def __iter__(self):
for key, item in message_sets[self.mw_id].items():

View file

@ -19,7 +19,7 @@ class UpdateDB:
def clear_list(self):
self.updated.clear()
async def fetch_rows(self, SQLstatement: str, args: Union[str, int]) -> collections.abc.AsyncIterable:
async def fetch_rows(self, SQLstatement: str, *args: Union[str, int]) -> collections.abc.AsyncIterable:
async with db.pool().acquire() as connection:
async with connection.transaction():
async for row in connection.cursor(SQLstatement, *args):

View file

@ -36,10 +36,11 @@ if TYPE_CHECKING:
MESSAGE_LIMIT = settings.get("message_limit", 30)
class Wiki:
def __init__(self, script_url: str, rc_id: Optional[int], discussion_id: Optional[int]):
self.script_url: str = script_url
self.session = aiohttp.ClientSession(headers=settings["header"], timeout=aiohttp.ClientTimeout(6.0))
self.session: aiohttp.ClientSession = aiohttp.ClientSession(headers=settings["header"], timeout=aiohttp.ClientTimeout(6.0))
self.statistics: Statistics = Statistics(rc_id, discussion_id)
self.mw_messages: Optional[MWMessages] = None
self.tags: dict[str, Optional[str]] = {} # Tag can be None if hidden
@ -48,8 +49,8 @@ class Wiki:
self.targets: Optional[defaultdict[Settings, list[str]]] = None
self.client: Client = Client(formatter_hooks, self)
self.message_history: list[StackedDiscordMessage] = list()
self.update_targets()
self.namespaces: Optional[dict] = None
self.recache_requested: bool = False
@property
def rc_id(self):
@ -265,12 +266,13 @@ class Wiki:
request = await self.fetch_wiki(amount=amount)
self.client.last_request = request
except WikiServerError as e:
# If WikiServerError comes up 2 times in recent 2 minutes, this will reraise the exception, otherwise waits 2 seconds
# If WikiServerError comes up 2 times in recent 2 minutes, this will reraise the exception, otherwise waits 2 seconds and retries
self.statistics.update(Log(type=LogType.CONNECTION_ERROR, title=str(e.exception)))
if self.statistics.recent_connection_errors() > 1:
raise
await asyncio.sleep(2.0)
if not self.mw_messages:
continue
if not self.mw_messages or self.recache_requested:
process_cachable(request, self)
try:
recent_changes = request["query"]["recentchanges"]
@ -312,6 +314,7 @@ class Wiki:
messagequeue.add_messages(message_list)
return
@cache
def prepare_settings(display_mode: int) -> dict:
"""Prepares dict of RcGcDw compatible settings based on a template and display mode of given call"""
@ -338,6 +341,8 @@ def process_cachable(response: dict, wiki_object: Wiki) -> None:
wiki_object.tags[tag["name"]] = (BeautifulSoup(tag["displayname"], "lxml")).get_text()
except KeyError:
wiki_object.tags[tag["name"]] = None
wiki_object.namespaces = response["query"]["namespaces"]
wiki_object.recache_requested = False
async def rc_processor(wiki: Wiki, change: dict, changed_categories: dict, display_options: namedtuple("Settings", ["lang", "display"]), webhooks: list) -> DiscordMessage: