From 090a14c6c4ae2c3fb95d4884f94a696a5bc0ab88 Mon Sep 17 00:00:00 2001 From: Frisk Date: Fri, 4 Nov 2022 15:59:26 +0100 Subject: [PATCH] Updates and fixes --- src/api/client.py | 2 +- src/api/util.py | 2 +- src/bot.py | 1 - src/database.py | 14 +------------- src/discord/queue.py | 4 +--- src/domain.py | 10 +++------- src/domain_manager.py | 19 ++++++++++--------- src/irc_feed.py | 7 ++++--- src/misc.py | 29 +++++++++++++++++------------ src/queue_handler.py | 1 + src/statistics.py | 2 +- src/wiki.py | 8 +++++--- src/wiki_ratelimiter.py | 22 ---------------------- 13 files changed, 45 insertions(+), 76 deletions(-) delete mode 100644 src/wiki_ratelimiter.py diff --git a/src/api/client.py b/src/api/client.py index aadea37..9f48085 100644 --- a/src/api/client.py +++ b/src/api/client.py @@ -105,7 +105,7 @@ class Client: return dict() def parse_links(self, summary: str): - link_parser = self.LinkParser() + link_parser = self.LinkParser(self.WIKI_JUST_DOMAIN) link_parser.feed(summary) return link_parser.new_string diff --git a/src/api/util.py b/src/api/util.py index 9422bf3..f95af0d 100644 --- a/src/api/util.py +++ b/src/api/util.py @@ -51,7 +51,7 @@ def sanitize_to_url(text: str) -> str: # TODO ) replaces needed? def parse_mediawiki_changes(ctx: Context, content: str, embed: DiscordMessage) -> None: """Parses MediaWiki changes and adds them to embed as fields "Added" and "Removed" """ - edit_diff = ctx.client.content_parser() + edit_diff = ctx.client.content_parser(ctx._) edit_diff.feed(content) if edit_diff.small_prev_del: if edit_diff.small_prev_del.replace("~~", "").replace("__", "").isspace(): diff --git a/src/bot.py b/src/bot.py index 0d288b9..8239710 100644 --- a/src/bot.py +++ b/src/bot.py @@ -16,7 +16,6 @@ from src.database import db from src.exceptions import * from src.queue_handler import dbmanager from src.wiki import Wiki, process_cats, essential_feeds -from src.wiki_ratelimiter import RateLimiter from src.domain_manager import domains diff --git a/src/database.py b/src/database.py index 3278c09..9469982 100644 --- a/src/database.py +++ b/src/database.py @@ -34,17 +34,5 @@ class db_connection: def pool(self) -> asyncpg.Pool: return self.connection_pool - # Tried to make it a decorator but tbh won't probably work - # async def in_transaction(self, func): - # async def single_transaction(): - # async with self.connection.acquire() as connection: - # async with connection.transaction(): - # await func() - # return single_transaction - # async def query(self, string, *arg): - # async with self.connection.acquire() as connection: - # async with connection.transaction(): - # return connection.cursor(string, *arg) - -db = db_connection() \ No newline at end of file +db = db_connection() diff --git a/src/discord/queue.py b/src/discord/queue.py index bbaf930..81fd131 100644 --- a/src/discord/queue.py +++ b/src/discord/queue.py @@ -114,14 +114,12 @@ class MessageQueue: async def pack_massages(self, messages: list[QueueEntry], current_pack=None) -> AsyncGenerator[tuple[StackedDiscordMessage, int, str], None]: """Pack messages into StackedDiscordMessage. It's an async generator""" - # TODO Rebuild to support DELETE and PATCH messages for index, message in enumerate(messages): if message.method == "POST": if current_pack is None: current_pack = StackedDiscordMessage(0 if message.discord_message.message_type == "compact" else 1, message.wiki) else: - # message.discord_message. # TODO Where do we store method? yield message.discord_message, index, message.method message = message.discord_message try: @@ -152,7 +150,7 @@ class MessageQueue: self.global_rate_limit = True await asyncio.sleep(e.remaining / 1000) return - for queue_message in messages[max(index-len(msg.message_list), 0):max(index, 1)]: # mark messages as delivered + for queue_message in messages[max(index-len(msg.message_list), 0):index+1]: # mark messages as delivered queue_message.confirm_sent_status(webhook_url) if client_error is False: msg.webhook = webhook_url diff --git a/src/domain.py b/src/domain.py index 0d4ce77..29f704c 100644 --- a/src/domain.py +++ b/src/domain.py @@ -8,7 +8,6 @@ from src.argparser import command_line_args from functools import cache # from src.discussions import Discussions from statistics import Log, LogType -import src.wiki_ratelimiter logger = logging.getLogger("rcgcdb.domain") @@ -22,7 +21,6 @@ class Domain: self.name = name # This should be always in format of topname.extension for example fandom.com self.task: Optional[asyncio.Task] = None self.wikis: OrderedDict[str, src.wiki.Wiki] = OrderedDict() - self.rate_limiter: src.wiki_ratelimiter = src.wiki_ratelimiter.RateLimiter() self.irc: Optional[src.irc_feed.AioIRCCat] = None # self.discussions_handler: Optional[Discussions] = Discussions(self.wikis) if name == "fandom.com" else None @@ -38,7 +36,7 @@ class Domain: def destroy(self): """Destroy the domain – do all of the tasks that should make sure there is no leftovers before being collected by GC""" if self.irc: - self.irc.connection.disconnect("Leaving") + self.irc.connection.die("Leaving") if self.discussions_handler: self.discussions_handler.close() if self.task: @@ -81,11 +79,9 @@ class Domain: self.wikis.move_to_end(wiki.script_url, last=False) async def run_wiki_scan(self, wiki: src.wiki.Wiki, reason: Optional[int] = None): - await self.rate_limiter.timeout_wait() await wiki.scan() wiki.statistics.update(Log(type=LogType.SCAN_REASON, title=str(reason))) self.wikis.move_to_end(wiki.script_url) - self.rate_limiter.timeout_add(1.0) async def irc_scheduler(self): try: @@ -108,7 +104,7 @@ class Domain: return # Recently scanned wikis will get at the end of the self.wikis, so we assume what is first hasn't been checked for a while except: if command_line_args.debug: - logger.exception("IRC task for domain {} failed!".format(self.name)) + logger.exception("IRC scheduler task for domain {} failed!".format(self.name)) else: # TODO Write pass @@ -140,7 +136,7 @@ class Domain: except asyncio.exceptions.CancelledError: for wiki in self.wikis.values(): await wiki.session.close() - await self.irc.connection.disconnect() + self.irc.connection.disconnect() else: try: await self.regular_scheduler() diff --git a/src/domain_manager.py b/src/domain_manager.py index 391dc08..07dddc4 100644 --- a/src/domain_manager.py +++ b/src/domain_manager.py @@ -1,5 +1,5 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Optional +from typing import TYPE_CHECKING from urllib.parse import urlparse, urlunparse import logging import asyncpg @@ -9,9 +9,7 @@ from src.config import settings from src.domain import Domain from src.irc_feed import AioIRCCat - -if TYPE_CHECKING: - from src.wiki import Wiki +from src.wiki import Wiki logger = logging.getLogger("rcgcdb.domain_manager") @@ -23,6 +21,7 @@ class DomainManager: """Callback for database listener. Used to update our domain cache on changes such as new wikis or removed wikis""" # TODO Write a trigger for pub/sub in database/Wiki-Bot repo split_payload = payload.split(" ") + logger.debug("Received pub/sub message: {}".format(payload)) if len(split_payload) < 2: raise ValueError("Improper pub/sub message! Pub/sub payload: {}".format(payload)) if split_payload[0] == "ADD": @@ -30,8 +29,8 @@ class DomainManager: elif split_payload[0] == "REMOVE": try: results = await connection.fetch("SELECT * FROM rcgcdw WHERE wiki = $1;", split_payload[1]) - if len(results) > 0: - return + if len(results) > 0: # If there are still webhooks for this wiki - just update its targets + await self.return_domain(self.get_domain(split_payload[1])).get_wiki(split_payload[1]).update_targets() except asyncpg.IdleSessionTimeoutError: logger.error("Couldn't check amount of webhooks with {} wiki!".format(split_payload[1])) return @@ -50,9 +49,9 @@ class DomainManager: new_domain = await self.new_domain(wiki_domain) await new_domain.add_wiki(wiki) - def remove_domain(self, domain): - domain.destoy() - del self.domains[domain] + def remove_domain(self, domain: Domain): + domain.destroy() + del self.domains[domain.name] def remove_wiki(self, script_url: str): wiki_domain = self.get_domain(script_url) @@ -79,6 +78,8 @@ class DomainManager: for irc_server in settings["irc_servers"].keys(): if name in settings["irc_servers"][irc_server]["domains"]: domain_object.set_irc(AioIRCCat(settings["irc_servers"][irc_server]["irc_channel_mapping"], domain_object, None, None)) + domain_object.irc.connect(settings["irc_servers"][irc_server]["irc_host"], settings["irc_servers"][irc_server]["irc_port"], + settings["irc_servers"][irc_server]["irc_name"], ircname=settings["irc_servers"][irc_server]["irc_nickname"]) break # Allow only one IRC for a domain self.domains[name] = domain_object return self.domains[name] diff --git a/src/irc_feed.py b/src/irc_feed.py index 93b1291..7ef4184 100644 --- a/src/irc_feed.py +++ b/src/irc_feed.py @@ -44,7 +44,8 @@ class AioIRCCat(irc.client_aio.AioSimpleIRCClient): c.nick(c.get_nickname() + "_") def on_disconnect(self, connection, event): - self.connect(*self.connection_details[0], **self.connection_details[1]) # attempt to reconnect + # self.connect(*self.connection_details[0], **self.connection_details[1]) # attempt to reconnect + pass def parse_fandom_message(self, message: str): message = message.split("\x035*\x03") @@ -71,8 +72,8 @@ class AioIRCCat(irc.client_aio.AioSimpleIRCClient): if post.get('action', 'unknown') != "deleted": # ignore deletion events url = urlparse(post.get('url')) full_url ="https://"+ url.netloc + recognize_langs(url.path) - if full_url in self.domain: - self.discussion_callback(full_url) + # if full_url in self.domain: + # self.discussion_callback(full_url) def recognize_langs(path): diff --git a/src/misc.py b/src/misc.py index bd14a4f..cc52b2b 100644 --- a/src/misc.py +++ b/src/misc.py @@ -2,6 +2,7 @@ from html.parser import HTMLParser import base64, re import logging +from typing import Callable from urllib.parse import urlparse, urlunparse from src.i18n import langs @@ -28,7 +29,10 @@ class LinkParser(HTMLParser): new_string = "" recent_href = "" - WIKI_JUST_DOMAIN = "" + + def __init__(self, DOMAIN_URL): + self.WIKI_JUST_DOMAIN = DOMAIN_URL + super().__init__() def handle_starttag(self, tag, attrs): for attr in attrs: @@ -59,16 +63,17 @@ class LinkParser(HTMLParser): pass -LinkParse = LinkParser() +# LinkParse = LinkParser() -def parse_link(domain: str, to_parse: str) -> str: - """Because I have strange issues using the LinkParser class myself, this is a helper function - to utilize the LinkParser properly""" - LinkParse.WIKI_JUST_DOMAIN = domain - LinkParse.new_string = "" - LinkParse.feed(to_parse) - LinkParse.recent_href = "" - return LinkParse.new_string + +# def parse_link(domain: str, to_parse: str) -> str: +# """Because I have strange issues using the LinkParser class myself, this is a helper function +# to utilize the LinkParser properly""" +# LinkParse.WIKI_JUST_DOMAIN = domain +# LinkParse.new_string = "" +# LinkParse.feed(to_parse) +# LinkParse.recent_href = "" +# return LinkParse.new_string def link_formatter(link: str) -> str: @@ -117,9 +122,9 @@ class ContentParser(HTMLParser): small_prev_ins = "" small_prev_del = "" - def __init__(self, lang): + def __init__(self, lang: Callable): super().__init__() - self.more = langs[lang]["misc"].gettext("\n__And more__") + self.more = lang("\n__And more__") self.ins_length = len(self.more) self.del_length = len(self.more) diff --git a/src/queue_handler.py b/src/queue_handler.py index 6a9b1c8..0bbd8df 100644 --- a/src/queue_handler.py +++ b/src/queue_handler.py @@ -28,6 +28,7 @@ class UpdateDB: async def update_db(self): try: while True: + logger.debug("Running DB check") if self.updated: async with db.pool().acquire() as connection: async with connection.transaction(): diff --git a/src/statistics.py b/src/statistics.py index 731a1b8..2814625 100644 --- a/src/statistics.py +++ b/src/statistics.py @@ -44,7 +44,7 @@ class Statistics: self.logs: LimitedList[Log] = LimitedList() def update(self, *args: Log, **kwargs: dict[str, Union[float, int]]): - for key, value in kwargs: + for key, value in kwargs.items(): self.__setattr__(key, value) for log in args: self.logs.append(log) diff --git a/src/wiki.py b/src/wiki.py index 5a211fd..9962276 100644 --- a/src/wiki.py +++ b/src/wiki.py @@ -242,7 +242,7 @@ class Wiki: """Synchronous function based on api_request created for compatibility reasons with RcGcDw API""" try: if isinstance(params, str): - request = self.session_requests.get(self.script_url + "api.php?" + params + "&errorformat=raw", timeout=10, allow_redirects=allow_redirects) + request = self.session_requests.get(self.script_url + "api.php" + params + "&errorformat=raw", timeout=10, allow_redirects=allow_redirects) elif isinstance(params, OrderedDict): request = self.session_requests.get(self.script_url + "api.php", params=params, timeout=10, allow_redirects=allow_redirects) else: @@ -357,6 +357,8 @@ class Wiki: message.wiki = self message_list.append(QueueEntry(message, webhooks, self)) messagequeue.add_messages(message_list) + self.statistics.update(last_action=highest_id) + dbmanager.add(("UPDATE rcgcdw SET rcid = $1 WHERE wiki = $2", (highest_id, self.script_url))) # If this is not enough for the future, save rcid in message sending function to make sure we always send all of the changes return @@ -394,7 +396,7 @@ async def rc_processor(wiki: Wiki, change: dict, changed_categories: dict, displ """This function takes more vital information, communicates with a formatter and constructs DiscordMessage with it. It creates DiscordMessageMetadata object, LinkParser and Context. Prepares a comment """ from src.misc import LinkParser - LinkParser = LinkParser() + LinkParser = LinkParser(wiki.client.WIKI_ARTICLE_PATH) metadata = DiscordMessageMetadata("POST", rev_id=change.get("revid", None), log_id=change.get("logid", None), page_id=change.get("pageid", None)) context = Context("embed" if display_options.display > 0 else "compact", "recentchanges", webhooks, wiki.client, langs[display_options.lang]["rc_formatters"], prepare_settings(display_options.display)) @@ -446,7 +448,7 @@ async def rc_processor(wiki: Wiki, change: dict, changed_categories: dict, displ else: raise if identification_string in ("delete/delete", "delete/delete_redir"): # TODO Move it into a hook? - wiki.delete_messages(dict(pageid=change.get("pageid"))) + wiki.delete_messages(dict(page_id=change.get("pageid"))) elif identification_string == "delete/event": logparams = change.get('logparams', {"ids": []}) if settings["appearance"]["mode"] == "embed": diff --git a/src/wiki_ratelimiter.py b/src/wiki_ratelimiter.py deleted file mode 100644 index f67bf63..0000000 --- a/src/wiki_ratelimiter.py +++ /dev/null @@ -1,22 +0,0 @@ -import logging, time, asyncio - -logger = logging.getLogger("rcgcdw.ratelimiter") - -class RateLimiter: - def __init__(self): - self.timeout_until = 0 - - def timeout_add(self, timeout: float): - """This function sets a new timeout""" - self.timeout_until = time.time() + timeout - #logger.debug("Added {} timeout".format(timeout)) - - async def timeout_wait(self): - """This awaitable calculates the time to wait according to timeout_until, does not wait if it's past the timeout to not skip a cycle""" - calculated_timeout = self.timeout_until - time.time() - #logger.debug("Waiting {}".format(calculated_timeout)) - if calculated_timeout > 0: - await asyncio.sleep(calculated_timeout) - - def timeout_raw(self): - return self.timeout_until - time.time() \ No newline at end of file