From 928db6a86d77d9c2debd1ff771ce102078862b5e Mon Sep 17 00:00:00 2001 From: Frisk Date: Sat, 20 Feb 2021 16:33:44 +0100 Subject: [PATCH] Fixed all immediatly visible issues --- src/bot.py | 29 ++++++++++++++++++++++------- src/irc_feed.py | 35 +++++++++++++++++++++-------------- src/wiki.py | 1 + 3 files changed, 44 insertions(+), 21 deletions(-) diff --git a/src/bot.py b/src/bot.py index 8753c5b..b16acdb 100644 --- a/src/bot.py +++ b/src/bot.py @@ -72,7 +72,7 @@ class RcQueue: else: for irc_server in settings["irc_servers"].keys(): if group in settings["irc_servers"][irc_server]["domains"]: - irc_connection = AioIRCCat(settings["irc_servers"]["group"]["irc_channel_mapping"], all_wikis) + irc_connection = AioIRCCat(settings["irc_servers"][irc_server]["irc_channel_mapping"], all_wikis) for domain in settings["irc_servers"][irc_server]["domains"]: self.irc_mapping[domain] = irc_connection irc_connection.connect(settings["irc_servers"][irc_server]["irc_host"], settings["irc_servers"][irc_server]["irc_port"], settings["irc_servers"][irc_server]["irc_name"]) @@ -92,7 +92,10 @@ class RcQueue: all_wikis[wiki].rc_active = -1 if not self[group]["query"]: # if there is no wiki left in the queue, get rid of the task logger.debug(f"{group} no longer has any wikis queued!") - await self.stop_task_group(group) + if not self.check_if_domain_in_db(group): + await self.stop_task_group(group) + else: + logger.debug(f"But there are still wikis for it in DB!") async def stop_task_group(self, group): self[group]["task"].cancel() @@ -102,7 +105,7 @@ class RcQueue: fetch_all = db_cursor.execute( 'SELECT ROWID, webhook, wiki, lang, display, rcid FROM rcgcdw WHERE rcid != -1 GROUP BY wiki ORDER BY ROWID ASC') for wiki in fetch_all.fetchall(): - if get_domain(db_wiki["wiki"]) == domain: + if get_domain(wiki["wiki"]) == domain: return True return False @@ -159,13 +162,22 @@ class RcQueue: try: current_domain: dict = self[domain] if current_domain["irc"]: - if db_wiki["wiki"] not in current_domain["irc"].updated and all_wikis[db_wiki["wiki"]].last_updated+settings["irc_overtime"] > time.time(): + logger.info('CURRENT STATUS:') + logger.info("DOMAIN LIST FOR IRC: {}".format(current_domain["irc"].updated)) + logger.info("CURRENT DOMAIN INFO: {}".format(domain)) + logger.info("IS WIKI IN A LIST?: {}".format(db_wiki["wiki"] in current_domain["irc"].updated)) + logger.info("LAST CHECK FOR THE WIKI {} IS {}".format(db_wiki["wiki"], all_wikis[db_wiki["wiki"]].last_check)) + if db_wiki["wiki"] not in current_domain["irc"].updated and all_wikis[db_wiki["wiki"]].last_check+settings["irc_overtime"] > time.time(): continue # if domain has IRC, has not been updated, and it was updated less than an hour ago else: # otherwise remove it from the list - current_domain["irc"].updated.remove(db_wiki["wiki"]) + try: + current_domain["irc"].updated.remove(db_wiki["wiki"]) + except KeyError: + pass # this is to be expected when third condition is not met above if not db_wiki["ROWID"] < current_domain["last_rowid"]: current_domain["query"].append(QueuedWiki(db_wiki["wiki"], 20)) except KeyError: + raise await self.start_group(domain, [QueuedWiki(db_wiki["wiki"], 20)]) logger.info("A new domain group ({}) has been added since last time, adding it to the domain_list and starting a task...".format(domain)) except ListFull: @@ -380,10 +392,13 @@ async def discussion_handler(): fetch_all = db_cursor.execute( "SELECT wiki, rcid, postid FROM rcgcdw WHERE postid != '-1' OR postid IS NULL GROUP BY wiki") for db_wiki in fetch_all.fetchall(): - if db_wiki["wiki"] not in rcqueue.irc_mapping["fandom.com"].updated_discussions and all_wikis[db_wiki["wiki"]].last_updated+settings["irc_overtime"] > time.time(): # I swear if another wiki farm ever starts using Fandom discussions I'm gonna use explosion magic + if db_wiki["wiki"] not in rcqueue.irc_mapping["fandom.com"].updated_discussions and all_wikis[db_wiki["wiki"]].last_discussion_check+settings["irc_overtime"] > time.time(): # I swear if another wiki farm ever starts using Fandom discussions I'm gonna use explosion magic continue else: - rcqueue.irc_mapping["fandom.com"].updated_discussions.remove(db_wiki["wiki"]) + try: + rcqueue.irc_mapping["fandom.com"].updated_discussions.remove(db_wiki["wiki"]) + except KeyError: + pass # to be expected header = settings["header"] header["Accept"] = "application/hal+json" async with aiohttp.ClientSession(headers=header, diff --git a/src/irc_feed.py b/src/irc_feed.py index 8e1098f..6c19597 100644 --- a/src/irc_feed.py +++ b/src/irc_feed.py @@ -1,23 +1,27 @@ import irc.client_aio import json +import logging from urllib.parse import urlparse, quote +logger = logging.getLogger("rcgcdw.irc_feed") + + class AioIRCCat(irc.client_aio.AioSimpleIRCClient): def __init__(self, targets, all_wikis): - irc.client.SimpleIRCClient.__init__(self) + irc.client_aio.SimpleIRCClient.__init__(self) self.targets = targets - self.updated = [] # Storage for edited wikis - self.updated_discussions = [] + self.updated = set() # Storage for edited wikis + self.updated_discussions = set() self.wikis = all_wikis def on_welcome(self, connection, event): # Join IRC channels for channel in self.targets.values(): connection.join(channel) - def on_pubmsg(self, channel, event): - if channel == self.targets["rc"]: + def on_pubmsg(self, connection, event): + if event.target == self.targets["rc"]: self.parse_fandom_message(' '.join(event.arguments)) - elif channel == self.targets["discussion"]: + elif event.target == self.targets["discussion"]: self.parse_fandom_discussion(' '.join(event.arguments)) def on_nicknameinuse(self, c, e): @@ -32,24 +36,27 @@ class AioIRCCat(irc.client_aio.AioSimpleIRCClient): message = message[0][half + 3:].strip() # print(message) url = urlparse(message) - full_url = url.netloc + recognize_langs(url.path) - if full_url in self.wikis: - self.updated.append(full_url) + full_url = "https://"+url.netloc + recognize_langs(url.path) + if full_url in self.wikis and self.wikis[full_url].rc_active != -1: + self.updated.add(full_url) + logger.debug("New website appended to the list! {}".format(full_url)) - def parse_discussions_message(self, message): + def parse_fandom_discussion(self, message): post = json.loads(message) if post.get('action', 'unknown') != "deleted": # ignore deletion events url = urlparse(post.get('url')) - full_url = url.netloc + recognize_langs(url.path) - self.updated_discussions.append(full_url) + full_url ="https://"+ url.netloc + recognize_langs(url.path) + if full_url in self.wikis: # POSSIBLE MEMORY LEAK AS WE DON'T HAVE A WAY TO CHECK IF WIKI IS LOOKING FOR DISCUSSIONS OR NOT + self.updated_discussions.add("https://"+full_url) + logger.debug("New website appended to the list! {}".format(full_url)) def recognize_langs(path): lang = "" new_path = path.split("/") if len(new_path)>2: - if new_path[1] != "wiki": + if new_path[1] not in ("wiki", "f"): lang = "/"+new_path[1] - return lang + return lang+"/" diff --git a/src/wiki.py b/src/wiki.py index d72dad3..1bf3a89 100644 --- a/src/wiki.py +++ b/src/wiki.py @@ -190,6 +190,7 @@ async def process_mwmsgs(wiki_response: dict, local_wiki: Wiki, mw_msgs: dict): mw_msgs[key] = msgs # it may be a little bit messy for sure, however I don't expect any reason to remove mw_msgs entries by one local_wiki.mw_messages = key + # db_wiki: webhook, wiki, lang, display, rcid, postid async def essential_info(change: dict, changed_categories, local_wiki: Wiki, target: tuple, paths: tuple, request: dict, rate_limiter: RateLimiter) -> src.discord.DiscordMessage: