From e1aeaaaaba062deea1983ec557e833f3cec99b0d Mon Sep 17 00:00:00 2001 From: Frisk Date: Wed, 20 Jan 2021 16:45:17 +0100 Subject: [PATCH 01/16] Added updates --- src/bot.py | 6 ++++++ src/irc_feed.py | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+) create mode 100644 src/irc_feed.py diff --git a/src/bot.py b/src/bot.py index 59839a8..b39aa76 100644 --- a/src/bot.py +++ b/src/bot.py @@ -18,6 +18,7 @@ from src.queue_handler import DBHandler from src.wiki import Wiki, process_cats, process_mwmsgs, essential_info, essential_feeds from src.discord import DiscordMessage, generic_msg_sender_exception_logger, stack_message_list from src.wiki_ratelimiter import RateLimiter +from src.irc_feed import AioIRCCat logging.config.dictConfig(settings["logging"]) @@ -64,6 +65,11 @@ class RcQueue: async def start_group(self, group, initial_wikis): """Starts a task for given domain group""" if group not in self.domain_list: + if group in settings["irc_servers"]: + irc_connection = AioIRCCat(settings["irc_servers"]["group"]["irc_channel_mapping"], all_wikis) + irc_connection.connect(settings["irc_servers"][group]["irc_host"], settings["irc_servers"][group]["irc_port"], "RcGcDb") + else: + irc_connection = None self.domain_list[group] = {"task": asyncio.create_task(scan_group(group)), "last_rowid": 0, "query": LimitedList(initial_wikis), "rate_limiter": RateLimiter()} logger.debug(self.domain_list[group]) else: diff --git a/src/irc_feed.py b/src/irc_feed.py new file mode 100644 index 0000000..2d8654b --- /dev/null +++ b/src/irc_feed.py @@ -0,0 +1,47 @@ +import irc.client_aio +from urllib.parse import urlparse, quote + +class AioIRCCat(irc.client_aio.AioSimpleIRCClient): + def __init__(self, targets, all_wikis): + irc.client.SimpleIRCClient.__init__(self) + self.targets = targets + self.updated = [] # Storage for edited wikis + self.wikis = all_wikis + + def on_welcome(self, connection, event): # Join IRC channels + for channel in self.targets.values(): + connection.join(channel) + + def on_pubmsg(self, channel, event): + + + def on_nicknameinuse(self, c, e): + c.nick(c.get_nickname() + "_") + + async def parse_fandom_message(self, message): + raw_msg = message + message = message.split("\x035*\x03") + try: + user = message[1][4:].strip().strip(chr(3)) + except IndexError: + return + # print(asyncio.all_tasks()) + half = message[0].find("\x0302http") + if half == -1: + return + message = message[0][half + 3:].strip() + # print(message) + url = urlparse(message) + full_url = url.netloc + recognize_langs(url.path) + if full_url in self.wikis: + self.updated.append(full_url) + +def recognize_langs(path): + lang = "" + new_path = path.split("/") + if len(new_path)>2: + if new_path[1] != "wiki": + lang = "/"+new_path[1] + return lang + + From 49c36c9688b6f3d1a5e7a754b887e951d2cd97a9 Mon Sep 17 00:00:00 2001 From: Frisk Date: Thu, 21 Jan 2021 14:40:55 +0100 Subject: [PATCH 02/16] Some sort of progress --- src/bot.py | 16 ++++++++++++---- src/irc_feed.py | 10 ++++------ src/wiki.py | 1 + 3 files changed, 17 insertions(+), 10 deletions(-) diff --git a/src/bot.py b/src/bot.py index b39aa76..1b9bee2 100644 --- a/src/bot.py +++ b/src/bot.py @@ -4,6 +4,7 @@ import logging.config import signal import traceback import nest_asyncio +import time from collections import defaultdict, namedtuple from typing import Generator @@ -65,12 +66,14 @@ class RcQueue: async def start_group(self, group, initial_wikis): """Starts a task for given domain group""" if group not in self.domain_list: - if group in settings["irc_servers"]: - irc_connection = AioIRCCat(settings["irc_servers"]["group"]["irc_channel_mapping"], all_wikis) - irc_connection.connect(settings["irc_servers"][group]["irc_host"], settings["irc_servers"][group]["irc_port"], "RcGcDb") + for irc_server in settings["irc_servers"].keys(): + if group in settings["irc_servers"]["irc_server"]["domains"]: + irc_connection = AioIRCCat(settings["irc_servers"]["group"]["irc_channel_mapping"], all_wikis) + irc_connection.connect(settings["irc_servers"][irc_server]["irc_host"], settings["irc_servers"][irc_server]["irc_port"], settings["irc_servers"][irc_server]["irc_name"]) + break else: irc_connection = None - self.domain_list[group] = {"task": asyncio.create_task(scan_group(group)), "last_rowid": 0, "query": LimitedList(initial_wikis), "rate_limiter": RateLimiter()} + self.domain_list[group] = {"task": asyncio.create_task(scan_group(group)), "last_rowid": 0, "query": LimitedList(initial_wikis), "rate_limiter": RateLimiter(), "irc": irc_connection} logger.debug(self.domain_list[group]) else: raise KeyError @@ -149,6 +152,11 @@ class RcQueue: continue try: current_domain: dict = self[domain] + if current_domain["irc"]: + if db_wiki["wiki"] not in current_domain["irc"].updated and all_wikis[db_wiki["wiki"]].last_updated+settings["irc_overtime"] > time.time(): + continue # if domain has IRC, has not been updated, and it was updated less than an hour ago + else: # otherwise remove it from the list + current_domain["irc"].updated.remove(db_wiki["wiki"]) if not db_wiki["ROWID"] < current_domain["last_rowid"]: current_domain["query"].append(QueuedWiki(db_wiki["wiki"], 20)) except KeyError: diff --git a/src/irc_feed.py b/src/irc_feed.py index 2d8654b..20cff7f 100644 --- a/src/irc_feed.py +++ b/src/irc_feed.py @@ -13,18 +13,16 @@ class AioIRCCat(irc.client_aio.AioSimpleIRCClient): connection.join(channel) def on_pubmsg(self, channel, event): - + if channel == self.targets["rc"]: + self.parse_fandom_message(' '.join(event.arguments)) + elif channel == self.targets["discussion"]: + self.parse_fandom_discussion(' '.join(event.arguments)) def on_nicknameinuse(self, c, e): c.nick(c.get_nickname() + "_") async def parse_fandom_message(self, message): - raw_msg = message message = message.split("\x035*\x03") - try: - user = message[1][4:].strip().strip(chr(3)) - except IndexError: - return # print(asyncio.all_tasks()) half = message[0].find("\x0302http") if half == -1: diff --git a/src/wiki.py b/src/wiki.py index 148a7a1..7cdde43 100644 --- a/src/wiki.py +++ b/src/wiki.py @@ -24,6 +24,7 @@ class Wiki: fail_times: int = 0 # corresponding to amount of times connection with wiki failed for client reasons (400-499) session: aiohttp.ClientSession = None rc_active: int = 0 + last_check: float = 0.0 @staticmethod async def fetch_wiki(extended, script_path, session: aiohttp.ClientSession, ratelimiter: RateLimiter, amount=20) -> aiohttp.ClientResponse: From d4c44da1263a89f816b97fb3c02e36245bfb2094 Mon Sep 17 00:00:00 2001 From: MarkusRost <2701034-MarkusRost@users.noreply.gitlab.com> Date: Fri, 22 Jan 2021 10:29:44 +0000 Subject: [PATCH 03/16] Fix feeds --- src/bot.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/bot.py b/src/bot.py index 59839a8..18577d9 100644 --- a/src/bot.py +++ b/src/bot.py @@ -374,9 +374,9 @@ async def discussion_handler(): continue # ignore this wiki if it throws errors try: discussion_feed_resp = await feeds_response.json(encoding="UTF-8") - if "title" in discussion_feed_resp: + if "error" in discussion_feed_resp: error = discussion_feed_resp["error"] - if error == "site doesn't exists": # Discussions disabled + if error == "NotFoundException": # Discussions disabled if db_wiki["rcid"] != -1: # RC feed is disabled db_cursor.execute("UPDATE rcgcdw SET postid = ? WHERE wiki = ?", ("-1", db_wiki["wiki"],)) From 0ef302ec7f7da31d6f84a42cc335d9e6e1f6e8b7 Mon Sep 17 00:00:00 2001 From: MarkusRost <2701034-MarkusRost@users.noreply.gitlab.com> Date: Sat, 23 Jan 2021 22:14:41 +0000 Subject: [PATCH 04/16] Fix message for log action "managewiki/undelete" See https://github.com/miraheze/ManageWiki/blob/master/i18n/en.json#L41. Also mentioned on Discord. --- src/formatters/rc.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/formatters/rc.py b/src/formatters/rc.py index 1f9420a..9ee8a40 100644 --- a/src/formatters/rc.py +++ b/src/formatters/rc.py @@ -388,7 +388,7 @@ async def compact_formatter(action, change, parsed_comment, categories, recent_c author=author, author_url=author_url, group_name=group_name, comment=parsed_comment ) elif action == "managewiki/undelete": - content = _("[{author}]({author_url}) restored a wiki *{wiki_name}*{comment}").format( + content = _("[{author}]({author_url}) undeleted a wiki *{wiki_name}*{comment}").format( author=author, author_url=author_url, wiki_name=change["logparams"].get("wiki", _("Unknown")), comment=parsed_comment ) elif action == "managewiki/unlock": @@ -1018,7 +1018,7 @@ async def embed_formatter(action, change, parsed_comment, categories, recent_cha embed["title"] = _("Modified \"{usergroup_name}\" usergroup").format(usergroup_name=group_name) link = create_article_path(change["title"], WIKI_ARTICLE_PATH) elif action == "managewiki/undelete": - embed["title"] = _("Restored a \"{wiki}\" wiki").format(wiki=change["logparams"].get("wiki", _("Unknown"))) + embed["title"] = _("Undeleted a \"{wiki}\" wiki").format(wiki=change["logparams"].get("wiki", _("Unknown"))) link = create_article_path(change["title"], WIKI_ARTICLE_PATH) elif action == "managewiki/unlock": embed["title"] = _("Unlocked a \"{wiki}\" wiki").format(wiki=change["logparams"].get("wiki", _("Unknown"))) From 6b8a2e217be129e1499c939aa7d16c70ed985b27 Mon Sep 17 00:00:00 2001 From: Frisk Date: Wed, 27 Jan 2021 18:48:46 +0100 Subject: [PATCH 05/16] I have no productivity today so I'm just commiting some more work I'll have to rework anyways --- src/bot.py | 19 +++++++++++++------ src/irc_feed.py | 12 +++++++++++- src/wiki.py | 1 + 3 files changed, 25 insertions(+), 7 deletions(-) diff --git a/src/bot.py b/src/bot.py index 1b9bee2..826ae7e 100644 --- a/src/bot.py +++ b/src/bot.py @@ -62,17 +62,23 @@ class RcQueue: def __init__(self): self.domain_list = {} self.to_remove = [] + self.irc_mapping = {} async def start_group(self, group, initial_wikis): """Starts a task for given domain group""" if group not in self.domain_list: - for irc_server in settings["irc_servers"].keys(): - if group in settings["irc_servers"]["irc_server"]["domains"]: - irc_connection = AioIRCCat(settings["irc_servers"]["group"]["irc_channel_mapping"], all_wikis) - irc_connection.connect(settings["irc_servers"][irc_server]["irc_host"], settings["irc_servers"][irc_server]["irc_port"], settings["irc_servers"][irc_server]["irc_name"]) - break + if group in self.irc_mapping: # Hopefully there are no race conditions.... + irc_connection = self.irc_mapping[group] else: - irc_connection = None + for irc_server in settings["irc_servers"].keys(): + if group in settings["irc_servers"][irc_server]["domains"]: + irc_connection = AioIRCCat(settings["irc_servers"]["group"]["irc_channel_mapping"], all_wikis) + for domain in settings["irc_servers"][irc_server]["domains"]: + self.irc_mapping[domain] = irc_connection + irc_connection.connect(settings["irc_servers"][irc_server]["irc_host"], settings["irc_servers"][irc_server]["irc_port"], settings["irc_servers"][irc_server]["irc_name"]) + break + else: + irc_connection = None self.domain_list[group] = {"task": asyncio.create_task(scan_group(group)), "last_rowid": 0, "query": LimitedList(initial_wikis), "rate_limiter": RateLimiter(), "irc": irc_connection} logger.debug(self.domain_list[group]) else: @@ -285,6 +291,7 @@ async def scan_group(group: str): targets = generate_targets(queued_wiki.url, "AND (rcid != -1 OR rcid IS NULL)") paths = get_paths(queued_wiki.url, recent_changes_resp) new_events = 0 + local_wiki.last_check = time.time() # on successful check, save new last check time for change in recent_changes: if change["rcid"] > local_wiki.rc_active and queued_wiki.amount != 450: new_events += 1 diff --git a/src/irc_feed.py b/src/irc_feed.py index 20cff7f..8e1098f 100644 --- a/src/irc_feed.py +++ b/src/irc_feed.py @@ -1,4 +1,5 @@ import irc.client_aio +import json from urllib.parse import urlparse, quote class AioIRCCat(irc.client_aio.AioSimpleIRCClient): @@ -6,6 +7,7 @@ class AioIRCCat(irc.client_aio.AioSimpleIRCClient): irc.client.SimpleIRCClient.__init__(self) self.targets = targets self.updated = [] # Storage for edited wikis + self.updated_discussions = [] self.wikis = all_wikis def on_welcome(self, connection, event): # Join IRC channels @@ -21,7 +23,7 @@ class AioIRCCat(irc.client_aio.AioSimpleIRCClient): def on_nicknameinuse(self, c, e): c.nick(c.get_nickname() + "_") - async def parse_fandom_message(self, message): + def parse_fandom_message(self, message): message = message.split("\x035*\x03") # print(asyncio.all_tasks()) half = message[0].find("\x0302http") @@ -34,6 +36,14 @@ class AioIRCCat(irc.client_aio.AioSimpleIRCClient): if full_url in self.wikis: self.updated.append(full_url) + def parse_discussions_message(self, message): + post = json.loads(message) + if post.get('action', 'unknown') != "deleted": # ignore deletion events + url = urlparse(post.get('url')) + full_url = url.netloc + recognize_langs(url.path) + self.updated_discussions.append(full_url) + + def recognize_langs(path): lang = "" new_path = path.split("/") diff --git a/src/wiki.py b/src/wiki.py index 7cdde43..d72dad3 100644 --- a/src/wiki.py +++ b/src/wiki.py @@ -25,6 +25,7 @@ class Wiki: session: aiohttp.ClientSession = None rc_active: int = 0 last_check: float = 0.0 + last_discussion_check: float = 0.0 @staticmethod async def fetch_wiki(extended, script_path, session: aiohttp.ClientSession, ratelimiter: RateLimiter, amount=20) -> aiohttp.ClientResponse: From 01c58417c7983813c8e5f2ee74a39638e07bf656 Mon Sep 17 00:00:00 2001 From: Frisk Date: Tue, 2 Feb 2021 02:28:14 +0100 Subject: [PATCH 06/16] wrote 4 lines for discussions, after a long break <3 --- src/bot.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/bot.py b/src/bot.py index 826ae7e..8753c5b 100644 --- a/src/bot.py +++ b/src/bot.py @@ -380,6 +380,10 @@ async def discussion_handler(): fetch_all = db_cursor.execute( "SELECT wiki, rcid, postid FROM rcgcdw WHERE postid != '-1' OR postid IS NULL GROUP BY wiki") for db_wiki in fetch_all.fetchall(): + if db_wiki["wiki"] not in rcqueue.irc_mapping["fandom.com"].updated_discussions and all_wikis[db_wiki["wiki"]].last_updated+settings["irc_overtime"] > time.time(): # I swear if another wiki farm ever starts using Fandom discussions I'm gonna use explosion magic + continue + else: + rcqueue.irc_mapping["fandom.com"].updated_discussions.remove(db_wiki["wiki"]) header = settings["header"] header["Accept"] = "application/hal+json" async with aiohttp.ClientSession(headers=header, From 928db6a86d77d9c2debd1ff771ce102078862b5e Mon Sep 17 00:00:00 2001 From: Frisk Date: Sat, 20 Feb 2021 16:33:44 +0100 Subject: [PATCH 07/16] Fixed all immediatly visible issues --- src/bot.py | 29 ++++++++++++++++++++++------- src/irc_feed.py | 35 +++++++++++++++++++++-------------- src/wiki.py | 1 + 3 files changed, 44 insertions(+), 21 deletions(-) diff --git a/src/bot.py b/src/bot.py index 8753c5b..b16acdb 100644 --- a/src/bot.py +++ b/src/bot.py @@ -72,7 +72,7 @@ class RcQueue: else: for irc_server in settings["irc_servers"].keys(): if group in settings["irc_servers"][irc_server]["domains"]: - irc_connection = AioIRCCat(settings["irc_servers"]["group"]["irc_channel_mapping"], all_wikis) + irc_connection = AioIRCCat(settings["irc_servers"][irc_server]["irc_channel_mapping"], all_wikis) for domain in settings["irc_servers"][irc_server]["domains"]: self.irc_mapping[domain] = irc_connection irc_connection.connect(settings["irc_servers"][irc_server]["irc_host"], settings["irc_servers"][irc_server]["irc_port"], settings["irc_servers"][irc_server]["irc_name"]) @@ -92,7 +92,10 @@ class RcQueue: all_wikis[wiki].rc_active = -1 if not self[group]["query"]: # if there is no wiki left in the queue, get rid of the task logger.debug(f"{group} no longer has any wikis queued!") - await self.stop_task_group(group) + if not self.check_if_domain_in_db(group): + await self.stop_task_group(group) + else: + logger.debug(f"But there are still wikis for it in DB!") async def stop_task_group(self, group): self[group]["task"].cancel() @@ -102,7 +105,7 @@ class RcQueue: fetch_all = db_cursor.execute( 'SELECT ROWID, webhook, wiki, lang, display, rcid FROM rcgcdw WHERE rcid != -1 GROUP BY wiki ORDER BY ROWID ASC') for wiki in fetch_all.fetchall(): - if get_domain(db_wiki["wiki"]) == domain: + if get_domain(wiki["wiki"]) == domain: return True return False @@ -159,13 +162,22 @@ class RcQueue: try: current_domain: dict = self[domain] if current_domain["irc"]: - if db_wiki["wiki"] not in current_domain["irc"].updated and all_wikis[db_wiki["wiki"]].last_updated+settings["irc_overtime"] > time.time(): + logger.info('CURRENT STATUS:') + logger.info("DOMAIN LIST FOR IRC: {}".format(current_domain["irc"].updated)) + logger.info("CURRENT DOMAIN INFO: {}".format(domain)) + logger.info("IS WIKI IN A LIST?: {}".format(db_wiki["wiki"] in current_domain["irc"].updated)) + logger.info("LAST CHECK FOR THE WIKI {} IS {}".format(db_wiki["wiki"], all_wikis[db_wiki["wiki"]].last_check)) + if db_wiki["wiki"] not in current_domain["irc"].updated and all_wikis[db_wiki["wiki"]].last_check+settings["irc_overtime"] > time.time(): continue # if domain has IRC, has not been updated, and it was updated less than an hour ago else: # otherwise remove it from the list - current_domain["irc"].updated.remove(db_wiki["wiki"]) + try: + current_domain["irc"].updated.remove(db_wiki["wiki"]) + except KeyError: + pass # this is to be expected when third condition is not met above if not db_wiki["ROWID"] < current_domain["last_rowid"]: current_domain["query"].append(QueuedWiki(db_wiki["wiki"], 20)) except KeyError: + raise await self.start_group(domain, [QueuedWiki(db_wiki["wiki"], 20)]) logger.info("A new domain group ({}) has been added since last time, adding it to the domain_list and starting a task...".format(domain)) except ListFull: @@ -380,10 +392,13 @@ async def discussion_handler(): fetch_all = db_cursor.execute( "SELECT wiki, rcid, postid FROM rcgcdw WHERE postid != '-1' OR postid IS NULL GROUP BY wiki") for db_wiki in fetch_all.fetchall(): - if db_wiki["wiki"] not in rcqueue.irc_mapping["fandom.com"].updated_discussions and all_wikis[db_wiki["wiki"]].last_updated+settings["irc_overtime"] > time.time(): # I swear if another wiki farm ever starts using Fandom discussions I'm gonna use explosion magic + if db_wiki["wiki"] not in rcqueue.irc_mapping["fandom.com"].updated_discussions and all_wikis[db_wiki["wiki"]].last_discussion_check+settings["irc_overtime"] > time.time(): # I swear if another wiki farm ever starts using Fandom discussions I'm gonna use explosion magic continue else: - rcqueue.irc_mapping["fandom.com"].updated_discussions.remove(db_wiki["wiki"]) + try: + rcqueue.irc_mapping["fandom.com"].updated_discussions.remove(db_wiki["wiki"]) + except KeyError: + pass # to be expected header = settings["header"] header["Accept"] = "application/hal+json" async with aiohttp.ClientSession(headers=header, diff --git a/src/irc_feed.py b/src/irc_feed.py index 8e1098f..6c19597 100644 --- a/src/irc_feed.py +++ b/src/irc_feed.py @@ -1,23 +1,27 @@ import irc.client_aio import json +import logging from urllib.parse import urlparse, quote +logger = logging.getLogger("rcgcdw.irc_feed") + + class AioIRCCat(irc.client_aio.AioSimpleIRCClient): def __init__(self, targets, all_wikis): - irc.client.SimpleIRCClient.__init__(self) + irc.client_aio.SimpleIRCClient.__init__(self) self.targets = targets - self.updated = [] # Storage for edited wikis - self.updated_discussions = [] + self.updated = set() # Storage for edited wikis + self.updated_discussions = set() self.wikis = all_wikis def on_welcome(self, connection, event): # Join IRC channels for channel in self.targets.values(): connection.join(channel) - def on_pubmsg(self, channel, event): - if channel == self.targets["rc"]: + def on_pubmsg(self, connection, event): + if event.target == self.targets["rc"]: self.parse_fandom_message(' '.join(event.arguments)) - elif channel == self.targets["discussion"]: + elif event.target == self.targets["discussion"]: self.parse_fandom_discussion(' '.join(event.arguments)) def on_nicknameinuse(self, c, e): @@ -32,24 +36,27 @@ class AioIRCCat(irc.client_aio.AioSimpleIRCClient): message = message[0][half + 3:].strip() # print(message) url = urlparse(message) - full_url = url.netloc + recognize_langs(url.path) - if full_url in self.wikis: - self.updated.append(full_url) + full_url = "https://"+url.netloc + recognize_langs(url.path) + if full_url in self.wikis and self.wikis[full_url].rc_active != -1: + self.updated.add(full_url) + logger.debug("New website appended to the list! {}".format(full_url)) - def parse_discussions_message(self, message): + def parse_fandom_discussion(self, message): post = json.loads(message) if post.get('action', 'unknown') != "deleted": # ignore deletion events url = urlparse(post.get('url')) - full_url = url.netloc + recognize_langs(url.path) - self.updated_discussions.append(full_url) + full_url ="https://"+ url.netloc + recognize_langs(url.path) + if full_url in self.wikis: # POSSIBLE MEMORY LEAK AS WE DON'T HAVE A WAY TO CHECK IF WIKI IS LOOKING FOR DISCUSSIONS OR NOT + self.updated_discussions.add("https://"+full_url) + logger.debug("New website appended to the list! {}".format(full_url)) def recognize_langs(path): lang = "" new_path = path.split("/") if len(new_path)>2: - if new_path[1] != "wiki": + if new_path[1] not in ("wiki", "f"): lang = "/"+new_path[1] - return lang + return lang+"/" diff --git a/src/wiki.py b/src/wiki.py index d72dad3..1bf3a89 100644 --- a/src/wiki.py +++ b/src/wiki.py @@ -190,6 +190,7 @@ async def process_mwmsgs(wiki_response: dict, local_wiki: Wiki, mw_msgs: dict): mw_msgs[key] = msgs # it may be a little bit messy for sure, however I don't expect any reason to remove mw_msgs entries by one local_wiki.mw_messages = key + # db_wiki: webhook, wiki, lang, display, rcid, postid async def essential_info(change: dict, changed_categories, local_wiki: Wiki, target: tuple, paths: tuple, request: dict, rate_limiter: RateLimiter) -> src.discord.DiscordMessage: From 8da53cabdbcfb97e2ffa0e61aee99b6ac6d84d7e Mon Sep 17 00:00:00 2001 From: Frisk Date: Sat, 20 Feb 2021 19:18:59 +0100 Subject: [PATCH 08/16] Remove debug code --- src/bot.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/bot.py b/src/bot.py index b16acdb..b9bcd98 100644 --- a/src/bot.py +++ b/src/bot.py @@ -177,7 +177,6 @@ class RcQueue: if not db_wiki["ROWID"] < current_domain["last_rowid"]: current_domain["query"].append(QueuedWiki(db_wiki["wiki"], 20)) except KeyError: - raise await self.start_group(domain, [QueuedWiki(db_wiki["wiki"], 20)]) logger.info("A new domain group ({}) has been added since last time, adding it to the domain_list and starting a task...".format(domain)) except ListFull: From b2eba790c0304d72ed33aa11d987fef0d67336d1 Mon Sep 17 00:00:00 2001 From: Frisk Date: Sat, 20 Feb 2021 19:26:49 +0100 Subject: [PATCH 09/16] Updated settings example --- settings.json.example | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/settings.json.example b/settings.json.example index 3b5ca77..67dd2f2 100644 --- a/settings.json.example +++ b/settings.json.example @@ -7,6 +7,17 @@ "database_path": "rcgcdb.db", "monitoring_webhook": "111111111111111111/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", "support": "https://discord.gg/v77RTk5", + "irc_overtime": 3600, + "irc_servers": { + "your custom name for the farm": { + "domains": ["wikipedia.org", "otherwikipedia.org"], + "irc_host": "randomIRC.domain.com", + "irc_port": "6667", + "irc_nickname": "BotIRCNickname", + "irc_name": "BotIRCName", + "irc_channel_mapping": {"rc": "#rcchannel", "discussion": "#discussionchannel"} + } + }, "logging": { "version": 1, "disable_existing_loggers": false, From dabf9ef6d4c6bf2b40e9444ec30dd2d66539bb71 Mon Sep 17 00:00:00 2001 From: Frisk Date: Sat, 20 Feb 2021 19:35:39 +0100 Subject: [PATCH 10/16] Updated requirements.txt --- requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 1c45373..c752fe3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ beautifulsoup4 >= 4.6.0; python_version >= '3.6' aiohttp >= 3.6.2 lxml >= 4.2.1 -nest-asyncio >= 1.4.0 \ No newline at end of file +nest-asyncio >= 1.4.0 +irc >= 19.0.1 \ No newline at end of file From 4c1101e5195df58c57b24dd10cd5ee776f855a30 Mon Sep 17 00:00:00 2001 From: Frisk Date: Sun, 21 Feb 2021 16:51:43 +0100 Subject: [PATCH 11/16] Fix failing discussion check --- src/bot.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/bot.py b/src/bot.py index b9bcd98..f8034dd 100644 --- a/src/bot.py +++ b/src/bot.py @@ -391,7 +391,12 @@ async def discussion_handler(): fetch_all = db_cursor.execute( "SELECT wiki, rcid, postid FROM rcgcdw WHERE postid != '-1' OR postid IS NULL GROUP BY wiki") for db_wiki in fetch_all.fetchall(): - if db_wiki["wiki"] not in rcqueue.irc_mapping["fandom.com"].updated_discussions and all_wikis[db_wiki["wiki"]].last_discussion_check+settings["irc_overtime"] > time.time(): # I swear if another wiki farm ever starts using Fandom discussions I'm gonna use explosion magic + try: + local_wiki = all_wikis[db_wiki["wiki"]] # set a reference to a wiki object from memory + except KeyError: + local_wiki = all_wikis[db_wiki["wiki"]] = Wiki() + local_wiki.rc_active = db_wiki["rcid"] + if db_wiki["wiki"] not in rcqueue.irc_mapping["fandom.com"].updated_discussions and local_wiki.last_discussion_check+settings["irc_overtime"] > time.time(): # I swear if another wiki farm ever starts using Fandom discussions I'm gonna use explosion magic continue else: try: @@ -402,11 +407,6 @@ async def discussion_handler(): header["Accept"] = "application/hal+json" async with aiohttp.ClientSession(headers=header, timeout=aiohttp.ClientTimeout(6.0)) as session: - try: - local_wiki = all_wikis[db_wiki["wiki"]] # set a reference to a wiki object from memory - except KeyError: - local_wiki = all_wikis[db_wiki["wiki"]] = Wiki() - local_wiki.rc_active = db_wiki["rcid"] try: feeds_response = await local_wiki.fetch_feeds(db_wiki["wiki"], session) except (WikiServerError, WikiError): From 96e678a047c72e1e512a6561becfa4a56cd1b169 Mon Sep 17 00:00:00 2001 From: Markus-Rost Date: Sun, 21 Feb 2021 17:49:14 +0100 Subject: [PATCH 12/16] Escape discussions titles because roblox --- src/formatters/discussions.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/formatters/discussions.py b/src/formatters/discussions.py index b7ea5b5..156bc88 100644 --- a/src/formatters/discussions.py +++ b/src/formatters/discussions.py @@ -45,20 +45,20 @@ async def feeds_compact_formatter(post_type, post, message_target, wiki, article else: logger.warning("No entry for {event} with params: {params}".format(event=thread_funnel, params=post)) event_type = "unknown" - message = msg_text.format(author=author, author_url=author_url, title=post["title"], url=wiki, threadId=post["threadId"], forumName=post["forumName"]) + message = msg_text.format(author=author, author_url=author_url, title=escape_formatting(post["title"]), url=wiki, threadId=post["threadId"], forumName=post["forumName"]) else: event_type = "discussion/forum/reply" - message = _("[{author}]({author_url}) created a [reply](<{url}f/p/{threadId}/r/{postId}>) to [{title}](<{url}f/p/{threadId}>) in {forumName}").format(author=author, author_url=author_url, url=wiki, threadId=post["threadId"], postId=post["id"], title=post["_embedded"]["thread"][0]["title"], forumName=post["forumName"]) + message = _("[{author}]({author_url}) created a [reply](<{url}f/p/{threadId}/r/{postId}>) to [{title}](<{url}f/p/{threadId}>) in {forumName}").format(author=author, author_url=author_url, url=wiki, threadId=post["threadId"], postId=post["id"], title=escape_formatting(post["_embedded"]["thread"][0]["title"]), forumName=post["forumName"]) elif post_type == "WALL": user_wall = _("unknown") # Fail safe if post["forumName"].endswith(' Message Wall'): user_wall = post["forumName"][:-13] if not post["isReply"]: event_type = "discussion/wall/post" - message = _("[{author}]({author_url}) created [{title}](<{url}wiki/Message_Wall:{user_wall}?threadId={threadId}>) on [{user}'s Message Wall](<{url}wiki/Message_Wall:{user_wall}>)").format(author=author, author_url=author_url, title=post["title"], url=wiki, user=user_wall, user_wall=quote_plus(user_wall.replace(" ", "_")), threadId=post["threadId"]) + message = _("[{author}]({author_url}) created [{title}](<{url}wiki/Message_Wall:{user_wall}?threadId={threadId}>) on [{user}'s Message Wall](<{url}wiki/Message_Wall:{user_wall}>)").format(author=author, author_url=author_url, title=escape_formatting(post["title"]), url=wiki, user=user_wall, user_wall=quote_plus(user_wall.replace(" ", "_")), threadId=post["threadId"]) else: event_type = "discussion/wall/reply" - message = _("[{author}]({author_url}) created a [reply](<{url}wiki/Message_Wall:{user_wall}?threadId={threadId}#{replyId}>) to [{title}](<{url}wiki/Message_Wall:{user_wall}?threadId={threadId}>) on [{user}'s Message Wall](<{url}wiki/Message_Wall:{user_wall}>)").format(author=author, author_url=author_url, url=wiki, title=post["_embedded"]["thread"][0]["title"], user=user_wall, user_wall=quote_plus(user_wall.replace(" ", "_")), threadId=post["threadId"], replyId=post["id"]) + message = _("[{author}]({author_url}) created a [reply](<{url}wiki/Message_Wall:{user_wall}?threadId={threadId}#{replyId}>) to [{title}](<{url}wiki/Message_Wall:{user_wall}?threadId={threadId}>) on [{user}'s Message Wall](<{url}wiki/Message_Wall:{user_wall}>)").format(author=author, author_url=author_url, url=wiki, title=escape_formatting(post["_embedded"]["thread"][0]["title"]), user=user_wall, user_wall=quote_plus(user_wall.replace(" ", "_")), threadId=post["threadId"], replyId=post["id"]) elif post_type == "ARTICLE_COMMENT": if article_page is None: article_page = {"title": _("unknown"), "fullUrl": wiki} # No page known @@ -112,11 +112,11 @@ async def feeds_embed_formatter(post_type, post, message_target, wiki, article_p if post_type == "FORUM": if not post["isReply"]: embed["url"] = "{url}f/p/{threadId}".format(url=wiki, threadId=post["threadId"]) - embed["title"] = _("Created \"{title}\"").format(title=post["title"]) + embed["title"] = _("Created \"{title}\"").format(title=escape_formatting(post["title"])) thread_funnel = post.get("funnel") if thread_funnel == "POLL": embed.event_type = "discussion/forum/poll" - embed["title"] = _("Created a poll \"{title}\"").format(title=post["title"]) + embed["title"] = _("Created a poll \"{title}\"").format(title=escape_formatting(post["title"])) if message_target[0][1] > 1: poll = post["poll"] image_type = False @@ -128,7 +128,7 @@ async def feeds_embed_formatter(post_type, post, message_target, wiki, article_p inline=True) elif thread_funnel == "QUIZ": embed.event_type = "discussion/forum/quiz" - embed["title"] = _("Created a quiz \"{title}\"").format(title=post["title"]) + embed["title"] = _("Created a quiz \"{title}\"").format(title=escape_formatting(post["title"])) if message_target[0][1] > 1: quiz = post["_embedded"]["quizzes"][0] embed["description"] = quiz["title"] @@ -149,7 +149,7 @@ async def feeds_embed_formatter(post_type, post, message_target, wiki, article_p embed.add_field(_("Tags"), ", ".join(tag_displayname)) else: embed.event_type = "discussion/forum/reply" - embed["title"] = _("Replied to \"{title}\"").format(title=post["_embedded"]["thread"][0]["title"]) + embed["title"] = _("Replied to \"{title}\"").format(title=escape_formatting(post["_embedded"]["thread"][0]["title"])) embed["url"] = "{url}f/p/{threadId}/r/{postId}".format(url=wiki, threadId=post["threadId"], postId=post["id"]) elif post_type == "WALL": user_wall = _("unknown") # Fail safe @@ -158,11 +158,11 @@ async def feeds_embed_formatter(post_type, post, message_target, wiki, article_p if not post["isReply"]: embed.event_type = "discussion/wall/post" embed["url"] = "{url}wiki/Message_Wall:{user_wall}?threadId={threadId}".format(url=wiki, user_wall=quote_plus(user_wall.replace(" ", "_")), threadId=post["threadId"]) - embed["title"] = _("Created \"{title}\" on {user}'s Message Wall").format(title=post["title"], user=user_wall) + embed["title"] = _("Created \"{title}\" on {user}'s Message Wall").format(title=escape_formatting(post["title"]), user=user_wall) else: embed.event_type = "discussion/wall/reply" embed["url"] = "{url}wiki/Message_Wall:{user_wall}?threadId={threadId}#{replyId}".format(url=wiki, user_wall=quote_plus(user_wall.replace(" ", "_")), threadId=post["threadId"], replyId=post["id"]) - embed["title"] = _("Replied to \"{title}\" on {user}'s Message Wall").format(title=post["_embedded"]["thread"][0]["title"], user=user_wall) + embed["title"] = _("Replied to \"{title}\" on {user}'s Message Wall").format(title=escape_formatting(post["_embedded"]["thread"][0]["title"]), user=user_wall) elif post_type == "ARTICLE_COMMENT": if article_page is None: article_page = {"title": _("unknown"), "fullUrl": wiki} # No page known From d7661469a2c3ddd2a5b1211ac7e6fc78bccab316 Mon Sep 17 00:00:00 2001 From: Frisk Date: Mon, 8 Mar 2021 18:33:54 +0100 Subject: [PATCH 13/16] Move logs to debug --- src/bot.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/bot.py b/src/bot.py index f8034dd..c3fc1bb 100644 --- a/src/bot.py +++ b/src/bot.py @@ -162,11 +162,11 @@ class RcQueue: try: current_domain: dict = self[domain] if current_domain["irc"]: - logger.info('CURRENT STATUS:') - logger.info("DOMAIN LIST FOR IRC: {}".format(current_domain["irc"].updated)) - logger.info("CURRENT DOMAIN INFO: {}".format(domain)) - logger.info("IS WIKI IN A LIST?: {}".format(db_wiki["wiki"] in current_domain["irc"].updated)) - logger.info("LAST CHECK FOR THE WIKI {} IS {}".format(db_wiki["wiki"], all_wikis[db_wiki["wiki"]].last_check)) + logger.debug('CURRENT STATUS:') + logger.debug("DOMAIN LIST FOR IRC: {}".format(current_domain["irc"].updated)) + logger.debug("CURRENT DOMAIN INFO: {}".format(domain)) + logger.debug("IS WIKI IN A LIST?: {}".format(db_wiki["wiki"] in current_domain["irc"].updated)) + logger.debug("LAST CHECK FOR THE WIKI {} IS {}".format(db_wiki["wiki"], all_wikis[db_wiki["wiki"]].last_check)) if db_wiki["wiki"] not in current_domain["irc"].updated and all_wikis[db_wiki["wiki"]].last_check+settings["irc_overtime"] > time.time(): continue # if domain has IRC, has not been updated, and it was updated less than an hour ago else: # otherwise remove it from the list From 5e08b4a5013e844efc617aa686ff9d6c00d89ef3 Mon Sep 17 00:00:00 2001 From: Frisk Date: Mon, 8 Mar 2021 19:00:12 +0100 Subject: [PATCH 14/16] Start ignoring the additional delay for wikis with IRC feed. We have IRC feed only for more active domains so this additional delay does more bad than good --- src/bot.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/bot.py b/src/bot.py index c3fc1bb..c43ef57 100644 --- a/src/bot.py +++ b/src/bot.py @@ -249,7 +249,8 @@ async def scan_group(group: str): while True: try: async with rcqueue.retrieve_next_queued(group) as queued_wiki: # acquire next wiki in queue - await asyncio.sleep(calculate_delay_for_group(len(rcqueue[group]["query"]))) + if "irc" not in rcqueue[group]: + await asyncio.sleep(calculate_delay_for_group(len(rcqueue[group]["query"]))) logger.debug("Wiki {}".format(queued_wiki.url)) local_wiki = all_wikis[queued_wiki.url] # set a reference to a wiki object from memory extended = False @@ -346,7 +347,7 @@ async def scan_group(group: str): except asyncio.CancelledError: return except QueueEmpty: - await asyncio.sleep(21.0) + await asyncio.sleep(10.0) continue From a37a7d9365991f92a442e0f24bffed11dfc6d25f Mon Sep 17 00:00:00 2001 From: MarkusRost <2701034-MarkusRost@users.noreply.gitlab.com> Date: Tue, 16 Mar 2021 20:36:11 +0000 Subject: [PATCH 15/16] follow redirects --- src/wiki.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/wiki.py b/src/wiki.py index 148a7a1..e89fd3e 100644 --- a/src/wiki.py +++ b/src/wiki.py @@ -68,7 +68,7 @@ class Wiki: await ratelimiter.timeout_wait() try: async with aiohttp.ClientSession(headers=settings["header"], timeout=aiohttp.ClientTimeout(6.0)) as session: - request = await session.get(url, allow_redirects=False) + request = await session.get(url) ratelimiter.timeout_add(1.0) request.raise_for_status() json_request = await request.json(encoding="UTF-8") From 155c50a561148b54424ea52367e4896e11c685e2 Mon Sep 17 00:00:00 2001 From: Markus-Rost Date: Tue, 16 Mar 2021 21:46:31 +0100 Subject: [PATCH 16/16] bump version bump version for IRC --- src/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/config.py b/src/config.py index cffd432..2a2ddc2 100644 --- a/src/config.py +++ b/src/config.py @@ -4,7 +4,7 @@ try: # load settings with open("settings.json", encoding="utf8") as sfile: settings = json.load(sfile) if "user-agent" in settings["header"]: - settings["header"]["user-agent"] = settings["header"]["user-agent"].format(version="1.0") # set the version in the useragent + settings["header"]["user-agent"] = settings["header"]["user-agent"].format(version="1.1") # set the version in the useragent except FileNotFoundError: logging.critical("No config file could be found. Please make sure settings.json is in the directory.") sys.exit(1) \ No newline at end of file