From 19730dfcdb58f3b51295ae2d36239e14d80c2d51 Mon Sep 17 00:00:00 2001 From: Frisk Date: Sun, 19 Jul 2020 15:32:54 +0200 Subject: [PATCH] Added code --- src/bot.py | 25 +++-- src/formatters/rc.py | 34 ++---- src/misc.py | 247 +++++++++++++++++++++++++++++++++++++++++++ src/msgqueue.py | 49 +++++++++ src/wiki.py | 17 +-- 5 files changed, 333 insertions(+), 39 deletions(-) create mode 100644 src/misc.py create mode 100644 src/msgqueue.py diff --git a/src/bot.py b/src/bot.py index 7f3aa6f..0d05ae6 100644 --- a/src/bot.py +++ b/src/bot.py @@ -1,7 +1,7 @@ import logging.config from src.config import settings import sqlite3 -from src.wiki import Wiki, process_cats, process_mwmsgs +from src.wiki import Wiki, process_cats, process_mwmsgs, essential_info import asyncio, aiohttp from src.exceptions import * from src.database import db_cursor @@ -19,7 +19,7 @@ mw_msgs: dict = {} # will have the type of id: tuple # Reasons for this: 1. we require amount of wikis to calculate the cooldown between requests # 2. Easier to code -for wiki in db_cursor.execute('SELECT ROWID, * FROM wikis'): +for wiki in db_cursor.execute('SELECT ROWID, * FROM rcgcdw'): all_wikis[wiki[0]] = Wiki() # Start queueing logic @@ -31,10 +31,10 @@ def calculate_delay() -> float: else: return min_delay -async def main_loop(): +async def wiki_scanner(): calc_delay = calculate_delay() - for db_wiki in db_cursor.execute('SELECT ROWID, * FROM wikis'): + for db_wiki in db_cursor.execute('SELECT ROWID, * FROM rcgcdw'): extended = False if wiki[0] not in all_wikis: logger.debug("New wiki: {}".format(wiki[1])) @@ -43,7 +43,7 @@ async def main_loop(): if local_wiki.mw_messages is None: extended = True try: - wiki_response = await local_wiki.fetch_wiki(extended, db_wiki[3], db_wiki[4]) + wiki_response = await local_wiki.fetch_wiki(extended, db_wiki[4]) await local_wiki.check_status(wiki[0], wiki_response.status, db_wiki[1]) except (WikiServerError, WikiError): continue # ignore this wiki if it throws errors @@ -56,7 +56,7 @@ async def main_loop(): if extended: await process_mwmsgs(recent_changes_resp, local_wiki, mw_msgs) categorize_events = {} - if db_wiki[6] is None: # new wiki, just get the last rc to not spam the channel + if db_wiki[7] is None: # new wiki, just get the last rc to not spam the channel if len(recent_changes) > 0: DBHandler.add(db_wiki[0], recent_changes[-1]["rcid"]) continue @@ -66,8 +66,17 @@ async def main_loop(): for change in recent_changes: await process_cats(change, local_wiki, mw_msgs, categorize_events) for change in recent_changes: # Yeah, second loop since the categories require to be all loaded up - if change["rcid"] < db_wiki[6]: - + if change["rcid"] < db_wiki[7]: + await essential_info(change, categorize_events, local_wiki, db_wiki) await asyncio.sleep(delay=calc_delay) + +async def message_sender(): + pass + +async def main_loop(): + task1 = asyncio.create_task(wiki_scanner()) + task2 = asyncio.create_task(message_sender()) + +asyncio.run(main_loop()) diff --git a/src/formatters/rc.py b/src/formatters/rc.py index bfd6e8e..bd39117 100644 --- a/src/formatters/rc.py +++ b/src/formatters/rc.py @@ -3,23 +3,25 @@ import math import re import time import logging +import base64 +from config import settings +from src.misc import link_formatter, create_article_path, LinkParser, profile_field_name, ContentParser, DiscordMessage from urllib.parse import quote_plus +# from html.parser import HTMLParser from bs4 import BeautifulSoup #from src.configloader import settings #from src.misc import link_formatter, create_article_path, WIKI_SCRIPT_PATH, send_to_discord, DiscordMessage, safe_read, \ # WIKI_API_PATH, ContentParser, profile_field_name, LinkParser -from src.i18n import lang +from src.i18n import langs #from src.rc import recent_changes, pull_comment -ngettext = lang.ngettext logger = logging.getLogger("rcgcdw.rc_formatters") #from src.rcgcdw import recent_changes, ngettext, logger, profile_field_name, LinkParser, pull_comment -LinkParser = LinkParser() - def compact_formatter(action, change, parsed_comment, categories, recent_changes): + LinkParser = LinkParser("domain") if action != "suppressed": author_url = link_formatter(create_article_path("User:{user}".format(user=change["user"]))) author = change["user"] @@ -308,32 +310,14 @@ def compact_formatter(action, change, parsed_comment, categories, recent_changes def embed_formatter(action, change, parsed_comment, categories, recent_changes): + LinkParser = LinkParser() embed = DiscordMessage("embed", action, settings["webhookURL"]) + WIKI_API_PATH = if parsed_comment is None: parsed_comment = _("No description provided") if action != "suppressed": if "anon" in change: - author_url = create_article_path("Special:Contributions/{user}".format(user=change["user"].replace(" ", "_"))) # Replace here needed in case of #75 - logger.debug("current user: {} with cache of IPs: {}".format(change["user"], recent_changes.map_ips.keys())) - if change["user"] not in list(recent_changes.map_ips.keys()): - contibs = safe_read(recent_changes.safe_request( - "{wiki}?action=query&format=json&list=usercontribs&uclimit=max&ucuser={user}&ucstart={timestamp}&ucprop=".format( - wiki=WIKI_API_PATH, user=change["user"], timestamp=change["timestamp"])), "query", "usercontribs") - if contibs is None: - logger.warning( - "WARNING: Something went wrong when checking amount of contributions for given IP address") - change["user"] = change["user"] + "(?)" - else: - recent_changes.map_ips[change["user"]] = len(contibs) - logger.debug("Current params user {} and state of map_ips {}".format(change["user"], recent_changes.map_ips)) - change["user"] = "{author} ({contribs})".format(author=change["user"], contribs=len(contibs)) - else: - logger.debug( - "Current params user {} and state of map_ips {}".format(change["user"], recent_changes.map_ips)) - if action in ("edit", "new"): - recent_changes.map_ips[change["user"]] += 1 - change["user"] = "{author} ({amount})".format(author=change["user"], - amount=recent_changes.map_ips[change["user"]]) + author_url = create_article_path("Special:Contributions/{user}".format(user=change["user"].replace(" ", "_"))) else: author_url = create_article_path("User:{}".format(change["user"].replace(" ", "_"))) embed.set_author(change["user"], author_url) diff --git a/src/misc.py b/src/misc.py new file mode 100644 index 0000000..cd1d2f7 --- /dev/null +++ b/src/misc.py @@ -0,0 +1,247 @@ +from html.parser import HTMLParser +import base64, re +from src.config import settings +import json +import logging +from collections import defaultdict +import random +import math +profile_fields = {"profile-location": _("Location"), "profile-aboutme": _("About me"), "profile-link-google": _("Google link"), "profile-link-facebook":_("Facebook link"), "profile-link-twitter": _("Twitter link"), "profile-link-reddit": _("Reddit link"), "profile-link-twitch": _("Twitch link"), "profile-link-psn": _("PSN link"), "profile-link-vk": _("VK link"), "profile-link-xbl": _("XBL link"), "profile-link-steam": _("Steam link"), "profile-link-discord": _("Discord handle"), "profile-link-battlenet": _("Battle.net handle")} +logger = logging.getLogger("rcgcdw.misc") + +class DiscordMessage(): + """A class defining a typical Discord JSON representation of webhook payload.""" + def __init__(self, message_type: str, event_type: str, webhook_url: str, content=None): + self.webhook_object = dict(allowed_mentions={"parse": []}, avatar_url=settings["avatars"].get(message_type, "")) + self.webhook_url = webhook_url + + if message_type == "embed": + self.__setup_embed() + elif message_type == "compact": + self.webhook_object["content"] = content + + self.event_type = event_type + + def __setitem__(self, key, value): + """Set item is used only in embeds.""" + try: + self.embed[key] = value + except NameError: + raise TypeError("Tried to assign a value when message type is plain message!") + + def __getitem__(self, item): + return self.embed[item] + + def __repr__(self): + """Return the Discord webhook object ready to be sent""" + return json.dumps(self.webhook_object) + + def __setup_embed(self): + self.embed = defaultdict(dict) + if "embeds" not in self.webhook_object: + self.webhook_object["embeds"] = [self.embed] + else: + self.webhook_object["embeds"].append(self.embed) + self.embed["color"] = None + + def add_embed(self): + self.finish_embed() + self.__setup_embed() + + def finish_embed(self): + if self.embed["color"] is None: + if settings["appearance"]["embed"].get(self.event_type, {"color": None})["color"] is None: + self.embed["color"] = random.randrange(1, 16777215) + else: + self.embed["color"] = settings["appearance"]["embed"][self.event_type]["color"] + else: + self.embed["color"] = math.floor(self.embed["color"]) + + def set_author(self, name, url, icon_url=""): + self.embed["author"]["name"] = name + self.embed["author"]["url"] = url + self.embed["author"]["icon_url"] = icon_url + + def add_field(self, name, value, inline=False): + if "fields" not in self.embed: + self.embed["fields"] = [] + self.embed["fields"].append(dict(name=name, value=value, inline=inline)) + + def set_avatar(self, url): + self.webhook_object["avatar_url"] = url + + def set_name(self, name): + self.webhook_object["username"] = name + + +class LinkParser(HTMLParser): + new_string = "" + recent_href = "" + + def __init__(self, domain): + super().__init__() + self.WIKI_JUST_DOMAIN = domain + + def handle_starttag(self, tag, attrs): + for attr in attrs: + if attr[0] == 'href': + self.recent_href = attr[1] + if self.recent_href.startswith("//"): + self.recent_href = "https:{rest}".format(rest=self.recent_href) + elif not self.recent_href.startswith("http"): + self.recent_href = self.WIKI_JUST_DOMAIN + self.recent_href + self.recent_href = self.recent_href.replace(")", "\\)") + elif attr[0] == 'data-uncrawlable-url': + self.recent_href = attr[1].encode('ascii') + self.recent_href = base64.b64decode(self.recent_href) + self.recent_href = self.WIKI_JUST_DOMAIN + self.recent_href.decode('ascii') + + def handle_data(self, data): + if self.recent_href: + self.new_string = self.new_string + "[{}](<{}>)".format(data, self.recent_href) + self.recent_href = "" + else: + self.new_string = self.new_string + data + + def handle_comment(self, data): + self.new_string = self.new_string + data + + def handle_endtag(self, tag): + # logger.debug(self.new_string) + pass + + +def link_formatter(link: str) -> str: + """Formats a link to not embed it""" + return "<" + re.sub(r"([)])", "\\\\\\1", link).replace(" ", "_") + ">" + + +def escape_formatting(data: str) -> str: + """Escape Discord formatting""" + return re.sub(r"([`_*~<>{}@/|\\])", "\\\\\\1", data, 0) + + +def create_article_path(article: str, WIKI_ARTICLE_PATH: str) -> str: + """Takes the string and creates an URL with it as the article name""" + return WIKI_ARTICLE_PATH.replace("$1", article) + + +def profile_field_name(name, embed): + try: + return profile_fields[name] + except KeyError: + if embed: + return _("Unknown") + else: + return _("unknown") + + +class ContentParser(HTMLParser): + more = _("\n__And more__") + current_tag = "" + small_prev_ins = "" + small_prev_del = "" + ins_length = len(more) + del_length = len(more) + added = False + + def handle_starttag(self, tagname, attribs): + if tagname == "ins" or tagname == "del": + self.current_tag = tagname + if tagname == "td" and 'diff-addedline' in attribs[0]: + self.current_tag = tagname + "a" + if tagname == "td" and 'diff-deletedline' in attribs[0]: + self.current_tag = tagname + "d" + if tagname == "td" and 'diff-marker' in attribs[0]: + self.added = True + + def handle_data(self, data): + data = re.sub(r"([`_*~<>{}@/|\\])", "\\\\\\1", data, 0) + if self.current_tag == "ins" and self.ins_length <= 1000: + self.ins_length += len("**" + data + '**') + if self.ins_length <= 1000: + self.small_prev_ins = self.small_prev_ins + "**" + data + '**' + else: + self.small_prev_ins = self.small_prev_ins + self.more + if self.current_tag == "del" and self.del_length <= 1000: + self.del_length += len("~~" + data + '~~') + if self.del_length <= 1000: + self.small_prev_del = self.small_prev_del + "~~" + data + '~~' + else: + self.small_prev_del = self.small_prev_del + self.more + if (self.current_tag == "afterins" or self.current_tag == "tda") and self.ins_length <= 1000: + self.ins_length += len(data) + if self.ins_length <= 1000: + self.small_prev_ins = self.small_prev_ins + data + else: + self.small_prev_ins = self.small_prev_ins + self.more + if (self.current_tag == "afterdel" or self.current_tag == "tdd") and self.del_length <= 1000: + self.del_length += len(data) + if self.del_length <= 1000: + self.small_prev_del = self.small_prev_del + data + else: + self.small_prev_del = self.small_prev_del + self.more + if self.added: + if data == '+' and self.ins_length <= 1000: + self.ins_length += 1 + if self.ins_length <= 1000: + self.small_prev_ins = self.small_prev_ins + '\n' + else: + self.small_prev_ins = self.small_prev_ins + self.more + if data == '−' and self.del_length <= 1000: + self.del_length += 1 + if self.del_length <= 1000: + self.small_prev_del = self.small_prev_del + '\n' + else: + self.small_prev_del = self.small_prev_del + self.more + self.added = False + + def handle_endtag(self, tagname): + if tagname == "ins": + self.current_tag = "afterins" + elif tagname == "del": + self.current_tag = "afterdel" + else: + self.current_tag = "" + + +class RecentChangesClass(): + """Store verious data and functions related to wiki and fetching of Recent Changes""" + def __init__(self): + self.tags = {} + self.mw_messages = {} + self.namespaces = None + self.session = session + + @staticmethod + def handle_mw_errors(request): + if "errors" in request: + logger.error(request["errors"]) + raise MWError + return request + + def safe_request(self, url): + try: + request = self.session.get(url, timeout=10, allow_redirects=False) + except requests.exceptions.Timeout: + logger.warning("Reached timeout error for request on link {url}".format(url=url)) + self.downtime_controller() + return None + except requests.exceptions.ConnectionError: + logger.warning("Reached connection error for request on link {url}".format(url=url)) + self.downtime_controller() + return None + except requests.exceptions.ChunkedEncodingError: + logger.warning("Detected faulty response from the web server for request on link {url}".format(url=url)) + self.downtime_controller() + return None + else: + if 499 < request.status_code < 600: + self.downtime_controller() + return None + elif request.status_code == 302: + logger.warning("Redirect detected! Either the wiki given in the script settings (wiki field) is incorrect/the wiki got removed or Gamepedia is giving us the false value. Please provide the real URL to the wiki, current URL redirects to {}".format(request.next.url)) + return request + + def init_info(self): + return \ No newline at end of file diff --git a/src/msgqueue.py b/src/msgqueue.py new file mode 100644 index 0000000..4e4bb5d --- /dev/null +++ b/src/msgqueue.py @@ -0,0 +1,49 @@ +import asyncio, logging +logger = logging.getLogger("rcgcdw.msgqueue") + +class MessageQueue: + """Message queue class for undelivered messages""" + def __init__(self): + self._queue = [] + + def __repr__(self): + return self._queue + + def __len__(self): + return len(self._queue) + + def __iter__(self): + return iter(self._queue) + + def clear(self): + self._queue.clear() + + def add_message(self, message): + self._queue.append(message) + + def cut_messages(self, item_num): + self._queue = self._queue[item_num:] + + async def resend_msgs(self): + if self._queue: + logger.info( + "{} messages waiting to be delivered to Discord due to Discord throwing errors/no connection to Discord servers.".format( + len(self._queue))) + for num, item in enumerate(self._queue): + logger.debug( + "Trying to send a message to Discord from the queue with id of {} and content {}".format(str(num), + str(item))) + if send_to_discord_webhook(item) < 2: + logger.debug("Sending message succeeded") + await asyncio.sleep(2.5) + else: + logger.debug("Sending message failed") + break + else: + self.clear() + logger.debug("Queue emptied, all messages delivered") + self.cut_messages(num) + logger.debug(self._queue) + + +messagequeue = MessageQueue() \ No newline at end of file diff --git a/src/wiki.py b/src/wiki.py index 7dbf2ae..94bf839 100644 --- a/src/wiki.py +++ b/src/wiki.py @@ -5,18 +5,23 @@ import logging, aiohttp from src.exceptions import * from src.database import db_cursor, db_connection from src.formatters.rc import embed_formatter, compact_formatter +from src.misc import LinkParser, RecentChangesClass from i18n import langs import src.discord logger = logging.getLogger("rcgcdb.wiki") +supported_logs = ["protect/protect", "protect/modify", "protect/unprotect", "upload/overwrite", "upload/upload", "delete/delete", "delete/delete_redir", "delete/restore", "delete/revision", "delete/event", "import/upload", "import/interwiki", "merge/merge", "move/move", "move/move_redir", "protect/move_prot", "block/block", "block/unblock", "block/reblock", "rights/rights", "rights/autopromote", "abusefilter/modify", "abusefilter/create", "interwiki/iw_add", "interwiki/iw_edit", "interwiki/iw_delete", "curseprofile/comment-created", "curseprofile/comment-edited", "curseprofile/comment-deleted", "curseprofile/comment-purged", "curseprofile/profile-edited", "curseprofile/comment-replied", "contentmodel/change", "sprite/sprite", "sprite/sheet", "sprite/slice", "managetags/create", "managetags/delete", "managetags/activate", "managetags/deactivate", "tag/update", "cargo/createtable", "cargo/deletetable", "cargo/recreatetable", "cargo/replacetable", "upload/revert"] + + + @dataclass class Wiki: mw_messages: int = None fail_times: int = 0 # corresponding to amount of times connection with wiki failed for client reasons (400-499) - async def fetch_wiki(self, extended, script_path, api_path) -> aiohttp.ClientResponse: - url_path = script_path + api_path + async def fetch_wiki(self, extended, script_path) -> aiohttp.ClientResponse: + url_path = script_path + "api.php" amount = 20 if extended: params = {"action": "query", "format": "json", "uselang": "content", "list": "tags|recentchanges", @@ -117,11 +122,13 @@ async def process_mwmsgs(wiki_response: dict, local_wiki: Wiki, mw_msgs: dict): mw_msgs[key] = msgs # it may be a little bit messy for sure, however I don't expect any reason to remove mw_msgs entries by one local_wiki.mw_messages = key -def essential_info(change, changed_categories, local_wiki, db_wiki): +async def essential_info(change, changed_categories, local_wiki, db_wiki): """Prepares essential information for both embed and compact message format.""" + recent_changes = RecentChangesClass() + LinkParser = LinkParser("domain") logger.debug(change) lang = langs[db_wiki[1]] - appearance_mode = embed_formatter + appearance_mode = embed_formatter # TODO Add chanding depending on the DB entry if ("actionhidden" in change or "suppressed" in change): # if event is hidden using suppression appearance_mode("suppressed", change, "", changed_categories, recent_changes) return @@ -151,6 +158,4 @@ def essential_info(change, changed_categories, local_wiki, db_wiki): else: logger.warning("This event is not implemented in the script. Please make an issue on the tracker attaching the following info: wiki url, time, and this information: {}".format(change)) return - if identification_string in settings["ignored"]: - return appearance_mode(identification_string, change, parsed_comment, changed_categories, recent_changes) \ No newline at end of file