from __future__ import annotations import functools import json from functools import cache from html.parser import HTMLParser import base64, re import logging from typing import Callable from urllib.parse import urlparse, urlunparse from src.config import settings logger = logging.getLogger("rcgcdw.misc") def get_paths(wiki: str, request) -> tuple: """Prepares wiki paths for the functions""" parsed_url = urlparse(wiki) WIKI_API_PATH = wiki + "api.php" WIKI_SCRIPT_PATH = wiki WIKI_ARTICLE_PATH = urlunparse((*parsed_url[0:2], "", "", "", "")) + request["query"]["general"]["articlepath"] WIKI_JUST_DOMAIN = urlunparse((*parsed_url[0:2], "", "", "", "")) return WIKI_API_PATH, WIKI_SCRIPT_PATH, WIKI_ARTICLE_PATH, WIKI_JUST_DOMAIN def get_domain(url: str) -> str: """Get domain of given URL""" parsed_url = urlparse(url) return ".".join(urlunparse((*parsed_url[0:2], "", "", "", "")).split(".")[-2:]) # something like gamepedia.com, fandom.com def run_hooks(hooks, *arguments): for hook in hooks: try: hook(*arguments) except: if settings.get("error_tolerance", 1) > 0: logger.exception("On running a pre hook, ignoring pre-hook") else: raise class LinkParser(HTMLParser): new_string = "" recent_href = "" def __init__(self, DOMAIN_URL: str): self.WIKI_JUST_DOMAIN = DOMAIN_URL super().__init__() def handle_starttag(self, tag, attrs): for attr in attrs: if attr[0] == 'href': self.recent_href = attr[1] if self.recent_href.startswith("//"): self.recent_href = "https:{rest}".format(rest=self.recent_href) elif not self.recent_href.startswith("http"): self.recent_href = self.WIKI_JUST_DOMAIN + self.recent_href self.recent_href = self.recent_href.replace(")", "\\)") elif attr[0] == 'data-uncrawlable-url': self.recent_href = attr[1].encode('ascii') self.recent_href = base64.b64decode(self.recent_href) self.recent_href = self.WIKI_JUST_DOMAIN + self.recent_href.decode('ascii') def handle_data(self, data): if self.recent_href: self.new_string = self.new_string + "[{}](<{}>)".format(escape_formatting(data), self.recent_href) self.recent_href = "" else: self.new_string = self.new_string + escape_formatting(data) def handle_comment(self, data): self.new_string = self.new_string + escape_formatting(data) def handle_endtag(self, tag): # logger.debug(self.new_string) pass # LinkParse = LinkParser() # def parse_link(domain: str, to_parse: str) -> str: # """Because I have strange issues using the LinkParser class myself, this is a helper function # to utilize the LinkParser properly""" # LinkParse.WIKI_JUST_DOMAIN = domain # LinkParse.new_string = "" # LinkParse.feed(to_parse) # LinkParse.recent_href = "" # return LinkParse.new_string def link_formatter(link: str) -> str: """Formats a link to not embed it""" return "<" + re.sub(r"([)])", "\\\\\\1", link).replace(" ", "_") + ">" def escape_formatting(data: str) -> str: """Escape Discord formatting""" return re.sub(r"([`_*~:<>{}@/|\\\[\]\(\)])", "\\\\\\1", data, 0) if data is not None else "" def create_article_path(article: str, WIKI_ARTICLE_PATH: str) -> str: """Takes the string and creates an URL with it as the article name""" article = article.replace(" ", "_").replace("%", "%25").replace("\\", "%5C") if "?" in WIKI_ARTICLE_PATH: article = article.replace("&", "%26") else: article = article.replace("?", "%3F") return WIKI_ARTICLE_PATH.replace("$1", article) def profile_field_name(name, embed, _): profile_fields = {"profile-location": _("Location"), "profile-aboutme": _("About me"), "profile-link-google": _("Google link"), "profile-link-facebook": _("Facebook link"), "profile-link-twitter": _("Twitter link"), "profile-link-reddit": _("Reddit link"), "profile-link-twitch": _("Twitch link"), "profile-link-psn": _("PSN link"), "profile-link-vk": _("VK link"), "profile-link-xbl": _("XBL link"), "profile-link-steam": _("Steam link"), "profile-link-discord": _("Discord handle"), "profile-link-battlenet": _("Battle.net handle")} try: return profile_fields[name] except KeyError: if embed: return _("Unknown") else: return _("unknown") def class_searcher(attribs: list) -> str: """Function to return classes of given element in HTMLParser on handle_starttag :returns a string with all of the classes of element """ for attr in attribs: if attr[0] == "class": return attr[1] return "" class ContentParser(HTMLParser): """ContentPerser is an implementation of HTMLParser that parses output of action=compare&prop=diff API request for two MediaWiki revisions. It extracts the following: small_prev_ins - storing up to 1000 characters of added text small_prev_del - storing up to 1000 chracters of removed text ins_length - storing length of inserted text del_length - storing length of deleted text """ current_tag = "" last_ins = None last_del = None empty = False small_prev_ins = "" small_prev_del = "" def __init__(self, lang: Callable): super().__init__() self.more = lang("\n__And more__") self.ins_length = len(self.more) self.del_length = len(self.more) def handle_starttag(self, tagname, attribs): if tagname == "ins" or tagname == "del": self.current_tag = tagname if tagname == "td": classes = class_searcher(attribs).split(' ') if "diff-addedline" in classes and self.ins_length <= 1000: self.current_tag = "tda" self.last_ins = "" if "diff-deletedline" in classes and self.del_length <= 1000: self.current_tag = "tdd" self.last_del = "" if "diff-empty" in classes: self.empty = True def handle_data(self, data): data = escape_formatting(data) if self.current_tag == "ins" and self.ins_length <= 1000: self.ins_length += len("**" + data + "**") if self.ins_length <= 1000: self.last_ins = self.last_ins + "**" + data + "**" if self.current_tag == "del" and self.del_length <= 1000: self.del_length += len("~~" + data + "~~") if self.del_length <= 1000: self.last_del = self.last_del + "~~" + data + "~~" if self.current_tag == "tda" and self.ins_length <= 1000: self.ins_length += len(data) if self.ins_length <= 1000: self.last_ins = self.last_ins + data if self.current_tag == "tdd" and self.del_length <= 1000: self.del_length += len(data) if self.del_length <= 1000: self.last_del = self.last_del + data def handle_endtag(self, tagname): if tagname == "ins": self.current_tag = "tda" elif tagname == "del": self.current_tag = "tdd" elif tagname == "td": self.current_tag = "" elif tagname == "tr": if self.last_ins is not None: self.ins_length += 1 if self.empty and not self.last_ins.isspace(): if "**" in self.last_ins: self.last_ins = self.last_ins.replace("**", "__") self.ins_length += 4 self.last_ins = "**" + self.last_ins + "**" self.small_prev_ins = self.small_prev_ins + "\n" + self.last_ins if self.ins_length > 1000: self.small_prev_ins = self.small_prev_ins + self.more self.last_ins = None if self.last_del is not None: self.del_length += 1 if self.empty and not self.last_del.isspace(): if "~~" in self.last_del: self.last_del = self.last_del.replace("~~", "__") self.del_length += 4 self.last_del = "~~" + self.last_del + "~~" self.small_prev_del = self.small_prev_del + "\n" + self.last_del if self.del_length > 1000: self.small_prev_del = self.small_prev_del + self.more self.last_del = None self.empty = False @cache def prepare_settings(display_mode: int) -> dict: """Prepares dict of RcGcDw compatible settings based on a template and display mode of given call""" with open("src/api/template_settings.json", "r") as template_json: template = json.load(template_json) template["appearance"]["embed"]["embed_images"] = True if display_mode > 1 else False template["appearance"]["embed"]["show_edit_changes"] = True if display_mode > 2 else False return template class LimitedList(list): def __init__(self, *args, limit=settings.get("queue_limit", 30)): list.__init__(self, *args) self.queue_limit = limit def append(self, obj) -> None: if len(self) > self.queue_limit: self.pop(0) super(LimitedList, self).append(obj) def __repr__(self): return "\n".join([str(x) for x in self]) def catch_exceptions(passed_context): def wrapper(func): @functools.wraps(func) async def wrapped(*args, **kwargs): try: return await func(*args, **kwargs) except Exception as e: return wrapped return wrapper