From 3a23b5c0039141baf820f1d1acdc10233e2fe5af Mon Sep 17 00:00:00 2001 From: Frisk Date: Sat, 24 Apr 2021 14:32:23 +0200 Subject: [PATCH] Reworked default request function to new fancy one --- extensions/base/mediawiki.py | 4 +- src/api/client.py | 33 +++++++-- src/exceptions.py | 30 +++++++- src/misc.py | 15 ++++ src/rc.py | 131 +++++++++++++++++++++++++---------- src/rc_formatters.py | 6 +- src/rcgcdw.py | 24 +++---- 7 files changed, 184 insertions(+), 59 deletions(-) diff --git a/extensions/base/mediawiki.py b/extensions/base/mediawiki.py index eba45f2..657ea47 100644 --- a/extensions/base/mediawiki.py +++ b/extensions/base/mediawiki.py @@ -60,12 +60,12 @@ class base(): space=" " if "bot" in change or (action == "edit" and "minor" in change) or action == "new" else "") if settings["appearance"]["embed"]["show_edit_changes"]: if action == "new": - changed_content = safe_read(recent_changes.safe_request( + changed_content = safe_read(recent_changes._safe_request( "{wiki}?action=compare&format=json&fromtext=&torev={diff}&topst=1&prop=diff".format( wiki=ctx.client.WIKI_API_PATH, diff=change["revid"] )), "compare", "*") else: - changed_content = safe_read(recent_changes.safe_request( + changed_content = safe_read(recent_changes._safe_request( "{wiki}?action=compare&format=json&fromrev={oldrev}&torev={diff}&topst=1&prop=diff".format( wiki=ctx.client.WIKI_API_PATH, diff=change["revid"], oldrev=change["old_revid"] )), "compare", "*") diff --git a/src/api/client.py b/src/api/client.py index 7028154..8e9d0f1 100644 --- a/src/api/client.py +++ b/src/api/client.py @@ -16,7 +16,8 @@ import src.rcgcdw import src.rc import src.misc - +from typing import Union +from collections import OrderedDict class Client: """ @@ -24,19 +25,41 @@ class Client: """ def __init__(self): self._formatters = src.rcgcdw.formatter_hooks - self.__recent_changes = src.rc.recent_changes + self.__recent_changes = src.rc.wiki self.WIKI_API_PATH = src.misc.WIKI_API_PATH self.WIKI_ARTICLE_PATH = src.misc.WIKI_ARTICLE_PATH self.WIKI_SCRIPT_PATH = src.misc.WIKI_SCRIPT_PATH self.WIKI_JUST_DOMAIN = src.misc.WIKI_JUST_DOMAIN self.content_parser = src.misc.ContentParser + #self.make_api_request: src.rc.wiki.__recent_changes.api_request = self.__recent_changes.api_request def refresh_internal_data(self): """Refreshes internal storage data for wiki tags and MediaWiki messages.""" self.__recent_changes.init_info() + def make_api_request(self, params: Union[str, OrderedDict], *json_path: list[str], timeout: int=10, allow_redirects: bool=False): + """Method to GET request data from the wiki's API with error handling including recognition of MediaWiki errors. + + Parameters: + + params (str, OrderedDict): a string or collections.OrderedDict object containing query parameters + json_path (str): *args taking strings as values. After request is parsed as json it will extract data from given json path + timeout (int, float) (default=10): int or float limiting time required for receiving a full response from a server before returning TimeoutError + allow_redirects (bool) (default=False): switches whether the request should follow redirects or not + + Returns: + + request_content (dict): a dict resulting from json extraction of HTTP GET request with given json_path + OR + One of the following exceptions: + ServerError: When connection with the wiki failed due to server error + ClientError: When connection with the wiki failed due to client error + KeyError: When json_path contained keys that weren't found in response JSON response + BadRequest: When params argument is of wrong type + MediaWikiError: When MediaWiki returns an error + """ + return self.__recent_changes.api_request(params, *json_path, timeout, allow_redirects) - - -client = Client() \ No newline at end of file +client = Client() +client.make_api_request() \ No newline at end of file diff --git a/src/exceptions.py b/src/exceptions.py index 2d84775..6f4ac72 100644 --- a/src/exceptions.py +++ b/src/exceptions.py @@ -25,4 +25,32 @@ class ArticleCommentError(Exception): class FormatterBreaksAPISpec(Exception): def __init__(self, field): self.message = f"Formatter doesn't specify {field}!" - super().__init__(self.message) \ No newline at end of file + super().__init__(self.message) + + +class ServerError(Exception): + """Exception for when a request fails because of Server error""" + pass + + +class ClientError(Exception): + """Exception for when a request failes because of Client error""" + + def __init__(self, request): + self.message = f"Client have made wrong request! {request.status_code}: {request.reason}. {request.text}" + super().__init__(self.message) + + +class BadRequest(Exception): + """When type of parameter given to request making method is invalid""" + def __init__(self, object_type): + self.message = f"params must be either a strong or OrderedDict object, not {type(object_type)}!" + super().__init__(self.message) + + +class MediaWikiError(Exception): + """When MediaWiki responds with an error""" + def __init__(self, errors): + self.message = f"MediaWiki returned the following errors: {errors}!" + super().__init__(self.message) + diff --git a/src/misc.py b/src/misc.py index 1c6581c..6acbb19 100644 --- a/src/misc.py +++ b/src/misc.py @@ -22,6 +22,7 @@ import requests from src.configloader import settings from src.discord.message import DiscordMessage, DiscordMessageMetadata from src.discord.queue import messagequeue, send_to_discord +from src.exceptions import MediaWikiError from src.i18n import misc AUTO_SUPPRESSION_ENABLED = settings.get("auto_suppression", {"enabled": False}).get("enabled") @@ -208,6 +209,20 @@ def safe_read(request, *keys): return request +def parse_mw_request_info(request_data: dict, url: str): + """A function parsing request JSON message from MediaWiki logging all warnings and raising on MediaWiki errors""" + # any([True for k in request_data.keys() if k in ("error", "errors")]) + errors: list = request_data.get("errors", {}) # Is it ugly? I don't know tbh + if errors: + raise MediaWikiError(str(errors)) + warnings: list = request_data.get("warnings", {}) + if warnings: + for warning in warnings: + misc_logger.warning("MediaWiki returned the following warning: {code} - {text} on {url}.".format( + code=warning["code"], text=warning.get("text", warning.get("*", "")), url=url + )) + return request_data + def add_to_dict(dictionary, key): if key in dictionary: dictionary[key] += 1 diff --git a/src/rc.py b/src/rc.py index 0910b65..676ac86 100644 --- a/src/rc.py +++ b/src/rc.py @@ -23,10 +23,12 @@ import requests from bs4 import BeautifulSoup from src.configloader import settings -from src.misc import WIKI_SCRIPT_PATH, WIKI_API_PATH, datafile, send_simple, safe_read, LinkParser, AUTO_SUPPRESSION_ENABLED +from src.misc import WIKI_SCRIPT_PATH, WIKI_API_PATH, datafile, send_simple, safe_read, LinkParser, \ + AUTO_SUPPRESSION_ENABLED, parse_mw_request_info from src.discord.queue import messagequeue -from src.exceptions import MWError +from src.exceptions import MWError, BadRequest, ClientError, ServerError, MediaWikiError from src.session import session +from typing import Union # from src.rc_formatters import compact_formatter, embed_formatter, compact_abuselog_formatter, embed_abuselog_formatter from src.i18n import rc from collections import OrderedDict @@ -37,25 +39,9 @@ storage = datafile logger = logging.getLogger("rcgcdw.rc") -supported_logs = {"protect/protect", "protect/modify", "protect/unprotect", "upload/overwrite", "upload/upload", - "delete/delete", "delete/delete_redir", "delete/restore", "delete/revision", "delete/event", - "import/upload", "import/interwiki", "merge/merge", "move/move", "move/move_redir", - "protect/move_prot", "block/block", "block/unblock", "block/reblock", "rights/rights", - "rights/autopromote", "abusefilter/modify", "abusefilter/create", "interwiki/iw_add", - "interwiki/iw_edit", "interwiki/iw_delete", "curseprofile/comment-created", - "curseprofile/comment-edited", "curseprofile/comment-deleted", "curseprofile/comment-purged", - "curseprofile/profile-edited", "curseprofile/comment-replied", "contentmodel/change", "sprite/sprite", - "sprite/sheet", "sprite/slice", "managetags/create", "managetags/delete", "managetags/activate", - "managetags/deactivate", "cargo/createtable", "cargo/deletetable", - "cargo/recreatetable", "cargo/replacetable", "upload/revert", "newusers/create", - "newusers/autocreate", "newusers/create2", "newusers/byemail", "newusers/newusers", - "managewiki/settings", "managewiki/delete", "managewiki/lock", "managewiki/unlock", - "managewiki/namespaces", "managewiki/namespaces-delete", "managewiki/rights", "managewiki/undelete"} - - LinkParser = LinkParser() -class Recent_Changes_Class(object): +class Wiki(object): """Store verious data and functions related to wiki and fetching of Recent Changes""" def __init__(self): self.map_ips = {} @@ -129,7 +115,7 @@ class Recent_Changes_Class(object): """Make a typical MW request for rc/abuselog If succeeds return the .json() of request and if not raises ConnectionError""" - request = self.safe_request(WIKI_API_PATH, params=self.construct_params(amount)) + request = self._safe_request(WIKI_API_PATH, params=self.construct_params(amount)) if request is not None: try: request = request.json() @@ -178,30 +164,30 @@ class Recent_Changes_Class(object): "There were too many new events, but the limit was high enough we don't care anymore about fetching them all.") if change["type"] == "categorize": if "commenthidden" not in change: - if len(recent_changes.mw_messages.keys()) > 0: + if len(wiki.mw_messages.keys()) > 0: cat_title = change["title"].split(':', 1)[1] # I so much hate this, blame Markus for making me do this if change["revid"] not in categorize_events: categorize_events[change["revid"]] = {"new": set(), "removed": set()} comment_to_match = re.sub(r'<.*?a>', '', change["parsedcomment"]) - if recent_changes.mw_messages["recentchanges-page-added-to-category"] in comment_to_match or \ - recent_changes.mw_messages[ + if wiki.mw_messages["recentchanges-page-added-to-category"] in comment_to_match or \ + wiki.mw_messages[ "recentchanges-page-added-to-category-bundled"] in comment_to_match: categorize_events[change["revid"]]["new"].add(cat_title) logger.debug("Matched {} to added category for {}".format(cat_title, change["revid"])) - elif recent_changes.mw_messages[ + elif wiki.mw_messages[ "recentchanges-page-removed-from-category"] in comment_to_match or \ - recent_changes.mw_messages[ + wiki.mw_messages[ "recentchanges-page-removed-from-category-bundled"] in comment_to_match: categorize_events[change["revid"]]["removed"].add(cat_title) logger.debug("Matched {} to removed category for {}".format(cat_title, change["revid"])) else: logger.debug( "Unknown match for category change with messages {}, {}, {}, {} and comment_to_match {}".format( - recent_changes.mw_messages["recentchanges-page-added-to-category"], - recent_changes.mw_messages["recentchanges-page-removed-from-category"], - recent_changes.mw_messages["recentchanges-page-removed-from-category-bundled"], - recent_changes.mw_messages["recentchanges-page-added-to-category-bundled"], + wiki.mw_messages["recentchanges-page-added-to-category"], + wiki.mw_messages["recentchanges-page-removed-from-category"], + wiki.mw_messages["recentchanges-page-removed-from-category-bundled"], + wiki.mw_messages["recentchanges-page-added-to-category-bundled"], comment_to_match)) else: logger.warning( @@ -264,7 +250,9 @@ class Recent_Changes_Class(object): abuselog_last_id = self.prepare_abuse_log(abuselog) return rc_last_id, abuselog_last_id - def safe_request(self, url, params=None): + def _safe_request(self, url, params=None): + """This method is depreciated, please use api_request""" + logger.warning("safe_request is depreciated, please use api_request or own requests request") try: if params: request = self.session.get(url, params=params, timeout=10, allow_redirects=False) @@ -291,6 +279,76 @@ class Recent_Changes_Class(object): sys.exit(0) return request + def api_request(self, params: Union[str, OrderedDict], *json_path: list[str], timeout: int=10, allow_redirects: bool=False): + """Method to GET request data from the wiki's API with error handling including recognition of MediaWiki errors. + + Parameters: + + params (str, OrderedDict): a string or collections.OrderedDict object containing query parameters + json_path (str): *args taking strings as values. After request is parsed as json it will extract data from given json path + timeout (int, float) (default=10): int or float limiting time required for receiving a full response from a server before returning TimeoutError + allow_redirects (bool) (default=False): switches whether the request should follow redirects or not + + Returns: + + request_content (dict): a dict resulting from json extraction of HTTP GET request with given json_path + OR + One of the following exceptions: + ServerError: When connection with the wiki failed due to server error + ClientError: When connection with the wiki failed due to client error + KeyError: When json_path contained keys that weren't found in response JSON response + BadRequest: When params argument is of wrong type + MediaWikiError: When MediaWiki returns an error + """ + # Making request + try: + if isinstance(params, str): + request = self.session.get(WIKI_API_PATH + params, timeout=timeout, allow_redirects=allow_redirects) + elif isinstance(params, OrderedDict): + request = self.session.get(WIKI_API_PATH, params=params, timeout=timeout, allow_redirects=allow_redirects) + else: + raise BadRequest(params) + except requests.exceptions.Timeout: + logger.warning("Reached timeout error for request on link {url}".format(url=WIKI_API_PATH+str(params))) + self.downtime_controller(True) + raise ServerError + except requests.exceptions.ConnectionError: + logger.warning("Reached connection error for request on link {url}".format(url=WIKI_API_PATH+str(params))) + self.downtime_controller(True) + raise ServerError + except requests.exceptions.ChunkedEncodingError: + logger.warning("Detected faulty response from the web server for request on link {url}".format(url=WIKI_API_PATH+str(params))) + self.downtime_controller(True) + raise ServerError + # Catching HTTP errors + if 499 < request.status_code < 600: + self.downtime_controller(True) + raise ServerError + elif request.status_code == 302: + logger.critical( + "Redirect detected! Either the wiki given in the script settings (wiki field) is incorrect/the wiki got removed or is giving us the false value. Please provide the real URL to the wiki, current URL redirects to {}".format( + request.next.url)) + sys.exit(0) + elif 399 < request.status_code < 500: + logger.error("Request returned ClientError status code on {url}".format(url=request.url)) + raise ClientError(request) + else: + # JSON Extraction + try: + request_json = parse_mw_request_info(request.json(), request.url) + for item in request_json: + request_json = request_json[item] + except ValueError: + logger.warning("ValueError when extracting JSON data on {url}".format(url=request.url)) + self.downtime_controller(True) + raise ServerError + except MediaWikiError: + logger.exception("MediaWiki error on request: {}".format(request.url)) + raise + except KeyError: + raise + return request_json + def check_connection(self, looped=False): online = 0 for website in ["https://google.com", "https://instagram.com", "https://steamcommunity.com"]: @@ -308,7 +366,7 @@ class Recent_Changes_Class(object): if not looped: while 1: # recursed loop, check for connection (every 10 seconds) as long as three services are down, don't do anything else if self.check_connection(looped=True): - recent_changes.fetch(amount=settings["limitrefetch"]) + wiki.fetch(amount=settings["limitrefetch"]) break time.sleep(10) return False @@ -347,7 +405,7 @@ class Recent_Changes_Class(object): clean_entries() def init_info(self): - startup_info = safe_read(self.safe_request( + startup_info = safe_read(self._safe_request( "{wiki}?action=query&format=json&uselang=content&list=tags&meta=allmessages%7Csiteinfo&utf8=1&tglimit=max&tgprop=displayname&ammessages=recentchanges-page-added-to-category%7Crecentchanges-page-removed-from-category%7Crecentchanges-page-added-to-category-bundled%7Crecentchanges-page-removed-from-category-bundled&amenableparser=1&amincludelocal=1&siprop=namespaces".format( wiki=WIKI_API_PATH)), "query") if startup_info: @@ -375,7 +433,7 @@ class Recent_Changes_Class(object): def pull_comment(self, comment_id): try: - comment = self.handle_mw_errors(self.safe_request( + comment = self.handle_mw_errors(self._safe_request( "{wiki}?action=comment&do=getRaw&comment_id={comment}&format=json".format(wiki=WIKI_API_PATH, comment=comment_id)).json())[ "text"] @@ -393,13 +451,14 @@ class Recent_Changes_Class(object): return "" -recent_changes = Recent_Changes_Class() +wiki = Wiki() + def essential_info(change, changed_categories): """Prepares essential information for both embed and compact message format.""" logger.debug(change) if ("actionhidden" in change or "suppressed" in change) and "suppressed" not in settings["ignored"]: # if event is hidden using suppression - appearance_mode("suppressed", change, "", changed_categories, recent_changes) + appearance_mode("suppressed", change, "", changed_categories, wiki) return if "commenthidden" not in change: LinkParser.feed(change["parsedcomment"]) @@ -431,7 +490,7 @@ def essential_info(change, changed_categories): return if identification_string in settings["ignored"]: return - appearance_mode(identification_string, change, parsed_comment, changed_categories, recent_changes) + appearance_mode(identification_string, change, parsed_comment, changed_categories, wiki) def abuselog_processing(entry, recent_changes): abuselog_appearance_mode(entry, recent_changes) \ No newline at end of file diff --git a/src/rc_formatters.py b/src/rc_formatters.py index a46d099..d0354e7 100644 --- a/src/rc_formatters.py +++ b/src/rc_formatters.py @@ -53,7 +53,7 @@ def format_user(change, recent_changes, action): user=change["user"].replace(" ", "_"))) # Replace here needed in case of #75 logger.debug("current user: {} with cache of IPs: {}".format(change["user"], recent_changes.map_ips.keys())) if change["user"] not in list(recent_changes.map_ips.keys()): - contibs = safe_read(recent_changes.safe_request( + contibs = safe_read(recent_changes._safe_request( "{wiki}?action=query&format=json&list=usercontribs&uclimit=max&ucuser={user}&ucstart={timestamp}&ucprop=".format( wiki=WIKI_API_PATH, user=change["user"], timestamp=change["timestamp"])), "query", "usercontribs") if contibs is None: @@ -715,7 +715,7 @@ def embed_formatter(action, change, parsed_comment, categories, recent_changes): elif action in ("upload/overwrite", "upload/upload", "upload/revert"): # sending files license = None - urls = safe_read(recent_changes.safe_request( + urls = safe_read(recent_changes._safe_request( "{wiki}?action=query&format=json&prop=imageinfo&list=&meta=&titles={filename}&iiprop=timestamp%7Curl%7Carchivename&iilimit=5".format( wiki=WIKI_API_PATH, filename=change["title"])), "query", "pages") link = create_article_path(change["title"]) @@ -755,7 +755,7 @@ def embed_formatter(action, change, parsed_comment, categories, recent_changes): else: embed["title"] = _("Uploaded {name}").format(name=change["title"]) if settings["license_detection"]: - article_content = safe_read(recent_changes.safe_request( + article_content = safe_read(recent_changes._safe_request( "{wiki}?action=query&format=json&prop=revisions&titles={article}&rvprop=content".format( wiki=WIKI_API_PATH, article=quote_plus(change["title"], safe=''))), "query", "pages") if article_content is None: diff --git a/src/rcgcdw.py b/src/rcgcdw.py index a2effc0..8792d3d 100644 --- a/src/rcgcdw.py +++ b/src/rcgcdw.py @@ -28,7 +28,7 @@ from src.misc import add_to_dict, datafile, \ WIKI_API_PATH, create_article_path from src.discord.queue import send_to_discord from src.discord.message import DiscordMessage, DiscordMessageMetadata -from src.rc import recent_changes +from src.rc import wiki from src.exceptions import MWError from src.i18n import rcgcdw @@ -74,18 +74,18 @@ def day_overview_request(): passes = 0 continuearg = "" while not complete and passes < 10: - request = recent_changes.safe_request( + request = wiki._safe_request( "{wiki}?action=query&format=json&list=recentchanges&rcend={timestamp}Z&rcprop=title%7Ctimestamp%7Csizes%7Cloginfo%7Cuser&rcshow=!bot&rclimit=500&rctype=edit%7Cnew%7Clog{continuearg}".format( wiki=WIKI_API_PATH, timestamp=timestamp, continuearg=continuearg)) if request: try: request = request.json() - request = recent_changes.handle_mw_errors(request) + request = wiki.handle_mw_errors(request) rc = request['query']['recentchanges'] continuearg = request["continue"]["rccontinue"] if "continue" in request else None except ValueError: logger.warning("ValueError in fetching changes") - recent_changes.downtime_controller(True) + wiki.downtime_controller(True) complete = 2 except KeyError: logger.warning("Wiki returned %s" % request) @@ -168,10 +168,10 @@ def day_overview(): if item["type"] == "edit": edits += 1 changed_bytes += item["newlen"] - item["oldlen"] - if (recent_changes.namespaces is not None and "content" in recent_changes.namespaces.get(str(item["ns"]), {})) or item["ns"] == 0: + if (wiki.namespaces is not None and "content" in wiki.namespaces.get(str(item["ns"]), {})) or item["ns"] == 0: articles = add_to_dict(articles, item["title"]) elif item["type"] == "new": - if "content" in (recent_changes.namespaces is not None and recent_changes.namespaces.get(str(item["ns"]), {})) or item["ns"] == 0: + if "content" in (wiki.namespaces is not None and wiki.namespaces.get(str(item["ns"]), {})) or item["ns"] == 0: new_articles += 1 changed_bytes += item["newlen"] elif item["type"] == "log": @@ -217,17 +217,17 @@ def day_overview(): # Log in and download wiki information try: if settings["wiki_bot_login"] and settings["wiki_bot_password"]: - recent_changes.log_in() + wiki.log_in() time.sleep(2.0) - recent_changes.init_info() + wiki.init_info() except requests.exceptions.ConnectionError: logger.critical("A connection can't be established with the wiki. Exiting...") sys.exit(1) time.sleep(3.0) # this timeout is to prevent timeouts. It seems Fandom does not like our ~2-3 request in under a second if settings["rc_enabled"]: logger.info("Script started! Fetching newest changes...") - recent_changes.fetch(amount=settings["limitrefetch"] if settings["limitrefetch"] != -1 else settings["limit"]) - schedule.every(settings["cooldown"]).seconds.do(recent_changes.fetch) + wiki.fetch(amount=settings["limitrefetch"] if settings["limitrefetch"] != -1 else settings["limit"]) + schedule.every(settings["cooldown"]).seconds.do(wiki.fetch) if settings["overview"]: try: overview_time = time.strptime(settings["overview_time"], '%H:%M') @@ -241,7 +241,7 @@ if settings["rc_enabled"]: except ValueError: logger.error("Invalid time format! Currentely: {}. Note: It needs to be in HH:MM format.".format( settings["overview_time"])) - schedule.every().day.at("00:00").do(recent_changes.clear_cache) + schedule.every().day.at("00:00").do(wiki.clear_cache) else: logger.info("Script started! RC is disabled however, this means no recent changes will be sent :c") @@ -254,7 +254,7 @@ if 1 == 2: # additional translation strings in unreachable code if TESTING: logger.debug("DEBUGGING ") storage["rcid"] = 1 - recent_changes.fetch(amount=5) + wiki.fetch(amount=5) day_overview() import src.discussions src.discussions.fetch_discussions()