Some restructuring done

This commit is contained in:
Frisk 2020-10-16 22:35:29 +02:00
parent b507ac85b7
commit 1db1d5b61b
No known key found for this signature in database
GPG key ID: 213F7C15068AF8AC
2 changed files with 132 additions and 102 deletions

232
src/rc.py
View file

@ -110,6 +110,7 @@ class Recent_Changes_Class(object):
def fetch(self, amount=settings["limit"]): def fetch(self, amount=settings["limit"]):
messagequeue.resend_msgs() messagequeue.resend_msgs()
rcrequest = self.fetch_recentchanges_request(amount)
last_check = self.fetch_changes(amount=amount) last_check = self.fetch_changes(amount=amount)
# If the request succeeds the last_check will be the last rcid from recentchanges query # If the request succeeds the last_check will be the last rcid from recentchanges query
if last_check is not None: if last_check is not None:
@ -122,7 +123,24 @@ class Recent_Changes_Class(object):
logger.debug("Most recent rcid is: {}".format(self.recent_id)) logger.debug("Most recent rcid is: {}".format(self.recent_id))
return self.recent_id return self.recent_id
def fetch_recentchanges_request(self, amount):
"""Make a typical MW request for rc/abuselog
If succeeds return the .json() of request and if not raises ConnectionError"""
request = self.safe_request(WIKI_API_PATH, params=self.construct_params(amount))
if request is not None:
try:
request = request.json()
except ValueError:
logger.warning("ValueError in fetching changes")
logger.warning("Changes URL:" + request.url)
self.downtime_controller(True)
raise ConnectionError
return request
raise ConnectionError
def construct_params(self, amount): def construct_params(self, amount):
"""Constructs GET parameters for recentchanges/abuselog fetching feature"""
params = OrderedDict(action="query", format="json") params = OrderedDict(action="query", format="json")
params["list"] = "recentchanges|abuselog" if settings.get("show_abuselog", False) else "recentchanges" params["list"] = "recentchanges|abuselog" if settings.get("show_abuselog", False) else "recentchanges"
params["rcshow"] = "" if settings.get("show_bots", False) else "!bot" params["rcshow"] = "" if settings.get("show_bots", False) else "!bot"
@ -134,6 +152,75 @@ class Recent_Changes_Class(object):
params["aflprop"] = "ids|user|title|action|result|timestamp|hidden|revid|filter" params["aflprop"] = "ids|user|title|action|result|timestamp|hidden|revid|filter"
return params return params
def prepare_rc(self, changes: list, clean: bool, amount: int):
"""Processes recent changes messages"""
categorize_events = {}
new_events = 0
changes.reverse()
for change in changes:
if not (change["rcid"] in self.ids or change["rcid"] < self.recent_id) and not clean:
new_events += 1
logger.debug(
"New event: {}".format(change["rcid"]))
if new_events == settings["limit"]:
if amount < 500:
# call the function again with max limit for more results, ignore the ones in this request
logger.debug("There were too many new events, requesting max amount of events from the wiki.")
return self.fetch(amount=5000 if self.logged_in else 500)
else:
logger.debug(
"There were too many new events, but the limit was high enough we don't care anymore about fetching them all.")
if change["type"] == "categorize":
if "commenthidden" not in change:
if len(recent_changes.mw_messages.keys()) > 0:
cat_title = change["title"].split(':', 1)[1]
# I so much hate this, blame Markus for making me do this
if change["revid"] not in categorize_events:
categorize_events[change["revid"]] = {"new": set(), "removed": set()}
comment_to_match = re.sub(r'<.*?a>', '', change["parsedcomment"])
if recent_changes.mw_messages["recentchanges-page-added-to-category"] in comment_to_match or \
recent_changes.mw_messages[
"recentchanges-page-added-to-category-bundled"] in comment_to_match:
categorize_events[change["revid"]]["new"].add(cat_title)
logger.debug("Matched {} to added category for {}".format(cat_title, change["revid"]))
elif recent_changes.mw_messages[
"recentchanges-page-removed-from-category"] in comment_to_match or \
recent_changes.mw_messages[
"recentchanges-page-removed-from-category-bundled"] in comment_to_match:
categorize_events[change["revid"]]["removed"].add(cat_title)
logger.debug("Matched {} to removed category for {}".format(cat_title, change["revid"]))
else:
logger.debug(
"Unknown match for category change with messages {}, {}, {}, {} and comment_to_match {}".format(
recent_changes.mw_messages["recentchanges-page-added-to-category"],
recent_changes.mw_messages["recentchanges-page-removed-from-category"],
recent_changes.mw_messages["recentchanges-page-removed-from-category-bundled"],
recent_changes.mw_messages["recentchanges-page-added-to-category-bundled"],
comment_to_match))
else:
logger.warning(
"Init information not available, could not read category information. Please restart the bot.")
else:
logger.debug("Log entry got suppressed, ignoring entry.")
for change in changes:
if change["rcid"] in self.ids or change["rcid"] < self.recent_id:
logger.debug("Change ({}) is in ids or is lower than recent_id {}".format(change["rcid"],
self.recent_id))
continue
logger.debug(self.ids)
logger.debug(self.recent_id)
self.add_cache(change)
if clean and not (self.recent_id == 0 and change["rcid"] > self.file_id):
logger.debug("Rejected {val}".format(val=change["rcid"]))
continue
essential_info(change, categorize_events.get(change.get("revid"), None))
def prepare_abuse_log(self, abuse_log: list):
abuse_log.reverse()
for entry in abuse_log:
abuselog_processing(entry, self)
return change["rcid"]
def fetch_changes(self, amount, clean=False): def fetch_changes(self, amount, clean=False):
"""Fetches the :amount: of changes from the wiki. """Fetches the :amount: of changes from the wiki.
Returns None on error and int of rcid of latest change if succeeded""" Returns None on error and int of rcid of latest change if succeeded"""
@ -141,96 +228,28 @@ class Recent_Changes_Class(object):
if len(self.ids) == 0: if len(self.ids) == 0:
logger.debug("ids is empty, triggering clean fetch") logger.debug("ids is empty, triggering clean fetch")
clean = True clean = True
raw_changes = self.safe_request(WIKI_API_PATH, params=self.construct_params(amount)) try:
# action=query&format=json&list=recentchanges%7Cabuselog&rcprop=title%7Credirect%7Ctimestamp%7Cids%7Cloginfo%7Cparsedcomment%7Csizes%7Cflags%7Ctags%7Cuser&rcshow=!bot&rclimit=20&rctype=edit%7Cnew%7Clog%7Cexternal&afllimit=10&aflprop=ids%7Cuser%7Ctitle%7Caction%7Cresult%7Ctimestamp%7Chidden%7Crevid%7Cfilter request_json = self.fetch_recentchanges_request(amount)
if raw_changes: except ConnectionError:
return
try:
rc = request_json["query"]['recentchanges']
except KeyError:
logger.warning("Path query.recentchanges not found inside request body. Skipping...")
return
else:
self.prepare_rc(rc, clean, amount)
if settings["show_abuselog"]:
try: try:
raw_changes = raw_changes.json() abuselog = request_json["query"]["abuselog"] # While LYBL approach would be more performant when abuselog is not in request body, I prefer this approach for its clarity
changes = raw_changes['query']['recentchanges'] except KeyError:
# {"batchcomplete":"","warnings":{"query":{"*":"Unrecognized value for parameter \"list\": abuselog."}}} if "warnings" in request_json:
changes.reverse() warnings = request_json.get("warnings", {"query": {"*": ""}})
if "warnings" in raw_changes: if "Unrecognized value for parameter \"list\": abuselog." in warnings["query"]["*"]:
warnings = raw_changes.get("warnings", {"query": {"*": ""}})
if warnings["query"]["*"] == "Unrecognized value for parameter \"list\": abuselog.":
settings["show_abuselog"] = False settings["show_abuselog"] = False
logger.warning("AbuseLog extension is not enabled on the wiki. Disabling the function...") logger.warning("AbuseLog extension is not enabled on the wiki. Disabling the function...")
except ValueError:
logger.warning("ValueError in fetching changes")
logger.warning("Changes URL:" + raw_changes.url)
self.downtime_controller()
return None
except KeyError:
logger.warning("Wiki returned %s" % (raw_changes))
return None
else: else:
if self.downtimecredibility > 0: self.prepare_abuse_log(abuselog)
self.downtimecredibility -= 1
if self.streak > -1:
self.streak += 1
if self.streak > 8:
self.streak = -1
send_simple("down_detector", _("Connection to {wiki} seems to be stable now.").format(wiki=settings["wikiname"]),
_("Connection status"), settings["avatars"]["connection_restored"])
# In the first for loop we analize the categorize events and figure if we will need more changes to fetch
# in order to cover all of the edits
categorize_events = {}
new_events = 0
for change in changes:
if not (change["rcid"] in self.ids or change["rcid"] < self.recent_id) and not clean:
new_events += 1
logger.debug(
"New event: {}".format(change["rcid"]))
if new_events == settings["limit"]:
if amount < 500:
# call the function again with max limit for more results, ignore the ones in this request
logger.debug("There were too many new events, requesting max amount of events from the wiki.")
return self.fetch(amount=5000 if self.logged_in else 500)
else:
logger.debug(
"There were too many new events, but the limit was high enough we don't care anymore about fetching them all.")
if change["type"] == "categorize":
if "commenthidden" not in change:
if len(recent_changes.mw_messages.keys()) > 0:
cat_title = change["title"].split(':', 1)[1]
# I so much hate this, blame Markus for making me do this
if change["revid"] not in categorize_events:
categorize_events[change["revid"]] = {"new": set(), "removed": set()}
comment_to_match = re.sub(r'<.*?a>', '', change["parsedcomment"])
if recent_changes.mw_messages["recentchanges-page-added-to-category"] in comment_to_match or recent_changes.mw_messages["recentchanges-page-added-to-category-bundled"] in comment_to_match:
categorize_events[change["revid"]]["new"].add(cat_title)
logger.debug("Matched {} to added category for {}".format(cat_title, change["revid"]))
elif recent_changes.mw_messages["recentchanges-page-removed-from-category"] in comment_to_match or recent_changes.mw_messages["recentchanges-page-removed-from-category-bundled"] in comment_to_match:
categorize_events[change["revid"]]["removed"].add(cat_title)
logger.debug("Matched {} to removed category for {}".format(cat_title, change["revid"]))
else:
logger.debug("Unknown match for category change with messages {}, {}, {}, {} and comment_to_match {}".format(recent_changes.mw_messages["recentchanges-page-added-to-category"], recent_changes.mw_messages["recentchanges-page-removed-from-category"], recent_changes.mw_messages["recentchanges-page-removed-from-category-bundled"], recent_changes.mw_messages["recentchanges-page-added-to-category-bundled"], comment_to_match))
else:
logger.warning("Init information not available, could not read category information. Please restart the bot.")
else:
logger.debug("Log entry got suppressed, ignoring entry.")
# if change["revid"] in categorize_events:
# categorize_events[change["revid"]].append(cat_title)
# else:
# logger.debug("New category '{}' for {}".format(cat_title, change["revid"]))
# categorize_events[change["revid"]] = {cat_title: }
for change in changes:
if change["rcid"] in self.ids or change["rcid"] < self.recent_id:
logger.debug("Change ({}) is in ids or is lower than recent_id {}".format(change["rcid"],
self.recent_id))
continue
logger.debug(self.ids)
logger.debug(self.recent_id)
self.add_cache(change)
if clean and not (self.recent_id == 0 and change["rcid"] > self.file_id):
logger.debug("Rejected {val}".format(val=change["rcid"]))
continue
essential_info(change, categorize_events.get(change.get("revid"), None))
if "abuselog" in raw_changes["query"]:
abuse_log = raw_changes['query']['recentchanges']
abuse_log.reverse()
for entry in abuse_log:
abuselog_processing(entry, self)
return change["rcid"]
def safe_request(self, url, params=None): def safe_request(self, url, params=None):
try: try:
@ -240,19 +259,19 @@ class Recent_Changes_Class(object):
request = self.session.get(url, timeout=10, allow_redirects=False) request = self.session.get(url, timeout=10, allow_redirects=False)
except requests.exceptions.Timeout: except requests.exceptions.Timeout:
logger.warning("Reached timeout error for request on link {url}".format(url=url)) logger.warning("Reached timeout error for request on link {url}".format(url=url))
self.downtime_controller() self.downtime_controller(True)
return None return None
except requests.exceptions.ConnectionError: except requests.exceptions.ConnectionError:
logger.warning("Reached connection error for request on link {url}".format(url=url)) logger.warning("Reached connection error for request on link {url}".format(url=url))
self.downtime_controller() self.downtime_controller(True)
return None return None
except requests.exceptions.ChunkedEncodingError: except requests.exceptions.ChunkedEncodingError:
logger.warning("Detected faulty response from the web server for request on link {url}".format(url=url)) logger.warning("Detected faulty response from the web server for request on link {url}".format(url=url))
self.downtime_controller() self.downtime_controller(True)
return None return None
else: else:
if 499 < request.status_code < 600: if 499 < request.status_code < 600:
self.downtime_controller() self.downtime_controller(True)
return None return None
elif request.status_code == 302: elif request.status_code == 302:
logger.critical("Redirect detected! Either the wiki given in the script settings (wiki field) is incorrect/the wiki got removed or Gamepedia is giving us the false value. Please provide the real URL to the wiki, current URL redirects to {}".format(request.next.url)) logger.critical("Redirect detected! Either the wiki given in the script settings (wiki field) is incorrect/the wiki got removed or Gamepedia is giving us the false value. Please provide the real URL to the wiki, current URL redirects to {}".format(request.next.url))
@ -282,20 +301,31 @@ class Recent_Changes_Class(object):
return False return False
return True return True
def downtime_controller(self): def downtime_controller(self, down):
if not settings["show_updown_messages"]: if not settings["show_updown_messages"]:
return return
if self.streak > -1: # reset the streak of successful connections when bad one happens if down:
self.streak = 0 if self.streak > -1: # reset the streak of successful connections when bad one happens
if self.downtimecredibility < 60:
self.downtimecredibility += 15
else:
if (
time.time() - self.last_downtime) > 1800 and self.check_connection(): # check if last downtime happened within 30 minutes, if yes, don't send a message
send_simple("down_detector", _("{wiki} seems to be down or unreachable.").format(wiki=settings["wikiname"]),
_("Connection status"), settings["avatars"]["connection_failed"])
self.last_downtime = time.time()
self.streak = 0 self.streak = 0
if self.downtimecredibility < 60:
self.downtimecredibility += 15
else:
if (
time.time() - self.last_downtime) > 1800 and self.check_connection(): # check if last downtime happened within 30 minutes, if yes, don't send a message
send_simple("down_detector", _("{wiki} seems to be down or unreachable.").format(wiki=settings["wikiname"]),
_("Connection status"), settings["avatars"]["connection_failed"])
self.last_downtime = time.time()
self.streak = 0
else:
if self.downtimecredibility > 0:
self.downtimecredibility -= 1
if self.streak > -1:
self.streak += 1
if self.streak > 8:
self.streak = -1
send_simple("down_detector", _("Connection to {wiki} seems to be stable now.").format(
wiki=settings["wikiname"]),
_("Connection status"), settings["avatars"]["connection_restored"])
def clear_cache(self): def clear_cache(self):
self.map_ips = {} self.map_ips = {}

View file

@ -78,7 +78,7 @@ def day_overview_request():
continuearg = request["continue"]["rccontinue"] if "continue" in request else None continuearg = request["continue"]["rccontinue"] if "continue" in request else None
except ValueError: except ValueError:
logger.warning("ValueError in fetching changes") logger.warning("ValueError in fetching changes")
recent_changes.downtime_controller() recent_changes.downtime_controller(True)
complete = 2 complete = 2
except KeyError: except KeyError:
logger.warning("Wiki returned %s" % (request)) logger.warning("Wiki returned %s" % (request))