Further restructuring

Removed rcid cache (stability of this method to be tested)
Added support for #142
Reorganized code to be more readable (?)
This commit is contained in:
Frisk 2020-10-18 01:45:06 +02:00
parent 1db1d5b61b
commit 1d302cc3ec
No known key found for this signature in database
GPG key ID: 213F7C15068AF8AC
4 changed files with 75 additions and 56 deletions

View file

@ -33,10 +33,10 @@ discussion_logger = logging.getLogger("rcgcdw.disc")
# Create a variable in datafile if it doesn't exist yet (in files <1.10) # Create a variable in datafile if it doesn't exist yet (in files <1.10)
if "discussion_id" not in datafile.data: if "discussion_id" not in datafile.data:
datafile.data["discussion_id"] = 0 datafile["discussion_id"] = 0
datafile.save_datafile() datafile.save_datafile()
storage = datafile.data storage = datafile
fetch_url = "https://services.fandom.com/discussion/{wikiid}/posts?sortDirection=descending&sortKey=creation_date&limit={limit}".format(wikiid=settings["fandom_discussions"]["wiki_id"], limit=settings["fandom_discussions"]["limit"]) fetch_url = "https://services.fandom.com/discussion/{wikiid}/posts?sortDirection=descending&sortKey=creation_date&limit={limit}".format(wikiid=settings["fandom_discussions"]["wiki_id"], limit=settings["fandom_discussions"]["limit"])
domain = prepare_paths(settings["fandom_discussions"]["wiki_url"], dry=True) # Shutdown if the path for discussions is wrong domain = prepare_paths(settings["fandom_discussions"]["wiki_url"], dry=True) # Shutdown if the path for discussions is wrong
@ -52,7 +52,7 @@ def fetch_discussions():
discussion_logger.warning("ValueError in fetching discussions") discussion_logger.warning("ValueError in fetching discussions")
return None return None
except KeyError: except KeyError:
discussion_logger.warning("Wiki returned %s" % (request_json.json())) discussion_logger.warning("Wiki returned %s" % (request.json()))
return None return None
else: else:
if request_json: if request_json:

View file

@ -30,7 +30,7 @@ _ = misc.gettext
misc_logger = logging.getLogger("rcgcdw.misc") misc_logger = logging.getLogger("rcgcdw.misc")
data_template = {"rcid": 99999999999, "discussion_id": 0, "abuse_log_id": 0, data_template = {"rcid": None, "discussion_id": 0, "abuse_log_id": None,
"daily_overview": {"edits": None, "new_files": None, "admin_actions": None, "bytes_changed": None, "daily_overview": {"edits": None, "new_files": None, "admin_actions": None, "bytes_changed": None,
"new_articles": None, "unique_editors": None, "day_score": None, "days_tracked": 0}} "new_articles": None, "unique_editors": None, "day_score": None, "days_tracked": 0}}
@ -45,6 +45,7 @@ class DataFile:
"""Data class which instance of is shared by multiple modules to remain consistent and do not cause too many IO operations.""" """Data class which instance of is shared by multiple modules to remain consistent and do not cause too many IO operations."""
def __init__(self): def __init__(self):
self.data = self.load_datafile() self.data = self.load_datafile()
self.changed = False
@staticmethod @staticmethod
def generate_datafile(): def generate_datafile():
@ -70,13 +71,24 @@ class DataFile:
def save_datafile(self): def save_datafile(self):
"""Overwrites the data.json file with given dictionary""" """Overwrites the data.json file with given dictionary"""
if self.changed is False: # don't cause unnecessary write operations
return
try: try:
with open("data.json", "w") as data_file: with open("data.json", "w") as data_file:
data_file.write(json.dumps(self.data, indent=4)) data_file.write(json.dumps(self.data, indent=4))
self.changed = False
except PermissionError: except PermissionError:
misc_logger.critical("Could not modify a data file (no permissions). No way to store last edit.") misc_logger.critical("Could not modify a data file (no permissions). No way to store last edit.")
sys.exit(1) sys.exit(1)
def __setitem__(self, instance, value):
self.data[instance] = value
self.changed = True
def __getitem__(self, item):
return self.data[item]
class MessageQueue: class MessageQueue:
"""Message queue class for undelivered messages""" """Message queue class for undelivered messages"""

106
src/rc.py
View file

@ -15,7 +15,7 @@ from collections import OrderedDict
_ = rc.gettext _ = rc.gettext
storage = datafile.data storage = datafile
logger = logging.getLogger("rcgcdw.rc") logger = logging.getLogger("rcgcdw.rc")
@ -38,10 +38,7 @@ LinkParser = LinkParser()
class Recent_Changes_Class(object): class Recent_Changes_Class(object):
"""Store verious data and functions related to wiki and fetching of Recent Changes""" """Store verious data and functions related to wiki and fetching of Recent Changes"""
def __init__(self): def __init__(self):
self.ids = []
self.map_ips = {} self.map_ips = {}
self.recent_id = 0
self.recent_abuse_id = 0
self.downtimecredibility = 0 self.downtimecredibility = 0
self.last_downtime = 0 self.last_downtime = 0
self.tags = {} self.tags = {}
@ -51,10 +48,7 @@ class Recent_Changes_Class(object):
self.namespaces = None self.namespaces = None
self.session = session self.session = session
self.logged_in = False self.logged_in = False
if settings["limitrefetch"] != -1: self.initial_run_complete = False
self.file_id = storage["rcid"]
else:
self.file_id = 999999999 # such value won't cause trouble, and it will make sure no refetch happen
@staticmethod @staticmethod
def handle_mw_errors(request): def handle_mw_errors(request):
@ -102,26 +96,24 @@ class Recent_Changes_Class(object):
except: except:
logger.error("Logging in have not succeeded") logger.error("Logging in have not succeeded")
def add_cache(self, change):
self.ids.append(change["rcid"])
# self.recent_id = change["rcid"]
if len(self.ids) > settings["limitrefetch"] + 5:
self.ids.pop(0)
def fetch(self, amount=settings["limit"]): def fetch(self, amount=settings["limit"]):
messagequeue.resend_msgs() messagequeue.resend_msgs()
rcrequest = self.fetch_recentchanges_request(amount)
last_check = self.fetch_changes(amount=amount) last_check = self.fetch_changes(amount=amount)
# If the request succeeds the last_check will be the last rcid from recentchanges query
if last_check is not None: if last_check is not None:
self.recent_id = last_check storage["rcid"] = last_check[0] if last_check[0] else storage["rcid"]
# Assigns self.recent_id the last rcid if request succeeded, otherwise set the id from the file storage["abuse_log_id"] = last_check[1] if last_check[1] else storage["abuse_log_id"]
if settings["limitrefetch"] != -1 and self.recent_id != self.file_id and self.recent_id != 0: # if saving to database is disabled, don't save the recent_id storage.save_datafile()
self.file_id = self.recent_id self.initial_run_complete = True
storage["rcid"] = self.recent_id # If the request succeeds the last_check will be the last rcid from recentchanges query
datafile.save_datafile() # if last_check is not None:
logger.debug("Most recent rcid is: {}".format(self.recent_id)) # self.recent_id = last_check
return self.recent_id # # Assigns self.recent_id the last rcid if request succeeded, otherwise set the id from the file
# if settings["limitrefetch"] != -1 and self.recent_id != self.file_id and self.recent_id != 0: # if saving to database is disabled, don't save the recent_id
# self.file_id = self.recent_id
# storage["rcid"] = self.recent_id
# datafile.save_datafile()
# logger.debug("Most recent rcid is: {}".format(self.recent_id))
# return self.recent_id
def fetch_recentchanges_request(self, amount): def fetch_recentchanges_request(self, amount):
"""Make a typical MW request for rc/abuselog """Make a typical MW request for rc/abuselog
@ -152,17 +144,21 @@ class Recent_Changes_Class(object):
params["aflprop"] = "ids|user|title|action|result|timestamp|hidden|revid|filter" params["aflprop"] = "ids|user|title|action|result|timestamp|hidden|revid|filter"
return params return params
def prepare_rc(self, changes: list, clean: bool, amount: int): def prepare_rc(self, changes: list, amount: int):
"""Processes recent changes messages""" """Processes recent changes messages"""
if not changes:
return None
categorize_events = {} categorize_events = {}
new_events = 0 new_events = 0
changes.reverse() changes.reverse()
recent_id = storage["rcid"]
dry_run = True if recent_id is None else False
for change in changes: for change in changes:
if not (change["rcid"] in self.ids or change["rcid"] < self.recent_id) and not clean: if not dry_run and not (change["rcid"] <= recent_id):
new_events += 1 new_events += 1
logger.debug( logger.debug(
"New event: {}".format(change["rcid"])) "New event: {}".format(change["rcid"]))
if new_events == settings["limit"]: if new_events == settings["limit"] and not (amount == settings["limitrefetch"] and self.initial_run_complete is False):
if amount < 500: if amount < 500:
# call the function again with max limit for more results, ignore the ones in this request # call the function again with max limit for more results, ignore the ones in this request
logger.debug("There were too many new events, requesting max amount of events from the wiki.") logger.debug("There were too many new events, requesting max amount of events from the wiki.")
@ -202,32 +198,41 @@ class Recent_Changes_Class(object):
"Init information not available, could not read category information. Please restart the bot.") "Init information not available, could not read category information. Please restart the bot.")
else: else:
logger.debug("Log entry got suppressed, ignoring entry.") logger.debug("Log entry got suppressed, ignoring entry.")
for change in changes: if not dry_run:
if change["rcid"] in self.ids or change["rcid"] < self.recent_id: for change in changes:
logger.debug("Change ({}) is in ids or is lower than recent_id {}".format(change["rcid"], if change["rcid"] <= recent_id:
self.recent_id)) logger.debug("Change ({}) is lower or equal to recent_id {}".format(change["rcid"], recent_id))
continue continue
logger.debug(self.ids) logger.debug(recent_id)
logger.debug(self.recent_id) essential_info(change, categorize_events.get(change.get("revid"), None))
self.add_cache(change)
if clean and not (self.recent_id == 0 and change["rcid"] > self.file_id):
logger.debug("Rejected {val}".format(val=change["rcid"]))
continue
essential_info(change, categorize_events.get(change.get("revid"), None))
def prepare_abuse_log(self, abuse_log: list):
abuse_log.reverse()
for entry in abuse_log:
abuselog_processing(entry, self)
return change["rcid"] return change["rcid"]
def fetch_changes(self, amount, clean=False): def prepare_abuse_log(self, abuse_log: list):
if not abuse_log:
return None
abuse_log.reverse()
recent_id = storage["abuse_log_id"]
dryrun = True if recent_id is None else False
for entry in abuse_log:
if dryrun:
continue
if entry["id"] <= recent_id:
continue
abuselog_processing(entry, self)
return entry["id"]
# def filter_logic(self, clean_status, change_id, file_id):
# """Function that filers which changes should be sent and which not. Returns True if to send, False otherwise"""
# if clean_status and not change_id > file_id:
# return False
# return True
def fetch_changes(self, amount):
"""Fetches the :amount: of changes from the wiki. """Fetches the :amount: of changes from the wiki.
Returns None on error and int of rcid of latest change if succeeded""" Returns None on error and int of rcid of latest change if succeeded"""
global logged_in global logged_in
if len(self.ids) == 0: rc_last_id = None
logger.debug("ids is empty, triggering clean fetch") abuselog_last_id = None
clean = True
try: try:
request_json = self.fetch_recentchanges_request(amount) request_json = self.fetch_recentchanges_request(amount)
except ConnectionError: except ConnectionError:
@ -238,7 +243,7 @@ class Recent_Changes_Class(object):
logger.warning("Path query.recentchanges not found inside request body. Skipping...") logger.warning("Path query.recentchanges not found inside request body. Skipping...")
return return
else: else:
self.prepare_rc(rc, clean, amount) rc_last_id = self.prepare_rc(rc, amount)
if settings["show_abuselog"]: if settings["show_abuselog"]:
try: try:
abuselog = request_json["query"]["abuselog"] # While LYBL approach would be more performant when abuselog is not in request body, I prefer this approach for its clarity abuselog = request_json["query"]["abuselog"] # While LYBL approach would be more performant when abuselog is not in request body, I prefer this approach for its clarity
@ -249,7 +254,8 @@ class Recent_Changes_Class(object):
settings["show_abuselog"] = False settings["show_abuselog"] = False
logger.warning("AbuseLog extension is not enabled on the wiki. Disabling the function...") logger.warning("AbuseLog extension is not enabled on the wiki. Disabling the function...")
else: else:
self.prepare_abuse_log(abuselog) abuselog_last_id = self.prepare_abuse_log(abuselog)
return rc_last_id, abuselog_last_id
def safe_request(self, url, params=None): def safe_request(self, url, params=None):
try: try:

View file

@ -57,9 +57,10 @@ def compact_abuselog_formatter(change, recent_changes):
action = "abuselog/{}".format(change["result"]) action = "abuselog/{}".format(change["result"])
author_url = link_formatter(create_article_path("User:{user}".format(user=change["user"]))) author_url = link_formatter(create_article_path("User:{user}".format(user=change["user"])))
author = change["user"] author = change["user"]
message = _("[{author}]({author_url}) triggered {abuse_filter}, performing the action \"{action}\" on {target}. Action taken: {result}.").format( message = _("[{author}]({author_url}) triggered *{abuse_filter}*, performing the action \"{action}\" on *[{target}]({target_url})* - action taken: {result}.").format(
author=author, author_url=author_url, abuse_filter=change["filter"], author=author, author_url=author_url, abuse_filter=change["filter"],
action=abusefilter_actions.get(change["action"], _("Unknown")), target=change.get("title", _("Unknown")), action=abusefilter_actions.get(change["action"], _("Unknown")), target=change.get("title", _("Unknown")),
target_url=create_article_path(change.get("title", _("Unknown"))),
result=abusefilter_results.get(change["result"], _("Unknown"))) result=abusefilter_results.get(change["result"], _("Unknown")))
send_to_discord(DiscordMessage("compact", action, settings["webhookURL"], content=message)) send_to_discord(DiscordMessage("compact", action, settings["webhookURL"], content=message))
@ -130,7 +131,7 @@ def compact_formatter(action, change, parsed_comment, categories, recent_changes
link = link_formatter(create_article_path("Special:Contributions/{user}".format(user=user))) link = link_formatter(create_article_path("Special:Contributions/{user}".format(user=user)))
except ValueError: except ValueError:
link = link_formatter(create_article_path(change["title"])) link = link_formatter(create_article_path(change["title"]))
if change["logparams"]["duration"] in ["infinite", "infinity"]: if change["logparams"]["duration"] in ["infinite", "infinity", "indefinite", "never"]:
block_time = _("for infinity and beyond") block_time = _("for infinity and beyond")
else: else:
english_length = re.sub(r"(\d+)", "", change["logparams"][ english_length = re.sub(r"(\d+)", "", change["logparams"][