RcGcDw/src/rc.py
2020-08-08 03:23:01 +02:00

357 lines
17 KiB
Python

import re
import sys
import time
import logging
import requests
from bs4 import BeautifulSoup
from src.configloader import settings
from src.misc import WIKI_SCRIPT_PATH, WIKI_API_PATH, messagequeue, datafile, send_simple, safe_read, LinkParser
from src.exceptions import MWError
from src.session import session
from src.rc_formatters import compact_formatter, embed_formatter
from src.i18n import rc
_ = rc.gettext
storage = datafile.data
logger = logging.getLogger("rcgcdw.rc")
supported_logs = ["protect/protect", "protect/modify", "protect/unprotect", "upload/overwrite", "upload/upload", "delete/delete", "delete/delete_redir", "delete/restore", "delete/revision", "delete/event", "import/upload", "import/interwiki", "merge/merge", "move/move", "move/move_redir", "protect/move_prot", "block/block", "block/unblock", "block/reblock", "rights/rights", "rights/autopromote", "abusefilter/modify", "abusefilter/create", "interwiki/iw_add", "interwiki/iw_edit", "interwiki/iw_delete", "curseprofile/comment-created", "curseprofile/comment-edited", "curseprofile/comment-deleted", "curseprofile/comment-purged", "curseprofile/profile-edited", "curseprofile/comment-replied", "contentmodel/change", "sprite/sprite", "sprite/sheet", "sprite/slice", "managetags/create", "managetags/delete", "managetags/activate", "managetags/deactivate", "tag/update", "cargo/createtable", "cargo/deletetable", "cargo/recreatetable", "cargo/replacetable", "upload/revert", "newusers/create", "newusers/autocreate", "newusers/create2", "newusers/byemail", "newusers/newusers"]
# Set the proper formatter
if settings["appearance"]["mode"] == "embed":
appearance_mode = embed_formatter
elif settings["appearance"]["mode"] == "compact":
appearance_mode = compact_formatter
else:
logger.critical("Unknown formatter!")
sys.exit(1)
LinkParser = LinkParser()
class Recent_Changes_Class(object):
"""Store verious data and functions related to wiki and fetching of Recent Changes"""
def __init__(self):
self.ids = []
self.map_ips = {}
self.recent_id = 0
self.downtimecredibility = 0
self.last_downtime = 0
self.tags = {}
self.groups = {}
self.streak = -1
self.mw_messages = {}
self.namespaces = None
self.session = session
self.logged_in = False
if settings["limitrefetch"] != -1:
self.file_id = storage["rcid"]
else:
self.file_id = 999999999 # such value won't cause trouble, and it will make sure no refetch happen
@staticmethod
def handle_mw_errors(request):
if "errors" in request:
logger.error(request["errors"])
raise MWError
return request
def log_in(self):
# session.cookies.clear()
if '@' not in settings["wiki_bot_login"]:
logger.error(
"Please provide proper nickname for login from {wiki}Special:BotPasswords".format(
wiki=WIKI_SCRIPT_PATH))
return
if len(settings["wiki_bot_password"]) != 32:
logger.error(
"Password seems incorrect. It should be 32 characters long! Grab it from {wiki}Special:BotPasswords".format(
wiki=WIKI_SCRIPT_PATH))
return
logger.info("Trying to log in to {wiki}...".format(wiki=WIKI_SCRIPT_PATH))
try:
response = self.handle_mw_errors(
self.session.post(WIKI_API_PATH,
data={'action': 'query', 'format': 'json', 'utf8': '', 'meta': 'tokens',
'type': 'login'}))
response = self.handle_mw_errors(
self.session.post(WIKI_API_PATH,
data={'action': 'login', 'format': 'json', 'utf8': '',
'lgname': settings["wiki_bot_login"],
'lgpassword': settings["wiki_bot_password"],
'lgtoken': response.json()['query']['tokens']['logintoken']}))
except ValueError:
logger.error("Logging in have not succeeded")
return
except MWError:
logger.error("Logging in have not succeeded")
return
try:
if response.json()['login']['result'] == "Success":
logger.info("Successfully logged in")
self.logged_in = True
else:
logger.error("Logging in have not succeeded")
except:
logger.error("Logging in have not succeeded")
def add_cache(self, change):
self.ids.append(change["rcid"])
# self.recent_id = change["rcid"]
if len(self.ids) > settings["limitrefetch"] + 5:
self.ids.pop(0)
def fetch(self, amount=settings["limit"]):
messagequeue.resend_msgs()
last_check = self.fetch_changes(amount=amount)
# If the request succeeds the last_check will be the last rcid from recentchanges query
if last_check is not None:
self.recent_id = last_check
# Assigns self.recent_id the last rcid if request succeeded, otherwise set the id from the file
if settings["limitrefetch"] != -1 and self.recent_id != self.file_id and self.recent_id != 0: # if saving to database is disabled, don't save the recent_id
self.file_id = self.recent_id
storage["rcid"] = self.recent_id
datafile.save_datafile()
logger.debug("Most recent rcid is: {}".format(self.recent_id))
return self.recent_id
def fetch_changes(self, amount, clean=False):
"""Fetches the :amount: of changes from the wiki.
Returns None on error and int of rcid of latest change if succeeded"""
global logged_in
if len(self.ids) == 0:
logger.debug("ids is empty, triggering clean fetch")
clean = True
changes = self.safe_request(
"{wiki}?action=query&format=json&list=recentchanges{show_bots}&rcprop=title%7Credirect%7Ctimestamp%7Cids%7Cloginfo%7Cparsedcomment%7Csizes%7Cflags%7Ctags%7Cuser&rclimit={amount}&rctype=edit%7Cnew%7Clog%7Cexternal{categorize}".format(
wiki=WIKI_API_PATH, amount=amount, categorize="%7Ccategorize" if settings["show_added_categories"] else "", show_bots="&rcshow=!bot" if settings["show_bots"] is False else ""))
if changes:
try:
changes = changes.json()['query']['recentchanges']
changes.reverse()
except ValueError:
logger.warning("ValueError in fetching changes")
logger.warning("Changes URL:" + changes.url)
self.downtime_controller()
return None
except KeyError:
logger.warning("Wiki returned %s" % (changes.json()))
return None
else:
if self.downtimecredibility > 0:
self.downtimecredibility -= 1
if self.streak > -1:
self.streak += 1
if self.streak > 8:
self.streak = -1
send_simple("down_detector", _("Connection to {wiki} seems to be stable now.").format(wiki=settings["wikiname"]),
_("Connection status"), settings["avatars"]["connection_restored"])
# In the first for loop we analize the categorize events and figure if we will need more changes to fetch
# in order to cover all of the edits
categorize_events = {}
new_events = 0
for change in changes:
if not (change["rcid"] in self.ids or change["rcid"] < self.recent_id) and not clean:
new_events += 1
logger.debug(
"New event: {}".format(change["rcid"]))
if new_events == settings["limit"]:
if amount < 500:
# call the function again with max limit for more results, ignore the ones in this request
logger.debug("There were too many new events, requesting max amount of events from the wiki.")
return self.fetch(amount=5000 if self.logged_in else 500)
else:
logger.debug(
"There were too many new events, but the limit was high enough we don't care anymore about fetching them all.")
if change["type"] == "categorize":
if "commenthidden" not in change:
if len(recent_changes.mw_messages.keys()) > 0:
cat_title = change["title"].split(':', 1)[1]
# I so much hate this, blame Markus for making me do this
if change["revid"] not in categorize_events:
categorize_events[change["revid"]] = {"new": set(), "removed": set()}
comment_to_match = re.sub(r'<.*?a>', '', change["parsedcomment"])
if recent_changes.mw_messages["recentchanges-page-added-to-category"] in comment_to_match or recent_changes.mw_messages["recentchanges-page-added-to-category-bundled"] in comment_to_match:
categorize_events[change["revid"]]["new"].add(cat_title)
logger.debug("Matched {} to added category for {}".format(cat_title, change["revid"]))
elif recent_changes.mw_messages["recentchanges-page-removed-from-category"] in comment_to_match or recent_changes.mw_messages["recentchanges-page-removed-from-category-bundled"] in comment_to_match:
categorize_events[change["revid"]]["removed"].add(cat_title)
logger.debug("Matched {} to removed category for {}".format(cat_title, change["revid"]))
else:
logger.debug("Unknown match for category change with messages {}, {}, {}, {} and comment_to_match {}".format(recent_changes.mw_messages["recentchanges-page-added-to-category"], recent_changes.mw_messages["recentchanges-page-removed-from-category"], recent_changes.mw_messages["recentchanges-page-removed-from-category-bundled"], recent_changes.mw_messages["recentchanges-page-added-to-category-bundled"], comment_to_match))
else:
logger.warning("Init information not available, could not read category information. Please restart the bot.")
else:
logger.debug("Log entry got suppressed, ignoring entry.")
# if change["revid"] in categorize_events:
# categorize_events[change["revid"]].append(cat_title)
# else:
# logger.debug("New category '{}' for {}".format(cat_title, change["revid"]))
# categorize_events[change["revid"]] = {cat_title: }
for change in changes:
if change["rcid"] in self.ids or change["rcid"] < self.recent_id:
logger.debug("Change ({}) is in ids or is lower than recent_id {}".format(change["rcid"],
self.recent_id))
continue
logger.debug(self.ids)
logger.debug(self.recent_id)
self.add_cache(change)
if clean and not (self.recent_id == 0 and change["rcid"] > self.file_id):
logger.debug("Rejected {val}".format(val=change["rcid"]))
continue
essential_info(change, categorize_events.get(change.get("revid"), None))
return change["rcid"]
def safe_request(self, url):
try:
request = self.session.get(url, timeout=10, allow_redirects=False)
except requests.exceptions.Timeout:
logger.warning("Reached timeout error for request on link {url}".format(url=url))
self.downtime_controller()
return None
except requests.exceptions.ConnectionError:
logger.warning("Reached connection error for request on link {url}".format(url=url))
self.downtime_controller()
return None
except requests.exceptions.ChunkedEncodingError:
logger.warning("Detected faulty response from the web server for request on link {url}".format(url=url))
self.downtime_controller()
return None
else:
if 499 < request.status_code < 600:
self.downtime_controller()
return None
elif request.status_code == 302:
logger.critical("Redirect detected! Either the wiki given in the script settings (wiki field) is incorrect/the wiki got removed or Gamepedia is giving us the false value. Please provide the real URL to the wiki, current URL redirects to {}".format(request.next.url))
sys.exit(0)
return request
def check_connection(self, looped=False):
online = 0
for website in ["https://google.com", "https://instagram.com", "https://steamcommunity.com"]:
try:
requests.get(website, timeout=10)
online += 1
except requests.exceptions.ConnectionError:
pass
except requests.exceptions.Timeout:
pass
if online < 1:
logger.error("Failure when checking Internet connection at {time}".format(
time=time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime())))
self.downtimecredibility = 0
if not looped:
while 1: # recursed loop, check for connection (every 10 seconds) as long as three services are down, don't do anything else
if self.check_connection(looped=True):
recent_changes.fetch(amount=settings["limitrefetch"])
break
time.sleep(10)
return False
return True
def downtime_controller(self):
if not settings["show_updown_messages"]:
return
if self.streak > -1: # reset the streak of successful connections when bad one happens
self.streak = 0
if self.downtimecredibility < 60:
self.downtimecredibility += 15
else:
if (
time.time() - self.last_downtime) > 1800 and self.check_connection(): # check if last downtime happened within 30 minutes, if yes, don't send a message
send_simple("down_detector", _("{wiki} seems to be down or unreachable.").format(wiki=settings["wikiname"]),
_("Connection status"), settings["avatars"]["connection_failed"])
self.last_downtime = time.time()
self.streak = 0
def clear_cache(self):
self.map_ips = {}
def init_info(self):
startup_info = safe_read(self.safe_request(
"{wiki}?action=query&format=json&uselang=content&list=tags&meta=allmessages%7Csiteinfo&utf8=1&tglimit=max&tgprop=displayname&ammessages=recentchanges-page-added-to-category%7Crecentchanges-page-removed-from-category%7Crecentchanges-page-added-to-category-bundled%7Crecentchanges-page-removed-from-category-bundled&amenableparser=1&amincludelocal=1&siprop=namespaces".format(
wiki=WIKI_API_PATH)), "query")
if startup_info:
if "tags" in startup_info and "allmessages" in startup_info:
for tag in startup_info["tags"]:
try:
self.tags[tag["name"]] = (BeautifulSoup(tag["displayname"], "lxml")).get_text()
except KeyError:
self.tags[tag["name"]] = None # Tags with no display name are hidden and should not appear on RC as well
for message in startup_info["allmessages"]:
if not "missing" in message: # ignore missing strings
self.mw_messages[message["name"]] = message["*"]
else:
logging.warning("Could not fetch the MW message translation for: {}".format(message["name"]))
for key, message in self.mw_messages.items():
if key.startswith("recentchanges-page-"):
self.mw_messages[key] = re.sub(r'\[\[.*?\]\]', '', message)
self.namespaces = startup_info["namespaces"]
logger.info("Gathered information about the tags and interface messages.")
else:
logger.warning("Could not retrieve initial wiki information. Some features may not work correctly!")
logger.debug(startup_info)
else:
logger.error("Could not retrieve initial wiki information. Possibly internet connection issue?")
def pull_comment(self, comment_id):
try:
comment = self.handle_mw_errors(self.safe_request(
"{wiki}?action=comment&do=getRaw&comment_id={comment}&format=json".format(wiki=WIKI_API_PATH,
comment=comment_id)).json())[
"text"]
logger.debug("Got the following comment from the API: {}".format(comment))
except MWError:
pass
except (TypeError, AttributeError):
logger.exception("Could not resolve the comment text.")
except KeyError:
logger.exception("CurseProfile extension API did not respond with a valid comment content.")
else:
if len(comment) > 1000:
comment = comment[0:1000] + ""
return comment
return ""
recent_changes = Recent_Changes_Class()
def essential_info(change, changed_categories):
"""Prepares essential information for both embed and compact message format."""
logger.debug(change)
if ("actionhidden" in change or "suppressed" in change) and "suppressed" not in settings["ignored"]: # if event is hidden using suppression
appearance_mode("suppressed", change, "", changed_categories, recent_changes)
return
if "commenthidden" not in change:
LinkParser.feed(change["parsedcomment"])
parsed_comment = LinkParser.new_string
LinkParser.new_string = ""
parsed_comment = re.sub(r"(`|_|\*|~|{|}|\|\|)", "\\\\\\1", parsed_comment, 0)
else:
parsed_comment = _("~~hidden~~")
if not parsed_comment:
parsed_comment = None
if change["type"] in ["edit", "new"]:
logger.debug("List of categories in essential_info: {}".format(changed_categories))
if "userhidden" in change:
change["user"] = _("hidden")
identification_string = change["type"]
elif change["type"] == "log":
identification_string = "{logtype}/{logaction}".format(logtype=change["logtype"], logaction=change["logaction"])
if identification_string not in supported_logs:
logger.warning(
"This event is not implemented in the script. Please make an issue on the tracker attaching the following info: wiki url, time, and this information: {}".format(
change))
return
elif change["type"] == "categorize":
return
else:
logger.warning("This event is not implemented in the script. Please make an issue on the tracker attaching the following info: wiki url, time, and this information: {}".format(change))
return
if identification_string in settings["ignored"]:
return
appearance_mode(identification_string, change, parsed_comment, changed_categories, recent_changes)