RcGcDw/src/rcgcdw.py

360 lines
16 KiB
Python
Raw Normal View History

2018-06-15 13:56:35 +00:00
#!/usr/bin/python
# -*- coding: utf-8 -*-
# This file is part of Recent changes Goat compatible Discord webhook (RcGcDw).
2018-06-21 23:56:04 +00:00
# RcGcDw is free software: you can redistribute it and/or modify
2019-02-13 08:14:08 +00:00
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
2018-09-30 16:14:44 +00:00
# (at your option) any later version.
2018-06-21 23:56:04 +00:00
# RcGcDw is distributed in the hope that it will be useful,
2018-09-30 16:14:44 +00:00
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2019-02-13 08:14:08 +00:00
# GNU General Public License for more details.
2018-06-21 23:56:04 +00:00
2019-02-13 08:14:08 +00:00
# You should have received a copy of the GNU General Public License
# along with RcGcDw. If not, see <http://www.gnu.org/licenses/>.
2018-06-21 23:56:04 +00:00
2018-09-30 16:14:44 +00:00
# WARNING! SHITTY CODE AHEAD. ENTER ONLY IF YOU ARE SURE YOU CAN TAKE IT
# You have been warned
2022-06-15 17:43:24 +00:00
import time, logging.config, requests, datetime, math, os.path, sys, importlib
2020-07-07 11:21:49 +00:00
import src.misc
2021-11-18 16:22:16 +00:00
import src.configloader
2023-01-10 21:36:20 +00:00
from src.migrations import *
2021-05-14 17:16:14 +00:00
from collections import defaultdict, Counter, OrderedDict
2021-11-18 16:27:02 +00:00
from src.argparser import command_args
2021-04-25 11:20:58 +00:00
from typing import Optional
import src.api.client
2021-04-25 11:20:58 +00:00
from src.api.context import Context
2021-05-02 23:46:40 +00:00
from src.api.hooks import formatter_hooks, pre_hooks, post_hooks
2022-01-02 19:59:17 +00:00
from src.misc import add_to_dict, datafile, run_hooks
from src.api.util import default_message
from src.discord.queue import send_to_discord
from src.discord.message import DiscordMessage, DiscordMessageMetadata
2022-01-02 19:59:17 +00:00
from src.exceptions import ServerError, MediaWikiError, NoFormatter
from src.i18n import rcgcdw, formatters_i18n
2021-04-25 11:20:58 +00:00
from src.wiki import Wiki
2021-11-18 16:22:16 +00:00
settings = src.configloader.settings
2020-08-08 01:23:01 +00:00
_ = rcgcdw.gettext
ngettext = rcgcdw.ngettext
2021-11-19 11:50:52 +00:00
TESTING = command_args.test # debug mode, pipeline testing
AUTO_SUPPRESSION_ENABLED = settings.get("auto_suppression", {"enabled": False}).get("enabled")
2019-02-13 09:12:18 +00:00
if AUTO_SUPPRESSION_ENABLED:
2022-01-02 14:31:11 +00:00
from src.discord.redaction import delete_messages, redact_messages, find_middle_next
# Prepare logging
logging.config.dictConfig(settings["logging"])
logger = logging.getLogger("rcgcdw")
logger.debug("Current settings: {settings}".format(settings=settings))
def load_extensions():
"""Loads all of the extensions, can be a local import because all we need is them to register"""
try:
importlib.import_module(settings.get('extensions_dir', 'extensions'), 'extensions')
except ImportError:
logger.critical("No extensions module found. What's going on?")
2021-05-19 14:27:01 +00:00
logger.exception("Error:")
sys.exit(1)
2020-10-18 09:39:16 +00:00
storage = datafile
# Remove previous data holding file if exists and limitfetch allows
if settings["limitrefetch"] != -1 and os.path.exists("lastchange.txt") is True:
2021-04-29 06:12:27 +00:00
with open("lastchange.txt", 'r', encoding="utf-8") as sfile:
logger.info("Converting old lastchange.txt file into new data storage data.json...")
storage["rcid"] = int(sfile.read().strip())
datafile.save_datafile()
os.remove("lastchange.txt")
2018-09-30 16:14:44 +00:00
2021-04-25 11:20:58 +00:00
def no_formatter(ctx: Context, change: dict) -> None:
logger.warning(f"There is no formatter specified for {ctx.event}! Ignoring event.")
2021-06-20 00:43:43 +00:00
raise NoFormatter
2021-04-25 11:20:58 +00:00
formatter_hooks["no_formatter"] = no_formatter
2021-05-14 17:16:14 +00:00
def day_overview_request() -> list:
"""Make requests for changes in last 24h"""
logger.info("Fetching daily overview... This may take up to 30 seconds!")
2024-09-26 02:57:45 +00:00
timestamp = (datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(hours=24)).isoformat(timespec='milliseconds')
2018-06-21 23:35:24 +00:00
result = []
passes = 0
2021-05-14 17:16:14 +00:00
continuearg: Optional[str] = None
while passes < 10:
params = OrderedDict(dict(action="query", format="json", list="recentchanges", rcend=timestamp,
2023-05-06 16:25:29 +00:00
rcprop="title|timestamp|sizes|loginfo|user|userid", rcshow="!bot", rclimit="max",
2021-05-14 17:16:14 +00:00
rctype="edit|new|log", rccontinue=continuearg))
request = wiki.retried_api_request(params)
result += request['query']['recentchanges']
if "continue" in request:
continuearg = request["continue"].get("rccontinue", None)
2018-06-21 23:35:24 +00:00
else:
2021-05-14 17:16:14 +00:00
return result
passes += 1
logger.debug(
"continuing requesting next pages of recent changes with {} passes and continuearg being {}".format(
passes, continuearg))
time.sleep(3.0)
logger.debug("quit the loop because there been too many passes")
return result
2018-06-21 23:35:24 +00:00
2018-09-30 16:14:44 +00:00
2021-05-14 17:16:14 +00:00
def daily_overview_sync(data: dict) -> dict:
weight = storage["daily_overview"]["days_tracked"]
if weight == 0:
2021-05-14 17:16:14 +00:00
storage["daily_overview"].update(data)
data_output = {k: str(v) for k, v in data.items()}
else:
2021-05-14 17:16:14 +00:00
data_output = {}
for data_point, value in data.items():
new_average = src.misc.weighted_average(storage["daily_overview"][data_point], weight, value)
data_output[data_point] = _("{value} (avg. {avg})").format(value=value, avg=new_average)
storage["daily_overview"][data_point] = new_average
storage["daily_overview"]["days_tracked"] += 1
datafile.save_datafile()
2021-05-14 17:16:14 +00:00
return data_output
2018-09-30 16:14:44 +00:00
def day_overview(client):
2021-05-14 17:16:14 +00:00
try:
result = day_overview_request()
except (ServerError, MediaWikiError):
logger.error("Couldn't complete Daily Overview as requests for changes resulted in errors.")
else:
2018-06-21 23:35:24 +00:00
activity = defaultdict(dict)
hours = defaultdict(dict)
2019-01-26 18:24:36 +00:00
articles = defaultdict(dict)
2021-05-14 17:16:14 +00:00
edits = files = admin = changed_bytes = new_articles = 0
2019-01-28 15:09:04 +00:00
active_articles = []
2020-05-22 15:30:58 +00:00
embed = DiscordMessage("embed", "daily_overview", settings["webhookURL"])
embed["title"] = _("Daily overview")
embed["url"] = client.create_article_path("Special:Statistics")
embed.set_author(settings["wikiname"], client.create_article_path(""))
2021-05-14 17:16:14 +00:00
if not result:
2020-03-15 23:12:00 +00:00
if not settings["send_empty_overview"]:
return # no changes in this day
else:
embed["description"] = _("No activity")
2018-06-21 23:35:24 +00:00
else:
2021-05-14 17:16:14 +00:00
for item in result:
2020-03-15 23:12:00 +00:00
if "actionhidden" in item or "suppressed" in item or "userhidden" in item:
continue # while such actions have type value (edit/new/log) many other values are hidden and therefore can crash with key error, let's not process such events
activity = add_to_dict(activity, item["user"])
2023-10-17 17:28:44 +00:00
hours = add_to_dict(hours, datetime.datetime.strptime(item["timestamp"], "%Y-%m-%dT%H:%M:%SZ").replace(minute=0, second=0, tzinfo=datetime.timezone.utc))
2020-03-15 23:12:00 +00:00
if item["type"] == "edit":
edits += 1
changed_bytes += item["newlen"] - item["oldlen"]
if (wiki.namespaces is not None and "content" in wiki.namespaces.get(str(item["ns"]), {})) or item["ns"] == 0:
2020-03-15 23:12:00 +00:00
articles = add_to_dict(articles, item["title"])
2020-08-29 15:04:49 +00:00
elif item["type"] == "new":
if "content" in (wiki.namespaces is not None and wiki.namespaces.get(str(item["ns"]), {})) or item["ns"] == 0:
2020-03-15 23:12:00 +00:00
new_articles += 1
changed_bytes += item["newlen"]
2020-08-29 15:04:49 +00:00
elif item["type"] == "log":
2020-03-15 23:12:00 +00:00
files = files + 1 if item["logtype"] == item["logaction"] == "upload" else files
admin = admin + 1 if item["logtype"] in ["delete", "merge", "block", "protect", "import", "rights",
2021-05-14 17:16:14 +00:00
"abusefilter", "interwiki", "managetags"] else admin
2020-03-15 23:12:00 +00:00
overall = round(new_articles + edits * 0.1 + files * 0.3 + admin * 0.1 + math.fabs(changed_bytes * 0.001), 2)
if activity:
active_users = []
for user, numberu in Counter(activity).most_common(3): # find most active users
active_users.append(user + ngettext(" ({} action)", " ({} actions)", numberu).format(numberu))
for article, numbere in Counter(articles).most_common(3): # find most active users
active_articles.append(article + ngettext(" ({} edit)", " ({} edits)", numbere).format(numbere))
v = hours.values()
active_hours = []
for hour, numberh in Counter(hours).most_common(list(v).count(max(v))): # find most active hours
2023-10-17 17:28:44 +00:00
active_hours.append("<t:"+str(int(hour.timestamp()))+":t>")
houramount = ngettext(" ({} action)", " ({} actions)", numberh).format(numberh)
2020-03-15 23:12:00 +00:00
else:
2021-05-14 17:16:14 +00:00
active_users = active_hours = [_("But nobody came")] # a reference to my favorite game of all the time, sorry ^_^
usramount = houramount = ""
2020-03-15 23:12:00 +00:00
if not active_articles:
active_articles = [_("But nobody came")]
2021-05-14 17:16:14 +00:00
messages = daily_overview_sync({"edits": edits, "new_files": files, "admin_actions": admin, "bytes_changed":
changed_bytes, "new_articles": new_articles, "unique_editors": len(activity), "day_score": overall})
2020-03-15 23:12:00 +00:00
fields = (
(ngettext("Most active user", "Most active users", len(active_users)), ', '.join(active_users)),
(ngettext("Most edited article", "Most edited articles", len(active_articles)), ', '.join(active_articles)),
2021-05-14 17:16:14 +00:00
(_("Edits made"), messages["edits"]), (_("New files"), messages["new_files"]),
(_("Admin actions"), messages["admin_actions"]), (_("Bytes changed"), messages["bytes_changed"]),
(_("New articles"), messages["new_articles"]), (_("Unique contributors"), messages["unique_editors"]),
2020-03-15 23:12:00 +00:00
(ngettext("Most active hour", "Most active hours", len(active_hours)), ', '.join(active_hours) + houramount),
2021-05-14 17:16:14 +00:00
(_("Day score"), messages["day_score"])
)
2020-03-15 23:12:00 +00:00
for name, value in fields:
embed.add_field(name, value, inline=True)
embed.finish_embed()
send_to_discord(embed, meta=DiscordMessageMetadata("POST"))
2018-06-21 23:35:24 +00:00
2018-09-30 16:14:44 +00:00
2021-04-25 11:20:58 +00:00
def rc_processor(change, changed_categories):
"""Prepares essential information for both embed and compact message format."""
from src.misc import LinkParser
2024-09-04 21:29:49 +00:00
LinkParser = LinkParser(client.WIKI_JUST_DOMAIN)
2021-04-25 11:20:58 +00:00
metadata = DiscordMessageMetadata("POST", rev_id=change.get("revid", None), log_id=change.get("logid", None),
page_id=change.get("pageid", None))
logger.debug(change)
context = Context(settings["appearance"]["mode"], "recentchanges", settings["webhookURL"], client, formatters_i18n, settings)
2021-04-25 11:20:58 +00:00
if ("actionhidden" in change or "suppressed" in change) and "suppressed" not in settings["ignored"]: # if event is hidden using suppression
context.event = "suppressed"
run_hooks(pre_hooks, context, change)
if not context.event:
return
2021-06-20 00:43:43 +00:00
try:
discord_message: Optional[DiscordMessage] = default_message(context.event, formatter_hooks)(context, change)
2021-06-20 00:43:43 +00:00
except NoFormatter:
return
except:
if settings.get("error_tolerance", 1) > 0:
discord_message: Optional[
DiscordMessage] = None # It's handled by send_to_discord, we still want other code to run
else:
raise
2018-06-21 23:35:24 +00:00
else:
2021-04-25 11:20:58 +00:00
if "commenthidden" not in change:
LinkParser.feed(change.get("parsedcomment", ""))
parsed_comment = LinkParser.new_string
else:
parsed_comment = _("~~hidden~~")
2021-05-03 12:52:04 +00:00
if not parsed_comment and context.message_type == "embed" and settings["appearance"].get("embed", {}).get("show_no_description_provided", True):
parsed_comment = _("No description provided")
context.set_parsedcomment(parsed_comment)
2021-04-25 11:20:58 +00:00
if "userhidden" in change:
change["user"] = _("hidden")
if change.get("ns", -1) in settings.get("ignored_namespaces", ()):
return
if change["type"] in ["edit", "new"]:
logger.debug("List of categories in essential_info: {}".format(changed_categories))
identification_string = change["type"]
context.set_categories(changed_categories)
elif change["type"] == "categorize":
return
elif change["type"] == "log":
identification_string = "{logtype}/{logaction}".format(logtype=change["logtype"], logaction=change["logaction"])
else:
identification_string = change.get("type", "unknown") # If event doesn't have a type
if identification_string in settings["ignored"]:
return
2021-04-27 13:10:29 +00:00
context.event = identification_string
run_hooks(pre_hooks, context, change)
if not context.event:
return
2021-05-14 12:30:52 +00:00
try:
discord_message: Optional[DiscordMessage] = default_message(context.event, formatter_hooks)(context, change)
except NoFormatter:
return
2021-05-14 12:30:52 +00:00
except:
if settings.get("error_tolerance", 1) > 0:
discord_message: Optional[DiscordMessage] = None # It's handled by send_to_discord, we still want other code to run
else:
raise
if context.event in ("delete/delete", "delete/delete_redir") and AUTO_SUPPRESSION_ENABLED: # TODO Move it into a hook?
delete_messages(dict(pageid=change.get("pageid")))
elif context.event == "delete/event" and AUTO_SUPPRESSION_ENABLED:
2021-05-03 00:16:19 +00:00
logparams = change.get('logparams', {"ids": []})
if settings["appearance"]["mode"] == "embed":
redact_messages(logparams.get("ids", []), 1, logparams.get("new", {}))
2021-05-03 14:08:33 +00:00
else:
for logid in logparams.get("ids", []):
delete_messages(dict(logid=logid))
elif context.event == "delete/revision" and AUTO_SUPPRESSION_ENABLED:
2021-05-03 14:08:33 +00:00
logparams = change.get('logparams', {"ids": []})
2022-02-02 08:11:39 +00:00
if logparams.get("type", "") in ("revision", "logging", "oldimage"):
if settings["appearance"]["mode"] == "embed":
redact_messages(logparams.get("ids", []), 0, logparams.get("new", {}))
if "content" in logparams.get("new", {}) and settings.get("appearance", {}).get("embed", {}).get("show_edit_changes", False): # Also redact revisions in the middle and next ones in case of content (diffs leak)
redact_messages(find_middle_next(logparams.get("ids", []), change.get("pageid", -1)), 0, {"content": ""})
else:
for revid in logparams.get("ids", []):
delete_messages(dict(revid=revid))
run_hooks(post_hooks, discord_message, metadata, context, change)
if discord_message:
discord_message.finish_embed()
2021-04-25 11:20:58 +00:00
send_to_discord(discord_message, metadata)
2018-06-21 23:35:24 +00:00
def abuselog_processing(entry):
action = "abuselog"
if action in settings["ignored"]:
return
2022-09-19 11:48:34 +00:00
context = Context(settings["appearance"]["mode"], "abuselog", settings.get("abuselog_webhookURL", settings["webhookURL"]), client, formatters_i18n, settings)
context.event = action
run_hooks(pre_hooks, context, entry)
if not context.event:
return
2021-06-20 00:43:43 +00:00
try:
discord_message: Optional[DiscordMessage] = default_message(context.event, formatter_hooks)(context, entry)
2021-06-20 00:43:43 +00:00
except NoFormatter:
return
except:
if settings.get("error_tolerance", 1) > 0:
discord_message: Optional[DiscordMessage] = None # It's handled by send_to_discord, we still want other code to run
else:
raise
2021-05-14 12:30:52 +00:00
metadata = DiscordMessageMetadata("POST")
run_hooks(post_hooks, discord_message, metadata, context, entry)
discord_message.finish_embed()
2021-05-14 12:30:52 +00:00
send_to_discord(discord_message, metadata)
2018-09-30 16:14:44 +00:00
2019-04-16 11:54:01 +00:00
load_extensions()
2019-04-16 11:54:01 +00:00
# Log in and download wiki information
2021-04-25 11:20:58 +00:00
wiki = Wiki(rc_processor, abuselog_processing)
client = src.api.client.Client(formatter_hooks, wiki)
if settings["fandom_discussions"]["enabled"] or TESTING:
2021-05-07 18:30:13 +00:00
import src.discussions
src.discussions.inject_client(client) # Not the prettiest but gets the job done
2018-12-02 10:46:11 +00:00
try:
if settings["wiki_bot_login"] and settings["wiki_bot_password"]:
wiki.log_in()
time.sleep(2.0)
wiki.init_info()
2018-12-02 10:46:11 +00:00
except requests.exceptions.ConnectionError:
logger.critical("A connection can't be established with the wiki. Exiting...")
2018-12-02 10:46:11 +00:00
sys.exit(1)
time.sleep(3.0) # this timeout is to prevent timeouts. It seems Fandom does not like our ~2-3 request in under a second
if settings["rc_enabled"]:
logger.info("Script started! Fetching newest changes...")
wiki.fetch(amount=settings["limitrefetch"] if settings["limitrefetch"] != -1 else settings["limit"])
2022-06-15 17:32:38 +00:00
client.schedule(wiki.fetch, every=settings["cooldown"])
if settings["overview"]:
try:
overview_time = time.strptime(settings["overview_time"], '%H:%M')
client.schedule(day_overview, client, at="{}:{}".format(str(overview_time.tm_hour).zfill(2), str(overview_time.tm_min).zfill(2)))
del overview_time
except ValueError:
logger.error("Invalid time format! Currentely: {}. Note: It needs to be in HH:MM format.".format(
settings["overview_time"]))
2022-06-15 17:32:38 +00:00
client.schedule(wiki.clear_cache, at="00:00")
else:
logger.info("Script started! RC is disabled however, this means no recent changes will be sent :c")
2018-09-30 16:14:44 +00:00
# noinspection PyUnreachableCode
2018-06-18 15:26:03 +00:00
2018-09-30 16:14:44 +00:00
2019-02-13 09:12:18 +00:00
if TESTING:
2019-08-10 15:31:03 +00:00
logger.debug("DEBUGGING ")
2020-10-18 09:39:16 +00:00
storage["rcid"] = 1
wiki.fetch(amount=5)
day_overview(client)
2020-07-08 15:59:19 +00:00
import src.discussions
src.discussions.fetch_discussions()
logger.info("Test has succeeded without premature exceptions.")
2019-02-13 09:12:18 +00:00
sys.exit(0)
while 1:
2018-06-21 23:35:24 +00:00
time.sleep(1.0)
try:
client.scheduler.run()
except KeyboardInterrupt:
logger.info("Shutting down...")
break