RcGcDb/src/wiki.py

565 lines
28 KiB
Python
Raw Normal View History

from __future__ import annotations
2021-07-04 11:39:50 +00:00
import functools
2021-07-04 11:39:50 +00:00
import time
2020-07-11 15:54:08 +00:00
import re
import logging, aiohttp
2022-08-31 12:30:41 +00:00
import asyncio
2022-10-29 15:04:25 +00:00
import requests
2021-06-22 19:42:32 +00:00
2021-07-08 11:33:10 +00:00
from api.util import default_message
2022-11-10 14:16:35 +00:00
from src.misc import prepare_settings
from src.discord.queue import messagequeue, QueueEntry
2021-06-22 19:42:32 +00:00
from mw_messages import MWMessages
from src.exceptions import *
2022-09-29 21:10:36 +00:00
from src.queue_handler import dbmanager
2021-07-09 12:55:23 +00:00
from src.api.hooks import formatter_hooks
from src.api.client import Client
from src.api.context import Context
2022-08-31 12:30:41 +00:00
from src.discord.message import DiscordMessage, DiscordMessageMetadata, StackedDiscordMessage
2020-07-21 12:15:40 +00:00
from src.i18n import langs
2022-08-31 12:30:41 +00:00
from src.statistics import Statistics, Log, LogType
2020-07-21 12:15:40 +00:00
from src.config import settings
2020-07-28 12:39:32 +00:00
# noinspection PyPackageRequirements
2020-07-21 12:15:40 +00:00
from bs4 import BeautifulSoup
from collections import OrderedDict, defaultdict, namedtuple
from typing import Union, Optional, TYPE_CHECKING
2020-07-10 13:38:36 +00:00
2022-07-24 20:02:25 +00:00
Settings = namedtuple("Settings", ["lang", "display"])
logger = logging.getLogger("rcgcdb.wiki")
2020-07-09 23:58:25 +00:00
2022-10-03 13:34:36 +00:00
# wiki_reamoval_reasons = {410: _("wiki deleted"), 404: _("wiki deleted"), 401: _("wiki inaccessible"),
# 402: _("wiki inaccessible"), 403: _("wiki inaccessible"), 1000: _("discussions disabled")}
if TYPE_CHECKING:
from src.domain import Domain
2021-06-22 19:42:32 +00:00
2022-08-31 12:30:41 +00:00
MESSAGE_LIMIT = settings.get("message_limit", 30)
2022-10-09 12:10:08 +00:00
2021-05-30 11:23:48 +00:00
class Wiki:
2022-11-25 16:24:44 +00:00
def __init__(self, script_url: str, rc_id: Optional[int], discussion_id: Optional[str]):
self.script_url: str = script_url
2022-10-29 15:04:25 +00:00
self.session: aiohttp.ClientSession = aiohttp.ClientSession(headers=settings["header"], timeout=aiohttp.ClientTimeout(total=6))
self.statistics: Statistics = Statistics(rc_id, discussion_id)
self.mw_messages: Optional[MWMessages] = None
2022-07-24 20:02:25 +00:00
self.tags: dict[str, Optional[str]] = {} # Tag can be None if hidden
2021-06-22 19:42:32 +00:00
self.first_fetch_done: bool = False
self.domain: Optional[Domain] = None
2022-11-10 14:16:35 +00:00
self.rc_targets: Optional[defaultdict[Settings, list[str]]] = None
self.discussion_targets: Optional[defaultdict[Settings, list[str]]] = None
2022-07-24 20:02:25 +00:00
self.client: Client = Client(formatter_hooks, self)
2022-08-31 12:30:41 +00:00
self.message_history: list[StackedDiscordMessage] = list()
2022-10-09 12:10:08 +00:00
self.namespaces: Optional[dict] = None
self.recache_requested: bool = False
2022-10-29 15:04:25 +00:00
self.session_requests = requests.Session()
self.session_requests.headers.update(settings["header"])
2021-05-30 11:23:48 +00:00
@property
def rc_id(self):
return self.statistics.last_action
2022-11-07 14:46:15 +00:00
@property
def discussion_id(self):
return self.statistics.last_post
2022-07-24 20:02:25 +00:00
@property
def last_request(self):
return self.statistics.last_request
@last_request.setter
def last_request(self, value):
self.statistics.last_request = value
2022-06-22 17:17:20 +00:00
# async def remove(self, reason):
# logger.info("Removing a wiki {}".format(self.script_url))
# await src.discord.wiki_removal(self.script_url, reason)
# await src.discord.wiki_removal_monitor(self.script_url, reason)
# async with db.pool().acquire() as connection:
# result = await connection.execute('DELETE FROM rcgcdw WHERE wiki = $1', self.script_url)
# logger.warning('{} rows affected by DELETE FROM rcgcdw WHERE wiki = "{}"'.format(result, self.script_url))
2022-08-31 12:30:41 +00:00
def add_message(self, message: StackedDiscordMessage):
self.message_history.append(message)
2022-11-10 14:16:35 +00:00
if len(self.message_history) > MESSAGE_LIMIT*len(self.rc_targets):
self.message_history = self.message_history[len(self.message_history)-MESSAGE_LIMIT*len(self.rc_targets):]
2022-08-31 12:30:41 +00:00
def set_domain(self, domain: Domain):
self.domain = domain
2021-06-22 19:42:32 +00:00
2022-09-18 14:38:19 +00:00
# def find_middle_next(ids: List[str], pageid: int) -> set: # TODO Properly re-implement for RcGcDb
# """To address #235 RcGcDw should now remove diffs in next revs relative to redacted revs to protect information in revs that revert revdeleted information.
#
# :arg ids - list
# :arg pageid - int
#
# :return list"""
# ids = [int(x) for x in ids]
# result = set()
# ids.sort() # Just to be sure, sort the list to make sure it's always sorted
# messages = db_cursor.execute("SELECT revid FROM event WHERE pageid = ? AND revid >= ? ORDER BY revid",
# (pageid, ids[0],))
# all_in_page = [x[0] for x in messages.fetchall()]
# for id in ids:
# try:
# result.add(all_in_page[all_in_page.index(id) + 1])
# except (KeyError, ValueError):
# logger.debug(f"Value {id} not in {all_in_page} or no value after that.")
# return result - set(ids)
def search_message_history(self, params: dict) -> list[tuple[StackedDiscordMessage, list[int]]]:
"""Search self.message_history for messages which match all properties in params and return them in a list"""
output = []
for message in self.message_history:
returned_matches_for_stacked = message.filter(params)
if returned_matches_for_stacked:
output.append((message, [x[0] for x in returned_matches_for_stacked]))
return output
def delete_messages(self, params: dict):
"""Delete certain messages from message_history which DiscordMessageMetadata matches all properties in params"""
# Delete all messages with given IDs
for stacked_message, ids in self.search_message_history(params):
stacked_message.delete_message_by_id(ids)
# If all messages were removed, send a DELETE to Discord
if len(stacked_message.message_list) == 0:
messagequeue.add_message(QueueEntry(stacked_message, [stacked_message.webhook], self, method="DELETE"))
else:
messagequeue.add_message(QueueEntry(stacked_message, [stacked_message.webhook], self, method="PATCH"))
def redact_messages(self, ids: list[int], mode: str, censored_properties: dict):
# ids can refer to multiple events, and search does not support additive mode, so we have to loop it for all ids
for revlogid in ids:
for stacked_message, ids in self.search_message_history({mode: revlogid}): # This might not work depending on how Python handles it, but hey, learning experience
for message in [message for num, message in enumerate(stacked_message.message_list) if num in ids]:
if "user" in censored_properties and "url" in message["author"]:
message["author"]["name"] = _("hidden")
message["author"].pop("url")
if "action" in censored_properties and "url" in message:
message["title"] = _("~~hidden~~")
message["embed"].pop("url")
if "content" in censored_properties and "fields" in message:
message["embed"].pop("fields")
if "comment" in censored_properties:
message["description"] = _("~~hidden~~")
messagequeue.add_message(QueueEntry(stacked_message, [stacked_message.webhook], self, method="PATCH"))
2022-06-22 17:17:20 +00:00
# async def downtime_controller(self, down, reason=None):
# if down:
# self.fail_times += 1
# if self.fail_times > 20:
# await self.remove(reason)
# else:
# self.fail_times -= 1
2022-07-24 20:02:25 +00:00
async def update_targets(self) -> None:
"""This function generates all possible varations of outputs that we need to generate messages for.
:returns defaultdict[namedtuple, list[str]] - where namedtuple is a named tuple with settings for given webhooks in list"""
Settings = namedtuple("Settings", ["lang", "display"])
target_settings: defaultdict[Settings, list[str]] = defaultdict(list)
2022-11-10 14:16:35 +00:00
discussion_targets: defaultdict[Settings, list[str]] = defaultdict(list)
async for webhook in dbmanager.fetch_rows("SELECT webhook, lang, display, rcid, postid FROM rcgcdw WHERE wiki = $1", self.script_url):
if webhook['rcid'] == -1 and webhook['postid'] == '-1':
await self.remove_wiki_from_db(4)
if webhook['rcid'] != -1:
target_settings[Settings(webhook["lang"], webhook["display"])].append(webhook["webhook"])
if webhook['postid'] != '-1':
discussion_targets[Settings(webhook["lang"], webhook["display"])].append(webhook["webhook"])
self.rc_targets = target_settings
self.discussion_targets = discussion_targets
def parse_mw_request_info(self, request_data: dict, url: str):
"""A function parsing request JSON message from MediaWiki logging all warnings and raising on MediaWiki errors"""
# any([True for k in request_data.keys() if k in ("error", "errors")])
errors: list = request_data.get("errors", {}) # Is it ugly? I don't know tbh
if errors:
raise MediaWikiError(str(errors))
warnings: list = request_data.get("warnings", {})
if warnings:
for warning in warnings:
logger.warning("MediaWiki returned the following warning: {code} - {text} on {url}.".format(
code=warning["code"], text=warning.get("text", warning.get("*", "")), url=url
))
return request_data
async def api_request(self, params: Union[str, OrderedDict], *json_path: str, timeout: int = 10,
allow_redirects: bool = False) -> dict:
"""Method to GET request data from the wiki's API with error handling including recognition of MediaWiki errors.
Parameters:
params (str, OrderedDict): a string or collections.OrderedDict object containing query parameters
json_path (str): *args taking strings as values. After request is parsed as json it will extract data from given json path
timeout (int, float) (default=10): int or float limiting time required for receiving a full response from a server before returning TimeoutError
allow_redirects (bool) (default=False): switches whether the request should follow redirects or not
Returns:
request_content (dict): a dict resulting from json extraction of HTTP GET request with given json_path
OR
One of the following exceptions:
ServerError: When connection with the wiki failed due to server error
ClientError: When connection with the wiki failed due to client error
KeyError: When json_path contained keys that weren't found in response JSON response
BadRequest: When params argument is of wrong type
MediaWikiError: When MediaWiki returns an error
"""
# Making request
try:
2022-10-29 15:04:25 +00:00
if isinstance(params, str):
request = await self.session.get(self.script_url + "api.php?" + params + "&errorformat=raw", timeout=timeout,
allow_redirects=allow_redirects)
elif isinstance(params, OrderedDict):
params["errorformat"] = "raw"
request = await self.session.get(self.script_url + "api.php", params=params, timeout=timeout,
allow_redirects=allow_redirects)
else:
raise BadRequest(params)
except (aiohttp.ServerConnectionError, aiohttp.ServerTimeoutError) as exc:
logger.warning("Reached {error} error for request on link {url}".format(error=repr(exc),
url=self.script_url + str(params)))
raise ServerError
# Catching HTTP errors
if 499 < request.status < 600:
raise ServerError
elif request.status == 302:
logger.critical(
"Redirect detected! Either the wiki given in the script settings (wiki field) is incorrect/the wiki got removed or is giving us the false value. Please provide the real URL to the wiki, current URL redirects to {}".format(
request.url))
elif 399 < request.status < 500:
logger.error("Request returned ClientError status code on {url}".format(url=request.url))
2022-10-03 13:34:36 +00:00
self.statistics.update(Log(type=LogType.HTTP_ERROR, title="{} error".format(request.status), details=str(request.headers) + "\n" + str(request.url)))
raise ClientError(request)
else:
# JSON Extraction
try:
request_json = self.parse_mw_request_info(await request.json(encoding="UTF-8"), str(request.url))
for item in json_path:
request_json = request_json[item]
except ValueError:
logger.warning("ValueError when extracting JSON data on {url}".format(url=request.url))
raise ServerError
except MediaWikiError:
logger.exception("MediaWiki error on request: {}".format(request.url))
raise
except KeyError:
logger.exception("KeyError while iterating over json_path, full response: {}".format(request.json()))
raise
return request_json
2022-10-29 15:04:25 +00:00
def sync_api_request(self, params: Union[str, OrderedDict], *json_path: str, timeout: int = 10,
allow_redirects: bool = False) -> dict:
"""Synchronous function based on api_request created for compatibility reasons with RcGcDw API"""
try:
if isinstance(params, str):
2022-11-04 14:59:26 +00:00
request = self.session_requests.get(self.script_url + "api.php" + params + "&errorformat=raw", timeout=10, allow_redirects=allow_redirects)
2022-10-29 15:04:25 +00:00
elif isinstance(params, OrderedDict):
request = self.session_requests.get(self.script_url + "api.php", params=params, timeout=10, allow_redirects=allow_redirects)
else:
raise BadRequest(params)
except (requests.exceptions.Timeout, requests.exceptions.ConnectionError, requests.exceptions.ChunkedEncodingError) as exc:
logger.warning("Reached {error} error for request on link {url}".format(error=repr(exc),
url=self.client.WIKI_API_PATH + str(params)))
raise ServerError
if 499 < request.status_code < 600:
raise ServerError
elif request.status_code == 302:
logger.critical(
"Redirect detected! Either the wiki given in the script settings (wiki field) is incorrect/the wiki got removed or is giving us the false value. Please provide the real URL to the wiki, current URL redirects to {}".format(
request.url))
elif 399 < request.status_code < 500:
logger.error("Request returned ClientError status code on {url}".format(url=request.url))
self.statistics.update(Log(type=LogType.HTTP_ERROR, title="{} error".format(request.status_code), details=str(request.headers) + "\n" + str(request.url)))
raise ClientError(request)
else:
try:
request_json = self.parse_mw_request_info(request.json(), request.url)
for item in json_path:
request_json = request_json[item]
except ValueError:
logger.warning("ValueError when extracting JSON data on {url}".format(url=request.url))
raise ServerError
except MediaWikiError:
logger.exception("MediaWiki error on request: {}".format(request.url))
raise
except KeyError:
logger.exception("KeyError while iterating over json_path, full response: {}".format(request.json()))
raise
return request_json
2021-07-04 11:39:50 +00:00
async def fetch_wiki(self, amount=10) -> dict:
2022-06-22 17:17:20 +00:00
if self.mw_messages is None:
params = OrderedDict({"action": "query", "format": "json", "uselang": "content", "list": "tags|recentchanges",
"meta": "allmessages|siteinfo",
"utf8": 1, "tglimit": "max", "tgprop": "displayname",
"rcprop": "title|redirect|timestamp|ids|loginfo|parsedcomment|sizes|flags|tags|user",
"rclimit": amount, "rcshow": "!bot", "rctype": "edit|new|log|categorize",
"ammessages": "recentchanges-page-added-to-category|recentchanges-page-removed-from-category|recentchanges-page-added-to-category-bundled|recentchanges-page-removed-from-category-bundled",
"amenableparser": 1, "amincludelocal": 1, "siprop": "namespaces|general"})
else:
params = OrderedDict({"action": "query", "format": "json", "uselang": "content", "list": "tags|recentchanges",
2022-08-16 10:50:49 +00:00
"meta": "siteinfo", "utf8": 1, "rcshow": "!bot",
"rcprop": "title|redirect|timestamp|ids|loginfo|parsedcomment|sizes|flags|tags|user",
"rclimit": amount, "rctype": "edit|new|log|categorize", "siprop": "namespaces|general"})
try:
response = await self.api_request(params=params)
except (aiohttp.ClientConnectionError, aiohttp.ServerTimeoutError, asyncio.TimeoutError) as e:
logger.error("A connection error occurred while requesting {}".format(params))
raise WikiServerError(e)
return response
2021-07-04 11:39:50 +00:00
async def scan(self, amount=10):
2022-08-16 10:50:49 +00:00
"""Main track of fetching RecentChanges of a wiki.
:raises WikiServerError
"""
2021-07-04 11:39:50 +00:00
while True: # Trap event in case there are more changes needed to be fetched
try:
request = await self.fetch_wiki(amount=amount)
self.client.last_request = request
2022-06-22 17:17:20 +00:00
except WikiServerError as e:
2022-10-09 12:10:08 +00:00
# If WikiServerError comes up 2 times in recent 2 minutes, this will reraise the exception, otherwise waits 2 seconds and retries
self.statistics.update(Log(type=LogType.CONNECTION_ERROR, title=str(e.exception)))
if self.statistics.recent_connection_errors() > 1:
raise
await asyncio.sleep(2.0)
2022-10-09 12:10:08 +00:00
continue
if not self.mw_messages or self.recache_requested:
2022-08-16 10:50:49 +00:00
process_cachable(request, self)
2021-07-04 11:39:50 +00:00
try:
recent_changes = request["query"]["recentchanges"]
recent_changes.reverse()
except KeyError:
raise WikiError
if self.rc_id in (0, None, -1):
if len(recent_changes) > 0:
2022-11-25 16:24:44 +00:00
self.statistics.update(last_action=recent_changes[-1]["rcid"])
2022-09-29 21:10:36 +00:00
dbmanager.add(("UPDATE rcgcdw SET rcid = $1 WHERE wiki = $2 AND ( rcid != -1 OR rcid IS NULL )",
2021-07-04 11:39:50 +00:00
(recent_changes[-1]["rcid"], self.script_url)))
else:
2022-11-25 16:24:44 +00:00
self.statistics.update(last_action=0)
2022-09-29 21:10:36 +00:00
dbmanager.add(("UPDATE rcgcdw SET rcid = 0 WHERE wiki = $1 AND ( rcid != -1 OR rcid IS NULL )", (self.script_url)))
2021-07-04 11:39:50 +00:00
return # TODO Add a log entry?
categorize_events = {}
new_events = 0
self.statistics.last_checked_rc = int(time.time())
highest_id = self.rc_id # Pretty sure that will be faster
for change in recent_changes:
if change["rcid"] > highest_id and amount != 450:
new_events += 1
if new_events == 10:
# call the function again with max limit for more results, ignore the ones in this request
logger.debug("There were too many new events, queuing wiki with 450 limit.")
amount = 450
break
await process_cats(change, self, categorize_events)
else: # adequate amount of changes
2022-08-16 10:50:49 +00:00
message_list = [] # Collect all messages so they can be efficiently merged in Discord message sender
2021-07-04 11:39:50 +00:00
for change in recent_changes: # Yeah, second loop since the categories require to be all loaded up
if change["rcid"] > self.rc_id:
if highest_id is None or change["rcid"] > highest_id: # make sure that the highest_rc is really highest rcid but do allow other entries with potentially lesser rcids come after without breaking the cycle
highest_id = change["rcid"]
2022-11-10 14:16:35 +00:00
for combination, webhooks in self.rc_targets.items():
2022-10-29 15:04:25 +00:00
message = await rc_processor(self, change, categorize_events.get(change.get("revid"), None), combination, webhooks)
if message is None:
break
2022-08-16 10:50:49 +00:00
message.wiki = self
2022-10-29 15:04:25 +00:00
message_list.append(QueueEntry(message, webhooks, self))
2022-08-16 10:50:49 +00:00
messagequeue.add_messages(message_list)
2022-11-04 14:59:26 +00:00
self.statistics.update(last_action=highest_id)
2022-11-10 14:16:35 +00:00
dbmanager.add(("UPDATE rcgcdw SET rcid = $1 WHERE wiki = $2 AND ( rcid != -1 OR rcid IS NULL )", (highest_id, self.script_url))) # If this is not enough for the future, save rcid in message sending function to make sure we always send all of the changes
2022-10-29 15:04:25 +00:00
return
2021-07-04 11:39:50 +00:00
2022-11-10 14:16:35 +00:00
async def remove_webhook_from_db(self, reason: str):
raise NotImplementedError
async def remove_wiki_from_db(self, reason: str):
raise NotImplementedError # TODO
2023-05-06 12:29:27 +00:00
async def fetch_discussions(self, params: dict) -> tuple[aiohttp.ClientResponse, dict]:
2022-11-07 14:46:15 +00:00
header = settings["header"]
header["Accept"] = "application/hal+json"
async with aiohttp.ClientSession(headers=header,
timeout=aiohttp.ClientTimeout(6.0)) as session:
url_path = "{wiki}wikia.php".format(wiki=self.script_url)
try:
2022-11-10 14:16:35 +00:00
feeds_response = await session.get(url_path, params=params)
feeds_response.raise_for_status()
2022-11-07 14:46:15 +00:00
except (aiohttp.ClientConnectionError, aiohttp.ServerTimeoutError, asyncio.TimeoutError,
2022-11-25 16:24:44 +00:00
aiohttp.ClientResponseError, aiohttp.TooManyRedirects) as e:
2023-05-06 12:29:27 +00:00
logger.error("A connection error occurred while requesting {}".format(feeds_response.url))
2022-11-25 16:24:44 +00:00
raise WikiServerError(e)
2023-05-06 12:29:27 +00:00
return feeds_response, await feeds_response.json(encoding="UTF-8")
2022-07-24 20:02:25 +00:00
2023-08-13 10:44:19 +00:00
def pull_comment(self, comment_id):
try:
comment = self.sync_api_request("?action=comment&do=getRaw&comment_id={comment}&format=json".format(comment=comment_id), "text")
logger.debug("Got the following comment from the API: {}".format(comment))
except (ServerError, MediaWikiError):
pass
except (BadRequest, ClientError):
logger.exception("Some kind of issue while creating a request (most likely client error).")
except KeyError:
logger.exception("CurseProfile extension API did not respond with a valid comment content.")
else:
if len(comment) > 1000:
comment = comment[0:1000] + ""
return comment
return ""
2022-08-16 10:50:49 +00:00
def process_cachable(response: dict, wiki_object: Wiki) -> None:
"""This function processes cachable objects such as MediaWiki system messages and wiki tag display names to be used
for processing of DiscordMessages and saves them in a wiki object."""
mw_messages = response.get("query", {}).get("allmessages", [])
final_mw_messages = dict()
for msg in mw_messages:
if "missing" not in msg: # ignore missing strings
final_mw_messages[msg["name"]] = re.sub(r'\[\[.*?]]', '', msg["*"])
else:
logger.warning("Could not fetch the MW message translation for: {}".format(msg["name"]))
wiki_object.mw_messages = MWMessages(final_mw_messages)
for tag in response["query"]["tags"]:
try:
wiki_object.tags[tag["name"]] = (BeautifulSoup(tag["displayname"], "lxml")).get_text()
except KeyError:
wiki_object.tags[tag["name"]] = None
2022-10-09 12:10:08 +00:00
wiki_object.namespaces = response["query"]["namespaces"]
wiki_object.recache_requested = False
2022-08-16 10:50:49 +00:00
2022-10-29 15:04:25 +00:00
async def rc_processor(wiki: Wiki, change: dict, changed_categories: dict, display_options: namedtuple("Settings", ["lang", "display"]), webhooks: list) -> Optional[DiscordMessage]:
2022-08-16 10:50:49 +00:00
"""This function takes more vital information, communicates with a formatter and constructs DiscordMessage with it.
It creates DiscordMessageMetadata object, LinkParser and Context. Prepares a comment """
2021-07-08 11:33:10 +00:00
from src.misc import LinkParser
2022-11-04 14:59:26 +00:00
LinkParser = LinkParser(wiki.client.WIKI_ARTICLE_PATH)
metadata = DiscordMessageMetadata("POST", rev_id=change.get("revid", None), log_id=change.get("logid", None),
2022-07-26 13:48:44 +00:00
page_id=change.get("pageid", None))
2022-11-25 16:24:44 +00:00
context = Context("embed" if display_options.display > 0 else "compact", "recentchanges", webhooks, wiki.client, langs[display_options.lang]["formatters"], prepare_settings(display_options.display))
if ("actionhidden" in change or "suppressed" in change) and "suppressed" not in settings["ignored"]: # if event is hidden using suppression
2021-07-08 11:33:10 +00:00
context.event = "suppressed"
try:
discord_message: Optional[DiscordMessage] = await asyncio.get_event_loop().run_in_executor(
2022-10-29 15:04:25 +00:00
None, functools.partial(default_message("suppressed", context.message_type, formatter_hooks), context, change))
2021-07-08 11:33:10 +00:00
except:
if settings.get("error_tolerance", 1) > 0:
discord_message: Optional[DiscordMessage] = None # It's handled by send_to_discord, we still want other code to run
2021-07-08 11:33:10 +00:00
else:
raise
else:
if "commenthidden" not in change:
LinkParser.feed(change.get("parsedcomment", ""))
parsed_comment = LinkParser.new_string
else:
2022-10-29 15:04:25 +00:00
parsed_comment = context._("~~hidden~~")
2021-07-08 11:33:10 +00:00
if not parsed_comment and context.message_type == "embed" and settings["appearance"].get("embed", {}).get(
"show_no_description_provided", True):
2022-10-29 15:04:25 +00:00
parsed_comment = context._("No description provided")
2021-07-08 11:33:10 +00:00
context.set_parsedcomment(parsed_comment)
if "userhidden" in change:
2022-10-29 15:04:25 +00:00
change["user"] = context._("hidden")
2021-07-08 11:33:10 +00:00
if change.get("ns", -1) in settings.get("ignored_namespaces", ()):
return
if change["type"] in ["edit", "new"]:
logger.debug("List of categories in essential_info: {}".format(changed_categories))
identification_string = change["type"]
context.set_categories(changed_categories)
elif change["type"] == "categorize":
return
elif change["type"] == "log":
identification_string = "{logtype}/{logaction}".format(logtype=change["logtype"],
logaction=change["logaction"])
else:
identification_string = change.get("type", "unknown") # If event doesn't have a type
if identification_string in settings["ignored"]:
return
context.event = identification_string
try:
discord_message: Optional[DiscordMessage] = await asyncio.get_event_loop().run_in_executor(None,
2022-10-29 15:04:25 +00:00
functools.partial(default_message(identification_string, context.message_type, formatter_hooks), context,
change))
2021-07-08 11:33:10 +00:00
except:
if settings.get("error_tolerance", 1) > 0:
discord_message: Optional[DiscordMessage] = None # It's handled by send_to_discord, we still want other code to run
2021-07-08 11:33:10 +00:00
else:
raise
2021-09-07 15:55:39 +00:00
if identification_string in ("delete/delete", "delete/delete_redir"): # TODO Move it into a hook?
2022-11-04 14:59:26 +00:00
wiki.delete_messages(dict(page_id=change.get("pageid")))
2021-07-09 12:55:23 +00:00
elif identification_string == "delete/event":
2021-07-08 11:33:10 +00:00
logparams = change.get('logparams', {"ids": []})
if settings["appearance"]["mode"] == "embed":
2022-09-18 14:38:19 +00:00
wiki.redact_messages(logparams.get("ids", []), "rev_id", logparams.get("new", {}))
2021-07-08 11:33:10 +00:00
else:
for logid in logparams.get("ids", []):
2022-09-18 14:38:19 +00:00
wiki.delete_messages(dict(logid=logid))
2021-07-09 12:55:23 +00:00
elif identification_string == "delete/revision":
2021-07-08 11:33:10 +00:00
logparams = change.get('logparams', {"ids": []})
if settings["appearance"]["mode"] == "embed":
2022-09-18 14:38:19 +00:00
wiki.redact_messages(logparams.get("ids", []), "log_id", logparams.get("new", {}))
2021-07-08 11:33:10 +00:00
else:
for revid in logparams.get("ids", []):
2022-09-18 14:38:19 +00:00
wiki.delete_messages(dict(revid=revid))
2022-10-29 15:04:25 +00:00
if discord_message: # TODO How to react when none? (crash in formatter), probably bad handling atm
discord_message.finish_embed()
discord_message.metadata = metadata
return discord_message
2021-07-08 11:33:10 +00:00
2021-06-22 19:42:32 +00:00
2021-07-04 11:39:50 +00:00
async def process_cats(event: dict, local_wiki: Wiki, categorize_events: dict):
"""Process categories based on local MW messages. """
2020-07-11 15:54:08 +00:00
if event["type"] == "categorize":
if "commenthidden" not in event:
2020-07-28 14:18:06 +00:00
if local_wiki.mw_messages is not None:
2020-07-11 15:54:08 +00:00
cat_title = event["title"].split(':', 1)[1]
# I so much hate this, blame Markus for making me do this
if event["revid"] not in categorize_events:
categorize_events[event["revid"]] = {"new": set(), "removed": set()}
comment_to_match = re.sub(r'<.*?a>', '', event["parsedcomment"])
2021-07-04 11:39:50 +00:00
if local_wiki.mw_messages["recentchanges-page-added-to-category"] in comment_to_match or local_wiki.mw_messages["recentchanges-page-added-to-category-bundled"] in comment_to_match: # Added to category
2020-07-11 15:54:08 +00:00
categorize_events[event["revid"]]["new"].add(cat_title)
2021-03-17 13:15:29 +00:00
#logger.debug("Matched {} to added category for {}".format(cat_title, event["revid"]))
2021-07-04 11:39:50 +00:00
elif local_wiki.mw_messages["recentchanges-page-removed-from-category"] in comment_to_match or local_wiki.mw_messages["recentchanges-page-removed-from-category-bundled"] in comment_to_match: # Removed from category
2020-07-11 15:54:08 +00:00
categorize_events[event["revid"]]["removed"].add(cat_title)
2021-03-17 13:15:29 +00:00
#logger.debug("Matched {} to removed category for {}".format(cat_title, event["revid"]))
2020-07-11 15:54:08 +00:00
else:
logger.debug(
2021-07-04 11:39:50 +00:00
"Unknown match for category change with messages {} and comment_to_match {}".format(local_wiki.mw_messages,comment_to_match))
2020-07-11 15:54:08 +00:00
else:
logger.warning(
"Init information not available, could not read category information. Please restart the bot.")
else:
logger.debug("Log entry got suppressed, ignoring entry.")
2022-08-16 10:50:49 +00:00
# This function has been removed. While its implementation seems sound, it should be considered only if we find performance
# concerns with RcGcDb
# async def process_mwmsgs(wiki_response: dict, local_wiki: Wiki, mw_msgs: dict):
# """
# This function is made to parse the initial wiki extended information to update local_wiki.mw_messages that stores the key
# to mw_msgs that is a dict storing id: tuple where tuple is a set of MW messages for categories.
# The reason it's constructed this way is to prevent duplication of data in memory so Markus doesn't complain about
# high RAM usage. It does however affect CPU performance as every wiki requires to check the list for the matching
# tuples of MW messages.
#
# :param wiki_response:
# :param local_wiki:
# :param mw_msgs:
# :return:
# """
# msgs = []
# for message in wiki_response["query"]["allmessages"]:
# if not "missing" in message: # ignore missing strings
# msgs.append((message["name"], re.sub(r'\[\[.*?\]\]', '', message["*"])))
# else:
# logger.warning("Could not fetch the MW message translation for: {}".format(message["name"]))
# msgs = tuple(msgs)
# for key, set in mw_msgs.items():
# if msgs == set:
# local_wiki.mw_messages = key
# return
# # if same entry is not in mw_msgs
# key = len(mw_msgs)
# mw_msgs[key] = msgs # it may be a little bit messy for sure, however I don't expect any reason to remove mw_msgs entries by one
# local_wiki.mw_messages = key