RcGcDb/src/wiki.py

534 lines
26 KiB
Python
Raw Normal View History

from __future__ import annotations
2021-07-04 11:39:50 +00:00
import time
2020-07-09 23:58:25 +00:00
from dataclasses import dataclass
2020-07-11 15:54:08 +00:00
import re
import logging, aiohttp
2021-06-22 19:42:32 +00:00
2021-07-08 11:33:10 +00:00
from api.util import default_message
2021-06-22 19:42:32 +00:00
from mw_messages import MWMessages
from src.exceptions import *
from src.database import db
2021-06-30 11:41:58 +00:00
from src.queue_handler import DBHandler
2020-07-18 12:12:00 +00:00
from src.formatters.rc import embed_formatter, compact_formatter
2020-08-02 17:27:42 +00:00
from src.formatters.discussions import feeds_embed_formatter, feeds_compact_formatter
2021-07-09 12:55:23 +00:00
from src.api.hooks import formatter_hooks
from src.api.client import Client
from src.api.context import Context
from src.misc import parse_link
2020-07-21 12:15:40 +00:00
from src.i18n import langs
2020-08-06 00:46:43 +00:00
from src.wiki_ratelimiter import RateLimiter
2022-06-22 17:17:20 +00:00
from statistics import Statistics, Log, LogType
2020-07-10 20:07:33 +00:00
import src.discord
2020-07-26 21:52:24 +00:00
import asyncio
2020-07-21 12:15:40 +00:00
from src.config import settings
2020-07-28 12:39:32 +00:00
# noinspection PyPackageRequirements
2020-07-21 12:15:40 +00:00
from bs4 import BeautifulSoup
from collections import OrderedDict, defaultdict, namedtuple
from typing import Union, Optional, TYPE_CHECKING
2020-07-10 13:38:36 +00:00
logger = logging.getLogger("rcgcdb.wiki")
2020-07-09 23:58:25 +00:00
wiki_reamoval_reasons = {410: _("wiki deleted"), 404: _("wiki deleted"), 401: _("wiki inaccessible"),
402: _("wiki inaccessible"), 403: _("wiki inaccessible"), 1000: _("discussions disabled")}
if TYPE_CHECKING:
from src.domain import Domain
2021-06-22 19:42:32 +00:00
2021-05-30 11:23:48 +00:00
class Wiki:
2022-06-22 17:17:20 +00:00
def __init__(self, script_url: str, rc_id: Optional[int], discussion_id: Optional[int]):
self.script_url: str = script_url
2021-05-30 11:23:48 +00:00
self.session = aiohttp.ClientSession(headers=settings["header"], timeout=aiohttp.ClientTimeout(6.0))
self.statistics: Statistics = Statistics(rc_id, discussion_id)
self.mw_messages: Optional[MWMessages] = None
2021-06-22 19:42:32 +00:00
self.first_fetch_done: bool = False
self.domain: Optional[Domain] = None
2021-07-09 12:55:23 +00:00
self.client: Client = Client(self)
2021-05-30 11:23:48 +00:00
@property
def rc_id(self):
return self.statistics.last_action
2022-06-22 17:17:20 +00:00
# async def remove(self, reason):
# logger.info("Removing a wiki {}".format(self.script_url))
# await src.discord.wiki_removal(self.script_url, reason)
# await src.discord.wiki_removal_monitor(self.script_url, reason)
# async with db.pool().acquire() as connection:
# result = await connection.execute('DELETE FROM rcgcdw WHERE wiki = $1', self.script_url)
# logger.warning('{} rows affected by DELETE FROM rcgcdw WHERE wiki = "{}"'.format(result, self.script_url))
def set_domain(self, domain: Domain):
self.domain = domain
2021-06-22 19:42:32 +00:00
2022-06-22 17:17:20 +00:00
# async def downtime_controller(self, down, reason=None):
# if down:
# self.fail_times += 1
# if self.fail_times > 20:
# await self.remove(reason)
# else:
# self.fail_times -= 1
async def generate_targets(self) -> defaultdict[namedtuple, list[str]]:
"""This function generates all possible varations of outputs that we need to generate messages for.
:returns defaultdict[namedtuple, list[str]] - where namedtuple is a named tuple with settings for given webhooks in list"""
Settings = namedtuple("Settings", ["lang", "display"])
target_settings: defaultdict[Settings, list[str]] = defaultdict(list)
async for webhook in DBHandler.fetch_rows("SELECT webhook, lang, display FROM rcgcdw WHERE wiki = $1 AND (rcid != -1 OR rcid IS NULL)", self.script_url):
target_settings[Settings(webhook["lang"], webhook["display"])].append(webhook["webhook"])
return target_settings
def parse_mw_request_info(self, request_data: dict, url: str):
"""A function parsing request JSON message from MediaWiki logging all warnings and raising on MediaWiki errors"""
# any([True for k in request_data.keys() if k in ("error", "errors")])
errors: list = request_data.get("errors", {}) # Is it ugly? I don't know tbh
if errors:
raise MediaWikiError(str(errors))
warnings: list = request_data.get("warnings", {})
if warnings:
for warning in warnings:
logger.warning("MediaWiki returned the following warning: {code} - {text} on {url}.".format(
code=warning["code"], text=warning.get("text", warning.get("*", "")), url=url
))
return request_data
async def api_request(self, params: Union[str, OrderedDict], *json_path: str, timeout: int = 10,
allow_redirects: bool = False) -> dict:
"""Method to GET request data from the wiki's API with error handling including recognition of MediaWiki errors.
Parameters:
params (str, OrderedDict): a string or collections.OrderedDict object containing query parameters
json_path (str): *args taking strings as values. After request is parsed as json it will extract data from given json path
timeout (int, float) (default=10): int or float limiting time required for receiving a full response from a server before returning TimeoutError
allow_redirects (bool) (default=False): switches whether the request should follow redirects or not
Returns:
request_content (dict): a dict resulting from json extraction of HTTP GET request with given json_path
OR
One of the following exceptions:
ServerError: When connection with the wiki failed due to server error
ClientError: When connection with the wiki failed due to client error
KeyError: When json_path contained keys that weren't found in response JSON response
BadRequest: When params argument is of wrong type
MediaWikiError: When MediaWiki returns an error
"""
# Making request
try:
if isinstance(params,
str): # Todo Make it so there are some default arguments like warning/error format appended
request = await self.session.get(self.script_url + "api.php?" + params + "&errorformat=raw", timeout=timeout,
allow_redirects=allow_redirects)
elif isinstance(params, OrderedDict):
params["errorformat"] = "raw"
request = await self.session.get(self.script_url + "api.php", params=params, timeout=timeout,
allow_redirects=allow_redirects)
else:
raise BadRequest(params)
except (aiohttp.ServerConnectionError, aiohttp.ServerTimeoutError) as exc:
logger.warning("Reached {error} error for request on link {url}".format(error=repr(exc),
url=self.script_url + str(params)))
raise ServerError
# Catching HTTP errors
if 499 < request.status < 600:
raise ServerError
elif request.status == 302:
logger.critical(
"Redirect detected! Either the wiki given in the script settings (wiki field) is incorrect/the wiki got removed or is giving us the false value. Please provide the real URL to the wiki, current URL redirects to {}".format(
request.url))
elif 399 < request.status < 500:
logger.error("Request returned ClientError status code on {url}".format(url=request.url))
if request.status in wiki_reamoval_reasons:
2022-06-22 17:17:20 +00:00
self.statistics.update(Log(type=LogType.HTTP_ERROR, title="{} error".format(request.status), details=str(request.headers) + "\n" + str(request.url)))
raise ClientError(request)
else:
# JSON Extraction
try:
request_json = self.parse_mw_request_info(await request.json(encoding="UTF-8"), str(request.url))
for item in json_path:
request_json = request_json[item]
except ValueError:
logger.warning("ValueError when extracting JSON data on {url}".format(url=request.url))
raise ServerError
except MediaWikiError:
logger.exception("MediaWiki error on request: {}".format(request.url))
raise
except KeyError:
logger.exception("KeyError while iterating over json_path, full response: {}".format(request.json()))
raise
return request_json
2021-07-04 11:39:50 +00:00
async def fetch_wiki(self, amount=10) -> dict:
2022-06-22 17:17:20 +00:00
if self.mw_messages is None:
params = OrderedDict({"action": "query", "format": "json", "uselang": "content", "list": "tags|recentchanges",
"meta": "allmessages|siteinfo",
"utf8": 1, "tglimit": "max", "tgprop": "displayname",
"rcprop": "title|redirect|timestamp|ids|loginfo|parsedcomment|sizes|flags|tags|user",
"rclimit": amount, "rcshow": "!bot", "rctype": "edit|new|log|categorize",
"ammessages": "recentchanges-page-added-to-category|recentchanges-page-removed-from-category|recentchanges-page-added-to-category-bundled|recentchanges-page-removed-from-category-bundled",
"amenableparser": 1, "amincludelocal": 1, "siprop": "namespaces|general"})
else:
params = OrderedDict({"action": "query", "format": "json", "uselang": "content", "list": "tags|recentchanges",
"meta": "siteinfo", "utf8": 1,
"tglimit": "max", "rcshow": "!bot", "tgprop": "displayname",
"rcprop": "title|redirect|timestamp|ids|loginfo|parsedcomment|sizes|flags|tags|user",
"rclimit": amount, "rctype": "edit|new|log|categorize", "siprop": "namespaces|general"})
try:
response = await self.api_request(params=params)
except (aiohttp.ClientConnectionError, aiohttp.ServerTimeoutError, asyncio.TimeoutError):
logger.error("A connection error occurred while requesting {}".format(params))
raise WikiServerError
return response
2021-07-04 11:39:50 +00:00
async def scan(self, amount=10):
while True: # Trap event in case there are more changes needed to be fetched
try:
request = await self.fetch_wiki(amount=amount)
self.client.last_request = request
2022-06-22 17:17:20 +00:00
except WikiServerError as e:
self.statistics.update(Log(type=LogType.CONNECTION_ERROR, title=e.)) # We need more details in WIkiServerError exception
2021-07-04 11:39:50 +00:00
if not self.mw_messages:
mw_messages = request.get("query", {}).get("allmessages", [])
final_mw_messages = dict()
for msg in mw_messages:
if "missing" not in msg: # ignore missing strings
final_mw_messages[msg["name"]] = re.sub(r'\[\[.*?]]', '', msg["*"])
else:
logger.warning("Could not fetch the MW message translation for: {}".format(msg["name"]))
self.mw_messages = MWMessages(final_mw_messages)
try:
recent_changes = request["query"]["recentchanges"]
recent_changes.reverse()
except KeyError:
raise WikiError
if self.rc_id in (0, None, -1):
if len(recent_changes) > 0:
self.statistics.last_action = recent_changes[-1]["rcid"]
DBHandler.add(("UPDATE rcgcdw SET rcid = $1 WHERE wiki = $2 AND ( rcid != -1 OR rcid IS NULL )",
(recent_changes[-1]["rcid"], self.script_url)))
else:
self.statistics.last_action = 0
DBHandler.add(("UPDATE rcgcdw SET rcid = 0 WHERE wiki = $1 AND ( rcid != -1 OR rcid IS NULL )", (self.script_url)))
return # TODO Add a log entry?
categorize_events = {}
new_events = 0
self.statistics.last_checked_rc = int(time.time())
highest_id = self.rc_id # Pretty sure that will be faster
for change in recent_changes:
if change["rcid"] > highest_id and amount != 450:
new_events += 1
if new_events == 10:
# call the function again with max limit for more results, ignore the ones in this request
logger.debug("There were too many new events, queuing wiki with 450 limit.")
amount = 450
break
await process_cats(change, self, categorize_events)
else: # adequate amount of changes
2021-07-09 12:55:23 +00:00
for tag in request["query"]["tags"]:
try:
self.tags[tag["name"]] = (BeautifulSoup(tag["displayname"], "lxml")).get_text()
except KeyError:
self.tags[tag["name"]] = None
2022-06-22 17:17:20 +00:00
targets = await self.generate_targets() # TODO Cache this in Wiki and update based on Redis updates
2021-07-04 11:39:50 +00:00
message_list = defaultdict(list)
for change in recent_changes: # Yeah, second loop since the categories require to be all loaded up
if change["rcid"] > self.rc_id:
if highest_id is None or change["rcid"] > highest_id: # make sure that the highest_rc is really highest rcid but do allow other entries with potentially lesser rcids come after without breaking the cycle
highest_id = change["rcid"]
for combination, webhooks in targets.items():
2021-09-07 15:55:39 +00:00
message, metadata = await rc_processor(self, change, categorize_events, combination, webhooks)
2022-06-22 17:17:20 +00:00
break
2021-07-04 11:39:50 +00:00
2021-09-07 15:55:39 +00:00
async def rc_processor(wiki: Wiki, change: dict, changed_categories: dict, display_options: namedtuple("Settings", ["lang", "display"]), webhooks: list) -> tuple[src.discord.DiscordMessage, src.discord.DiscordMessageMetadata]:
2021-07-08 11:33:10 +00:00
from src.misc import LinkParser
LinkParser = LinkParser()
metadata = src.discord.DiscordMessageMetadata("POST", rev_id=change.get("revid", None), log_id=change.get("logid", None),
page_id=change.get("pageid", None))
2021-07-09 12:55:23 +00:00
context = Context(display_options, webhooks, wiki.client)
if ("actionhidden" in change or "suppressed" in change) and "suppressed" not in settings["ignored"]: # if event is hidden using suppression
2021-07-08 11:33:10 +00:00
context.event = "suppressed"
try:
2021-07-09 12:55:23 +00:00
discord_message: Optional[src.discord.DiscordMessage] = default_message("suppressed", display_options.display, formatter_hooks)(context, change)
2021-07-08 11:33:10 +00:00
except NoFormatter:
return
except:
if settings.get("error_tolerance", 1) > 0:
discord_message: Optional[src.discord.DiscordMessage] = None # It's handled by send_to_discord, we still want other code to run
else:
raise
else:
if "commenthidden" not in change:
LinkParser.feed(change.get("parsedcomment", ""))
parsed_comment = LinkParser.new_string
else:
parsed_comment = _("~~hidden~~")
if not parsed_comment and context.message_type == "embed" and settings["appearance"].get("embed", {}).get(
"show_no_description_provided", True):
parsed_comment = _("No description provided")
context.set_parsedcomment(parsed_comment)
if "userhidden" in change:
change["user"] = _("hidden")
if change.get("ns", -1) in settings.get("ignored_namespaces", ()):
return
if change["type"] in ["edit", "new"]:
logger.debug("List of categories in essential_info: {}".format(changed_categories))
identification_string = change["type"]
context.set_categories(changed_categories)
elif change["type"] == "categorize":
return
elif change["type"] == "log":
identification_string = "{logtype}/{logaction}".format(logtype=change["logtype"],
logaction=change["logaction"])
else:
identification_string = change.get("type", "unknown") # If event doesn't have a type
if identification_string in settings["ignored"]:
return
context.event = identification_string
try:
discord_message: Optional[src.discord.DiscordMessage] = default_message(identification_string, formatter_hooks)(context,
change)
except:
if settings.get("error_tolerance", 1) > 0:
discord_message: Optional[
src.discord.DiscordMessage] = None # It's handled by send_to_discord, we still want other code to run
else:
raise
2021-09-07 15:55:39 +00:00
if identification_string in ("delete/delete", "delete/delete_redir"): # TODO Move it into a hook?
2021-07-08 11:33:10 +00:00
delete_messages(dict(pageid=change.get("pageid")))
2021-07-09 12:55:23 +00:00
elif identification_string == "delete/event":
2021-07-08 11:33:10 +00:00
logparams = change.get('logparams', {"ids": []})
if settings["appearance"]["mode"] == "embed":
redact_messages(logparams.get("ids", []), 1, logparams.get("new", {}))
else:
for logid in logparams.get("ids", []):
delete_messages(dict(logid=logid))
2021-07-09 12:55:23 +00:00
elif identification_string == "delete/revision":
2021-07-08 11:33:10 +00:00
logparams = change.get('logparams', {"ids": []})
if settings["appearance"]["mode"] == "embed":
redact_messages(logparams.get("ids", []), 0, logparams.get("new", {}))
else:
for revid in logparams.get("ids", []):
delete_messages(dict(revid=revid))
discord_message.finish_embed()
return discord_message, metadata
2021-06-22 19:42:32 +00:00
2020-07-09 23:58:25 +00:00
@dataclass
2021-05-30 11:23:48 +00:00
class Wiki_old:
2020-07-10 13:38:36 +00:00
mw_messages: int = None
2020-07-09 23:58:25 +00:00
fail_times: int = 0 # corresponding to amount of times connection with wiki failed for client reasons (400-499)
2020-07-21 12:15:40 +00:00
session: aiohttp.ClientSession = None
2020-08-07 16:56:29 +00:00
rc_active: int = 0
2021-01-21 13:40:55 +00:00
last_check: float = 0.0
last_discussion_check: float = 0.0
2020-07-10 13:38:36 +00:00
2020-07-28 12:39:32 +00:00
@staticmethod
async def fetch_wiki(extended, script_path, session: aiohttp.ClientSession, ratelimiter: RateLimiter, amount=20) -> aiohttp.ClientResponse:
2020-08-06 00:46:43 +00:00
await ratelimiter.timeout_wait()
2020-07-19 13:32:54 +00:00
url_path = script_path + "api.php"
2020-07-10 13:38:36 +00:00
if extended:
params = {"action": "query", "format": "json", "uselang": "content", "list": "tags|recentchanges",
"meta": "allmessages|siteinfo",
"utf8": 1, "tglimit": "max", "tgprop": "displayname",
"rcprop": "title|redirect|timestamp|ids|loginfo|parsedcomment|sizes|flags|tags|user",
2020-07-28 13:58:25 +00:00
"rclimit": amount, "rcshow": "!bot", "rctype": "edit|new|log|categorize",
2020-07-10 13:38:36 +00:00
"ammessages": "recentchanges-page-added-to-category|recentchanges-page-removed-from-category|recentchanges-page-added-to-category-bundled|recentchanges-page-removed-from-category-bundled",
"amenableparser": 1, "amincludelocal": 1, "siprop": "namespaces|general"}
2020-07-10 13:38:36 +00:00
else:
params = {"action": "query", "format": "json", "uselang": "content", "list": "tags|recentchanges",
"meta": "siteinfo", "utf8": 1,
2020-07-26 21:52:24 +00:00
"tglimit": "max", "rcshow": "!bot", "tgprop": "displayname",
2020-07-10 13:38:36 +00:00
"rcprop": "title|redirect|timestamp|ids|loginfo|parsedcomment|sizes|flags|tags|user",
2020-07-28 13:58:25 +00:00
"rclimit": amount, "rctype": "edit|new|log|categorize", "siprop": "namespaces|general"}
2020-07-10 13:38:36 +00:00
try:
2020-07-26 21:52:24 +00:00
response = await session.get(url_path, params=params)
2020-08-06 00:46:43 +00:00
ratelimiter.timeout_add(1.0)
except (aiohttp.ClientConnectionError, aiohttp.ServerTimeoutError, asyncio.TimeoutError):
2020-08-14 06:29:10 +00:00
logger.error("A connection error occurred while requesting {}".format(url_path))
2020-07-10 20:07:33 +00:00
raise WikiServerError
return response
2020-08-02 17:27:42 +00:00
@staticmethod
2020-11-22 12:44:15 +00:00
async def fetch_feeds(wiki, session: aiohttp.ClientSession) -> aiohttp.ClientResponse:
url_path = "{wiki}wikia.php".format(wiki=wiki)
params = {"controller": "DiscussionPost", "method": "getPosts", "includeCounters": "false", "sortDirection": "descending", "sortKey": "creation_date", "limit": 20}
2020-08-02 17:27:42 +00:00
try:
response = await session.get(url_path, params=params)
2020-08-03 11:03:36 +00:00
response.raise_for_status()
except (aiohttp.ClientConnectionError, aiohttp.ServerTimeoutError, asyncio.TimeoutError, aiohttp.ClientResponseError):
2020-08-12 11:40:48 +00:00
logger.error("A connection error occurred while requesting {}".format(url_path))
2020-08-02 17:27:42 +00:00
raise WikiServerError
return response
2020-07-28 01:11:27 +00:00
@staticmethod
2020-08-06 00:46:43 +00:00
async def safe_request(url, ratelimiter, *keys):
await ratelimiter.timeout_wait()
try:
async with aiohttp.ClientSession(headers=settings["header"], timeout=aiohttp.ClientTimeout(6.0)) as session:
2021-03-16 20:36:11 +00:00
request = await session.get(url)
2020-08-06 00:46:43 +00:00
ratelimiter.timeout_add(1.0)
2020-07-28 01:11:27 +00:00
request.raise_for_status()
json_request = await request.json(encoding="UTF-8")
except (aiohttp.ClientConnectionError, aiohttp.ServerTimeoutError, asyncio.TimeoutError):
2020-08-14 06:29:10 +00:00
logger.error("Reached connection error for request on link {url}".format(url=url))
else:
2020-07-28 01:11:27 +00:00
try:
for item in keys:
json_request = json_request[item]
except KeyError:
logger.warning(
2020-08-10 17:58:51 +00:00
"Failure while extracting data from request on key {key} in {change}".format(key=item, change=json_request))
2020-07-28 01:11:27 +00:00
return None
return json_request
2020-07-26 21:52:24 +00:00
async def fail_add(self, wiki_url, status):
logger.debug("Increasing fail_times to {}".format(self.fail_times+3))
self.fail_times += 3
if self.fail_times > 9:
await self.remove(wiki_url, status)
2020-07-19 23:40:20 +00:00
async def check_status(self, wiki_url, status):
if 199 < status < 300:
2020-07-26 21:52:24 +00:00
self.fail_times -= 1
pass
elif 400 < status < 500: # ignore 400 error since this might be our fault
2020-07-26 21:52:24 +00:00
await self.fail_add(wiki_url, status)
2020-07-19 23:40:20 +00:00
logger.warning("Wiki {} responded with HTTP code {}, increased fail_times to {}, skipping...".format(wiki_url, status, self.fail_times))
raise WikiError
elif 499 < status < 600:
2020-07-19 23:40:20 +00:00
logger.warning("Wiki {} responded with HTTP code {}, skipping...".format(wiki_url, status, self.fail_times))
raise WikiServerError
2020-07-28 12:39:32 +00:00
@staticmethod
async def remove(wiki_url, reason):
logger.info("Removing a wiki {}".format(wiki_url))
2020-07-26 22:50:27 +00:00
await src.discord.wiki_removal(wiki_url, reason)
await src.discord.wiki_removal_monitor(wiki_url, reason)
async with db.pool().acquire() as connection:
result = await connection.execute('DELETE FROM rcgcdw WHERE wiki = $1', wiki_url)
logger.warning('{} rows affected by DELETE FROM rcgcdw WHERE wiki = "{}"'.format(result, wiki_url))
2020-07-11 15:54:08 +00:00
2020-08-06 00:46:43 +00:00
async def pull_comment(self, comment_id, WIKI_API_PATH, rate_limiter):
2020-07-21 12:15:40 +00:00
try:
comment = await self.safe_request(
"{wiki}?action=comment&do=getRaw&comment_id={comment}&format=json".format(wiki=WIKI_API_PATH,
2020-08-06 00:46:43 +00:00
comment=comment_id), rate_limiter, "text")
2020-07-21 12:15:40 +00:00
logger.debug("Got the following comment from the API: {}".format(comment))
if comment is None:
raise TypeError
2020-07-21 12:15:40 +00:00
except (TypeError, AttributeError):
logger.exception("Could not resolve the comment text.")
except KeyError:
logger.exception("CurseProfile extension API did not respond with a valid comment content.")
else:
if len(comment) > 1000:
comment = comment[0:1000] + ""
return comment
return ""
2020-07-11 15:54:08 +00:00
2021-07-04 11:39:50 +00:00
async def process_cats(event: dict, local_wiki: Wiki, categorize_events: dict):
"""Process categories based on local MW messages. """
2020-07-11 15:54:08 +00:00
if event["type"] == "categorize":
if "commenthidden" not in event:
2020-07-28 14:18:06 +00:00
if local_wiki.mw_messages is not None:
2020-07-11 15:54:08 +00:00
cat_title = event["title"].split(':', 1)[1]
# I so much hate this, blame Markus for making me do this
if event["revid"] not in categorize_events:
categorize_events[event["revid"]] = {"new": set(), "removed": set()}
comment_to_match = re.sub(r'<.*?a>', '', event["parsedcomment"])
2021-07-04 11:39:50 +00:00
if local_wiki.mw_messages["recentchanges-page-added-to-category"] in comment_to_match or local_wiki.mw_messages["recentchanges-page-added-to-category-bundled"] in comment_to_match: # Added to category
2020-07-11 15:54:08 +00:00
categorize_events[event["revid"]]["new"].add(cat_title)
2021-03-17 13:15:29 +00:00
#logger.debug("Matched {} to added category for {}".format(cat_title, event["revid"]))
2021-07-04 11:39:50 +00:00
elif local_wiki.mw_messages["recentchanges-page-removed-from-category"] in comment_to_match or local_wiki.mw_messages["recentchanges-page-removed-from-category-bundled"] in comment_to_match: # Removed from category
2020-07-11 15:54:08 +00:00
categorize_events[event["revid"]]["removed"].add(cat_title)
2021-03-17 13:15:29 +00:00
#logger.debug("Matched {} to removed category for {}".format(cat_title, event["revid"]))
2020-07-11 15:54:08 +00:00
else:
logger.debug(
2021-07-04 11:39:50 +00:00
"Unknown match for category change with messages {} and comment_to_match {}".format(local_wiki.mw_messages,comment_to_match))
2020-07-11 15:54:08 +00:00
else:
logger.warning(
"Init information not available, could not read category information. Please restart the bot.")
else:
logger.debug("Log entry got suppressed, ignoring entry.")
async def process_mwmsgs(wiki_response: dict, local_wiki: Wiki, mw_msgs: dict):
"""
This function is made to parse the initial wiki extended information to update local_wiki.mw_messages that stores the key
to mw_msgs that is a dict storing id: tuple where tuple is a set of MW messages for categories.
The reason it's constructed this way is to prevent duplication of data in memory so Markus doesn't complain about
high RAM usage. It does however affect CPU performance as every wiki requires to check the list for the matching
tuples of MW messages.
:param wiki_response:
:param local_wiki:
:param mw_msgs:
:return:
"""
msgs = []
2020-07-22 11:43:18 +00:00
for message in wiki_response["query"]["allmessages"]:
2020-07-11 15:54:08 +00:00
if not "missing" in message: # ignore missing strings
msgs.append((message["name"], re.sub(r'\[\[.*?\]\]', '', message["*"])))
else:
2020-07-26 21:52:24 +00:00
logger.warning("Could not fetch the MW message translation for: {}".format(message["name"]))
2020-07-11 15:54:08 +00:00
msgs = tuple(msgs)
for key, set in mw_msgs.items():
if msgs == set:
local_wiki.mw_messages = key
return
2020-07-19 23:40:20 +00:00
# if same entry is not in mw_msgs
2020-07-11 15:54:08 +00:00
key = len(mw_msgs)
mw_msgs[key] = msgs # it may be a little bit messy for sure, however I don't expect any reason to remove mw_msgs entries by one
local_wiki.mw_messages = key
2020-07-18 12:12:00 +00:00
2021-02-20 15:33:44 +00:00
2020-11-22 13:19:21 +00:00
# db_wiki: webhook, wiki, lang, display, rcid, postid
2020-08-07 16:56:29 +00:00
async def essential_info(change: dict, changed_categories, local_wiki: Wiki, target: tuple, paths: tuple, request: dict,
2020-11-28 13:08:37 +00:00
rate_limiter: RateLimiter) -> src.discord.DiscordMessage:
2020-07-18 12:12:00 +00:00
"""Prepares essential information for both embed and compact message format."""
2020-08-09 23:57:14 +00:00
_ = langs[target[0][0]]["wiki"].gettext
2020-07-28 17:52:48 +00:00
changed_categories = changed_categories.get(change["revid"], None)
2021-03-17 13:15:29 +00:00
#logger.debug("List of categories in essential_info: {}".format(changed_categories))
2020-07-19 23:40:20 +00:00
appearance_mode = embed_formatter if target[0][1] > 0 else compact_formatter
2020-07-28 12:39:32 +00:00
if "actionhidden" in change or "suppressed" in change: # if event is hidden using suppression
2020-08-09 23:57:14 +00:00
await appearance_mode("suppressed", change, "", changed_categories, local_wiki, target, paths, rate_limiter)
2020-07-18 12:12:00 +00:00
return
if "commenthidden" not in change:
parsed_comment = parse_link(paths[3], change["parsedcomment"])
2020-07-18 12:12:00 +00:00
else:
parsed_comment = _("~~hidden~~")
if not parsed_comment:
parsed_comment = None
if change["type"] in ["edit", "new"]:
if "userhidden" in change:
change["user"] = _("hidden")
identification_string = change["type"]
elif change["type"] == "log":
identification_string = "{logtype}/{logaction}".format(logtype=change["logtype"], logaction=change["logaction"])
elif change["type"] == "categorize":
return
else:
2020-07-28 13:41:07 +00:00
identification_string = change["type"]
2020-07-21 12:15:40 +00:00
additional_data = {"namespaces": request["query"]["namespaces"], "tags": {}}
for tag in request["query"]["tags"]:
try:
additional_data["tags"][tag["name"]] = (BeautifulSoup(tag["displayname"], "lxml")).get_text()
except KeyError:
additional_data["tags"][tag["name"]] = None # Tags with no displ
2020-11-28 13:08:37 +00:00
return await appearance_mode(identification_string, change, parsed_comment, changed_categories, local_wiki, target, paths, rate_limiter, additional_data=additional_data)
2020-08-02 17:27:42 +00:00
2021-09-07 15:55:39 +00:00
async def essential_feeds(change: dict, comment_pages: dict, db_wiki, target: tuple) -> src.discord.DiscordMessage:
2020-08-02 17:27:42 +00:00
"""Prepares essential information for both embed and compact message format."""
appearance_mode = feeds_embed_formatter if target[0][1] > 0 else feeds_compact_formatter
2020-08-02 21:40:30 +00:00
identification_string = change["_embedded"]["thread"][0]["containerType"]
2020-08-22 17:51:15 +00:00
comment_page = None
if identification_string == "ARTICLE_COMMENT" and comment_pages is not None:
comment_page = comment_pages.get(change["forumId"], None)
if comment_page is not None:
2020-12-05 22:12:09 +00:00
comment_page["fullUrl"] = "/".join(db_wiki["wiki"].split("/", 3)[:3]) + comment_page["relativeUrl"]
return await appearance_mode(identification_string, change, target, db_wiki["wiki"], article_page=comment_page)