2020-07-09 23:58:25 +00:00
|
|
|
from dataclasses import dataclass
|
2020-07-11 15:54:08 +00:00
|
|
|
import re
|
2020-07-10 14:11:45 +00:00
|
|
|
import logging, aiohttp
|
|
|
|
from src.exceptions import *
|
2020-07-11 15:54:08 +00:00
|
|
|
from src.database import db_cursor, db_connection
|
2020-07-18 12:12:00 +00:00
|
|
|
from src.formatters.rc import embed_formatter, compact_formatter
|
2020-07-23 09:46:32 +00:00
|
|
|
from src.misc import parse_link
|
2020-07-21 12:15:40 +00:00
|
|
|
from src.i18n import langs
|
2020-07-10 20:07:33 +00:00
|
|
|
import src.discord
|
2020-07-26 21:52:24 +00:00
|
|
|
import asyncio
|
2020-07-21 12:15:40 +00:00
|
|
|
from src.config import settings
|
2020-07-28 12:39:32 +00:00
|
|
|
# noinspection PyPackageRequirements
|
2020-07-21 12:15:40 +00:00
|
|
|
from bs4 import BeautifulSoup
|
2020-07-10 13:38:36 +00:00
|
|
|
|
2020-07-10 14:11:45 +00:00
|
|
|
logger = logging.getLogger("rcgcdb.wiki")
|
2020-07-09 23:58:25 +00:00
|
|
|
|
2020-07-19 13:32:54 +00:00
|
|
|
supported_logs = ["protect/protect", "protect/modify", "protect/unprotect", "upload/overwrite", "upload/upload", "delete/delete", "delete/delete_redir", "delete/restore", "delete/revision", "delete/event", "import/upload", "import/interwiki", "merge/merge", "move/move", "move/move_redir", "protect/move_prot", "block/block", "block/unblock", "block/reblock", "rights/rights", "rights/autopromote", "abusefilter/modify", "abusefilter/create", "interwiki/iw_add", "interwiki/iw_edit", "interwiki/iw_delete", "curseprofile/comment-created", "curseprofile/comment-edited", "curseprofile/comment-deleted", "curseprofile/comment-purged", "curseprofile/profile-edited", "curseprofile/comment-replied", "contentmodel/change", "sprite/sprite", "sprite/sheet", "sprite/slice", "managetags/create", "managetags/delete", "managetags/activate", "managetags/deactivate", "tag/update", "cargo/createtable", "cargo/deletetable", "cargo/recreatetable", "cargo/replacetable", "upload/revert"]
|
|
|
|
|
2020-07-20 12:03:55 +00:00
|
|
|
|
2020-07-09 23:58:25 +00:00
|
|
|
@dataclass
|
2020-07-09 22:24:23 +00:00
|
|
|
class Wiki:
|
2020-07-10 13:38:36 +00:00
|
|
|
mw_messages: int = None
|
2020-07-09 23:58:25 +00:00
|
|
|
fail_times: int = 0 # corresponding to amount of times connection with wiki failed for client reasons (400-499)
|
2020-07-21 12:15:40 +00:00
|
|
|
session: aiohttp.ClientSession = None
|
|
|
|
|
2020-07-10 13:38:36 +00:00
|
|
|
|
2020-07-28 12:39:32 +00:00
|
|
|
@staticmethod
|
|
|
|
async def fetch_wiki(extended, script_path, session: aiohttp.ClientSession) -> aiohttp.ClientResponse:
|
2020-07-19 13:32:54 +00:00
|
|
|
url_path = script_path + "api.php"
|
2020-07-10 13:38:36 +00:00
|
|
|
amount = 20
|
|
|
|
if extended:
|
|
|
|
params = {"action": "query", "format": "json", "uselang": "content", "list": "tags|recentchanges",
|
|
|
|
"meta": "allmessages|siteinfo",
|
|
|
|
"utf8": 1, "tglimit": "max", "tgprop": "displayname",
|
|
|
|
"rcprop": "title|redirect|timestamp|ids|loginfo|parsedcomment|sizes|flags|tags|user",
|
2020-07-28 13:58:25 +00:00
|
|
|
"rclimit": amount, "rcshow": "!bot", "rctype": "edit|new|log|categorize",
|
2020-07-10 13:38:36 +00:00
|
|
|
"ammessages": "recentchanges-page-added-to-category|recentchanges-page-removed-from-category|recentchanges-page-added-to-category-bundled|recentchanges-page-removed-from-category-bundled",
|
2020-07-20 00:52:02 +00:00
|
|
|
"amenableparser": 1, "amincludelocal": 1, "siprop": "namespaces|general"}
|
2020-07-10 13:38:36 +00:00
|
|
|
else:
|
|
|
|
params = {"action": "query", "format": "json", "uselang": "content", "list": "tags|recentchanges",
|
2020-07-20 00:52:02 +00:00
|
|
|
"meta": "siteinfo", "utf8": 1,
|
2020-07-26 21:52:24 +00:00
|
|
|
"tglimit": "max", "rcshow": "!bot", "tgprop": "displayname",
|
2020-07-10 13:38:36 +00:00
|
|
|
"rcprop": "title|redirect|timestamp|ids|loginfo|parsedcomment|sizes|flags|tags|user",
|
2020-07-28 13:58:25 +00:00
|
|
|
"rclimit": amount, "rctype": "edit|new|log|categorize", "siprop": "namespaces|general"}
|
2020-07-10 13:38:36 +00:00
|
|
|
try:
|
2020-07-26 21:52:24 +00:00
|
|
|
response = await session.get(url_path, params=params)
|
2020-07-28 01:11:27 +00:00
|
|
|
except (aiohttp.ClientConnectionError, aiohttp.ServerTimeoutError, asyncio.exceptions.TimeoutError):
|
2020-07-10 20:07:33 +00:00
|
|
|
logger.exception("A connection error occurred while requesting {}".format(url_path))
|
|
|
|
raise WikiServerError
|
2020-07-10 14:11:45 +00:00
|
|
|
return response
|
|
|
|
|
2020-07-28 01:11:27 +00:00
|
|
|
@staticmethod
|
|
|
|
async def safe_request(url, *keys):
|
2020-07-20 12:03:55 +00:00
|
|
|
try:
|
2020-07-26 21:52:24 +00:00
|
|
|
async with aiohttp.ClientSession(headers=settings["header"], timeout=aiohttp.ClientTimeout(2.0)) as session:
|
2020-07-25 13:27:15 +00:00
|
|
|
request = await session.get(url, timeout=5, allow_redirects=False)
|
2020-07-28 01:11:27 +00:00
|
|
|
request.raise_for_status()
|
|
|
|
json_request = await request.json(encoding="UTF-8")
|
|
|
|
except (aiohttp.ClientConnectionError, aiohttp.ServerTimeoutError, asyncio.exceptions.TimeoutError):
|
2020-07-20 12:03:55 +00:00
|
|
|
logger.exception("Reached connection error for request on link {url}".format(url=url))
|
|
|
|
else:
|
2020-07-28 01:11:27 +00:00
|
|
|
try:
|
|
|
|
for item in keys:
|
|
|
|
json_request = json_request[item]
|
|
|
|
except KeyError:
|
|
|
|
logger.warning(
|
|
|
|
"Failure while extracting data from request on key {key} in {change}".format(key=item, change=request))
|
|
|
|
return None
|
|
|
|
return json_request
|
2020-07-20 12:03:55 +00:00
|
|
|
|
2020-07-26 21:52:24 +00:00
|
|
|
async def fail_add(self, wiki_url, status):
|
|
|
|
logger.debug("Increasing fail_times to {}".format(self.fail_times+3))
|
|
|
|
self.fail_times += 3
|
|
|
|
if self.fail_times > 9:
|
|
|
|
await self.remove(wiki_url, status)
|
|
|
|
|
2020-07-19 23:40:20 +00:00
|
|
|
async def check_status(self, wiki_url, status):
|
2020-07-10 14:11:45 +00:00
|
|
|
if 199 < status < 300:
|
2020-07-26 21:52:24 +00:00
|
|
|
self.fail_times -= 1
|
2020-07-10 14:11:45 +00:00
|
|
|
pass
|
|
|
|
elif 400 < status < 500: # ignore 400 error since this might be our fault
|
2020-07-26 21:52:24 +00:00
|
|
|
await self.fail_add(wiki_url, status)
|
2020-07-19 23:40:20 +00:00
|
|
|
logger.warning("Wiki {} responded with HTTP code {}, increased fail_times to {}, skipping...".format(wiki_url, status, self.fail_times))
|
2020-07-10 14:11:45 +00:00
|
|
|
raise WikiError
|
|
|
|
elif 499 < status < 600:
|
2020-07-19 23:40:20 +00:00
|
|
|
logger.warning("Wiki {} responded with HTTP code {}, skipping...".format(wiki_url, status, self.fail_times))
|
2020-07-10 14:11:45 +00:00
|
|
|
raise WikiServerError
|
|
|
|
|
2020-07-28 12:39:32 +00:00
|
|
|
@staticmethod
|
|
|
|
async def remove(wiki_url, reason):
|
2020-07-26 22:50:27 +00:00
|
|
|
await src.discord.wiki_removal(wiki_url, reason)
|
|
|
|
await src.discord.wiki_removal_monitor(wiki_url, reason)
|
|
|
|
db_cursor.execute('DELETE FROM rcgcdw WHERE wiki = ?', (wiki_url,))
|
|
|
|
logger.warning('{} rows affected by DELETE FROM rcgcdw WHERE wiki = "{}"'.format(db_cursor.rowcount, wiki_url))
|
2020-07-11 15:54:08 +00:00
|
|
|
db_connection.commit()
|
|
|
|
|
2020-07-21 12:15:40 +00:00
|
|
|
async def pull_comment(self, comment_id, WIKI_API_PATH):
|
|
|
|
try:
|
|
|
|
comment = await self.safe_request(
|
|
|
|
"{wiki}?action=comment&do=getRaw&comment_id={comment}&format=json".format(wiki=WIKI_API_PATH,
|
2020-07-28 01:11:27 +00:00
|
|
|
comment=comment_id), "text")
|
2020-07-21 12:15:40 +00:00
|
|
|
logger.debug("Got the following comment from the API: {}".format(comment))
|
2020-07-28 11:41:33 +00:00
|
|
|
if comment is None:
|
|
|
|
raise TypeError
|
2020-07-21 12:15:40 +00:00
|
|
|
except (TypeError, AttributeError):
|
|
|
|
logger.exception("Could not resolve the comment text.")
|
|
|
|
except KeyError:
|
|
|
|
logger.exception("CurseProfile extension API did not respond with a valid comment content.")
|
|
|
|
else:
|
|
|
|
if len(comment) > 1000:
|
|
|
|
comment = comment[0:1000] + "…"
|
|
|
|
return comment
|
|
|
|
return ""
|
|
|
|
|
2020-07-11 15:54:08 +00:00
|
|
|
|
2020-07-18 12:12:00 +00:00
|
|
|
async def process_cats(event: dict, local_wiki: Wiki, category_msgs: dict, categorize_events: dict):
|
2020-07-27 16:32:30 +00:00
|
|
|
"""Process categories based on local MW messages. """
|
2020-07-11 15:54:08 +00:00
|
|
|
if event["type"] == "categorize":
|
|
|
|
if "commenthidden" not in event:
|
2020-07-28 14:18:06 +00:00
|
|
|
if local_wiki.mw_messages is not None:
|
2020-07-11 15:54:08 +00:00
|
|
|
cat_title = event["title"].split(':', 1)[1]
|
|
|
|
# I so much hate this, blame Markus for making me do this
|
|
|
|
if event["revid"] not in categorize_events:
|
|
|
|
categorize_events[event["revid"]] = {"new": set(), "removed": set()}
|
|
|
|
comment_to_match = re.sub(r'<.*?a>', '', event["parsedcomment"])
|
|
|
|
wiki_cat_mw_messages = category_msgs[local_wiki.mw_messages]
|
2020-07-28 14:18:06 +00:00
|
|
|
if wiki_cat_mw_messages[0][1] in comment_to_match or wiki_cat_mw_messages[2][1] in comment_to_match: # Added to category
|
2020-07-11 15:54:08 +00:00
|
|
|
categorize_events[event["revid"]]["new"].add(cat_title)
|
|
|
|
logger.debug("Matched {} to added category for {}".format(cat_title, event["revid"]))
|
2020-07-28 14:18:06 +00:00
|
|
|
elif wiki_cat_mw_messages[1][1] in comment_to_match or wiki_cat_mw_messages[3][1] in comment_to_match: # Removed from category
|
2020-07-11 15:54:08 +00:00
|
|
|
categorize_events[event["revid"]]["removed"].add(cat_title)
|
|
|
|
logger.debug("Matched {} to removed category for {}".format(cat_title, event["revid"]))
|
|
|
|
else:
|
|
|
|
logger.debug(
|
|
|
|
"Unknown match for category change with messages {}, {}, {}, {} and comment_to_match {}".format(
|
|
|
|
wiki_cat_mw_messages[0], wiki_cat_mw_messages[1], wiki_cat_mw_messages[2], wiki_cat_mw_messages[3],
|
|
|
|
comment_to_match))
|
|
|
|
else:
|
|
|
|
logger.warning(
|
|
|
|
"Init information not available, could not read category information. Please restart the bot.")
|
|
|
|
else:
|
|
|
|
logger.debug("Log entry got suppressed, ignoring entry.")
|
|
|
|
|
|
|
|
|
|
|
|
async def process_mwmsgs(wiki_response: dict, local_wiki: Wiki, mw_msgs: dict):
|
|
|
|
"""
|
|
|
|
This function is made to parse the initial wiki extended information to update local_wiki.mw_messages that stores the key
|
|
|
|
to mw_msgs that is a dict storing id: tuple where tuple is a set of MW messages for categories.
|
|
|
|
The reason it's constructed this way is to prevent duplication of data in memory so Markus doesn't complain about
|
|
|
|
high RAM usage. It does however affect CPU performance as every wiki requires to check the list for the matching
|
|
|
|
tuples of MW messages.
|
|
|
|
|
|
|
|
:param wiki_response:
|
|
|
|
:param local_wiki:
|
|
|
|
:param mw_msgs:
|
|
|
|
:return:
|
|
|
|
"""
|
|
|
|
msgs = []
|
2020-07-22 11:43:18 +00:00
|
|
|
for message in wiki_response["query"]["allmessages"]:
|
2020-07-11 15:54:08 +00:00
|
|
|
if not "missing" in message: # ignore missing strings
|
|
|
|
msgs.append((message["name"], re.sub(r'\[\[.*?\]\]', '', message["*"])))
|
|
|
|
else:
|
2020-07-26 21:52:24 +00:00
|
|
|
logger.warning("Could not fetch the MW message translation for: {}".format(message["name"]))
|
2020-07-11 15:54:08 +00:00
|
|
|
msgs = tuple(msgs)
|
|
|
|
for key, set in mw_msgs.items():
|
|
|
|
if msgs == set:
|
|
|
|
local_wiki.mw_messages = key
|
|
|
|
return
|
2020-07-19 23:40:20 +00:00
|
|
|
# if same entry is not in mw_msgs
|
2020-07-11 15:54:08 +00:00
|
|
|
key = len(mw_msgs)
|
|
|
|
mw_msgs[key] = msgs # it may be a little bit messy for sure, however I don't expect any reason to remove mw_msgs entries by one
|
|
|
|
local_wiki.mw_messages = key
|
2020-07-18 12:12:00 +00:00
|
|
|
|
2020-07-27 03:16:50 +00:00
|
|
|
# db_wiki: webhook, wiki, lang, display, wikiid, rcid, postid
|
2020-07-21 12:15:40 +00:00
|
|
|
async def essential_info(change: dict, changed_categories, local_wiki: Wiki, db_wiki: tuple, target: tuple, paths: tuple, request: dict):
|
2020-07-18 12:12:00 +00:00
|
|
|
"""Prepares essential information for both embed and compact message format."""
|
2020-07-19 23:40:20 +00:00
|
|
|
def _(string: str) -> str:
|
|
|
|
"""Our own translation string to make it compatible with async"""
|
|
|
|
return lang.gettext(string)
|
2020-07-20 00:52:02 +00:00
|
|
|
|
|
|
|
lang = langs[target[0][0]]
|
|
|
|
ngettext = lang.ngettext
|
2020-07-20 12:03:55 +00:00
|
|
|
# recent_changes = RecentChangesClass() # TODO Look into replacing RecentChangesClass with local_wiki
|
2020-07-19 23:40:20 +00:00
|
|
|
appearance_mode = embed_formatter if target[0][1] > 0 else compact_formatter
|
2020-07-28 12:39:32 +00:00
|
|
|
if "actionhidden" in change or "suppressed" in change: # if event is hidden using suppression
|
2020-07-20 12:03:55 +00:00
|
|
|
await appearance_mode("suppressed", change, "", changed_categories, local_wiki, target, _, ngettext, paths)
|
2020-07-18 12:12:00 +00:00
|
|
|
return
|
|
|
|
if "commenthidden" not in change:
|
2020-07-23 09:46:32 +00:00
|
|
|
parsed_comment = parse_link(paths[3], change["parsedcomment"])
|
2020-07-18 12:12:00 +00:00
|
|
|
parsed_comment = re.sub(r"(`|_|\*|~|{|}|\|\|)", "\\\\\\1", parsed_comment, 0)
|
|
|
|
else:
|
|
|
|
parsed_comment = _("~~hidden~~")
|
|
|
|
if not parsed_comment:
|
|
|
|
parsed_comment = None
|
|
|
|
if change["type"] in ["edit", "new"]:
|
2020-07-28 14:18:06 +00:00
|
|
|
changed_categories = changed_categories.get(change["revid"], None)
|
2020-07-18 12:12:00 +00:00
|
|
|
logger.debug("List of categories in essential_info: {}".format(changed_categories))
|
|
|
|
if "userhidden" in change:
|
|
|
|
change["user"] = _("hidden")
|
|
|
|
identification_string = change["type"]
|
|
|
|
elif change["type"] == "log":
|
|
|
|
identification_string = "{logtype}/{logaction}".format(logtype=change["logtype"], logaction=change["logaction"])
|
|
|
|
elif change["type"] == "categorize":
|
|
|
|
return
|
|
|
|
else:
|
2020-07-28 13:41:07 +00:00
|
|
|
identification_string = change["type"]
|
2020-07-21 12:15:40 +00:00
|
|
|
additional_data = {"namespaces": request["query"]["namespaces"], "tags": {}}
|
|
|
|
for tag in request["query"]["tags"]:
|
|
|
|
try:
|
|
|
|
additional_data["tags"][tag["name"]] = (BeautifulSoup(tag["displayname"], "lxml")).get_text()
|
|
|
|
except KeyError:
|
|
|
|
additional_data["tags"][tag["name"]] = None # Tags with no displ
|
|
|
|
await appearance_mode(identification_string, change, parsed_comment, changed_categories, local_wiki, target, _, ngettext, paths, additional_data=additional_data)
|