perparing for initial launch

This commit is contained in:
Frisk 2022-09-18 16:38:19 +02:00
parent 0b59ba8b5c
commit e327c90544
No known key found for this signature in database
GPG key ID: 213F7C15068AF8AC
10 changed files with 225 additions and 1702 deletions

View file

@ -23,4 +23,5 @@ import extensions.base.translate
import extensions.base.discussions
import extensions.base.curseprofile
import extensions.base.interwiki
import extensions.base.renameuser
import extensions.base.renameuser
import extensions.base.rcgcdb

View file

@ -1,4 +1,3 @@
import logging
import json
from src.discord.message import DiscordMessage
from src.api import formatter

View file

@ -9,18 +9,14 @@ from collections import defaultdict, namedtuple
from typing import Generator
from contextlib import asynccontextmanager
from src.discord.queue import messagequeue
from src.argparser import command_line_args
from src.config import settings
from src.database import db_connection
from src.exceptions import *
from src.misc import get_paths, get_domain
from src.msgqueue import messagequeue, send_to_discord
from src.queue_handler import DBHandler
from src.wiki import Wiki, process_cats, process_mwmsgs, essential_info, essential_feeds
from src.discord.discord import DiscordMessage, generic_msg_sender_exception_logger, stack_message_list
from src.queue_handler import UpdateDB
from src.wiki import Wiki, process_cats, essential_feeds
from src.wiki_ratelimiter import RateLimiter
from src.irc_feed import AioIRCCat
from src.domain_manager import domains
@ -74,122 +70,122 @@ async def message_sender():
shutdown(loop=asyncio.get_event_loop())
else:
logger.exception("Exception on DC message sender")
await generic_msg_sender_exception_logger(traceback.format_exc(), "Message sender exception")
# await generic_msg_sender_exception_logger(traceback.format_exc(), "Message sender exception") # TODO
async def discussion_handler():
await asyncio.sleep(3.0) # Make some time before IRC code is executed, happens only once and saves if inside
try:
while True:
async with db.pool().acquire() as connection:
async with connection.transaction():
async for db_wiki in connection.cursor("SELECT DISTINCT wiki, rcid, postid FROM rcgcdw WHERE postid != '-1' OR postid IS NULL"):
try:
local_wiki = all_wikis[db_wiki["wiki"]] # set a reference to a wiki object from memory
except KeyError:
local_wiki = all_wikis[db_wiki["wiki"]] = Wiki()
local_wiki.rc_active = db_wiki["rcid"]
if db_wiki["wiki"] not in rcqueue.irc_mapping["fandom.com"].updated_discussions and \
local_wiki.last_discussion_check+settings["irc_overtime"] > time.time(): # I swear if another wiki farm ever starts using Fandom discussions I'm gonna use explosion magic
continue
else:
try:
rcqueue.irc_mapping["fandom.com"].updated_discussions.remove(db_wiki["wiki"])
except KeyError:
pass # to be expected
header = settings["header"]
header["Accept"] = "application/hal+json"
async with aiohttp.ClientSession(headers=header,
timeout=aiohttp.ClientTimeout(6.0)) as session:
try:
feeds_response = await local_wiki.fetch_feeds(db_wiki["wiki"], session)
except (WikiServerError, WikiError):
continue # ignore this wiki if it throws errors
try:
discussion_feed_resp = await feeds_response.json(encoding="UTF-8")
if "error" in discussion_feed_resp:
error = discussion_feed_resp["error"]
if error == "NotFoundException": # Discussions disabled
if db_wiki["rcid"] != -1: # RC feed is disabled
await connection.execute("UPDATE rcgcdw SET postid = $1 WHERE wiki = $2", "-1", db_wiki["wiki"])
else:
await local_wiki.remove(db_wiki["wiki"], 1000)
continue
raise WikiError
discussion_feed = discussion_feed_resp["_embedded"]["doc:posts"]
discussion_feed.reverse()
except aiohttp.ContentTypeError:
logger.exception("Wiki seems to be resulting in non-json content.")
continue
except asyncio.TimeoutError:
logger.debug("Timeout on reading JSON of discussion post feeed.")
continue
except:
logger.exception("On loading json of response.")
continue
if db_wiki["postid"] is None: # new wiki, just get the last post to not spam the channel
if len(discussion_feed) > 0:
DBHandler.add(db_wiki["wiki"], discussion_feed[-1]["id"], True)
else:
DBHandler.add(db_wiki["wiki"], "0", True)
continue
comment_events = []
targets = await generate_targets(db_wiki["wiki"], "AND NOT postid = '-1'")
for post in discussion_feed:
if post["_embedded"]["thread"][0]["containerType"] == "ARTICLE_COMMENT" and post["id"] > db_wiki["postid"]:
comment_events.append(post["forumId"])
comment_pages: dict = {}
if comment_events:
try:
comment_pages = await local_wiki.safe_request(
"{wiki}wikia.php?controller=FeedsAndPosts&method=getArticleNamesAndUsernames&stablePageIds={pages}&format=json".format(
wiki=db_wiki["wiki"], pages=",".join(comment_events)
), RateLimiter(), "articleNames")
except aiohttp.ClientResponseError: # Fandom can be funny sometimes... See #30
comment_pages = None
except:
if command_line_args.debug:
logger.exception("Exception on Feeds article comment request")
shutdown(loop=asyncio.get_event_loop())
else:
logger.exception("Exception on Feeds article comment request")
await generic_msg_sender_exception_logger(traceback.format_exc(),
"Exception on Feeds article comment request",
Post=str(post)[0:1000], Wiki=db_wiki["wiki"])
message_list = defaultdict(list)
for post in discussion_feed: # Yeah, second loop since the comments require an extra request
if post["id"] > db_wiki["postid"]:
for target in targets.items():
try:
message = await essential_feeds(post, comment_pages, db_wiki, target)
if message is not None:
message_list[target[0]].append(message)
except asyncio.CancelledError:
raise
except:
if command_line_args.debug:
logger.exception("Exception on Feeds formatter")
shutdown(loop=asyncio.get_event_loop())
else:
logger.exception("Exception on Feeds formatter")
await generic_msg_sender_exception_logger(traceback.format_exc(), "Exception in feed formatter", Post=str(post)[0:1000], Wiki=db_wiki["wiki"])
# Lets stack the messages
for messages in message_list.values():
messages = stack_message_list(messages)
for message in messages:
await send_to_discord(message)
if discussion_feed:
DBHandler.add(db_wiki["wiki"], post["id"], True)
await asyncio.sleep(delay=2.0) # hardcoded really doesn't need much more
await asyncio.sleep(delay=1.0) # Avoid lock on no wikis
except asyncio.CancelledError:
pass
except:
if command_line_args.debug:
raise # reraise the issue
else:
logger.exception("Exception on Feeds formatter")
await generic_msg_sender_exception_logger(traceback.format_exc(), "Discussion handler task exception", Wiki=db_wiki["wiki"])
# async def discussion_handler():
# await asyncio.sleep(3.0) # Make some time before IRC code is executed, happens only once and saves if inside
# try:
# while True:
# async with db.pool().acquire() as connection:
# async with connection.transaction():
# async for db_wiki in connection.cursor("SELECT DISTINCT wiki, rcid, postid FROM rcgcdw WHERE postid != '-1' OR postid IS NULL"):
# try:
# local_wiki = all_wikis[db_wiki["wiki"]] # set a reference to a wiki object from memory
# except KeyError:
# local_wiki = all_wikis[db_wiki["wiki"]] = Wiki()
# local_wiki.rc_active = db_wiki["rcid"]
# if db_wiki["wiki"] not in rcqueue.irc_mapping["fandom.com"].updated_discussions and \
# local_wiki.last_discussion_check+settings["irc_overtime"] > time.time(): # I swear if another wiki farm ever starts using Fandom discussions I'm gonna use explosion magic
# continue
# else:
# try:
# rcqueue.irc_mapping["fandom.com"].updated_discussions.remove(db_wiki["wiki"])
# except KeyError:
# pass # to be expected
# header = settings["header"]
# header["Accept"] = "application/hal+json"
# async with aiohttp.ClientSession(headers=header,
# timeout=aiohttp.ClientTimeout(6.0)) as session:
# try:
# feeds_response = await local_wiki.fetch_feeds(db_wiki["wiki"], session)
# except (WikiServerError, WikiError):
# continue # ignore this wiki if it throws errors
# try:
# discussion_feed_resp = await feeds_response.json(encoding="UTF-8")
# if "error" in discussion_feed_resp:
# error = discussion_feed_resp["error"]
# if error == "NotFoundException": # Discussions disabled
# if db_wiki["rcid"] != -1: # RC feed is disabled
# await connection.execute("UPDATE rcgcdw SET postid = $1 WHERE wiki = $2", "-1", db_wiki["wiki"])
# else:
# await local_wiki.remove(db_wiki["wiki"], 1000)
# continue
# raise WikiError
# discussion_feed = discussion_feed_resp["_embedded"]["doc:posts"]
# discussion_feed.reverse()
# except aiohttp.ContentTypeError:
# logger.exception("Wiki seems to be resulting in non-json content.")
# continue
# except asyncio.TimeoutError:
# logger.debug("Timeout on reading JSON of discussion post feeed.")
# continue
# except:
# logger.exception("On loading json of response.")
# continue
# if db_wiki["postid"] is None: # new wiki, just get the last post to not spam the channel
# if len(discussion_feed) > 0:
# DBHandler.add(db_wiki["wiki"], discussion_feed[-1]["id"], True)
# else:
# DBHandler.add(db_wiki["wiki"], "0", True)
# continue
# comment_events = []
# targets = await generate_targets(db_wiki["wiki"], "AND NOT postid = '-1'")
# for post in discussion_feed:
# if post["_embedded"]["thread"][0]["containerType"] == "ARTICLE_COMMENT" and post["id"] > db_wiki["postid"]:
# comment_events.append(post["forumId"])
# comment_pages: dict = {}
# if comment_events:
# try:
# comment_pages = await local_wiki.safe_request(
# "{wiki}wikia.php?controller=FeedsAndPosts&method=getArticleNamesAndUsernames&stablePageIds={pages}&format=json".format(
# wiki=db_wiki["wiki"], pages=",".join(comment_events)
# ), RateLimiter(), "articleNames")
# except aiohttp.ClientResponseError: # Fandom can be funny sometimes... See #30
# comment_pages = None
# except:
# if command_line_args.debug:
# logger.exception("Exception on Feeds article comment request")
# shutdown(loop=asyncio.get_event_loop())
# else:
# logger.exception("Exception on Feeds article comment request")
# await generic_msg_sender_exception_logger(traceback.format_exc(),
# "Exception on Feeds article comment request",
# Post=str(post)[0:1000], Wiki=db_wiki["wiki"])
# message_list = defaultdict(list)
# for post in discussion_feed: # Yeah, second loop since the comments require an extra request
# if post["id"] > db_wiki["postid"]:
# for target in targets.items():
# try:
# message = await essential_feeds(post, comment_pages, db_wiki, target)
# if message is not None:
# message_list[target[0]].append(message)
# except asyncio.CancelledError:
# raise
# except:
# if command_line_args.debug:
# logger.exception("Exception on Feeds formatter")
# shutdown(loop=asyncio.get_event_loop())
# else:
# logger.exception("Exception on Feeds formatter")
# await generic_msg_sender_exception_logger(traceback.format_exc(), "Exception in feed formatter", Post=str(post)[0:1000], Wiki=db_wiki["wiki"])
# # Lets stack the messages
# for messages in message_list.values():
# messages = stack_message_list(messages)
# for message in messages:
# await send_to_discord(message)
# if discussion_feed:
# DBHandler.add(db_wiki["wiki"], post["id"], True)
# await asyncio.sleep(delay=2.0) # hardcoded really doesn't need much more
# await asyncio.sleep(delay=1.0) # Avoid lock on no wikis
# except asyncio.CancelledError:
# pass
# except:
# if command_line_args.debug:
# raise # reraise the issue
# else:
# logger.exception("Exception on Feeds formatter")
# await generic_msg_sender_exception_logger(traceback.format_exc(), "Discussion handler task exception", Wiki=db_wiki["wiki"])
def shutdown(loop, signal=None):
@ -197,7 +193,7 @@ def shutdown(loop, signal=None):
loop.remove_signal_handler(signal)
if len(messagequeue) > 0:
logger.warning("Some messages are still queued!")
for task in (main_tasks["wiki_scanner"], main_tasks["discussion_handler"], main_tasks["msg_queue_shield"], main_tasks["database_updates_shield"]):
for task in asyncio.all_tasks(loop):
task.cancel()
loop.run_until_complete(main_tasks["message_sender"])
loop.run_until_complete(main_tasks["database_updates"])
@ -246,10 +242,10 @@ async def main_loop():
# loop.set_exception_handler(global_exception_handler)
try:
main_tasks = {"message_sender": asyncio.create_task(message_sender()),
"discussion_handler": asyncio.create_task(discussion_handler()), "database_updates": asyncio.create_task(DBHandler.update_db())}
"database_updates": asyncio.create_task(DBHandler.update_db())} # "discussion_handler": asyncio.create_task(discussion_handler()),
main_tasks["msg_queue_shield"] = asyncio.shield(main_tasks["message_sender"])
main_tasks["database_updates_shield"] = asyncio.shield(main_tasks["database_updates"])
await asyncio.gather(main_tasks["wiki_scanner"], main_tasks["discussion_handler"], main_tasks["message_sender"], main_tasks["database_updates"])
await asyncio.gather(main_tasks["message_sender"], main_tasks["database_updates"])
except KeyboardInterrupt:
shutdown(loop)
except asyncio.CancelledError:

View file

@ -29,7 +29,7 @@ with open("src/api/template_settings.json", "r") as template_json:
class DiscordMessageMetadata:
def __init__(self, method, log_id = None, page_id = None, rev_id = None, webhook_url = None):
self.method = method
self.method = method # unused, remains for compatibility reasons
self.page_id = page_id
self.log_id = log_id
self.rev_id = rev_id
@ -172,8 +172,9 @@ class StackedDiscordMessage():
self.message_list: list[DiscordMessage] = []
self.length = 0
self.message_type: int = m_type # 0 for compact, 1 for embed
self.discord_callback_message_ids: list[int] = []
self.discord_callback_message_id: int = -1
self.wiki: Wiki = wiki
self.webhook: Optional[str] = None
def __len__(self):
return self.length
@ -188,7 +189,12 @@ class StackedDiscordMessage():
def filter(self, params: dict) -> list[tuple[int, DiscordMessage]]:
"""Filters messages by their metadata"""
return [(num, message) for num, message in enumerate(self.message_list)]
return [(num, message) for num, message in enumerate(self.message_list) if message.matches(params)]
def delete_message_by_id(self, message_ids: list[int]):
"""Deletes messages with given IDS from the message_ids list"""
for message_id in sorted(message_ids, reverse=True):
self.message_list.pop(message_id)
def add_message(self, message: DiscordMessage):
if len(self) + len(message) > 6000 or len(self.message_list) > 9:

View file

@ -38,11 +38,12 @@ logger = logging.getLogger("rcgcdw.discord.queue")
class QueueEntry:
def __init__(self, discord_message, webhooks, wiki):
def __init__(self, discord_message, webhooks, wiki, method="POST"):
self.discord_message: [DiscordMessage, StackedDiscordMessage] = discord_message
self.webhooks: list[str] = webhooks
self._sent_webhooks: set[str] = set()
self.wiki: Wiki = wiki
self.method = method
def check_sent_status(self, webhook: str) -> bool:
"""Checks sent status for given message, if True it means that the message has been sent before to given webhook, otherwise False."""
@ -111,11 +112,17 @@ class MessageQueue:
if self.compare_message_to_dict(item[1], properties):
self._queue.pop(index)
async def pack_massages(self, messages: list[QueueEntry]) -> AsyncGenerator[tuple[StackedDiscordMessage, int]]:
async def pack_massages(self, messages: list[QueueEntry], current_pack=None) -> AsyncGenerator[tuple[StackedDiscordMessage, int, str]]:
"""Pack messages into StackedDiscordMessage. It's an async generator"""
current_pack = StackedDiscordMessage(0 if messages[0].discord_message.message_type == "compact" else 1, messages[0].wiki) # first message
index = -1
# TODO Rebuild to support DELETE and PATCH messages
for index, message in enumerate(messages):
if message.method == "POST":
if current_pack is None:
current_pack = StackedDiscordMessage(0 if message.discord_message.message_type == "compact" else 1,
message.wiki)
else:
# message.discord_message. # TODO Where do we store method?
yield message.discord_message, index, message.method
message = message.discord_message
try:
current_pack.add_message(message)
@ -123,18 +130,18 @@ class MessageQueue:
yield current_pack, index-1
current_pack = StackedDiscordMessage(0 if message.message_type == "compact" else 1, message.wiki) # next messages
current_pack.add_message(message)
yield current_pack, index
yield current_pack, index, "POST"
async def send_msg_set(self, msg_set: tuple[str, list[QueueEntry]]):
webhook_url, messages = msg_set # str("daosdkosakda/adkahfwegr34", list(DiscordMessage, DiscordMessage, DiscordMessage)
async for msg, index in self.pack_massages(messages):
async for msg, index, method in self.pack_massages(messages):
client_error = False
if self.global_rate_limit:
return # if we are globally rate limited just wait for first gblocked request to finish
# Verify that message hasn't been sent before
# noinspection PyTypeChecker
try:
status = await send_to_discord_webhook(msg, webhook_url)
status = await send_to_discord_webhook(msg, webhook_url, method)
except aiohttp.ClientError:
client_error = True
except (aiohttp.ServerConnectionError, aiohttp.ServerTimeoutError):
@ -148,6 +155,7 @@ class MessageQueue:
for queue_message in messages[max(index-len(msg.message_list), 0):index]: # mark messages as delivered
queue_message.confirm_sent_status(webhook_url)
if client_error is False:
msg.webhook = webhook_url
msg.wiki.add_message(msg)
async def resend_msgs(self):
@ -199,7 +207,7 @@ def handle_discord_http(code: int, formatted_embed: str, result: ClientResponse)
raise aiohttp.ServerConnectionError()
async def send_to_discord_webhook(message: [StackedDiscordMessage, DiscordMessageMetadata], webhook_path: str):
async def send_to_discord_webhook(message: [StackedDiscordMessage, DiscordMessageMetadata], webhook_path: str, method: str):
header = settings["header"]
header['Content-Type'] = 'application/json'
header['X-RateLimit-Precision'] = "millisecond"
@ -209,7 +217,7 @@ async def send_to_discord_webhook(message: [StackedDiscordMessage, DiscordMessag
try:
resp_json = await resp.json()
# Add Discord Message ID which we can later use to delete/redact messages if we want
message.discord_callback_message_ids.append(resp_json["id"])
message.discord_callback_message_id = resp_json["id"]
except KeyError:
raise aiohttp.ServerConnectionError(f"Could not get the ID from POST request with message data. Data: {await resp.text()}")
except ContentTypeError:
@ -217,9 +225,9 @@ async def send_to_discord_webhook(message: [StackedDiscordMessage, DiscordMessag
except ValueError:
logger.exception(f"Could not decode JSON response from Discord. Response: {await resp.text()}]")
return handle_discord_http(resp.status, repr(message), resp)
elif message.method == "DELETE":
async with session.request(method=message.method, url=f"https://discord.com/api/webhooks/{webhook_path}") as resp:
pass
elif message.method == "PATCH":
async with session.request(method=message.method, url=f"https://discord.com/api/webhooks/{webhook_path}", data=repr(message)) as resp:
pass
elif method == "DELETE":
async with session.request(method=message.method, url=f"https://discord.com/api/webhooks/{webhook_path}/messages/{message.discord_callback_message_id}") as resp:
return handle_discord_http(resp.status, repr(message), resp)
elif method == "PATCH":
async with session.request(method=message.method, url=f"https://discord.com/api/webhooks/{webhook_path}/messages/{message.discord_callback_message_id}", data=repr(message)) as resp:
return handle_discord_http(resp.status, repr(message), resp)

View file

@ -1,114 +0,0 @@
# This file is part of Recent changes Goat compatible Discord webhook (RcGcDw).
# RcGcDw is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# RcGcDw is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with RcGcDw. If not, see <http://www.gnu.org/licenses/>.
import logging
import json
from typing import List, Union
from src.configloader import settings
from src.discord.message import DiscordMessageMetadata, DiscordMessageRaw
from src.discord.queue import send_to_discord, messagequeue
from src.fileio.database import db_cursor, db_connection
from src.i18n import redaction as redaction_translation
logger = logging.getLogger("rcgcdw.discord.redaction") # TODO Figure out why does this logger do not work
_ = redaction_translation.gettext
#ngettext = redaction_translation.ngettext
def delete_messages(matching_data: dict):
"""Delete messages that match given data"""
sql_conditions = ""
for key, value in matching_data.items():
sql_conditions += "{} = ? AND".format(key)
else:
sql_conditions = sql_conditions[0:-4] # remove last AND statement
to_delete = db_cursor.execute("SELECT msg_id FROM event WHERE {CON}".format(CON=sql_conditions), list(matching_data.values()))
if len(messagequeue) > 0:
messagequeue.delete_all_with_matching_metadata(**matching_data)
msg_to_remove = []
logger.debug("Deleting messages for data: {}".format(matching_data))
for message in to_delete:
webhook_url = "{main_webhook}/messages/{message_id}".format(main_webhook=settings["webhookURL"], message_id=message[0])
msg_to_remove.append(message[0])
logger.debug("Removing following message: {}".format(message[0]))
send_to_discord(None, DiscordMessageMetadata("DELETE", webhook_url=webhook_url))
for msg in msg_to_remove:
db_cursor.execute("DELETE FROM messages WHERE message_id = ?", (msg,))
db_connection.commit()
def redact_messages(ids, entry_type: int, to_censor: dict): # : Union[List[Union[str, int]], set[Union[int, str]]]
"""Redact past Discord messages
ids: list of ints
entry_type: int - 0 for revdel, 1 for logdel
to_censor: dict - logparams of message parts to censor"""
for event_id in ids:
if entry_type == 0:
message = db_cursor.execute("SELECT content, message_id FROM messages INNER JOIN event ON event.msg_id = messages.message_id WHERE event.revid = ?;", (event_id, ))
else:
message = db_cursor.execute(
"SELECT content, message_id FROM messages INNER JOIN event ON event.msg_id = messages.message_id WHERE event.logid = ?;",
(event_id,))
if settings["appearance"]["mode"] == "embed":
if message is not None:
row = message.fetchone()
try:
message = json.loads(row[0])
new_embed = message["embeds"][0]
except ValueError:
logger.error("Couldn't loads JSON for message data. What happened? Data: {}".format(row[0]))
return
except TypeError:
logger.error("Couldn't find entry in the database for RevDel to censor information. This is probably because the script has been recently restarted or cache cleared.")
return
if "user" in to_censor and "url" in new_embed["author"]:
new_embed["author"]["name"] = _("hidden")
new_embed["author"].pop("url")
if "action" in to_censor and "url" in new_embed:
new_embed["title"] = _("~~hidden~~")
new_embed.pop("url")
if "content" in to_censor and "fields" in new_embed:
new_embed.pop("fields")
if "comment" in to_censor:
new_embed["description"] = _("~~hidden~~")
message["embeds"][0] = new_embed
db_cursor.execute("UPDATE messages SET content = ? WHERE message_id = ?;", (json.dumps(message), row[1],))
db_connection.commit()
logger.debug(message)
send_to_discord(DiscordMessageRaw(message, settings["webhookURL"]+"/messages/"+str(row[1])), DiscordMessageMetadata("PATCH"))
else:
logger.debug("Could not find message in the database.")
def find_middle_next(ids: List[str], pageid: int) -> set:
"""To address #235 RcGcDw should now remove diffs in next revs relative to redacted revs to protect information in revs that revert revdeleted information.
:arg ids - list
:arg pageid - int
:return list"""
ids = [int(x) for x in ids]
result = set()
ids.sort() # Just to be sure, sort the list to make sure it's always sorted
messages = db_cursor.execute("SELECT revid FROM event WHERE pageid = ? AND revid >= ? ORDER BY revid", (pageid, ids[0],))
all_in_page = [x[0] for x in messages.fetchall()]
for id in ids:
try:
result.add(all_in_page[all_in_page.index(id)+1])
except (KeyError, ValueError):
logger.debug(f"Value {id} not in {all_in_page} or no value after that.")
return result - set(ids)

View file

@ -1,274 +0,0 @@
import datetime, logging
import json
from urllib.parse import quote_plus
from src.config import settings
from src.misc import link_formatter, create_article_path, escape_formatting
from src.discord import DiscordMessage
from src.msgqueue import send_to_discord
from src.i18n import langs
logger = logging.getLogger("rcgcdw.discussion_formatters")
async def feeds_compact_formatter(post_type, post, message_target, wiki, article_page=None) -> DiscordMessage:
"""Compact formatter for Fandom discussions."""
_ = langs[message_target[0][0]]["discussion_formatters"].gettext
message = None
author = _("unknown") # Fail safe
if post_type == "FORUM":
if post["createdBy"]["name"]:
author = post["createdBy"]["name"]
author_url = "<{url}f/u/{creatorId}>".format(url=wiki, creatorId=post["creatorId"])
elif post["creatorIp"]:
author = post["creatorIp"][1:]
author_url = "<{url}wiki/Special:Contributions{creatorIp}>".format(url=wiki, creatorIp=post["creatorIp"])
else:
if post["createdBy"]["name"]:
author = post["createdBy"]["name"]
author_url = link_formatter(create_article_path("User:{user}".format(user=author), wiki + "wiki/$1"))
else:
author_url = "<{url}f/u/{creatorId}>".format(url=wiki, creatorId=post["creatorId"])
event_type = "discussion"
if post_type == "FORUM":
if not post["isReply"]:
thread_funnel = post.get("funnel")
msg_text = _("[{author}]({author_url}) created [{title}](<{url}f/p/{threadId}>) in {forumName}")
if thread_funnel == "POLL":
event_type = "discussion/forum/poll"
msg_text = _("[{author}]({author_url}) created a poll [{title}](<{url}f/p/{threadId}>) in {forumName}")
elif thread_funnel == "QUIZ":
event_type = "discussion/forum/quiz"
msg_text = _("[{author}]({author_url}) created a quiz [{title}](<{url}f/p/{threadId}>) in {forumName}")
elif thread_funnel == "TEXT":
event_type = "discussion/forum/post"
else:
logger.warning("No entry for {event} with params: {params}".format(event=thread_funnel, params=post))
event_type = "unknown"
message = msg_text.format(author=author, author_url=author_url, title=escape_formatting(post["title"]), url=wiki, threadId=post["threadId"], forumName=post["forumName"])
else:
event_type = "discussion/forum/reply"
message = _("[{author}]({author_url}) created a [reply](<{url}f/p/{threadId}/r/{postId}>) to [{title}](<{url}f/p/{threadId}>) in {forumName}").format(author=author, author_url=author_url, url=wiki, threadId=post["threadId"], postId=post["id"], title=escape_formatting(post["_embedded"]["thread"][0]["title"]), forumName=post["forumName"])
elif post_type == "WALL":
user_wall = _("unknown") # Fail safe
if post["forumName"].endswith(' Message Wall'):
user_wall = post["forumName"][:-13]
if not post["isReply"]:
event_type = "discussion/wall/post"
message = _("[{author}]({author_url}) created [{title}](<{url}wiki/Message_Wall:{user_wall}?threadId={threadId}>) on [{user}'s Message Wall](<{url}wiki/Message_Wall:{user_wall}>)").format(author=author, author_url=author_url, title=escape_formatting(post["title"]), url=wiki, user=user_wall, user_wall=quote_plus(user_wall.replace(" ", "_")), threadId=post["threadId"])
else:
event_type = "discussion/wall/reply"
message = _("[{author}]({author_url}) created a [reply](<{url}wiki/Message_Wall:{user_wall}?threadId={threadId}#{replyId}>) to [{title}](<{url}wiki/Message_Wall:{user_wall}?threadId={threadId}>) on [{user}'s Message Wall](<{url}wiki/Message_Wall:{user_wall}>)").format(author=author, author_url=author_url, url=wiki, title=escape_formatting(post["_embedded"]["thread"][0]["title"]), user=user_wall, user_wall=quote_plus(user_wall.replace(" ", "_")), threadId=post["threadId"], replyId=post["id"])
elif post_type == "ARTICLE_COMMENT":
if article_page is None:
article_page = {"title": _("unknown"), "fullUrl": wiki} # No page known
article_page["fullUrl"] = article_page["fullUrl"].replace(")", "\)").replace("()", "\(")
if not post["isReply"]:
event_type = "discussion/comment/post"
message = _("[{author}]({author_url}) created a [comment](<{url}?commentId={commentId}>) on [{article}](<{url}>)").format(author=author, author_url=author_url, url=article_page["fullUrl"], article=article_page["title"], commentId=post["threadId"])
else:
event_type = "discussion/comment/reply"
message = _("[{author}]({author_url}) created a [reply](<{url}?commentId={commentId}&replyId={replyId}>) to a [comment](<{url}?commentId={commentId}>) on [{article}](<{url}>)").format(author=author, author_url=author_url, url=article_page["fullUrl"], article=article_page["title"], commentId=post["threadId"], replyId=post["id"])
else:
logger.warning("No entry for {event} with params: {params}".format(event=post_type, params=post))
if not settings["support"]:
return
else:
message = _("Unknown event `{event}` by [{author}]({author_url}), report it on the [support server](<{support}>).").format(event=post_type, author=author, author_url=author_url, support=settings["support"])
event_type = "unknown"
return DiscordMessage("compact", event_type, message_target[1], content=message, wiki=wiki)
async def feeds_embed_formatter(post_type, post, message_target, wiki, article_page=None) -> DiscordMessage:
"""Embed formatter for Fandom discussions."""
_ = langs[message_target[0][0]]["discussion_formatters"].gettext
embed = DiscordMessage("embed", "discussion", message_target[1], wiki=wiki)
author = _("unknown") # Fail safe
if post_type == "FORUM":
if post["createdBy"]["name"]:
author = post["createdBy"]["name"]
embed.set_author(author, "{url}f/u/{creatorId}".format(url=wiki, creatorId=post["creatorId"]), icon_url=post["createdBy"]["avatarUrl"])
elif post["creatorIp"]:
author = post["creatorIp"][1:]
embed.set_author(author, "{url}wiki/Special:Contributions{creatorIp}".format(url=wiki, creatorIp=post["creatorIp"]))
else:
if post["createdBy"]["name"]:
author = post["createdBy"]["name"]
embed.set_author(author, "{url}wiki/User:{creator}".format(url=wiki, creator=author.replace(" ", "_")), icon_url=post["createdBy"]["avatarUrl"])
else:
embed.set_author(author, "{url}f/u/{creatorId}".format(url=wiki, creatorId=post["creatorId"]), icon_url=post["createdBy"]["avatarUrl"])
if message_target[0][1] == 3:
if post.get("jsonModel") is not None:
npost = DiscussionsFromHellParser(post, wiki)
embed["description"] = npost.parse()
if npost.image_last:
embed["image"]["url"] = npost.image_last
embed["description"] = embed["description"].replace(npost.image_last, "")
else: # Fallback when model is not available
embed["description"] = post.get("rawContent", "")
if post["forumName"] is not None:
embed.set_footer(post["forumName"].replace("_", " "))
embed["timestamp"] = datetime.datetime.fromtimestamp(post["creationDate"]["epochSecond"], tz=datetime.timezone.utc).isoformat()
if post_type == "FORUM":
if not post["isReply"]:
embed["url"] = "{url}f/p/{threadId}".format(url=wiki, threadId=post["threadId"])
embed["title"] = _("Created \"{title}\"").format(title=escape_formatting(post["title"]))
thread_funnel = post.get("funnel")
if thread_funnel == "POLL":
embed.event_type = "discussion/forum/poll"
embed["title"] = _("Created a poll \"{title}\"").format(title=escape_formatting(post["title"]))
if message_target[0][1] > 1:
poll = post["poll"]
image_type = False
if poll["answers"][0]["image"] is not None:
image_type = True
for num, option in enumerate(poll["answers"]):
embed.add_field(option["text"] if image_type is True else _("Option {}").format(num+1),
option["text"] if image_type is False else _("__[View image]({image_url})__").format(image_url=option["image"]["url"]),
inline=True)
elif thread_funnel == "QUIZ":
embed.event_type = "discussion/forum/quiz"
embed["title"] = _("Created a quiz \"{title}\"").format(title=escape_formatting(post["title"]))
if message_target[0][1] > 1:
quiz = post["_embedded"]["quizzes"][0]
embed["description"] = quiz["title"]
if quiz["image"] is not None:
embed["image"]["url"] = quiz["image"]
elif thread_funnel == "TEXT":
embed.event_type = "discussion/forum/post"
else:
logger.warning("No entry for {event} with params: {params}".format(event=thread_funnel, params=post))
embed.event_type = "unknown"
if message_target[0][1] > 1 and post["_embedded"]["thread"][0]["tags"]:
tag_displayname = []
for tag in post["_embedded"]["thread"][0]["tags"]:
tag_displayname.append("[{title}]({url})".format(title=tag["articleTitle"], url=create_article_path(tag["articleTitle"], wiki + "wiki/$1")))
if len(", ".join(tag_displayname)) > 1000:
embed.add_field(_("Tags"), _("{} tags").format(len(post["_embedded"]["thread"][0]["tags"])))
else:
embed.add_field(_("Tags"), ", ".join(tag_displayname))
else:
embed.event_type = "discussion/forum/reply"
embed["title"] = _("Replied to \"{title}\"").format(title=escape_formatting(post["_embedded"]["thread"][0]["title"]))
embed["url"] = "{url}f/p/{threadId}/r/{postId}".format(url=wiki, threadId=post["threadId"], postId=post["id"])
elif post_type == "WALL":
user_wall = _("unknown") # Fail safe
if post["forumName"].endswith(' Message Wall'):
user_wall = post["forumName"][:-13].replace("_", " ")
if not post["isReply"]:
embed.event_type = "discussion/wall/post"
embed["url"] = "{url}wiki/Message_Wall:{user_wall}?threadId={threadId}".format(url=wiki, user_wall=quote_plus(user_wall.replace(" ", "_")), threadId=post["threadId"])
embed["title"] = _("Created \"{title}\" on {user}'s Message Wall").format(title=escape_formatting(post["title"]), user=user_wall)
else:
embed.event_type = "discussion/wall/reply"
embed["url"] = "{url}wiki/Message_Wall:{user_wall}?threadId={threadId}#{replyId}".format(url=wiki, user_wall=quote_plus(user_wall.replace(" ", "_")), threadId=post["threadId"], replyId=post["id"])
embed["title"] = _("Replied to \"{title}\" on {user}'s Message Wall").format(title=escape_formatting(post["_embedded"]["thread"][0]["title"]), user=user_wall)
elif post_type == "ARTICLE_COMMENT":
if article_page is None:
article_page = {"title": _("unknown"), "fullUrl": wiki} # No page known
if not post["isReply"]:
embed.event_type = "discussion/comment/post"
embed["url"] = "{url}?commentId={commentId}".format(url=article_page["fullUrl"], commentId=post["threadId"])
embed["title"] = _("Commented on {article}").format(article=article_page["title"])
else:
embed.event_type = "discussion/comment/reply"
embed["url"] = "{url}?commentId={commentId}&replyId={replyId}".format(url=article_page["fullUrl"], commentId=post["threadId"], replyId=post["id"])
embed["title"] = _("Replied to a comment on {article}").format(article=article_page["title"])
embed.set_footer(article_page["title"])
else:
logger.warning("No entry for {event} with params: {params}".format(event=post_type, params=post))
embed["title"] = _("Unknown event `{event}`").format(event=post_type)
embed.event_type = "unknown"
if settings["support"]:
change_params = "[```json\n{params}\n```]({support})".format(params=json.dumps(post, indent=2), support=settings["support"])
if len(change_params) > 1000:
embed.add_field(_("Report this on the support server"), settings["support"])
else:
embed.add_field(_("Report this on the support server"), change_params)
embed.finish_embed()
return embed
class DiscussionsFromHellParser:
"""This class converts fairly convoluted Fandom jsonModal of a discussion post into Markdown formatted usable thing. Takes string, returns string.
Kudos to MarkusRost for allowing me to implement this formatter based on his code in Wiki-Bot."""
def __init__(self, post, wiki):
self.post = post
self.wiki = wiki
self.jsonModal = json.loads(post.get("jsonModel", "{}"))
self.markdown_text = ""
self.item_num = 1
self.image_last = None
def parse(self) -> str:
"""Main parsing logic"""
self.parse_content(self.jsonModal["content"])
if len(self.markdown_text) > 2000:
self.markdown_text = self.markdown_text[0:2000] + ""
return self.markdown_text
def parse_content(self, content, ctype=None):
self.image_last = None
for item in content:
if ctype == "bulletList":
self.markdown_text += "\t"
if ctype == "orderedList":
self.markdown_text += "\t{num}. ".format(num=self.item_num)
self.item_num += 1
if item["type"] == "text":
if "marks" in item:
prefix, suffix = self.convert_marks(item["marks"])
self.markdown_text = "{old}{pre}{text}{suf}".format(old=self.markdown_text, pre=prefix, text=escape_formatting(item["text"]), suf=suffix)
else:
if ctype == "code_block":
self.markdown_text += item["text"] # ignore formatting on preformatted text which cannot have additional formatting anyways
else:
self.markdown_text += escape_formatting(item["text"])
elif item["type"] == "paragraph":
if "content" in item:
self.parse_content(item["content"], item["type"])
self.markdown_text += "\n"
elif item["type"] == "openGraph":
if not item["attrs"].get("wasAddedWithInlineLink", False):
self.markdown_text = "{old}{link}\n".format(old=self.markdown_text, link=item["attrs"]["url"])
elif item["type"] == "image":
try:
logger.debug(item["attrs"]["id"])
if item["attrs"]["id"] is not None:
self.markdown_text = "{old}{img_url}\n".format(old=self.markdown_text, img_url=self.post["_embedded"]["contentImages"][int(item["attrs"]["id"])]["url"])
self.image_last = self.post["_embedded"]["contentImages"][int(item["attrs"]["id"])]["url"]
except (IndexError, ValueError, TypeError):
logger.warning("Image {} not found.".format(item["attrs"]["id"]))
logger.debug(self.markdown_text)
elif item["type"] == "code_block":
self.markdown_text += "```\n"
if "content" in item:
self.parse_content(item["content"], item["type"])
self.markdown_text += "\n```\n"
elif item["type"] == "bulletList":
if "content" in item:
self.parse_content(item["content"], item["type"])
elif item["type"] == "orderedList":
self.item_num = 1
if "content" in item:
self.parse_content(item["content"], item["type"])
elif item["type"] == "listItem":
self.parse_content(item["content"], item["type"])
def convert_marks(self, marks):
prefix = ""
suffix = ""
for mark in marks:
if mark["type"] == "mention":
prefix += "["
suffix = "]({wiki}f/u/{userid}){suffix}".format(wiki=self.wiki, userid=mark["attrs"]["userId"], suffix=suffix)
elif mark["type"] == "strong":
prefix += "**"
suffix = "**{suffix}".format(suffix=suffix)
elif mark["type"] == "link":
prefix += "["
suffix = "]({link}){suffix}".format(link=mark["attrs"]["href"], suffix=suffix)
elif mark["type"] == "em":
prefix += "_"
suffix = "_" + suffix
return prefix, suffix

File diff suppressed because it is too large Load diff

View file

@ -1,18 +1,17 @@
import asyncio
import collections
import logging
from typing import Union
from typing import Union, Optional
import asyncpg
from src.database import db
logger = logging.getLogger("rcgcdb.queue_handler")
class UpdateDB:
def __init__(self):
self.updated: list[tuple[str, tuple[Union[str, int]]]] = []
self.db: Optional[] = None
def add(self, sql_expression):
self.updated.append(sql_expression)
@ -21,7 +20,7 @@ class UpdateDB:
self.updated.clear()
async def fetch_rows(self, SQLstatement: str, args: Union[str, int]) -> collections.AsyncIterable:
async with db.pool().acquire() as connection:
async with self.db.pool().acquire() as connection:
async with connection.transaction():
async for row in connection.cursor(SQLstatement, *args):
yield row
@ -30,7 +29,7 @@ class UpdateDB:
try:
while True:
if self.updated:
async with db.pool().acquire() as connection:
async with self.db.pool().acquire() as connection:
async with connection.transaction():
for update in self.updated:
await connection.execute(update[0], *update[1])
@ -38,12 +37,12 @@ class UpdateDB:
await asyncio.sleep(10.0)
except asyncio.CancelledError:
logger.info("Shutting down after updating DB with {} more entries...".format(len(self.updated)))
async with db.pool().acquire() as connection:
async with self.db.pool().acquire() as connection:
async with connection.transaction():
for update in self.updated:
await connection.execute(update[0], *update[1])
self.clear_list()
await db.shutdown_connection()
await self.db.shutdown_connection()
DBHandler = UpdateDB()
dbmanager = UpdateDB()

View file

@ -12,8 +12,7 @@ from api.util import default_message
from src.discord.queue import messagequeue, QueueEntry
from mw_messages import MWMessages
from src.exceptions import *
from src.queue_handler import DBHandler
from src.formatters.discussions import feeds_embed_formatter, feeds_compact_formatter
from src.queue_handler import UpdateDB
from src.api.hooks import formatter_hooks
from src.api.client import Client
from src.api.context import Context
@ -80,6 +79,63 @@ class Wiki:
def set_domain(self, domain: Domain):
self.domain = domain
# def find_middle_next(ids: List[str], pageid: int) -> set: # TODO Properly re-implement for RcGcDb
# """To address #235 RcGcDw should now remove diffs in next revs relative to redacted revs to protect information in revs that revert revdeleted information.
#
# :arg ids - list
# :arg pageid - int
#
# :return list"""
# ids = [int(x) for x in ids]
# result = set()
# ids.sort() # Just to be sure, sort the list to make sure it's always sorted
# messages = db_cursor.execute("SELECT revid FROM event WHERE pageid = ? AND revid >= ? ORDER BY revid",
# (pageid, ids[0],))
# all_in_page = [x[0] for x in messages.fetchall()]
# for id in ids:
# try:
# result.add(all_in_page[all_in_page.index(id) + 1])
# except (KeyError, ValueError):
# logger.debug(f"Value {id} not in {all_in_page} or no value after that.")
# return result - set(ids)
def search_message_history(self, params: dict) -> list[tuple[StackedDiscordMessage, list[int]]]:
"""Search self.message_history for messages which match all properties in params and return them in a list"""
output = []
for message in self.message_history:
returned_matches_for_stacked = message.filter(params)
if returned_matches_for_stacked:
output.append((message, [x[0] for x in returned_matches_for_stacked]))
return output
def delete_messages(self, params: dict):
"""Delete certain messages from message_history which DiscordMessageMetadata matches all properties in params"""
# Delete all messages with given IDs
for stacked_message, ids in self.search_message_history(params):
stacked_message.delete_message_by_id(ids)
# If all messages were removed, send a DELETE to Discord
if len(stacked_message.message_list) == 0:
messagequeue.add_message(QueueEntry(stacked_message, [stacked_message.webhook], self, method="DELETE"))
else:
messagequeue.add_message(QueueEntry(stacked_message, [stacked_message.webhook], self, method="PATCH"))
def redact_messages(self, ids: list[int], mode: str, censored_properties: dict):
# ids can refer to multiple events, and search does not support additive mode, so we have to loop it for all ids
for revlogid in ids:
for stacked_message, ids in self.search_message_history({mode: revlogid}): # This might not work depending on how Python handles it, but hey, learning experience
for message in [message for num, message in enumerate(stacked_message.message_list) if num in ids]:
if "user" in censored_properties and "url" in message["author"]:
message["author"]["name"] = _("hidden")
message["author"].pop("url")
if "action" in censored_properties and "url" in message:
message["title"] = _("~~hidden~~")
message["embed"].pop("url")
if "content" in censored_properties and "fields" in message:
message["embed"].pop("fields")
if "comment" in censored_properties:
message["description"] = _("~~hidden~~")
messagequeue.add_message(QueueEntry(stacked_message, [stacked_message.webhook], self, method="PATCH"))
# async def downtime_controller(self, down, reason=None):
# if down:
# self.fail_times += 1
@ -341,21 +397,21 @@ async def rc_processor(wiki: Wiki, change: dict, changed_categories: dict, displ
else:
raise
if identification_string in ("delete/delete", "delete/delete_redir"): # TODO Move it into a hook?
delete_messages(dict(pageid=change.get("pageid")))
wiki.delete_messages(dict(pageid=change.get("pageid")))
elif identification_string == "delete/event":
logparams = change.get('logparams', {"ids": []})
if settings["appearance"]["mode"] == "embed":
redact_messages(logparams.get("ids", []), 1, logparams.get("new", {}))
wiki.redact_messages(logparams.get("ids", []), "rev_id", logparams.get("new", {}))
else:
for logid in logparams.get("ids", []):
delete_messages(dict(logid=logid))
wiki.delete_messages(dict(logid=logid))
elif identification_string == "delete/revision":
logparams = change.get('logparams', {"ids": []})
if settings["appearance"]["mode"] == "embed":
redact_messages(logparams.get("ids", []), 0, logparams.get("new", {}))
wiki.redact_messages(logparams.get("ids", []), "log_id", logparams.get("new", {}))
else:
for revid in logparams.get("ids", []):
delete_messages(dict(revid=revid))
wiki.delete_messages(dict(revid=revid))
discord_message.finish_embed()
if discord_message:
discord_message.metadata = metadata
@ -418,7 +474,7 @@ async def process_cats(event: dict, local_wiki: Wiki, categorize_events: dict):
# mw_msgs[key] = msgs # it may be a little bit messy for sure, however I don't expect any reason to remove mw_msgs entries by one
# local_wiki.mw_messages = key
async def essential_feeds(change: dict, comment_pages: dict, db_wiki, target: tuple) -> discord.DiscordMessage:
async def essential_feeds(change: dict, comment_pages: dict, db_wiki, target: tuple) -> DiscordMessage:
"""Prepares essential information for both embed and compact message format."""
appearance_mode = feeds_embed_formatter if target[0][1] > 0 else feeds_compact_formatter
identification_string = change["_embedded"]["thread"][0]["containerType"]