From adb3b4207425c5224fa9a449daed6c2ab19e0d66 Mon Sep 17 00:00:00 2001 From: Frisk Date: Sun, 2 Jan 2022 15:31:11 +0100 Subject: [PATCH] Fixed #235 --- src/discord/redaction.py | 27 +++++++++++++++++++++++++-- src/rcgcdw.py | 4 +++- 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/src/discord/redaction.py b/src/discord/redaction.py index b7aac13..fd57440 100644 --- a/src/discord/redaction.py +++ b/src/discord/redaction.py @@ -15,6 +15,8 @@ import logging import json +from typing import List, Union + from src.configloader import settings from src.discord.message import DiscordMessageMetadata, DiscordMessage, DiscordMessageRaw from src.discord.queue import send_to_discord, messagequeue @@ -48,14 +50,14 @@ def delete_messages(matching_data: dict): db_connection.commit() -def redact_messages(ids: list, entry_type: int, to_censor: dict): +def redact_messages(ids: Union[List[Union[str, int]], set[Union[int, str]]], entry_type: int, to_censor: dict): """Redact past Discord messages ids: list of ints entry_type: int - 0 for revdel, 1 for logdel to_censor: dict - logparams of message parts to censor""" for event_id in ids: - if entry_type == 0: # TODO check if queries are proper + if entry_type == 0: message = db_cursor.execute("SELECT content, message_id FROM messages INNER JOIN event ON event.msg_id = messages.message_id WHERE event.revid = ?;", (event_id, )) else: message = db_cursor.execute( @@ -90,3 +92,24 @@ def redact_messages(ids: list, entry_type: int, to_censor: dict): send_to_discord(DiscordMessageRaw(message, settings["webhookURL"]+"/messages/"+str(row[1])), DiscordMessageMetadata("PATCH")) else: logger.debug("Could not find message in the database.") + + +def find_middle_next(ids: List[str], pageid: int) -> set: + """To address #235 RcGcDw should now remove diffs in next revs relative to redacted revs to protect information in revs that revert revdeleted information. + This does not fix the problem of leaky diffs in entirety (because we are not doing diff comparisons), however the chances of fixing in most cases is pretty high + + :arg ids - list + :arg pageid - int + + :return list""" + ids = [int(x) for x in ids] + result = set() + ids.sort() # Just to be sure, sort the list to make sure it's always sorted + messages = db_cursor.execute("SELECT revid FROM event WHERE pageid = ? AND revid >= ? ORDER BY revid", (pageid, ids[0],)) + all_in_page = [x[0] for x in messages.fetchall()] + for id in ids: + try: + result.add(all_in_page[all_in_page.index(id)+1]) + except (KeyError, ValueError): + logger.debug(f"Value {id} not in {all_in_page} or no value after that.") + return result diff --git a/src/rcgcdw.py b/src/rcgcdw.py index f5cd6d9..b95ab41 100644 --- a/src/rcgcdw.py +++ b/src/rcgcdw.py @@ -45,7 +45,7 @@ TESTING = command_args.test # debug mode, pipeline testing AUTO_SUPPRESSION_ENABLED = settings.get("auto_suppression", {"enabled": False}).get("enabled") if AUTO_SUPPRESSION_ENABLED: - from src.discord.redaction import delete_messages, redact_messages + from src.discord.redaction import delete_messages, redact_messages, find_middle_next # Prepare logging logging.config.dictConfig(settings["logging"]) @@ -262,6 +262,8 @@ def rc_processor(change, changed_categories): logparams = change.get('logparams', {"ids": []}) if settings["appearance"]["mode"] == "embed": redact_messages(logparams.get("ids", []), 0, logparams.get("new", {})) + if "content" in logparams.get("new", {}): # Also redact revisions in the middle and next ones in case of content (diffs leak) + redact_messages(find_middle_next(logparams.get("ids", []), change.get("pageid", -1)), 0, {"content": ""}) else: for revid in logparams.get("ids", []): delete_messages(dict(revid=revid))