From c96488153ce7d45828dfd6a32c4042fd81effd59 Mon Sep 17 00:00:00 2001 From: Frisk Date: Thu, 11 Jul 2024 18:35:54 +0200 Subject: [PATCH] Borrowed code from RcGcDw to fix diff parser --- src/bot.py | 2 +- src/misc.py | 29 +++++++++++++++++++++-------- 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/src/bot.py b/src/bot.py index 05a3acf..8679a15 100644 --- a/src/bot.py +++ b/src/bot.py @@ -210,7 +210,7 @@ def shutdown(loop, signal=None): loop.run_until_complete(main_tasks["message_sender"]) loop.run_until_complete(main_tasks["database_updates"]) for task in asyncio.all_tasks(loop): - logger.debug("Killing task") + logger.debug("Killing task {}".format(task.get_name())) task.cancel() try: loop.run_until_complete(asyncio.gather(*asyncio.all_tasks(loop))) diff --git a/src/misc.py b/src/misc.py index 6303787..0f7ea08 100644 --- a/src/misc.py +++ b/src/misc.py @@ -128,6 +128,17 @@ def profile_field_name(name, embed, _): return _("unknown") +def class_searcher(attribs: list) -> str: + """Function to return classes of given element in HTMLParser on handle_starttag + + :returns a string with all of the classes of element + """ + for attr in attribs: + if attr[0] == "class": + return attr[1] + return "" + + class ContentParser(HTMLParser): current_tag = "" last_ins = None @@ -145,14 +156,16 @@ class ContentParser(HTMLParser): def handle_starttag(self, tagname, attribs): if tagname == "ins" or tagname == "del": self.current_tag = tagname - if tagname == "td" and "diff-addedline" in attribs[0] and self.ins_length <= 1000: - self.current_tag = "tda" - self.last_ins = "" - if tagname == "td" and "diff-deletedline" in attribs[0] and self.del_length <= 1000: - self.current_tag = "tdd" - self.last_del = "" - if tagname == "td" and "diff-empty" in attribs[0]: - self.empty = True + if tagname == "td": + classes = class_searcher(attribs).split(' ') + if "diff-addedline" in classes and self.ins_length <= 1000: + self.current_tag = "tda" + self.last_ins = "" + if "diff-deletedline" in classes and self.del_length <= 1000: + self.current_tag = "tdd" + self.last_del = "" + if "diff-empty" in classes: + self.empty = True def handle_data(self, data): data = escape_formatting(data)