Borrowed code from RcGcDw to fix diff parser

This commit is contained in:
Frisk 2024-07-11 18:35:54 +02:00
parent 5963e2cd59
commit c96488153c
2 changed files with 22 additions and 9 deletions

View file

@ -210,7 +210,7 @@ def shutdown(loop, signal=None):
loop.run_until_complete(main_tasks["message_sender"])
loop.run_until_complete(main_tasks["database_updates"])
for task in asyncio.all_tasks(loop):
logger.debug("Killing task")
logger.debug("Killing task {}".format(task.get_name()))
task.cancel()
try:
loop.run_until_complete(asyncio.gather(*asyncio.all_tasks(loop)))

View file

@ -128,6 +128,17 @@ def profile_field_name(name, embed, _):
return _("unknown")
def class_searcher(attribs: list) -> str:
"""Function to return classes of given element in HTMLParser on handle_starttag
:returns a string with all of the classes of element
"""
for attr in attribs:
if attr[0] == "class":
return attr[1]
return ""
class ContentParser(HTMLParser):
current_tag = ""
last_ins = None
@ -145,13 +156,15 @@ class ContentParser(HTMLParser):
def handle_starttag(self, tagname, attribs):
if tagname == "ins" or tagname == "del":
self.current_tag = tagname
if tagname == "td" and "diff-addedline" in attribs[0] and self.ins_length <= 1000:
if tagname == "td":
classes = class_searcher(attribs).split(' ')
if "diff-addedline" in classes and self.ins_length <= 1000:
self.current_tag = "tda"
self.last_ins = ""
if tagname == "td" and "diff-deletedline" in attribs[0] and self.del_length <= 1000:
if "diff-deletedline" in classes and self.del_length <= 1000:
self.current_tag = "tdd"
self.last_del = ""
if tagname == "td" and "diff-empty" in attribs[0]:
if "diff-empty" in classes:
self.empty = True
def handle_data(self, data):