From 011f23c35aec210eab0687f1a04227b39a3daab5 Mon Sep 17 00:00:00 2001 From: Frisk Date: Sat, 13 Jul 2024 13:19:54 +0200 Subject: [PATCH] Fixed improper domain name extraction + added dependency --- requirements.txt | 3 ++- src/domain_manager.py | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/requirements.txt b/requirements.txt index 9d89520..733093c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,4 +5,5 @@ nest-asyncio >= 1.4.0 irc >= 19.0.1 beautifulsoup4>=4.9.3 asyncpg>=0.22.0 -requests>=2.31.0 \ No newline at end of file +requests>=2.31.0 +tldextract>=5.1.2 \ No newline at end of file diff --git a/src/domain_manager.py b/src/domain_manager.py index 76e8ee7..be96404 100644 --- a/src/domain_manager.py +++ b/src/domain_manager.py @@ -11,6 +11,7 @@ from src.irc_feed import AioIRCCat from io import StringIO from contextlib import redirect_stdout from src.wiki import Wiki +import tldextract logger = logging.getLogger("rcgcdb.domain_manager") @@ -102,8 +103,7 @@ class DomainManager: @staticmethod def get_domain(url: str) -> str: """Returns a domain for given URL (for example fandom.com, wikipedia.org)""" - parsed_url = urlparse(url) - return ".".join(urlunparse((*parsed_url[0:2], "", "", "", "")).split(".")[-2:]) + return tldextract.extract(url).registered_domain def check_for_domain(self, domain: str): return domain in self.domains