Fixed improper domain name extraction + added dependency

This commit is contained in:
Frisk 2024-07-13 13:19:54 +02:00
parent 1e970337a8
commit 011f23c35a
2 changed files with 4 additions and 3 deletions

View file

@ -6,3 +6,4 @@ irc >= 19.0.1
beautifulsoup4>=4.9.3 beautifulsoup4>=4.9.3
asyncpg>=0.22.0 asyncpg>=0.22.0
requests>=2.31.0 requests>=2.31.0
tldextract>=5.1.2

View file

@ -11,6 +11,7 @@ from src.irc_feed import AioIRCCat
from io import StringIO from io import StringIO
from contextlib import redirect_stdout from contextlib import redirect_stdout
from src.wiki import Wiki from src.wiki import Wiki
import tldextract
logger = logging.getLogger("rcgcdb.domain_manager") logger = logging.getLogger("rcgcdb.domain_manager")
@ -102,8 +103,7 @@ class DomainManager:
@staticmethod @staticmethod
def get_domain(url: str) -> str: def get_domain(url: str) -> str:
"""Returns a domain for given URL (for example fandom.com, wikipedia.org)""" """Returns a domain for given URL (for example fandom.com, wikipedia.org)"""
parsed_url = urlparse(url) return tldextract.extract(url).registered_domain
return ".".join(urlunparse((*parsed_url[0:2], "", "", "", "")).split(".")[-2:])
def check_for_domain(self, domain: str): def check_for_domain(self, domain: str):
return domain in self.domains return domain in self.domains