mirror of
https://gitlab.com/chicken-riders/RcGcDb.git
synced 2025-02-23 00:54:09 +00:00
Additional work done on the ratelimiting
This commit is contained in:
parent
c1831b992b
commit
a4462369bb
43
src/bot.py
43
src/bot.py
|
@ -12,7 +12,7 @@ from src.argparser import command_line_args
|
||||||
from src.config import settings
|
from src.config import settings
|
||||||
from src.database import db_cursor
|
from src.database import db_cursor
|
||||||
from src.exceptions import *
|
from src.exceptions import *
|
||||||
from src.misc import get_paths
|
from src.misc import get_paths, get_domain
|
||||||
from src.msgqueue import messagequeue
|
from src.msgqueue import messagequeue
|
||||||
from src.queue_handler import DBHandler
|
from src.queue_handler import DBHandler
|
||||||
from src.wiki import Wiki, process_cats, process_mwmsgs, essential_info, essential_feeds
|
from src.wiki import Wiki, process_cats, process_mwmsgs, essential_info, essential_feeds
|
||||||
|
@ -41,11 +41,11 @@ for wiki in db_cursor.execute('SELECT DISTINCT wiki FROM rcgcdw'):
|
||||||
# Start queueing logic
|
# Start queueing logic
|
||||||
|
|
||||||
|
|
||||||
def calculate_delay() -> float:
|
def calculate_delay_for_group(group_length: int) -> float:
|
||||||
"""Calculate the delay between fetching each wiki to avoid rate limits"""
|
"""Calculate the delay between fetching each wiki to avoid rate limits"""
|
||||||
min_delay = 60 / settings["max_requests_per_minute"]
|
min_delay = 60 / settings["max_requests_per_minute"]
|
||||||
if (len(all_wikis) * min_delay) < settings["minimal_cooldown_per_wiki_in_sec"]:
|
if (group_length * min_delay) < settings["minimal_cooldown_per_wiki_in_sec"]:
|
||||||
return settings["minimal_cooldown_per_wiki_in_sec"] / len(all_wikis)
|
return settings["minimal_cooldown_per_wiki_in_sec"] / group_length
|
||||||
else:
|
else:
|
||||||
return min_delay
|
return min_delay
|
||||||
|
|
||||||
|
@ -62,15 +62,18 @@ def generate_targets(wiki_url: str) -> defaultdict:
|
||||||
return combinations
|
return combinations
|
||||||
|
|
||||||
|
|
||||||
async def wiki_scanner():
|
async def generate_domain_groups(): # oh boy, I cannot wait to learn about async generators
|
||||||
"""Wiki scanner is spawned as a task which purpose is to continuously run over wikis in the DB, fetching recent changes
|
combinations = defaultdict(list)
|
||||||
to add messages based on the changes to message queue later handled by message_sender coroutine."""
|
fetch_all = db_cursor.execute('SELECT webhook, wiki, lang, display, wikiid, rcid FROM rcgcdw GROUP BY wiki')
|
||||||
try:
|
|
||||||
while True:
|
|
||||||
calc_delay = calculate_delay()
|
|
||||||
fetch_all = db_cursor.execute(
|
|
||||||
'SELECT webhook, wiki, lang, display, wikiid, rcid, postid FROM rcgcdw GROUP BY wiki')
|
|
||||||
for db_wiki in fetch_all.fetchall():
|
for db_wiki in fetch_all.fetchall():
|
||||||
|
combinations[get_domain(db_wiki["wiki"])].append(db_wiki)
|
||||||
|
for item in combinations.values():
|
||||||
|
yield item
|
||||||
|
|
||||||
|
|
||||||
|
async def scan_group(group: list):
|
||||||
|
calc_delay = calculate_delay_for_group(len(group))
|
||||||
|
for db_wiki in group:
|
||||||
logger.debug("Wiki {}".format(db_wiki["wiki"]))
|
logger.debug("Wiki {}".format(db_wiki["wiki"]))
|
||||||
if db_wiki["wiki"] not in all_wikis:
|
if db_wiki["wiki"] not in all_wikis:
|
||||||
logger.info("Registering new wiki locally: {}".format(db_wiki["wiki"]))
|
logger.info("Registering new wiki locally: {}".format(db_wiki["wiki"]))
|
||||||
|
@ -133,7 +136,21 @@ async def wiki_scanner():
|
||||||
await formatter_exception_logger(db_wiki["wiki"], change, traceback.format_exc())
|
await formatter_exception_logger(db_wiki["wiki"], change, traceback.format_exc())
|
||||||
if recent_changes:
|
if recent_changes:
|
||||||
DBHandler.add(db_wiki["wiki"], change["rcid"])
|
DBHandler.add(db_wiki["wiki"], change["rcid"])
|
||||||
await asyncio.sleep(delay=2.0) # temporary measure until rate limiting is not implemented
|
await asyncio.sleep(delay=calc_delay)
|
||||||
|
|
||||||
|
|
||||||
|
async def wiki_scanner():
|
||||||
|
"""Wiki scanner is spawned as a task which purpose is to continuously run over wikis in the DB, fetching recent changes
|
||||||
|
to add messages based on the changes to message queue later handled by message_sender coroutine."""
|
||||||
|
try:
|
||||||
|
while True:
|
||||||
|
async for group in generate_domain_groups():
|
||||||
|
asyncio.create_task(scan_group(group))
|
||||||
|
|
||||||
|
fetch_all = db_cursor.execute(
|
||||||
|
'SELECT webhook, wiki, lang, display, wikiid, rcid, postid FROM rcgcdw GROUP BY wiki')
|
||||||
|
for db_wiki in fetch_all.fetchall():
|
||||||
|
|
||||||
if db_wiki["wikiid"] is not None:
|
if db_wiki["wikiid"] is not None:
|
||||||
header = settings["header"]
|
header = settings["header"]
|
||||||
header["Accept"] = "application/hal+json"
|
header["Accept"] = "application/hal+json"
|
||||||
|
|
|
@ -17,6 +17,12 @@ def get_paths(wiki: str, request) -> tuple:
|
||||||
return WIKI_API_PATH, WIKI_SCRIPT_PATH, WIKI_ARTICLE_PATH, WIKI_JUST_DOMAIN
|
return WIKI_API_PATH, WIKI_SCRIPT_PATH, WIKI_ARTICLE_PATH, WIKI_JUST_DOMAIN
|
||||||
|
|
||||||
|
|
||||||
|
def get_domain(url: str) -> str:
|
||||||
|
"""Get domain of given URL"""
|
||||||
|
parsed_url = urlparse(url)
|
||||||
|
return ".".join(urlunparse((*parsed_url[0:2], "", "", "", "")).split(".")[-2:]) # something like gamepedia.com, fandom.com
|
||||||
|
|
||||||
|
|
||||||
class LinkParser(HTMLParser):
|
class LinkParser(HTMLParser):
|
||||||
|
|
||||||
new_string = ""
|
new_string = ""
|
||||||
|
|
Loading…
Reference in a new issue