mirror of
https://gitlab.com/chicken-riders/RcGcDb.git
synced 2025-02-23 00:54:09 +00:00
Created basic handling of wikis
This commit is contained in:
parent
d7f341d081
commit
1ab0eaa24f
72
src/bot.py
72
src/bot.py
|
@ -9,6 +9,7 @@ from collections import defaultdict
|
|||
import functools
|
||||
import requests
|
||||
|
||||
from contextlib import asynccontextmanager
|
||||
from src.argparser import command_line_args
|
||||
from src.config import settings
|
||||
from src.database import db_cursor
|
||||
|
@ -38,15 +39,28 @@ mw_msgs: dict = {} # will have the type of id: tuple
|
|||
for wiki in db_cursor.execute('SELECT DISTINCT wiki FROM rcgcdw'):
|
||||
all_wikis[wiki] = Wiki()
|
||||
|
||||
queue_limit = settings.get("queue_limit", 30)
|
||||
|
||||
class LimitedList(list):
|
||||
def __init__(self, *args):
|
||||
list.__init__(self, *args)
|
||||
|
||||
def append(self, object) -> None:
|
||||
if len(self) < queue_limit:
|
||||
self.append(object)
|
||||
return
|
||||
raise ListFull
|
||||
|
||||
|
||||
class RcQueue:
|
||||
def __init__(self):
|
||||
self.domain_list = {}
|
||||
self.to_remove = []
|
||||
|
||||
async def start_group(self, group):
|
||||
async def start_group(self, group, initial_wikis):
|
||||
"""Starts a task for given domain group"""
|
||||
if group not in self.domain_list:
|
||||
self.domain_list[group] = {"task": asyncio.create_task(scan_group(group)), "last_rowid": 0, "query": []}
|
||||
self.domain_list[group] = {"task": asyncio.create_task(scan_group(group)), "last_rowid": 0, "query": LimitedList(initial_wikis)}
|
||||
else:
|
||||
raise KeyError
|
||||
|
||||
|
@ -54,6 +68,39 @@ class RcQueue:
|
|||
"""Removes a wiki from query of given domain group"""
|
||||
self[group]["query"] # there can be multiple webhooks with
|
||||
|
||||
@asynccontextmanager
|
||||
async def retrieve_next_queued(self, group):
|
||||
try:
|
||||
yield self.domain_list[group]["query"][0]
|
||||
except IndexError:
|
||||
logger.warning("Queue for {} domain group is empty.".format(group))
|
||||
yield None
|
||||
finally: # add exception handling?
|
||||
self.domain_list[group]["query"].pop(0)
|
||||
|
||||
async def update_queues(self):
|
||||
fetch_all = db_cursor.execute(
|
||||
'SELECT ROWID, webhook, wiki, lang, display, wikiid, rcid FROM rcgcdw WHERE NOT rcid = -1 GROUP BY wiki ORDER BY ROWID')
|
||||
self.to_remove = list(all_wikis.keys())
|
||||
full = []
|
||||
for db_wiki in fetch_all.fetchall():
|
||||
domain = get_domain(db_wiki["wiki"])
|
||||
current_domain = self[domain]
|
||||
try:
|
||||
if not db_wiki["ROWID"] < current_domain["last_rowid"]:
|
||||
current_domain["query"].append(db_wiki)
|
||||
self.to_remove.remove(domain)
|
||||
except KeyError:
|
||||
await self.start_group(domain, db_wiki)
|
||||
logger.info("A new domain group has been added since last time, adding it to the domain_list and starting a task...")
|
||||
except ListFull:
|
||||
full.append(domain)
|
||||
current_domain["last_rowid"] = db_wiki["ROWID"]
|
||||
continue
|
||||
for group, data in self.domain_list:
|
||||
if group not in full:
|
||||
self["domain"]["last_rowid"] = 0 # iter reached the end without being stuck on full list
|
||||
|
||||
|
||||
def __getitem__(self, item):
|
||||
"""Returns the query of given domain group"""
|
||||
|
@ -90,24 +137,24 @@ def generate_targets(wiki_url: str) -> defaultdict:
|
|||
return combinations
|
||||
|
||||
|
||||
async def generate_domain_groups(): # oh boy, I cannot wait to learn about async generators
|
||||
async def generate_domain_groups():
|
||||
"""Generate a list of wikis per domain (fandom.com, wikipedia.org etc.)"""
|
||||
combinations = defaultdict(list)
|
||||
fetch_all = db_cursor.execute('SELECT webhook, wiki, lang, display, wikiid, rcid FROM rcgcdw GROUP BY wiki')
|
||||
fetch_all = db_cursor.execute('SELECT ROWID, webhook, wiki, lang, display, wikiid, rcid FROM rcgcdw WHERE NOT rcid = -1 GROUP BY wiki ORDER BY ROWID ASC')
|
||||
for db_wiki in fetch_all.fetchall():
|
||||
combinations[get_domain(db_wiki["wiki"])].append(db_wiki)
|
||||
all_wikis[db_wiki["wiki"]] = Wiki() # populate all_wikis
|
||||
for group, db_wikis in combinations.items():
|
||||
yield group, db_wikis
|
||||
|
||||
|
||||
async def scan_group(group: list):
|
||||
async def scan_group(group: str):
|
||||
while True:
|
||||
calc_delay = calculate_delay_for_group(len(rcqueue[group]))
|
||||
rcqueue[group]
|
||||
async with rcqueue.retrieve_next_queued(group) as db_wiki: # acquire next wiki in queue
|
||||
if db_wiki is None:
|
||||
raise QueueEmpty
|
||||
logger.debug("Wiki {}".format(db_wiki["wiki"]))
|
||||
if db_wiki["wiki"] not in all_wikis:
|
||||
logger.info("Registering new wiki locally: {}".format(db_wiki["wiki"]))
|
||||
all_wikis[db_wiki["wiki"]] = Wiki()
|
||||
local_wiki = all_wikis[db_wiki["wiki"]] # set a reference to a wiki object from memory
|
||||
if db_wiki["rcid"] != -1:
|
||||
extended = False
|
||||
|
@ -174,12 +221,11 @@ async def wiki_scanner():
|
|||
"""Wiki scanner is spawned as a task which purpose is to continuously run over wikis in the DB, fetching recent changes
|
||||
to add messages based on the changes to message queue later handled by message_sender coroutine."""
|
||||
try:
|
||||
async for group, db_wikis in generate_domain_groups():
|
||||
await rcqueue.start_group(group)
|
||||
rcqueue[group]["query"] = db_wikis # __SETITEM__ MIGHT BE BAD FOR NESTED, SEE IF CRASHES
|
||||
async for group, db_wikis in generate_domain_groups(): # First scan
|
||||
await rcqueue.start_group(group, db_wikis)
|
||||
while True:
|
||||
await asyncio.sleep(20.0)
|
||||
|
||||
await rcqueue.update_queues()
|
||||
|
||||
|
||||
if db_wiki["wikiid"] is not None:
|
||||
|
|
|
@ -15,3 +15,9 @@ class WikiUnauthorizedError(Exception):
|
|||
|
||||
class OtherWikiError(Exception):
|
||||
pass
|
||||
|
||||
class QueueEmpty(Exception):
|
||||
pass
|
||||
|
||||
class ListFull(Exception):
|
||||
pass
|
Loading…
Reference in a new issue