mirror of
https://gitlab.com/chicken-riders/RcGcDb.git
synced 2025-02-23 00:54:09 +00:00
Added removal handling, should be now mostly ok, with exception to exception handling
This commit is contained in:
parent
1ab0eaa24f
commit
293947c510
134
src/bot.py
134
src/bot.py
|
@ -36,9 +36,6 @@ mw_msgs: dict = {} # will have the type of id: tuple
|
||||||
# Reasons for this: 1. we require amount of wikis to calculate the cooldown between requests
|
# Reasons for this: 1. we require amount of wikis to calculate the cooldown between requests
|
||||||
# 2. Easier to code
|
# 2. Easier to code
|
||||||
|
|
||||||
for wiki in db_cursor.execute('SELECT DISTINCT wiki FROM rcgcdw'):
|
|
||||||
all_wikis[wiki] = Wiki()
|
|
||||||
|
|
||||||
queue_limit = settings.get("queue_limit", 30)
|
queue_limit = settings.get("queue_limit", 30)
|
||||||
|
|
||||||
class LimitedList(list):
|
class LimitedList(list):
|
||||||
|
@ -64,12 +61,17 @@ class RcQueue:
|
||||||
else:
|
else:
|
||||||
raise KeyError
|
raise KeyError
|
||||||
|
|
||||||
async def remove_wiki_from_group(self, group, wiki):
|
async def remove_wiki_from_group(self, wiki):
|
||||||
"""Removes a wiki from query of given domain group"""
|
"""Removes a wiki from query of given domain group"""
|
||||||
self[group]["query"] # there can be multiple webhooks with
|
group = get_domain(wiki)
|
||||||
|
self[group]["query"] = [x for x in self[group]["query"] if x["wiki"] == wiki]
|
||||||
|
if not self[group]["query"]: # if there is no wiki left in the queue, get rid of the task
|
||||||
|
self[group]["task"].cancel()
|
||||||
|
del self.domain_list[group]
|
||||||
|
|
||||||
@asynccontextmanager
|
@asynccontextmanager
|
||||||
async def retrieve_next_queued(self, group):
|
async def retrieve_next_queued(self, group):
|
||||||
|
"""Retrives next wiki in the queue for given domain"""
|
||||||
try:
|
try:
|
||||||
yield self.domain_list[group]["query"][0]
|
yield self.domain_list[group]["query"][0]
|
||||||
except IndexError:
|
except IndexError:
|
||||||
|
@ -79,9 +81,10 @@ class RcQueue:
|
||||||
self.domain_list[group]["query"].pop(0)
|
self.domain_list[group]["query"].pop(0)
|
||||||
|
|
||||||
async def update_queues(self):
|
async def update_queues(self):
|
||||||
|
"""Makes a round on rcgcdw DB and looks for updates to the queues in self.domain_list"""
|
||||||
fetch_all = db_cursor.execute(
|
fetch_all = db_cursor.execute(
|
||||||
'SELECT ROWID, webhook, wiki, lang, display, wikiid, rcid FROM rcgcdw WHERE NOT rcid = -1 GROUP BY wiki ORDER BY ROWID')
|
'SELECT ROWID, webhook, wiki, lang, display, wikiid, rcid FROM rcgcdw WHERE NOT rcid = -1 GROUP BY wiki ORDER BY ROWID')
|
||||||
self.to_remove = list(all_wikis.keys())
|
self.to_remove = list(all_wikis.keys()) # first populate this list and remove wikis that are still in the db, clean up the rest
|
||||||
full = []
|
full = []
|
||||||
for db_wiki in fetch_all.fetchall():
|
for db_wiki in fetch_all.fetchall():
|
||||||
domain = get_domain(db_wiki["wiki"])
|
domain = get_domain(db_wiki["wiki"])
|
||||||
|
@ -89,7 +92,7 @@ class RcQueue:
|
||||||
try:
|
try:
|
||||||
if not db_wiki["ROWID"] < current_domain["last_rowid"]:
|
if not db_wiki["ROWID"] < current_domain["last_rowid"]:
|
||||||
current_domain["query"].append(db_wiki)
|
current_domain["query"].append(db_wiki)
|
||||||
self.to_remove.remove(domain)
|
self.to_remove.remove(db_wiki["wiki"])
|
||||||
except KeyError:
|
except KeyError:
|
||||||
await self.start_group(domain, db_wiki)
|
await self.start_group(domain, db_wiki)
|
||||||
logger.info("A new domain group has been added since last time, adding it to the domain_list and starting a task...")
|
logger.info("A new domain group has been added since last time, adding it to the domain_list and starting a task...")
|
||||||
|
@ -97,11 +100,13 @@ class RcQueue:
|
||||||
full.append(domain)
|
full.append(domain)
|
||||||
current_domain["last_rowid"] = db_wiki["ROWID"]
|
current_domain["last_rowid"] = db_wiki["ROWID"]
|
||||||
continue
|
continue
|
||||||
|
for wiki in self.to_remove:
|
||||||
|
del all_wikis[wiki]
|
||||||
|
await self.remove_wiki_from_group(wiki)
|
||||||
for group, data in self.domain_list:
|
for group, data in self.domain_list:
|
||||||
if group not in full:
|
if group not in full:
|
||||||
self["domain"]["last_rowid"] = 0 # iter reached the end without being stuck on full list
|
self["domain"]["last_rowid"] = 0 # iter reached the end without being stuck on full list
|
||||||
|
|
||||||
|
|
||||||
def __getitem__(self, item):
|
def __getitem__(self, item):
|
||||||
"""Returns the query of given domain group"""
|
"""Returns the query of given domain group"""
|
||||||
return self.domain_list[item]
|
return self.domain_list[item]
|
||||||
|
@ -139,18 +144,17 @@ def generate_targets(wiki_url: str) -> defaultdict:
|
||||||
|
|
||||||
async def generate_domain_groups():
|
async def generate_domain_groups():
|
||||||
"""Generate a list of wikis per domain (fandom.com, wikipedia.org etc.)"""
|
"""Generate a list of wikis per domain (fandom.com, wikipedia.org etc.)"""
|
||||||
combinations = defaultdict(list)
|
domain_wikis = defaultdict(list)
|
||||||
fetch_all = db_cursor.execute('SELECT ROWID, webhook, wiki, lang, display, wikiid, rcid FROM rcgcdw WHERE NOT rcid = -1 GROUP BY wiki ORDER BY ROWID ASC')
|
fetch_all = db_cursor.execute('SELECT ROWID, webhook, wiki, lang, display, wikiid, rcid FROM rcgcdw WHERE NOT rcid = -1 GROUP BY wiki ORDER BY ROWID ASC')
|
||||||
for db_wiki in fetch_all.fetchall():
|
for db_wiki in fetch_all.fetchall():
|
||||||
combinations[get_domain(db_wiki["wiki"])].append(db_wiki)
|
domain_wikis[get_domain(db_wiki["wiki"])].append(db_wiki)
|
||||||
all_wikis[db_wiki["wiki"]] = Wiki() # populate all_wikis
|
all_wikis[db_wiki["wiki"]] = Wiki() # populate all_wikis
|
||||||
for group, db_wikis in combinations.items():
|
for group, db_wikis in domain_wikis.items():
|
||||||
yield group, db_wikis
|
yield group, db_wikis
|
||||||
|
|
||||||
|
|
||||||
async def scan_group(group: str):
|
async def scan_group(group: str):
|
||||||
while True:
|
while True:
|
||||||
calc_delay = calculate_delay_for_group(len(rcqueue[group]))
|
|
||||||
async with rcqueue.retrieve_next_queued(group) as db_wiki: # acquire next wiki in queue
|
async with rcqueue.retrieve_next_queued(group) as db_wiki: # acquire next wiki in queue
|
||||||
if db_wiki is None:
|
if db_wiki is None:
|
||||||
raise QueueEmpty
|
raise QueueEmpty
|
||||||
|
@ -226,59 +230,59 @@ async def wiki_scanner():
|
||||||
while True:
|
while True:
|
||||||
await asyncio.sleep(20.0)
|
await asyncio.sleep(20.0)
|
||||||
await rcqueue.update_queues()
|
await rcqueue.update_queues()
|
||||||
|
#
|
||||||
|
#
|
||||||
if db_wiki["wikiid"] is not None:
|
# if db_wiki["wikiid"] is not None:
|
||||||
header = settings["header"]
|
# header = settings["header"]
|
||||||
header["Accept"] = "application/hal+json"
|
# header["Accept"] = "application/hal+json"
|
||||||
async with aiohttp.ClientSession(headers=header,
|
# async with aiohttp.ClientSession(headers=header,
|
||||||
timeout=aiohttp.ClientTimeout(3.0)) as session:
|
# timeout=aiohttp.ClientTimeout(3.0)) as session:
|
||||||
try:
|
# try:
|
||||||
feeds_response = await local_wiki.fetch_feeds(db_wiki["wikiid"], session)
|
# feeds_response = await local_wiki.fetch_feeds(db_wiki["wikiid"], session)
|
||||||
except (WikiServerError, WikiError):
|
# except (WikiServerError, WikiError):
|
||||||
logger.error("Exeption when fetching the wiki")
|
# logger.error("Exeption when fetching the wiki")
|
||||||
continue # ignore this wiki if it throws errors
|
# continue # ignore this wiki if it throws errors
|
||||||
try:
|
# try:
|
||||||
discussion_feed_resp = await feeds_response.json(encoding="UTF-8")
|
# discussion_feed_resp = await feeds_response.json(encoding="UTF-8")
|
||||||
if "title" in discussion_feed_resp:
|
# if "title" in discussion_feed_resp:
|
||||||
error = discussion_feed_resp["error"]
|
# error = discussion_feed_resp["error"]
|
||||||
if error == "site doesn't exists":
|
# if error == "site doesn't exists":
|
||||||
db_cursor.execute("UPDATE rcgcdw SET wikiid = ? WHERE wiki = ?",
|
# db_cursor.execute("UPDATE rcgcdw SET wikiid = ? WHERE wiki = ?",
|
||||||
(None, db_wiki["wiki"],))
|
# (None, db_wiki["wiki"],))
|
||||||
DBHandler.update_db()
|
# DBHandler.update_db()
|
||||||
continue
|
# continue
|
||||||
raise WikiError
|
# raise WikiError
|
||||||
discussion_feed = discussion_feed_resp["_embedded"]["doc:posts"]
|
# discussion_feed = discussion_feed_resp["_embedded"]["doc:posts"]
|
||||||
discussion_feed.reverse()
|
# discussion_feed.reverse()
|
||||||
except aiohttp.ContentTypeError:
|
# except aiohttp.ContentTypeError:
|
||||||
logger.exception("Wiki seems to be resulting in non-json content.")
|
# logger.exception("Wiki seems to be resulting in non-json content.")
|
||||||
continue
|
# continue
|
||||||
except:
|
# except:
|
||||||
logger.exception("On loading json of response.")
|
# logger.exception("On loading json of response.")
|
||||||
continue
|
# continue
|
||||||
if db_wiki["postid"] is None: # new wiki, just get the last post to not spam the channel
|
# if db_wiki["postid"] is None: # new wiki, just get the last post to not spam the channel
|
||||||
if len(discussion_feed) > 0:
|
# if len(discussion_feed) > 0:
|
||||||
DBHandler.add(db_wiki["wiki"], discussion_feed[-1]["id"], True)
|
# DBHandler.add(db_wiki["wiki"], discussion_feed[-1]["id"], True)
|
||||||
else:
|
# else:
|
||||||
DBHandler.add(db_wiki["wiki"], "0", True)
|
# DBHandler.add(db_wiki["wiki"], "0", True)
|
||||||
DBHandler.update_db()
|
# DBHandler.update_db()
|
||||||
continue
|
# continue
|
||||||
targets = generate_targets(db_wiki["wiki"])
|
# targets = generate_targets(db_wiki["wiki"])
|
||||||
for post in discussion_feed:
|
# for post in discussion_feed:
|
||||||
if post["id"] > db_wiki["postid"]:
|
# if post["id"] > db_wiki["postid"]:
|
||||||
for target in targets.items():
|
# for target in targets.items():
|
||||||
try:
|
# try:
|
||||||
await essential_feeds(post, db_wiki, target)
|
# await essential_feeds(post, db_wiki, target)
|
||||||
except:
|
# except:
|
||||||
if command_line_args.debug:
|
# if command_line_args.debug:
|
||||||
raise # reraise the issue
|
# raise # reraise the issue
|
||||||
else:
|
# else:
|
||||||
logger.exception("Exception on Feeds formatter")
|
# logger.exception("Exception on Feeds formatter")
|
||||||
await formatter_exception_logger(db_wiki["wiki"], post, traceback.format_exc())
|
# await formatter_exception_logger(db_wiki["wiki"], post, traceback.format_exc())
|
||||||
if discussion_feed:
|
# if discussion_feed:
|
||||||
DBHandler.add(db_wiki["wiki"], post["id"], True)
|
# DBHandler.add(db_wiki["wiki"], post["id"], True)
|
||||||
await asyncio.sleep(delay=calc_delay)
|
# await asyncio.sleep(delay=calc_delay)
|
||||||
DBHandler.update_db()
|
# DBHandler.update_db()
|
||||||
except asyncio.CancelledError:
|
except asyncio.CancelledError:
|
||||||
raise
|
raise
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue