Added removal handling, should be now mostly ok, with exception to exception handling

This commit is contained in:
Frisk 2020-08-05 19:20:38 +02:00
parent 1ab0eaa24f
commit 293947c510
No known key found for this signature in database
GPG key ID: 213F7C15068AF8AC

View file

@ -36,9 +36,6 @@ mw_msgs: dict = {} # will have the type of id: tuple
# Reasons for this: 1. we require amount of wikis to calculate the cooldown between requests # Reasons for this: 1. we require amount of wikis to calculate the cooldown between requests
# 2. Easier to code # 2. Easier to code
for wiki in db_cursor.execute('SELECT DISTINCT wiki FROM rcgcdw'):
all_wikis[wiki] = Wiki()
queue_limit = settings.get("queue_limit", 30) queue_limit = settings.get("queue_limit", 30)
class LimitedList(list): class LimitedList(list):
@ -64,12 +61,17 @@ class RcQueue:
else: else:
raise KeyError raise KeyError
async def remove_wiki_from_group(self, group, wiki): async def remove_wiki_from_group(self, wiki):
"""Removes a wiki from query of given domain group""" """Removes a wiki from query of given domain group"""
self[group]["query"] # there can be multiple webhooks with group = get_domain(wiki)
self[group]["query"] = [x for x in self[group]["query"] if x["wiki"] == wiki]
if not self[group]["query"]: # if there is no wiki left in the queue, get rid of the task
self[group]["task"].cancel()
del self.domain_list[group]
@asynccontextmanager @asynccontextmanager
async def retrieve_next_queued(self, group): async def retrieve_next_queued(self, group):
"""Retrives next wiki in the queue for given domain"""
try: try:
yield self.domain_list[group]["query"][0] yield self.domain_list[group]["query"][0]
except IndexError: except IndexError:
@ -79,9 +81,10 @@ class RcQueue:
self.domain_list[group]["query"].pop(0) self.domain_list[group]["query"].pop(0)
async def update_queues(self): async def update_queues(self):
"""Makes a round on rcgcdw DB and looks for updates to the queues in self.domain_list"""
fetch_all = db_cursor.execute( fetch_all = db_cursor.execute(
'SELECT ROWID, webhook, wiki, lang, display, wikiid, rcid FROM rcgcdw WHERE NOT rcid = -1 GROUP BY wiki ORDER BY ROWID') 'SELECT ROWID, webhook, wiki, lang, display, wikiid, rcid FROM rcgcdw WHERE NOT rcid = -1 GROUP BY wiki ORDER BY ROWID')
self.to_remove = list(all_wikis.keys()) self.to_remove = list(all_wikis.keys()) # first populate this list and remove wikis that are still in the db, clean up the rest
full = [] full = []
for db_wiki in fetch_all.fetchall(): for db_wiki in fetch_all.fetchall():
domain = get_domain(db_wiki["wiki"]) domain = get_domain(db_wiki["wiki"])
@ -89,7 +92,7 @@ class RcQueue:
try: try:
if not db_wiki["ROWID"] < current_domain["last_rowid"]: if not db_wiki["ROWID"] < current_domain["last_rowid"]:
current_domain["query"].append(db_wiki) current_domain["query"].append(db_wiki)
self.to_remove.remove(domain) self.to_remove.remove(db_wiki["wiki"])
except KeyError: except KeyError:
await self.start_group(domain, db_wiki) await self.start_group(domain, db_wiki)
logger.info("A new domain group has been added since last time, adding it to the domain_list and starting a task...") logger.info("A new domain group has been added since last time, adding it to the domain_list and starting a task...")
@ -97,11 +100,13 @@ class RcQueue:
full.append(domain) full.append(domain)
current_domain["last_rowid"] = db_wiki["ROWID"] current_domain["last_rowid"] = db_wiki["ROWID"]
continue continue
for wiki in self.to_remove:
del all_wikis[wiki]
await self.remove_wiki_from_group(wiki)
for group, data in self.domain_list: for group, data in self.domain_list:
if group not in full: if group not in full:
self["domain"]["last_rowid"] = 0 # iter reached the end without being stuck on full list self["domain"]["last_rowid"] = 0 # iter reached the end without being stuck on full list
def __getitem__(self, item): def __getitem__(self, item):
"""Returns the query of given domain group""" """Returns the query of given domain group"""
return self.domain_list[item] return self.domain_list[item]
@ -139,18 +144,17 @@ def generate_targets(wiki_url: str) -> defaultdict:
async def generate_domain_groups(): async def generate_domain_groups():
"""Generate a list of wikis per domain (fandom.com, wikipedia.org etc.)""" """Generate a list of wikis per domain (fandom.com, wikipedia.org etc.)"""
combinations = defaultdict(list) domain_wikis = defaultdict(list)
fetch_all = db_cursor.execute('SELECT ROWID, webhook, wiki, lang, display, wikiid, rcid FROM rcgcdw WHERE NOT rcid = -1 GROUP BY wiki ORDER BY ROWID ASC') fetch_all = db_cursor.execute('SELECT ROWID, webhook, wiki, lang, display, wikiid, rcid FROM rcgcdw WHERE NOT rcid = -1 GROUP BY wiki ORDER BY ROWID ASC')
for db_wiki in fetch_all.fetchall(): for db_wiki in fetch_all.fetchall():
combinations[get_domain(db_wiki["wiki"])].append(db_wiki) domain_wikis[get_domain(db_wiki["wiki"])].append(db_wiki)
all_wikis[db_wiki["wiki"]] = Wiki() # populate all_wikis all_wikis[db_wiki["wiki"]] = Wiki() # populate all_wikis
for group, db_wikis in combinations.items(): for group, db_wikis in domain_wikis.items():
yield group, db_wikis yield group, db_wikis
async def scan_group(group: str): async def scan_group(group: str):
while True: while True:
calc_delay = calculate_delay_for_group(len(rcqueue[group]))
async with rcqueue.retrieve_next_queued(group) as db_wiki: # acquire next wiki in queue async with rcqueue.retrieve_next_queued(group) as db_wiki: # acquire next wiki in queue
if db_wiki is None: if db_wiki is None:
raise QueueEmpty raise QueueEmpty
@ -226,59 +230,59 @@ async def wiki_scanner():
while True: while True:
await asyncio.sleep(20.0) await asyncio.sleep(20.0)
await rcqueue.update_queues() await rcqueue.update_queues()
#
#
if db_wiki["wikiid"] is not None: # if db_wiki["wikiid"] is not None:
header = settings["header"] # header = settings["header"]
header["Accept"] = "application/hal+json" # header["Accept"] = "application/hal+json"
async with aiohttp.ClientSession(headers=header, # async with aiohttp.ClientSession(headers=header,
timeout=aiohttp.ClientTimeout(3.0)) as session: # timeout=aiohttp.ClientTimeout(3.0)) as session:
try: # try:
feeds_response = await local_wiki.fetch_feeds(db_wiki["wikiid"], session) # feeds_response = await local_wiki.fetch_feeds(db_wiki["wikiid"], session)
except (WikiServerError, WikiError): # except (WikiServerError, WikiError):
logger.error("Exeption when fetching the wiki") # logger.error("Exeption when fetching the wiki")
continue # ignore this wiki if it throws errors # continue # ignore this wiki if it throws errors
try: # try:
discussion_feed_resp = await feeds_response.json(encoding="UTF-8") # discussion_feed_resp = await feeds_response.json(encoding="UTF-8")
if "title" in discussion_feed_resp: # if "title" in discussion_feed_resp:
error = discussion_feed_resp["error"] # error = discussion_feed_resp["error"]
if error == "site doesn't exists": # if error == "site doesn't exists":
db_cursor.execute("UPDATE rcgcdw SET wikiid = ? WHERE wiki = ?", # db_cursor.execute("UPDATE rcgcdw SET wikiid = ? WHERE wiki = ?",
(None, db_wiki["wiki"],)) # (None, db_wiki["wiki"],))
DBHandler.update_db() # DBHandler.update_db()
continue # continue
raise WikiError # raise WikiError
discussion_feed = discussion_feed_resp["_embedded"]["doc:posts"] # discussion_feed = discussion_feed_resp["_embedded"]["doc:posts"]
discussion_feed.reverse() # discussion_feed.reverse()
except aiohttp.ContentTypeError: # except aiohttp.ContentTypeError:
logger.exception("Wiki seems to be resulting in non-json content.") # logger.exception("Wiki seems to be resulting in non-json content.")
continue # continue
except: # except:
logger.exception("On loading json of response.") # logger.exception("On loading json of response.")
continue # continue
if db_wiki["postid"] is None: # new wiki, just get the last post to not spam the channel # if db_wiki["postid"] is None: # new wiki, just get the last post to not spam the channel
if len(discussion_feed) > 0: # if len(discussion_feed) > 0:
DBHandler.add(db_wiki["wiki"], discussion_feed[-1]["id"], True) # DBHandler.add(db_wiki["wiki"], discussion_feed[-1]["id"], True)
else: # else:
DBHandler.add(db_wiki["wiki"], "0", True) # DBHandler.add(db_wiki["wiki"], "0", True)
DBHandler.update_db() # DBHandler.update_db()
continue # continue
targets = generate_targets(db_wiki["wiki"]) # targets = generate_targets(db_wiki["wiki"])
for post in discussion_feed: # for post in discussion_feed:
if post["id"] > db_wiki["postid"]: # if post["id"] > db_wiki["postid"]:
for target in targets.items(): # for target in targets.items():
try: # try:
await essential_feeds(post, db_wiki, target) # await essential_feeds(post, db_wiki, target)
except: # except:
if command_line_args.debug: # if command_line_args.debug:
raise # reraise the issue # raise # reraise the issue
else: # else:
logger.exception("Exception on Feeds formatter") # logger.exception("Exception on Feeds formatter")
await formatter_exception_logger(db_wiki["wiki"], post, traceback.format_exc()) # await formatter_exception_logger(db_wiki["wiki"], post, traceback.format_exc())
if discussion_feed: # if discussion_feed:
DBHandler.add(db_wiki["wiki"], post["id"], True) # DBHandler.add(db_wiki["wiki"], post["id"], True)
await asyncio.sleep(delay=calc_delay) # await asyncio.sleep(delay=calc_delay)
DBHandler.update_db() # DBHandler.update_db()
except asyncio.CancelledError: except asyncio.CancelledError:
raise raise