continued work

This commit is contained in:
Frisk 2021-05-30 19:15:37 +02:00
parent 5caed4d438
commit 04f45c33e9
No known key found for this signature in database
GPG key ID: 213F7C15068AF8AC
4 changed files with 66 additions and 24 deletions

View file

@ -41,7 +41,7 @@ main_tasks: dict = {}
# Reasons for this: 1. we require amount of wikis to calculate the cooldown between requests # Reasons for this: 1. we require amount of wikis to calculate the cooldown between requests
# 2. Easier to code # 2. Easier to code
async def populate_allwikis(): async def populate_wikis():
async with db.pool().acquire() as connection: async with db.pool().acquire() as connection:
async with connection.transaction(): async with connection.transaction():
async for db_wiki in connection.cursor('SELECT DISTINCT wiki, rcid, postid FROM rcgcdw'): async for db_wiki in connection.cursor('SELECT DISTINCT wiki, rcid, postid FROM rcgcdw'):
@ -554,7 +554,7 @@ async def main_loop():
nest_asyncio.apply(loop) nest_asyncio.apply(loop)
await db.setup_connection() await db.setup_connection()
logger.debug("Connection type: {}".format(db.connection)) logger.debug("Connection type: {}".format(db.connection))
await populate_allwikis() await populate_wikis()
try: try:
signals = (signal.SIGHUP, signal.SIGTERM, signal.SIGINT) signals = (signal.SIGHUP, signal.SIGTERM, signal.SIGINT)
for s in signals: for s in signals:

View file

@ -1,22 +1,24 @@
from __future__ import annotations from __future__ import annotations
import asyncio import asyncio
import logging
from collections import OrderedDict from collections import OrderedDict
from src.config import settings
from typing import TYPE_CHECKING, Optional from typing import TYPE_CHECKING, Optional
logger = logging.getLogger("rcgcdb.domain")
if TYPE_CHECKING: if TYPE_CHECKING:
import src.wiki import src.wiki
import src.wiki_ratelimiter import src.wiki_ratelimiter
import irc.client_aio import src.irc_feed
class Domain: class Domain:
def __init__(self, name: str): def __init__(self, name: str):
self.name = name # This should be always in format of topname.extension for example fandom.com self.name = name # This should be always in format of topname.extension for example fandom.com
self.task: asyncio.Task = self.create_task() self.task: Optional[asyncio.Task] = None
self.wikis: OrderedDict[str, src.wiki.Wiki] = OrderedDict() self.wikis: OrderedDict[str, src.wiki.Wiki] = OrderedDict()
self.rate_limiter: src.wiki_ratelimiter = src.wiki_ratelimiter.RateLimiter() self.rate_limiter: src.wiki_ratelimiter = src.wiki_ratelimiter.RateLimiter()
self.irc = None self.irc: Optional[src.irc_feed.AioIRCCat] = None
def __iter__(self): def __iter__(self):
return iter(self.wikis) return iter(self.wikis)
@ -24,9 +26,18 @@ class Domain:
def __getitem__(self, item): def __getitem__(self, item):
return return
def set_irc(self, irc_client: irc.client_aio.AioSimpleIRCClient): def __len__(self):
return len(self.wikis)
def get_wiki(self, item, default=None) -> Optional[src.wiki.Wiki]:
return self.wikis.get(item, default)
def set_irc(self, irc_client: src.irc_feed.AioIRCCat):
self.irc = irc_client self.irc = irc_client
def run_domain(self):
self.task = asyncio.create_task(self.run_wiki_check())
def add_wiki(self, wiki: src.wiki.Wiki, first=False): def add_wiki(self, wiki: src.wiki.Wiki, first=False):
"""Adds a wiki to domain list. """Adds a wiki to domain list.
@ -36,8 +47,39 @@ class Domain:
if first: if first:
self.wikis.move_to_end(wiki.script_url, last=False) self.wikis.move_to_end(wiki.script_url, last=False)
def create_task(self) -> asyncio.Task: async def run_wiki_scan(self, wiki: src.wiki.Wiki):
return asyncio.create_task(self.run_wiki_check()) await self.rate_limiter.timeout_wait()
await wiki.scan()
self.wikis.move_to_end(wiki.script_url)
self.rate_limiter.timeout_add(1.0)
async def irc_scheduler(self):
while 1:
try:
wiki_url = self.irc.updated_wikis.pop()
except KeyError:
break
try:
wiki = self.wikis[wiki_url]
except KeyError:
logger.error(f"Could not find a wiki with URL {wiki_url} in the domain group!")
continue
await self.run_wiki_scan(wiki)
for wiki in self.wikis.values():
if wiki.statistics.last_checked_rc < settings.get("irc_overtime", 3600):
await self.run_wiki_scan(wiki)
else:
return # Recently scanned wikis will get at the end of the self.wikis, so we assume what is first hasn't been checked for a while
async def regular_scheduler(self):
while 1:
additional_time = max((-25*len(self))+150, 0)
async def run_wiki_check(self): async def run_wiki_check(self):
raise NotImplementedError if self.irc:
while:
await self.irc_scheduler()
else:
await self.regular_scheduler()

View file

@ -31,7 +31,6 @@ class DomainManager:
return ".".join(urlunparse((*parsed_url[0:2], "", "", "", "")).split(".")[-2:]) return ".".join(urlunparse((*parsed_url[0:2], "", "", "", "")).split(".")[-2:])
async def new_domain(self, name: str) -> Domain: async def new_domain(self, name: str) -> Domain:
irc = None
domain_object = Domain(name) domain_object = Domain(name)
for irc_server in settings["irc_servers"].keys(): for irc_server in settings["irc_servers"].keys():
if name in settings["irc_servers"][irc_server]["domains"]: if name in settings["irc_servers"][irc_server]["domains"]:

View file

@ -1,8 +1,12 @@
from __future__ import annotations from __future__ import annotations
import asyncio
import types
import irc.client_aio import irc.client_aio
import json import json
import logging import logging
from typing import TYPE_CHECKING from typing import TYPE_CHECKING, Callable
from urllib.parse import urlparse, quote from urllib.parse import urlparse, quote
logger = logging.getLogger("rcgcdw.irc_feed") logger = logging.getLogger("rcgcdw.irc_feed")
@ -15,11 +19,11 @@ class AioIRCCat(irc.client_aio.AioSimpleIRCClient):
super().connect(*args, **kwargs) super().connect(*args, **kwargs)
self.connection_details = (args, kwargs) self.connection_details = (args, kwargs)
def __init__(self, targets: dict[str, str], domain_object: Domain): def __init__(self, targets: dict[str, str], domain_object: Domain, rc_callback: Callable, discussion_callback: Callable):
irc.client_aio.SimpleIRCClient.__init__(self) irc.client_aio.SimpleIRCClient.__init__(self)
self.targets = targets self.targets = targets
self.updated = set() # Storage for edited wikis self.updated_wikis: set[str] = set()
self.updated_discussions = set() self.discussion_callback = discussion_callback
self.domain = domain_object self.domain = domain_object
self.connection.buffer_class.errors = "replace" # Ignore encoding errors self.connection.buffer_class.errors = "replace" # Ignore encoding errors
self.connection_details = None self.connection_details = None
@ -50,12 +54,10 @@ class AioIRCCat(irc.client_aio.AioSimpleIRCClient):
# print(message) # print(message)
url = urlparse(message) url = urlparse(message)
full_url = "https://"+url.netloc + recognize_langs(url.path) full_url = "https://"+url.netloc + recognize_langs(url.path)
try: wiki = self.domain.get_wiki(full_url)
if self.domain[full_url].rc_id != -1: if wiki and wiki.rc_id != -1:
self.updated.add(full_url) self.updated_wikis.add(full_url)
logger.debug("New website appended to the list! {}".format(full_url)) logger.debug("New website appended to the list! {}".format(full_url))
except KeyError:
pass
def parse_fandom_discussion(self, message: str): def parse_fandom_discussion(self, message: str):
@ -67,9 +69,8 @@ class AioIRCCat(irc.client_aio.AioSimpleIRCClient):
if post.get('action', 'unknown') != "deleted": # ignore deletion events if post.get('action', 'unknown') != "deleted": # ignore deletion events
url = urlparse(post.get('url')) url = urlparse(post.get('url'))
full_url ="https://"+ url.netloc + recognize_langs(url.path) full_url ="https://"+ url.netloc + recognize_langs(url.path)
if full_url in self.domain: # POSSIBLE MEMORY LEAK AS WE DON'T HAVE A WAY TO CHECK IF WIKI IS LOOKING FOR DISCUSSIONS OR NOT if full_url in self.domain:
self.updated_discussions.add("https://"+full_url) self.discussion_callback(full_url)
logger.debug("New website appended to the list (discussions)! {}".format(full_url))
def recognize_langs(path): def recognize_langs(path):