mirror of
https://gitlab.com/chicken-riders/RcGcDb.git
synced 2025-02-23 00:54:09 +00:00
continued work
This commit is contained in:
parent
5caed4d438
commit
04f45c33e9
|
@ -41,7 +41,7 @@ main_tasks: dict = {}
|
|||
# Reasons for this: 1. we require amount of wikis to calculate the cooldown between requests
|
||||
# 2. Easier to code
|
||||
|
||||
async def populate_allwikis():
|
||||
async def populate_wikis():
|
||||
async with db.pool().acquire() as connection:
|
||||
async with connection.transaction():
|
||||
async for db_wiki in connection.cursor('SELECT DISTINCT wiki, rcid, postid FROM rcgcdw'):
|
||||
|
@ -554,7 +554,7 @@ async def main_loop():
|
|||
nest_asyncio.apply(loop)
|
||||
await db.setup_connection()
|
||||
logger.debug("Connection type: {}".format(db.connection))
|
||||
await populate_allwikis()
|
||||
await populate_wikis()
|
||||
try:
|
||||
signals = (signal.SIGHUP, signal.SIGTERM, signal.SIGINT)
|
||||
for s in signals:
|
||||
|
|
|
@ -1,22 +1,24 @@
|
|||
from __future__ import annotations
|
||||
import asyncio
|
||||
import logging
|
||||
from collections import OrderedDict
|
||||
from src.config import settings
|
||||
from typing import TYPE_CHECKING, Optional
|
||||
|
||||
logger = logging.getLogger("rcgcdb.domain")
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import src.wiki
|
||||
import src.wiki_ratelimiter
|
||||
import irc.client_aio
|
||||
import src.irc_feed
|
||||
|
||||
|
||||
class Domain:
|
||||
def __init__(self, name: str):
|
||||
self.name = name # This should be always in format of topname.extension for example fandom.com
|
||||
self.task: asyncio.Task = self.create_task()
|
||||
self.task: Optional[asyncio.Task] = None
|
||||
self.wikis: OrderedDict[str, src.wiki.Wiki] = OrderedDict()
|
||||
self.rate_limiter: src.wiki_ratelimiter = src.wiki_ratelimiter.RateLimiter()
|
||||
self.irc = None
|
||||
self.irc: Optional[src.irc_feed.AioIRCCat] = None
|
||||
|
||||
def __iter__(self):
|
||||
return iter(self.wikis)
|
||||
|
@ -24,9 +26,18 @@ class Domain:
|
|||
def __getitem__(self, item):
|
||||
return
|
||||
|
||||
def set_irc(self, irc_client: irc.client_aio.AioSimpleIRCClient):
|
||||
def __len__(self):
|
||||
return len(self.wikis)
|
||||
|
||||
def get_wiki(self, item, default=None) -> Optional[src.wiki.Wiki]:
|
||||
return self.wikis.get(item, default)
|
||||
|
||||
def set_irc(self, irc_client: src.irc_feed.AioIRCCat):
|
||||
self.irc = irc_client
|
||||
|
||||
def run_domain(self):
|
||||
self.task = asyncio.create_task(self.run_wiki_check())
|
||||
|
||||
def add_wiki(self, wiki: src.wiki.Wiki, first=False):
|
||||
"""Adds a wiki to domain list.
|
||||
|
||||
|
@ -36,8 +47,39 @@ class Domain:
|
|||
if first:
|
||||
self.wikis.move_to_end(wiki.script_url, last=False)
|
||||
|
||||
def create_task(self) -> asyncio.Task:
|
||||
return asyncio.create_task(self.run_wiki_check())
|
||||
async def run_wiki_scan(self, wiki: src.wiki.Wiki):
|
||||
await self.rate_limiter.timeout_wait()
|
||||
await wiki.scan()
|
||||
self.wikis.move_to_end(wiki.script_url)
|
||||
self.rate_limiter.timeout_add(1.0)
|
||||
|
||||
async def irc_scheduler(self):
|
||||
while 1:
|
||||
try:
|
||||
wiki_url = self.irc.updated_wikis.pop()
|
||||
except KeyError:
|
||||
break
|
||||
try:
|
||||
wiki = self.wikis[wiki_url]
|
||||
except KeyError:
|
||||
logger.error(f"Could not find a wiki with URL {wiki_url} in the domain group!")
|
||||
continue
|
||||
await self.run_wiki_scan(wiki)
|
||||
for wiki in self.wikis.values():
|
||||
if wiki.statistics.last_checked_rc < settings.get("irc_overtime", 3600):
|
||||
await self.run_wiki_scan(wiki)
|
||||
else:
|
||||
return # Recently scanned wikis will get at the end of the self.wikis, so we assume what is first hasn't been checked for a while
|
||||
|
||||
async def regular_scheduler(self):
|
||||
while 1:
|
||||
additional_time = max((-25*len(self))+150, 0)
|
||||
|
||||
|
||||
|
||||
async def run_wiki_check(self):
|
||||
raise NotImplementedError
|
||||
if self.irc:
|
||||
while:
|
||||
await self.irc_scheduler()
|
||||
else:
|
||||
await self.regular_scheduler()
|
||||
|
|
|
@ -31,7 +31,6 @@ class DomainManager:
|
|||
return ".".join(urlunparse((*parsed_url[0:2], "", "", "", "")).split(".")[-2:])
|
||||
|
||||
async def new_domain(self, name: str) -> Domain:
|
||||
irc = None
|
||||
domain_object = Domain(name)
|
||||
for irc_server in settings["irc_servers"].keys():
|
||||
if name in settings["irc_servers"][irc_server]["domains"]:
|
||||
|
|
|
@ -1,8 +1,12 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import types
|
||||
|
||||
import irc.client_aio
|
||||
import json
|
||||
import logging
|
||||
from typing import TYPE_CHECKING
|
||||
from typing import TYPE_CHECKING, Callable
|
||||
from urllib.parse import urlparse, quote
|
||||
|
||||
logger = logging.getLogger("rcgcdw.irc_feed")
|
||||
|
@ -15,11 +19,11 @@ class AioIRCCat(irc.client_aio.AioSimpleIRCClient):
|
|||
super().connect(*args, **kwargs)
|
||||
self.connection_details = (args, kwargs)
|
||||
|
||||
def __init__(self, targets: dict[str, str], domain_object: Domain):
|
||||
def __init__(self, targets: dict[str, str], domain_object: Domain, rc_callback: Callable, discussion_callback: Callable):
|
||||
irc.client_aio.SimpleIRCClient.__init__(self)
|
||||
self.targets = targets
|
||||
self.updated = set() # Storage for edited wikis
|
||||
self.updated_discussions = set()
|
||||
self.updated_wikis: set[str] = set()
|
||||
self.discussion_callback = discussion_callback
|
||||
self.domain = domain_object
|
||||
self.connection.buffer_class.errors = "replace" # Ignore encoding errors
|
||||
self.connection_details = None
|
||||
|
@ -50,12 +54,10 @@ class AioIRCCat(irc.client_aio.AioSimpleIRCClient):
|
|||
# print(message)
|
||||
url = urlparse(message)
|
||||
full_url = "https://"+url.netloc + recognize_langs(url.path)
|
||||
try:
|
||||
if self.domain[full_url].rc_id != -1:
|
||||
self.updated.add(full_url)
|
||||
logger.debug("New website appended to the list! {}".format(full_url))
|
||||
except KeyError:
|
||||
pass
|
||||
wiki = self.domain.get_wiki(full_url)
|
||||
if wiki and wiki.rc_id != -1:
|
||||
self.updated_wikis.add(full_url)
|
||||
logger.debug("New website appended to the list! {}".format(full_url))
|
||||
|
||||
|
||||
def parse_fandom_discussion(self, message: str):
|
||||
|
@ -67,9 +69,8 @@ class AioIRCCat(irc.client_aio.AioSimpleIRCClient):
|
|||
if post.get('action', 'unknown') != "deleted": # ignore deletion events
|
||||
url = urlparse(post.get('url'))
|
||||
full_url ="https://"+ url.netloc + recognize_langs(url.path)
|
||||
if full_url in self.domain: # POSSIBLE MEMORY LEAK AS WE DON'T HAVE A WAY TO CHECK IF WIKI IS LOOKING FOR DISCUSSIONS OR NOT
|
||||
self.updated_discussions.add("https://"+full_url)
|
||||
logger.debug("New website appended to the list (discussions)! {}".format(full_url))
|
||||
if full_url in self.domain:
|
||||
self.discussion_callback(full_url)
|
||||
|
||||
|
||||
def recognize_langs(path):
|
||||
|
|
Loading…
Reference in a new issue