mirror of
https://gitlab.com/chicken-riders/RcGcDb.git
synced 2025-02-23 00:54:09 +00:00
continued work
This commit is contained in:
parent
5caed4d438
commit
04f45c33e9
|
@ -41,7 +41,7 @@ main_tasks: dict = {}
|
||||||
# Reasons for this: 1. we require amount of wikis to calculate the cooldown between requests
|
# Reasons for this: 1. we require amount of wikis to calculate the cooldown between requests
|
||||||
# 2. Easier to code
|
# 2. Easier to code
|
||||||
|
|
||||||
async def populate_allwikis():
|
async def populate_wikis():
|
||||||
async with db.pool().acquire() as connection:
|
async with db.pool().acquire() as connection:
|
||||||
async with connection.transaction():
|
async with connection.transaction():
|
||||||
async for db_wiki in connection.cursor('SELECT DISTINCT wiki, rcid, postid FROM rcgcdw'):
|
async for db_wiki in connection.cursor('SELECT DISTINCT wiki, rcid, postid FROM rcgcdw'):
|
||||||
|
@ -554,7 +554,7 @@ async def main_loop():
|
||||||
nest_asyncio.apply(loop)
|
nest_asyncio.apply(loop)
|
||||||
await db.setup_connection()
|
await db.setup_connection()
|
||||||
logger.debug("Connection type: {}".format(db.connection))
|
logger.debug("Connection type: {}".format(db.connection))
|
||||||
await populate_allwikis()
|
await populate_wikis()
|
||||||
try:
|
try:
|
||||||
signals = (signal.SIGHUP, signal.SIGTERM, signal.SIGINT)
|
signals = (signal.SIGHUP, signal.SIGTERM, signal.SIGINT)
|
||||||
for s in signals:
|
for s in signals:
|
||||||
|
|
|
@ -1,22 +1,24 @@
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
import asyncio
|
import asyncio
|
||||||
|
import logging
|
||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
|
from src.config import settings
|
||||||
from typing import TYPE_CHECKING, Optional
|
from typing import TYPE_CHECKING, Optional
|
||||||
|
logger = logging.getLogger("rcgcdb.domain")
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
import src.wiki
|
import src.wiki
|
||||||
import src.wiki_ratelimiter
|
import src.wiki_ratelimiter
|
||||||
import irc.client_aio
|
import src.irc_feed
|
||||||
|
|
||||||
|
|
||||||
class Domain:
|
class Domain:
|
||||||
def __init__(self, name: str):
|
def __init__(self, name: str):
|
||||||
self.name = name # This should be always in format of topname.extension for example fandom.com
|
self.name = name # This should be always in format of topname.extension for example fandom.com
|
||||||
self.task: asyncio.Task = self.create_task()
|
self.task: Optional[asyncio.Task] = None
|
||||||
self.wikis: OrderedDict[str, src.wiki.Wiki] = OrderedDict()
|
self.wikis: OrderedDict[str, src.wiki.Wiki] = OrderedDict()
|
||||||
self.rate_limiter: src.wiki_ratelimiter = src.wiki_ratelimiter.RateLimiter()
|
self.rate_limiter: src.wiki_ratelimiter = src.wiki_ratelimiter.RateLimiter()
|
||||||
self.irc = None
|
self.irc: Optional[src.irc_feed.AioIRCCat] = None
|
||||||
|
|
||||||
def __iter__(self):
|
def __iter__(self):
|
||||||
return iter(self.wikis)
|
return iter(self.wikis)
|
||||||
|
@ -24,9 +26,18 @@ class Domain:
|
||||||
def __getitem__(self, item):
|
def __getitem__(self, item):
|
||||||
return
|
return
|
||||||
|
|
||||||
def set_irc(self, irc_client: irc.client_aio.AioSimpleIRCClient):
|
def __len__(self):
|
||||||
|
return len(self.wikis)
|
||||||
|
|
||||||
|
def get_wiki(self, item, default=None) -> Optional[src.wiki.Wiki]:
|
||||||
|
return self.wikis.get(item, default)
|
||||||
|
|
||||||
|
def set_irc(self, irc_client: src.irc_feed.AioIRCCat):
|
||||||
self.irc = irc_client
|
self.irc = irc_client
|
||||||
|
|
||||||
|
def run_domain(self):
|
||||||
|
self.task = asyncio.create_task(self.run_wiki_check())
|
||||||
|
|
||||||
def add_wiki(self, wiki: src.wiki.Wiki, first=False):
|
def add_wiki(self, wiki: src.wiki.Wiki, first=False):
|
||||||
"""Adds a wiki to domain list.
|
"""Adds a wiki to domain list.
|
||||||
|
|
||||||
|
@ -36,8 +47,39 @@ class Domain:
|
||||||
if first:
|
if first:
|
||||||
self.wikis.move_to_end(wiki.script_url, last=False)
|
self.wikis.move_to_end(wiki.script_url, last=False)
|
||||||
|
|
||||||
def create_task(self) -> asyncio.Task:
|
async def run_wiki_scan(self, wiki: src.wiki.Wiki):
|
||||||
return asyncio.create_task(self.run_wiki_check())
|
await self.rate_limiter.timeout_wait()
|
||||||
|
await wiki.scan()
|
||||||
|
self.wikis.move_to_end(wiki.script_url)
|
||||||
|
self.rate_limiter.timeout_add(1.0)
|
||||||
|
|
||||||
|
async def irc_scheduler(self):
|
||||||
|
while 1:
|
||||||
|
try:
|
||||||
|
wiki_url = self.irc.updated_wikis.pop()
|
||||||
|
except KeyError:
|
||||||
|
break
|
||||||
|
try:
|
||||||
|
wiki = self.wikis[wiki_url]
|
||||||
|
except KeyError:
|
||||||
|
logger.error(f"Could not find a wiki with URL {wiki_url} in the domain group!")
|
||||||
|
continue
|
||||||
|
await self.run_wiki_scan(wiki)
|
||||||
|
for wiki in self.wikis.values():
|
||||||
|
if wiki.statistics.last_checked_rc < settings.get("irc_overtime", 3600):
|
||||||
|
await self.run_wiki_scan(wiki)
|
||||||
|
else:
|
||||||
|
return # Recently scanned wikis will get at the end of the self.wikis, so we assume what is first hasn't been checked for a while
|
||||||
|
|
||||||
|
async def regular_scheduler(self):
|
||||||
|
while 1:
|
||||||
|
additional_time = max((-25*len(self))+150, 0)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
async def run_wiki_check(self):
|
async def run_wiki_check(self):
|
||||||
raise NotImplementedError
|
if self.irc:
|
||||||
|
while:
|
||||||
|
await self.irc_scheduler()
|
||||||
|
else:
|
||||||
|
await self.regular_scheduler()
|
||||||
|
|
|
@ -31,7 +31,6 @@ class DomainManager:
|
||||||
return ".".join(urlunparse((*parsed_url[0:2], "", "", "", "")).split(".")[-2:])
|
return ".".join(urlunparse((*parsed_url[0:2], "", "", "", "")).split(".")[-2:])
|
||||||
|
|
||||||
async def new_domain(self, name: str) -> Domain:
|
async def new_domain(self, name: str) -> Domain:
|
||||||
irc = None
|
|
||||||
domain_object = Domain(name)
|
domain_object = Domain(name)
|
||||||
for irc_server in settings["irc_servers"].keys():
|
for irc_server in settings["irc_servers"].keys():
|
||||||
if name in settings["irc_servers"][irc_server]["domains"]:
|
if name in settings["irc_servers"][irc_server]["domains"]:
|
||||||
|
|
|
@ -1,8 +1,12 @@
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import types
|
||||||
|
|
||||||
import irc.client_aio
|
import irc.client_aio
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING, Callable
|
||||||
from urllib.parse import urlparse, quote
|
from urllib.parse import urlparse, quote
|
||||||
|
|
||||||
logger = logging.getLogger("rcgcdw.irc_feed")
|
logger = logging.getLogger("rcgcdw.irc_feed")
|
||||||
|
@ -15,11 +19,11 @@ class AioIRCCat(irc.client_aio.AioSimpleIRCClient):
|
||||||
super().connect(*args, **kwargs)
|
super().connect(*args, **kwargs)
|
||||||
self.connection_details = (args, kwargs)
|
self.connection_details = (args, kwargs)
|
||||||
|
|
||||||
def __init__(self, targets: dict[str, str], domain_object: Domain):
|
def __init__(self, targets: dict[str, str], domain_object: Domain, rc_callback: Callable, discussion_callback: Callable):
|
||||||
irc.client_aio.SimpleIRCClient.__init__(self)
|
irc.client_aio.SimpleIRCClient.__init__(self)
|
||||||
self.targets = targets
|
self.targets = targets
|
||||||
self.updated = set() # Storage for edited wikis
|
self.updated_wikis: set[str] = set()
|
||||||
self.updated_discussions = set()
|
self.discussion_callback = discussion_callback
|
||||||
self.domain = domain_object
|
self.domain = domain_object
|
||||||
self.connection.buffer_class.errors = "replace" # Ignore encoding errors
|
self.connection.buffer_class.errors = "replace" # Ignore encoding errors
|
||||||
self.connection_details = None
|
self.connection_details = None
|
||||||
|
@ -50,12 +54,10 @@ class AioIRCCat(irc.client_aio.AioSimpleIRCClient):
|
||||||
# print(message)
|
# print(message)
|
||||||
url = urlparse(message)
|
url = urlparse(message)
|
||||||
full_url = "https://"+url.netloc + recognize_langs(url.path)
|
full_url = "https://"+url.netloc + recognize_langs(url.path)
|
||||||
try:
|
wiki = self.domain.get_wiki(full_url)
|
||||||
if self.domain[full_url].rc_id != -1:
|
if wiki and wiki.rc_id != -1:
|
||||||
self.updated.add(full_url)
|
self.updated_wikis.add(full_url)
|
||||||
logger.debug("New website appended to the list! {}".format(full_url))
|
logger.debug("New website appended to the list! {}".format(full_url))
|
||||||
except KeyError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
def parse_fandom_discussion(self, message: str):
|
def parse_fandom_discussion(self, message: str):
|
||||||
|
@ -67,9 +69,8 @@ class AioIRCCat(irc.client_aio.AioSimpleIRCClient):
|
||||||
if post.get('action', 'unknown') != "deleted": # ignore deletion events
|
if post.get('action', 'unknown') != "deleted": # ignore deletion events
|
||||||
url = urlparse(post.get('url'))
|
url = urlparse(post.get('url'))
|
||||||
full_url ="https://"+ url.netloc + recognize_langs(url.path)
|
full_url ="https://"+ url.netloc + recognize_langs(url.path)
|
||||||
if full_url in self.domain: # POSSIBLE MEMORY LEAK AS WE DON'T HAVE A WAY TO CHECK IF WIKI IS LOOKING FOR DISCUSSIONS OR NOT
|
if full_url in self.domain:
|
||||||
self.updated_discussions.add("https://"+full_url)
|
self.discussion_callback(full_url)
|
||||||
logger.debug("New website appended to the list (discussions)! {}".format(full_url))
|
|
||||||
|
|
||||||
|
|
||||||
def recognize_langs(path):
|
def recognize_langs(path):
|
||||||
|
|
Loading…
Reference in a new issue