2021-05-28 17:16:14 +00:00
from __future__ import annotations
import asyncio
2021-05-30 17:15:37 +00:00
import logging
2021-05-30 13:31:51 +00:00
from collections import OrderedDict
2021-05-30 17:15:37 +00:00
from src . config import settings
2021-05-28 17:16:14 +00:00
from typing import TYPE_CHECKING , Optional
2021-06-22 19:42:32 +00:00
from functools import cache
2021-05-30 17:15:37 +00:00
logger = logging . getLogger ( " rcgcdb.domain " )
2021-05-28 17:16:14 +00:00
if TYPE_CHECKING :
import src . wiki
import src . wiki_ratelimiter
2021-05-30 17:15:37 +00:00
import src . irc_feed
2021-05-28 17:16:14 +00:00
class Domain :
2021-05-30 13:31:51 +00:00
def __init__ ( self , name : str ) :
2021-05-30 11:23:48 +00:00
self . name = name # This should be always in format of topname.extension for example fandom.com
2021-05-30 17:15:37 +00:00
self . task : Optional [ asyncio . Task ] = None
2021-05-30 13:31:51 +00:00
self . wikis : OrderedDict [ str , src . wiki . Wiki ] = OrderedDict ( )
2021-05-28 17:16:14 +00:00
self . rate_limiter : src . wiki_ratelimiter = src . wiki_ratelimiter . RateLimiter ( )
2021-05-30 17:15:37 +00:00
self . irc : Optional [ src . irc_feed . AioIRCCat ] = None
2021-05-30 13:31:51 +00:00
def __iter__ ( self ) :
return iter ( self . wikis )
def __getitem__ ( self , item ) :
return
2021-05-30 17:15:37 +00:00
def __len__ ( self ) :
return len ( self . wikis )
def get_wiki ( self , item , default = None ) - > Optional [ src . wiki . Wiki ] :
return self . wikis . get ( item , default )
def set_irc ( self , irc_client : src . irc_feed . AioIRCCat ) :
2021-05-28 17:16:14 +00:00
self . irc = irc_client
2021-06-05 11:12:23 +00:00
def stop_task ( self ) :
""" Cancells the task """
self . task . cancel ( ) # Be aware that cancelling the task may take time
2021-05-30 17:15:37 +00:00
def run_domain ( self ) :
2021-06-05 11:12:23 +00:00
if not self . task or self . task . cancelled ( ) :
self . task = asyncio . create_task ( self . run_wiki_check ( ) )
else :
logger . error ( f " Tried to start a task for domain { self . name } however the task already exists! " )
2021-05-30 17:15:37 +00:00
2021-05-30 13:31:51 +00:00
def add_wiki ( self , wiki : src . wiki . Wiki , first = False ) :
2021-05-30 11:23:48 +00:00
""" Adds a wiki to domain list.
: parameter wiki - Wiki object
2021-05-30 13:31:51 +00:00
: parameter first ( optional ) - bool indicating if wikis should be added as first or last in the ordered dict """
self . wikis [ wiki . script_url ] = wiki
if first :
self . wikis . move_to_end ( wiki . script_url , last = False )
2021-05-30 11:23:48 +00:00
2021-05-30 17:15:37 +00:00
async def run_wiki_scan ( self , wiki : src . wiki . Wiki ) :
await self . rate_limiter . timeout_wait ( )
2021-06-22 19:42:32 +00:00
await wiki . scan ( self . rate_limiter )
2021-05-30 17:15:37 +00:00
self . wikis . move_to_end ( wiki . script_url )
self . rate_limiter . timeout_add ( 1.0 )
async def irc_scheduler ( self ) :
2021-06-05 11:12:23 +00:00
while True :
2021-05-30 17:15:37 +00:00
try :
wiki_url = self . irc . updated_wikis . pop ( )
except KeyError :
break
try :
wiki = self . wikis [ wiki_url ]
except KeyError :
logger . error ( f " Could not find a wiki with URL { wiki_url } in the domain group! " )
continue
await self . run_wiki_scan ( wiki )
for wiki in self . wikis . values ( ) :
if wiki . statistics . last_checked_rc < settings . get ( " irc_overtime " , 3600 ) :
await self . run_wiki_scan ( wiki )
else :
return # Recently scanned wikis will get at the end of the self.wikis, so we assume what is first hasn't been checked for a while
async def regular_scheduler ( self ) :
2021-06-05 11:12:23 +00:00
while True :
2021-06-22 19:42:32 +00:00
await asyncio . sleep ( self . calculate_sleep_time ( len ( self ) ) ) # To make sure that we don't spam domains with one wiki every second we calculate a sane timeout for domains with few wikis
2021-06-05 11:12:23 +00:00
await self . run_wiki_scan ( self . wikis . pop ( ) )
2021-05-30 17:15:37 +00:00
2021-06-22 19:42:32 +00:00
@cache
def calculate_sleep_time ( self , queue_length : int ) :
return max ( ( - 25 * queue_length ) + 150 , 1 )
2021-05-30 11:23:48 +00:00
async def run_wiki_check ( self ) :
2021-05-30 17:15:37 +00:00
if self . irc :
2021-06-05 11:12:23 +00:00
while True :
2021-05-30 17:15:37 +00:00
await self . irc_scheduler ( )
2021-06-05 11:12:23 +00:00
await asyncio . sleep ( 10.0 )
2021-05-30 17:15:37 +00:00
else :
await self . regular_scheduler ( )