2021-05-28 17:16:14 +00:00
from __future__ import annotations
import asyncio
2021-05-30 17:15:37 +00:00
import logging
2021-05-30 13:31:51 +00:00
from collections import OrderedDict
2021-05-30 17:15:37 +00:00
from src . config import settings
2021-05-28 17:16:14 +00:00
from typing import TYPE_CHECKING , Optional
2021-06-22 19:42:32 +00:00
from functools import cache
2022-08-09 10:57:40 +00:00
from src . discussions import Discussions
2022-08-09 14:08:30 +00:00
from statistics import Log , LogType
2021-05-30 17:15:37 +00:00
logger = logging . getLogger ( " rcgcdb.domain " )
2021-05-28 17:16:14 +00:00
if TYPE_CHECKING :
import src . wiki
import src . wiki_ratelimiter
2021-05-30 17:15:37 +00:00
import src . irc_feed
2021-05-28 17:16:14 +00:00
class Domain :
2021-05-30 13:31:51 +00:00
def __init__ ( self , name : str ) :
2021-05-30 11:23:48 +00:00
self . name = name # This should be always in format of topname.extension for example fandom.com
2021-05-30 17:15:37 +00:00
self . task : Optional [ asyncio . Task ] = None
2022-08-09 10:57:40 +00:00
self . wikis : OrderedDict [ str , src . wiki . Wiki ] = OrderedDict ( )
2021-05-28 17:16:14 +00:00
self . rate_limiter : src . wiki_ratelimiter = src . wiki_ratelimiter . RateLimiter ( )
2021-05-30 17:15:37 +00:00
self . irc : Optional [ src . irc_feed . AioIRCCat ] = None
2022-08-09 14:08:30 +00:00
self . discussions_handler : Optional [ Discussions ] = Discussions ( self . wikis ) if name == " fandom.com " else None
2021-05-30 13:31:51 +00:00
def __iter__ ( self ) :
return iter ( self . wikis )
def __getitem__ ( self , item ) :
return
2021-05-30 17:15:37 +00:00
def __len__ ( self ) :
return len ( self . wikis )
2022-08-09 14:08:30 +00:00
def destroy ( self ) :
if self . irc :
self . irc . connection . disconnect ( " Leaving " )
if self . discussions_handler :
self . discussions_handler . close ( )
if self . task :
self . task . cancel ( )
2021-05-30 17:15:37 +00:00
def get_wiki ( self , item , default = None ) - > Optional [ src . wiki . Wiki ] :
return self . wikis . get ( item , default )
def set_irc ( self , irc_client : src . irc_feed . AioIRCCat ) :
2021-05-28 17:16:14 +00:00
self . irc = irc_client
2021-06-05 11:12:23 +00:00
def stop_task ( self ) :
""" Cancells the task """
self . task . cancel ( ) # Be aware that cancelling the task may take time
2021-05-30 17:15:37 +00:00
def run_domain ( self ) :
2021-06-05 11:12:23 +00:00
if not self . task or self . task . cancelled ( ) :
2022-08-09 10:57:40 +00:00
self . task = asyncio . create_task ( self . run_wiki_check ( ) , name = self . name )
2021-06-05 11:12:23 +00:00
else :
logger . error ( f " Tried to start a task for domain { self . name } however the task already exists! " )
2021-05-30 17:15:37 +00:00
2021-06-23 14:21:06 +00:00
def remove_wiki ( self , script_url : str ) :
2021-07-09 12:55:23 +00:00
self . wikis . pop ( script_url )
2021-06-23 14:21:06 +00:00
2021-05-30 13:31:51 +00:00
def add_wiki ( self , wiki : src . wiki . Wiki , first = False ) :
2021-05-30 11:23:48 +00:00
""" Adds a wiki to domain list.
: parameter wiki - Wiki object
2021-05-30 13:31:51 +00:00
: parameter first ( optional ) - bool indicating if wikis should be added as first or last in the ordered dict """
2021-06-23 14:21:06 +00:00
wiki . set_domain ( self )
2022-06-22 17:17:20 +00:00
if wiki . script_url in self . wikis :
2022-07-26 13:48:44 +00:00
self . wikis [ wiki . script_url ] . update_targets ( )
2022-08-09 10:57:40 +00:00
else :
self . wikis [ wiki . script_url ] = wiki
2021-05-30 13:31:51 +00:00
if first :
self . wikis . move_to_end ( wiki . script_url , last = False )
2021-05-30 11:23:48 +00:00
2022-08-09 14:08:30 +00:00
async def run_wiki_scan ( self , wiki : src . wiki . Wiki , reason : Optional [ int ] = None ) :
2021-05-30 17:15:37 +00:00
await self . rate_limiter . timeout_wait ( )
2021-07-03 12:07:47 +00:00
await wiki . scan ( )
2022-08-09 14:08:30 +00:00
wiki . statistics . update ( Log ( type = LogType . SCAN_REASON , title = str ( reason ) ) )
2021-05-30 17:15:37 +00:00
self . wikis . move_to_end ( wiki . script_url )
self . rate_limiter . timeout_add ( 1.0 )
async def irc_scheduler ( self ) :
2021-06-05 11:12:23 +00:00
while True :
2021-05-30 17:15:37 +00:00
try :
wiki_url = self . irc . updated_wikis . pop ( )
except KeyError :
break
try :
wiki = self . wikis [ wiki_url ]
except KeyError :
logger . error ( f " Could not find a wiki with URL { wiki_url } in the domain group! " )
continue
await self . run_wiki_scan ( wiki )
for wiki in self . wikis . values ( ) :
if wiki . statistics . last_checked_rc < settings . get ( " irc_overtime " , 3600 ) :
await self . run_wiki_scan ( wiki )
else :
return # Recently scanned wikis will get at the end of the self.wikis, so we assume what is first hasn't been checked for a while
async def regular_scheduler ( self ) :
2021-06-05 11:12:23 +00:00
while True :
2021-06-22 19:42:32 +00:00
await asyncio . sleep ( self . calculate_sleep_time ( len ( self ) ) ) # To make sure that we don't spam domains with one wiki every second we calculate a sane timeout for domains with few wikis
2022-07-26 13:48:44 +00:00
await self . run_wiki_scan ( next ( iter ( self . wikis . values ( ) ) ) )
2021-05-30 17:15:37 +00:00
2021-06-22 19:42:32 +00:00
@cache
def calculate_sleep_time ( self , queue_length : int ) :
return max ( ( - 25 * queue_length ) + 150 , 1 )
2021-05-30 11:23:48 +00:00
async def run_wiki_check ( self ) :
2021-05-30 17:15:37 +00:00
if self . irc :
2021-06-05 11:12:23 +00:00
while True :
2021-05-30 17:15:37 +00:00
await self . irc_scheduler ( )
2021-06-05 11:12:23 +00:00
await asyncio . sleep ( 10.0 )
2021-05-30 17:15:37 +00:00
else :
await self . regular_scheduler ( )