2021-05-28 17:16:14 +00:00
from __future__ import annotations
import asyncio
2024-07-04 17:03:51 +00:00
import datetime
2021-05-30 17:15:37 +00:00
import logging
2022-11-10 14:16:35 +00:00
import time
2023-08-14 15:23:32 +00:00
import traceback
2021-05-30 13:31:51 +00:00
from collections import OrderedDict
2021-05-28 17:16:14 +00:00
from typing import TYPE_CHECKING , Optional
2021-06-22 19:42:32 +00:00
from functools import cache
2024-03-03 09:07:25 +00:00
import sys
2022-11-07 14:46:15 +00:00
import aiohttp
2024-07-04 17:03:51 +00:00
from misc import LimitedList
2023-08-13 14:31:07 +00:00
from src . discord . message import DiscordMessage
2022-11-07 14:46:15 +00:00
from src . config import settings
from src . argparser import command_line_args
2022-10-03 13:34:36 +00:00
# from src.discussions import Discussions
2022-11-07 14:46:15 +00:00
from src . statistics import Log , LogType
2022-08-09 14:08:30 +00:00
2021-05-30 17:15:37 +00:00
logger = logging . getLogger ( " rcgcdb.domain " )
2021-05-28 17:16:14 +00:00
if TYPE_CHECKING :
import src . wiki
2021-05-30 17:15:37 +00:00
import src . irc_feed
2021-05-28 17:16:14 +00:00
class Domain :
2021-05-30 13:31:51 +00:00
def __init__ ( self , name : str ) :
2021-05-30 11:23:48 +00:00
self . name = name # This should be always in format of topname.extension for example fandom.com
2021-05-30 17:15:37 +00:00
self . task : Optional [ asyncio . Task ] = None
2022-08-09 10:57:40 +00:00
self . wikis : OrderedDict [ str , src . wiki . Wiki ] = OrderedDict ( )
2021-05-30 17:15:37 +00:00
self . irc : Optional [ src . irc_feed . AioIRCCat ] = None
2024-07-03 09:43:12 +00:00
self . last_failure_report = 0
2024-07-04 17:03:51 +00:00
self . message_timings : LimitedList = LimitedList ( limit = 100 )
2022-10-09 12:10:08 +00:00
# self.discussions_handler: Optional[Discussions] = Discussions(self.wikis) if name == "fandom.com" else None
2021-05-30 13:31:51 +00:00
def __iter__ ( self ) :
return iter ( self . wikis )
2023-05-06 12:29:27 +00:00
def __str__ ( self ) - > str :
2024-07-04 17:03:51 +00:00
if len ( self . message_timings ) > 0 : # min throws exception when used on empty iterable
tmin , avg , tmax = ( self . convert_seconds_to_readable ( min ( self . message_timings ) ) ,
self . convert_seconds_to_readable ( int ( sum ( self . message_timings ) / len ( self . message_timings ) ) ) ,
self . convert_seconds_to_readable ( max ( self . message_timings ) ) )
else :
tmin , avg , tmax = 0 , 0 , 0
2023-08-15 16:16:00 +00:00
return ( f " <Domain name= ' { self . name } ' task= ' { self . task } ' wikis= ' { self . wikis } ' "
2024-07-03 09:43:12 +00:00
f " irc= ' { self . irc . connection . connected if self . irc else False } ' "
2024-07-04 17:03:51 +00:00
f " calculated_delay= { self . calculate_sleep_time ( len ( self ) ) if not self . irc else ' handled by IRC scheduler ' } "
f " msgdelays=(min= { tmin } , avg= { avg } , max= { tmax } )> " )
2023-05-06 12:29:27 +00:00
def __repr__ ( self ) :
return self . __str__ ( )
2021-05-30 13:31:51 +00:00
def __getitem__ ( self , item ) :
return
2021-05-30 17:15:37 +00:00
def __len__ ( self ) :
return len ( self . wikis )
2024-07-04 17:03:51 +00:00
@staticmethod
def convert_seconds_to_readable ( seconds : int ) - > str :
""" Helper function to prepare human readable times for domain report """
return f " { int ( seconds / 60 ) } m { seconds % 60 } s "
2022-08-09 14:08:30 +00:00
def destroy ( self ) :
2022-08-16 10:50:49 +00:00
""" Destroy the domain – do all of the tasks that should make sure there is no leftovers before being collected by GC """
2022-08-09 14:08:30 +00:00
if self . irc :
2023-08-14 15:03:03 +00:00
logger . debug ( " Leaving IRC due to destroy() for domain {} " . format ( self . name ) )
2022-11-04 14:59:26 +00:00
self . irc . connection . die ( " Leaving " )
2023-08-14 15:03:03 +00:00
# if self.discussions_handler:
# self.discussions_handler.close()
2022-08-09 14:08:30 +00:00
if self . task :
self . task . cancel ( )
2023-08-14 16:01:59 +00:00
def get_wiki ( self , item : str , default = None ) - > Optional [ src . wiki . Wiki ] :
2022-08-16 10:50:49 +00:00
""" Return a wiki with given domain name """
2021-05-30 17:15:37 +00:00
return self . wikis . get ( item , default )
def set_irc ( self , irc_client : src . irc_feed . AioIRCCat ) :
2022-08-16 10:50:49 +00:00
""" Sets IRC """
2021-05-28 17:16:14 +00:00
self . irc = irc_client
2021-06-05 11:12:23 +00:00
def stop_task ( self ) :
""" Cancells the task """
self . task . cancel ( ) # Be aware that cancelling the task may take time
2021-05-30 17:15:37 +00:00
def run_domain ( self ) :
2022-08-16 10:50:49 +00:00
""" Starts asyncio task for domain """
2021-06-05 11:12:23 +00:00
if not self . task or self . task . cancelled ( ) :
2022-08-09 10:57:40 +00:00
self . task = asyncio . create_task ( self . run_wiki_check ( ) , name = self . name )
2021-06-05 11:12:23 +00:00
else :
logger . error ( f " Tried to start a task for domain { self . name } however the task already exists! " )
2021-05-30 17:15:37 +00:00
2021-06-23 14:21:06 +00:00
def remove_wiki ( self , script_url : str ) :
2021-07-09 12:55:23 +00:00
self . wikis . pop ( script_url )
2021-06-23 14:21:06 +00:00
2022-10-09 12:10:08 +00:00
async def add_wiki ( self , wiki : src . wiki . Wiki , first = False ) :
2021-05-30 11:23:48 +00:00
""" Adds a wiki to domain list.
: parameter wiki - Wiki object
2021-05-30 13:31:51 +00:00
: parameter first ( optional ) - bool indicating if wikis should be added as first or last in the ordered dict """
2021-06-23 14:21:06 +00:00
wiki . set_domain ( self )
2022-06-22 17:17:20 +00:00
if wiki . script_url in self . wikis :
2022-10-09 12:10:08 +00:00
await self . wikis [ wiki . script_url ] . update_targets ( )
2022-08-09 10:57:40 +00:00
else :
self . wikis [ wiki . script_url ] = wiki
2022-10-09 12:10:08 +00:00
await wiki . update_targets ( )
2021-05-30 13:31:51 +00:00
if first :
self . wikis . move_to_end ( wiki . script_url , last = False )
2024-02-25 13:23:22 +00:00
logger . debug ( f " Added new wiki { wiki . script_url } to domain { self . name } " )
2021-05-30 11:23:48 +00:00
2024-02-25 13:23:22 +00:00
async def run_wiki_scan ( self , wiki : src . wiki . Wiki , reason : Optional [ str ] = None ) :
2021-07-03 12:07:47 +00:00
await wiki . scan ( )
2022-08-09 14:08:30 +00:00
wiki . statistics . update ( Log ( type = LogType . SCAN_REASON , title = str ( reason ) ) )
2021-05-30 17:15:37 +00:00
self . wikis . move_to_end ( wiki . script_url )
2024-07-03 09:43:12 +00:00
def failure_rate_investigation ( self ) - > Optional [ set ] :
""" Function is supposed to determine if a notification should be sent regarding a wiki/domain not working properly
Cases considered worthy of notification :
An entire farm ( 20 % of wikis when 15 + wikis from domain ) is responding with errors for the past 10 minutes
A single wiki returning connection errors either for full queue_length or for an hour
"""
if len ( self ) > 15 :
affected = set ( )
for wiki_url , wiki_obj in self . wikis . items ( ) :
failures = 0
logs_last_10 = wiki_obj . statistics . filter_by_time ( 10 * 60 )
for log in logs_last_10 :
if log . type == LogType . CONNECTION_ERROR :
failures + = 1
if len ( logs_last_10 ) / 2 < = failures :
affected . add ( wiki_url )
if len ( affected ) > len ( self ) / 5 :
return affected
else :
affected = set ( )
for wiki_url , wiki_obj in self . wikis . items ( ) :
if all ( [ x for x in wiki_obj . statistics . filter_by_time ( 60 * 60 ) if x . type in ( LogType . CONNECTION_ERROR , LogType . SCAN_REASON , LogType . HTTP_ERROR ) ] ) :
affected . add ( wiki_url )
if affected :
return affected
2024-07-04 17:03:51 +00:00
def register_message_timing_report ( self , initial_time : datetime . datetime , send_time : Optional [ datetime . datetime ] = None ) - > None :
""" This function registers time between edit being made and message with given edit being sent on Discord
For metrics and debugging """
if send_time is None :
send_time = datetime . datetime . now ( tz = datetime . timezone . utc )
self . message_timings . append ( ( send_time - initial_time ) . seconds )
2021-05-30 17:15:37 +00:00
async def irc_scheduler ( self ) :
2022-10-09 12:10:08 +00:00
try :
while True :
try :
wiki_url = self . irc . updated_wikis . pop ( )
except KeyError :
break
try :
wiki = self . wikis [ wiki_url ]
except KeyError :
logger . error ( f " Could not find a wiki with URL { wiki_url } in the domain group! " )
continue
2024-02-25 13:23:22 +00:00
await self . run_wiki_scan ( wiki , " IRC feed event " )
2022-10-29 15:04:25 +00:00
while True : # Iterate until hitting return, we don't have to iterate using for since we are sending wiki to the end anyways
wiki : src . wiki . Wiki = next ( iter ( self . wikis . values ( ) ) )
2022-11-10 14:16:35 +00:00
if ( int ( time . time ( ) ) - ( wiki . statistics . last_checked_rc or 0 ) ) > settings . get ( " irc_overtime " , 3600 ) :
2024-02-25 13:23:22 +00:00
await self . run_wiki_scan ( wiki , " IRC backup check " )
2022-10-09 12:10:08 +00:00
else :
return # Recently scanned wikis will get at the end of the self.wikis, so we assume what is first hasn't been checked for a while
2022-11-07 14:46:15 +00:00
except Exception as e :
2022-10-09 12:10:08 +00:00
if command_line_args . debug :
2022-11-04 14:59:26 +00:00
logger . exception ( " IRC scheduler task for domain {} failed! " . format ( self . name ) )
2024-07-03 09:43:12 +00:00
else : # production
if not ( time . time ( ) - 172800 > self . last_failure_report ) : # If we haven't reported for more than 2 days or at all
return
2023-08-14 15:23:32 +00:00
traceback . print_exc ( )
2024-07-03 09:43:12 +00:00
wikis = self . failure_rate_investigation ( )
if wikis :
await self . send_exception_to_monitoring ( e , wikis )
self . last_failure_report = time . time ( )
2021-05-30 17:15:37 +00:00
async def regular_scheduler ( self ) :
2022-10-09 12:10:08 +00:00
try :
while True :
await asyncio . sleep ( self . calculate_sleep_time ( len ( self ) ) ) # To make sure that we don't spam domains with one wiki every second we calculate a sane timeout for domains with few wikis
2024-02-25 13:23:22 +00:00
await self . run_wiki_scan ( next ( iter ( self . wikis . values ( ) ) ) , " regular check " )
2022-11-07 14:46:15 +00:00
except Exception as e :
2022-10-09 12:10:08 +00:00
if command_line_args . debug :
2023-08-15 16:16:00 +00:00
logger . exception ( " Regular scheduler task for domain {} failed! " . format ( self . name ) )
2022-10-09 12:10:08 +00:00
else :
2024-07-03 09:43:12 +00:00
if not ( time . time ( ) - 172800 > self . last_failure_report ) : # If we haven't reported for more than 2 days or at all
return
2024-03-23 14:14:27 +00:00
traceback . print_exc ( )
2024-07-03 09:43:12 +00:00
wikis = self . failure_rate_investigation ( )
if wikis :
await self . send_exception_to_monitoring ( e , wikis )
self . last_failure_report = time . time ( )
2021-05-30 17:15:37 +00:00
2021-06-22 19:42:32 +00:00
@cache
def calculate_sleep_time ( self , queue_length : int ) :
return max ( ( - 25 * queue_length ) + 150 , 1 )
2021-05-30 11:23:48 +00:00
async def run_wiki_check ( self ) :
2022-08-16 10:50:49 +00:00
""" Runs appropriate scheduler depending on existence of IRC """
2021-05-30 17:15:37 +00:00
if self . irc :
2022-10-09 12:10:08 +00:00
try :
while True :
await self . irc_scheduler ( )
await asyncio . sleep ( 10.0 )
except asyncio . exceptions . CancelledError :
for wiki in self . wikis . values ( ) :
await wiki . session . close ( )
2022-11-04 14:59:26 +00:00
self . irc . connection . disconnect ( )
2024-03-23 14:14:27 +00:00
raise
2021-05-30 17:15:37 +00:00
else :
2022-10-09 12:10:08 +00:00
try :
2024-03-23 14:14:27 +00:00
while True :
await self . regular_scheduler ( )
2022-10-09 12:10:08 +00:00
except asyncio . exceptions . CancelledError :
for wiki in self . wikis . values ( ) :
await wiki . session . close ( )
2024-03-23 14:14:27 +00:00
raise
2022-11-07 14:46:15 +00:00
2024-07-03 09:43:12 +00:00
async def send_exception_to_monitoring ( self , ex : Exception , wikis : set ) :
2022-11-07 14:46:15 +00:00
discord_message = DiscordMessage ( " embed " , " generic " , [ " " ] )
discord_message [ " title " ] = " Domain scheduler exception for {} (recovered) " . format ( self . name )
2024-07-03 09:53:23 +00:00
discord_message [ " description " ] = " Affected wikis: {} " . format ( " , " . join ( wikis ) ) + " \n " + str ( ex )
2024-07-04 14:14:47 +00:00
discord_message [ " description " ] = discord_message [ " description " ] [ 0 : 2000 ]
2024-07-03 09:43:12 +00:00
# discord_message.add_field("Failure count", str(self.failures))
2022-11-07 14:46:15 +00:00
discord_message . finish_embed_message ( )
header = settings [ " header " ]
header [ ' Content-Type ' ] = ' application/json '
header [ ' X-RateLimit-Precision ' ] = " millisecond "
try :
async with aiohttp . ClientSession ( headers = header , timeout = aiohttp . ClientTimeout ( total = 6 ) ) as session :
async with session . post ( " https://discord.com/api/webhooks/ {} " . format ( settings [ " monitoring_webhook " ] ) ,
data = repr ( discord_message ) ) as resp :
pass
except ( aiohttp . ServerConnectionError , aiohttp . ServerTimeoutError ) :
2023-08-15 10:20:38 +00:00
logger . exception ( " Couldn ' t communicate with Discord as a result of Server Error when trying to signal domain task issue! " )