2021-06-23 14:21:06 +00:00
from __future__ import annotations
2021-07-04 11:39:50 +00:00
2022-07-24 20:02:25 +00:00
import json
2021-07-04 11:39:50 +00:00
import time
2020-07-09 23:58:25 +00:00
from dataclasses import dataclass
2020-07-11 15:54:08 +00:00
import re
2020-07-10 14:11:45 +00:00
import logging , aiohttp
2022-07-24 20:02:25 +00:00
from functools import cache
2021-06-22 19:42:32 +00:00
2021-07-08 11:33:10 +00:00
from api . util import default_message
2021-06-22 19:42:32 +00:00
from mw_messages import MWMessages
2020-07-10 14:11:45 +00:00
from src . exceptions import *
2021-03-20 12:42:54 +00:00
from src . database import db
2021-06-30 11:41:58 +00:00
from src . queue_handler import DBHandler
2020-07-18 12:12:00 +00:00
from src . formatters . rc import embed_formatter , compact_formatter
2020-08-02 17:27:42 +00:00
from src . formatters . discussions import feeds_embed_formatter , feeds_compact_formatter
2021-07-09 12:55:23 +00:00
from src . api . hooks import formatter_hooks
from src . api . client import Client
2021-07-18 12:40:26 +00:00
from src . api . context import Context
2020-07-23 09:46:32 +00:00
from src . misc import parse_link
2020-07-21 12:15:40 +00:00
from src . i18n import langs
2020-08-06 00:46:43 +00:00
from src . wiki_ratelimiter import RateLimiter
2022-06-22 17:17:20 +00:00
from statistics import Statistics , Log , LogType
2020-07-26 21:52:24 +00:00
import asyncio
2020-07-21 12:15:40 +00:00
from src . config import settings
2020-07-28 12:39:32 +00:00
# noinspection PyPackageRequirements
2020-07-21 12:15:40 +00:00
from bs4 import BeautifulSoup
2021-06-23 14:21:06 +00:00
from collections import OrderedDict , defaultdict , namedtuple
from typing import Union , Optional , TYPE_CHECKING
2020-07-10 13:38:36 +00:00
2022-07-24 20:02:25 +00:00
Settings = namedtuple ( " Settings " , [ " lang " , " display " ] )
2020-07-10 14:11:45 +00:00
logger = logging . getLogger ( " rcgcdb.wiki " )
2020-07-09 23:58:25 +00:00
2021-06-23 14:21:06 +00:00
wiki_reamoval_reasons = { 410 : _ ( " wiki deleted " ) , 404 : _ ( " wiki deleted " ) , 401 : _ ( " wiki inaccessible " ) ,
402 : _ ( " wiki inaccessible " ) , 403 : _ ( " wiki inaccessible " ) , 1000 : _ ( " discussions disabled " ) }
if TYPE_CHECKING :
from src . domain import Domain
2021-06-22 19:42:32 +00:00
2021-05-30 11:23:48 +00:00
class Wiki :
2022-06-22 17:17:20 +00:00
def __init__ ( self , script_url : str , rc_id : Optional [ int ] , discussion_id : Optional [ int ] ) :
2021-06-05 11:12:23 +00:00
self . script_url : str = script_url
2021-05-30 11:23:48 +00:00
self . session = aiohttp . ClientSession ( headers = settings [ " header " ] , timeout = aiohttp . ClientTimeout ( 6.0 ) )
2021-06-05 11:12:23 +00:00
self . statistics : Statistics = Statistics ( rc_id , discussion_id )
2021-06-23 14:21:06 +00:00
self . mw_messages : Optional [ MWMessages ] = None
2022-07-24 20:02:25 +00:00
self . tags : dict [ str , Optional [ str ] ] = { } # Tag can be None if hidden
2021-06-22 19:42:32 +00:00
self . first_fetch_done : bool = False
2021-06-23 14:21:06 +00:00
self . domain : Optional [ Domain ] = None
2022-07-24 20:02:25 +00:00
self . targets : Optional [ defaultdict [ Settings , list [ str ] ] ] = None
self . client : Client = Client ( formatter_hooks , self )
self . update_targets ( )
2021-05-30 11:23:48 +00:00
@property
def rc_id ( self ) :
2021-06-05 11:12:23 +00:00
return self . statistics . last_action
2022-07-24 20:02:25 +00:00
@property
def last_request ( self ) :
return self . statistics . last_request
@last_request.setter
def last_request ( self , value ) :
self . statistics . last_request = value
2022-06-22 17:17:20 +00:00
# async def remove(self, reason):
# logger.info("Removing a wiki {}".format(self.script_url))
# await src.discord.wiki_removal(self.script_url, reason)
# await src.discord.wiki_removal_monitor(self.script_url, reason)
# async with db.pool().acquire() as connection:
# result = await connection.execute('DELETE FROM rcgcdw WHERE wiki = $1', self.script_url)
# logger.warning('{} rows affected by DELETE FROM rcgcdw WHERE wiki = "{}"'.format(result, self.script_url))
2021-06-23 14:21:06 +00:00
def set_domain ( self , domain : Domain ) :
self . domain = domain
2021-06-22 19:42:32 +00:00
2022-06-22 17:17:20 +00:00
# async def downtime_controller(self, down, reason=None):
# if down:
# self.fail_times += 1
# if self.fail_times > 20:
# await self.remove(reason)
# else:
# self.fail_times -= 1
2021-06-05 11:12:23 +00:00
2022-07-24 20:02:25 +00:00
async def update_targets ( self ) - > None :
2021-06-23 14:21:06 +00:00
""" This function generates all possible varations of outputs that we need to generate messages for.
: returns defaultdict [ namedtuple , list [ str ] ] - where namedtuple is a named tuple with settings for given webhooks in list """
Settings = namedtuple ( " Settings " , [ " lang " , " display " ] )
target_settings : defaultdict [ Settings , list [ str ] ] = defaultdict ( list )
2021-07-03 12:07:47 +00:00
async for webhook in DBHandler . fetch_rows ( " SELECT webhook, lang, display FROM rcgcdw WHERE wiki = $1 AND (rcid != -1 OR rcid IS NULL) " , self . script_url ) :
target_settings [ Settings ( webhook [ " lang " ] , webhook [ " display " ] ) ] . append ( webhook [ " webhook " ] )
2022-07-24 20:02:25 +00:00
self . targets = target_settings
2021-06-23 14:21:06 +00:00
2021-06-05 11:12:23 +00:00
def parse_mw_request_info ( self , request_data : dict , url : str ) :
""" A function parsing request JSON message from MediaWiki logging all warnings and raising on MediaWiki errors """
# any([True for k in request_data.keys() if k in ("error", "errors")])
errors : list = request_data . get ( " errors " , { } ) # Is it ugly? I don't know tbh
if errors :
raise MediaWikiError ( str ( errors ) )
warnings : list = request_data . get ( " warnings " , { } )
if warnings :
for warning in warnings :
logger . warning ( " MediaWiki returned the following warning: {code} - {text} on {url} . " . format (
code = warning [ " code " ] , text = warning . get ( " text " , warning . get ( " * " , " " ) ) , url = url
) )
return request_data
async def api_request ( self , params : Union [ str , OrderedDict ] , * json_path : str , timeout : int = 10 ,
allow_redirects : bool = False ) - > dict :
""" Method to GET request data from the wiki ' s API with error handling including recognition of MediaWiki errors.
Parameters :
params ( str , OrderedDict ) : a string or collections . OrderedDict object containing query parameters
json_path ( str ) : * args taking strings as values . After request is parsed as json it will extract data from given json path
timeout ( int , float ) ( default = 10 ) : int or float limiting time required for receiving a full response from a server before returning TimeoutError
allow_redirects ( bool ) ( default = False ) : switches whether the request should follow redirects or not
Returns :
request_content ( dict ) : a dict resulting from json extraction of HTTP GET request with given json_path
OR
One of the following exceptions :
ServerError : When connection with the wiki failed due to server error
ClientError : When connection with the wiki failed due to client error
KeyError : When json_path contained keys that weren ' t found in response JSON response
BadRequest : When params argument is of wrong type
MediaWikiError : When MediaWiki returns an error
"""
# Making request
try :
if isinstance ( params ,
str ) : # Todo Make it so there are some default arguments like warning/error format appended
request = await self . session . get ( self . script_url + " api.php? " + params + " &errorformat=raw " , timeout = timeout ,
allow_redirects = allow_redirects )
elif isinstance ( params , OrderedDict ) :
params [ " errorformat " ] = " raw "
request = await self . session . get ( self . script_url + " api.php " , params = params , timeout = timeout ,
allow_redirects = allow_redirects )
else :
raise BadRequest ( params )
except ( aiohttp . ServerConnectionError , aiohttp . ServerTimeoutError ) as exc :
logger . warning ( " Reached {error} error for request on link {url} " . format ( error = repr ( exc ) ,
url = self . script_url + str ( params ) ) )
raise ServerError
# Catching HTTP errors
if 499 < request . status < 600 :
raise ServerError
elif request . status == 302 :
logger . critical (
" Redirect detected! Either the wiki given in the script settings (wiki field) is incorrect/the wiki got removed or is giving us the false value. Please provide the real URL to the wiki, current URL redirects to {} " . format (
request . url ) )
elif 399 < request . status < 500 :
logger . error ( " Request returned ClientError status code on {url} " . format ( url = request . url ) )
2021-06-23 14:21:06 +00:00
if request . status in wiki_reamoval_reasons :
2022-06-22 17:17:20 +00:00
self . statistics . update ( Log ( type = LogType . HTTP_ERROR , title = " {} error " . format ( request . status ) , details = str ( request . headers ) + " \n " + str ( request . url ) ) )
2021-06-05 11:12:23 +00:00
raise ClientError ( request )
else :
# JSON Extraction
try :
request_json = self . parse_mw_request_info ( await request . json ( encoding = " UTF-8 " ) , str ( request . url ) )
for item in json_path :
request_json = request_json [ item ]
except ValueError :
logger . warning ( " ValueError when extracting JSON data on {url} " . format ( url = request . url ) )
raise ServerError
except MediaWikiError :
logger . exception ( " MediaWiki error on request: {} " . format ( request . url ) )
raise
except KeyError :
logger . exception ( " KeyError while iterating over json_path, full response: {} " . format ( request . json ( ) ) )
raise
return request_json
2021-07-04 11:39:50 +00:00
async def fetch_wiki ( self , amount = 10 ) - > dict :
2022-06-22 17:17:20 +00:00
if self . mw_messages is None :
2021-06-05 11:12:23 +00:00
params = OrderedDict ( { " action " : " query " , " format " : " json " , " uselang " : " content " , " list " : " tags|recentchanges " ,
" meta " : " allmessages|siteinfo " ,
" utf8 " : 1 , " tglimit " : " max " , " tgprop " : " displayname " ,
" rcprop " : " title|redirect|timestamp|ids|loginfo|parsedcomment|sizes|flags|tags|user " ,
" rclimit " : amount , " rcshow " : " !bot " , " rctype " : " edit|new|log|categorize " ,
" ammessages " : " recentchanges-page-added-to-category|recentchanges-page-removed-from-category|recentchanges-page-added-to-category-bundled|recentchanges-page-removed-from-category-bundled " ,
" amenableparser " : 1 , " amincludelocal " : 1 , " siprop " : " namespaces|general " } )
else :
params = OrderedDict ( { " action " : " query " , " format " : " json " , " uselang " : " content " , " list " : " tags|recentchanges " ,
" meta " : " siteinfo " , " utf8 " : 1 ,
" tglimit " : " max " , " rcshow " : " !bot " , " tgprop " : " displayname " ,
" rcprop " : " title|redirect|timestamp|ids|loginfo|parsedcomment|sizes|flags|tags|user " ,
" rclimit " : amount , " rctype " : " edit|new|log|categorize " , " siprop " : " namespaces|general " } )
try :
response = await self . api_request ( params = params )
except ( aiohttp . ClientConnectionError , aiohttp . ServerTimeoutError , asyncio . TimeoutError ) :
logger . error ( " A connection error occurred while requesting {} " . format ( params ) )
raise WikiServerError
return response
2020-07-20 12:03:55 +00:00
2021-07-04 11:39:50 +00:00
async def scan ( self , amount = 10 ) :
while True : # Trap event in case there are more changes needed to be fetched
try :
request = await self . fetch_wiki ( amount = amount )
2021-07-18 12:40:26 +00:00
self . client . last_request = request
2022-06-22 17:17:20 +00:00
except WikiServerError as e :
self . statistics . update ( Log ( type = LogType . CONNECTION_ERROR , title = e . ) ) # We need more details in WIkiServerError exception
2021-07-04 11:39:50 +00:00
if not self . mw_messages :
2022-07-24 20:02:25 +00:00
# TODO Split into other function
2021-07-04 11:39:50 +00:00
mw_messages = request . get ( " query " , { } ) . get ( " allmessages " , [ ] )
final_mw_messages = dict ( )
for msg in mw_messages :
if " missing " not in msg : # ignore missing strings
final_mw_messages [ msg [ " name " ] ] = re . sub ( r ' \ [ \ [.*?]] ' , ' ' , msg [ " * " ] )
else :
logger . warning ( " Could not fetch the MW message translation for: {} " . format ( msg [ " name " ] ) )
self . mw_messages = MWMessages ( final_mw_messages )
2022-07-24 20:02:25 +00:00
# TODO Split into other function
2021-07-04 11:39:50 +00:00
try :
recent_changes = request [ " query " ] [ " recentchanges " ]
recent_changes . reverse ( )
except KeyError :
raise WikiError
if self . rc_id in ( 0 , None , - 1 ) :
if len ( recent_changes ) > 0 :
self . statistics . last_action = recent_changes [ - 1 ] [ " rcid " ]
DBHandler . add ( ( " UPDATE rcgcdw SET rcid = $1 WHERE wiki = $2 AND ( rcid != -1 OR rcid IS NULL ) " ,
( recent_changes [ - 1 ] [ " rcid " ] , self . script_url ) ) )
else :
self . statistics . last_action = 0
DBHandler . add ( ( " UPDATE rcgcdw SET rcid = 0 WHERE wiki = $1 AND ( rcid != -1 OR rcid IS NULL ) " , ( self . script_url ) ) )
return # TODO Add a log entry?
categorize_events = { }
new_events = 0
self . statistics . last_checked_rc = int ( time . time ( ) )
highest_id = self . rc_id # Pretty sure that will be faster
for change in recent_changes :
if change [ " rcid " ] > highest_id and amount != 450 :
new_events + = 1
if new_events == 10 :
# call the function again with max limit for more results, ignore the ones in this request
logger . debug ( " There were too many new events, queuing wiki with 450 limit. " )
amount = 450
break
await process_cats ( change , self , categorize_events )
else : # adequate amount of changes
2021-07-09 12:55:23 +00:00
for tag in request [ " query " ] [ " tags " ] :
try :
self . tags [ tag [ " name " ] ] = ( BeautifulSoup ( tag [ " displayname " ] , " lxml " ) ) . get_text ( )
except KeyError :
self . tags [ tag [ " name " ] ] = None
2021-07-04 11:39:50 +00:00
message_list = defaultdict ( list )
for change in recent_changes : # Yeah, second loop since the categories require to be all loaded up
if change [ " rcid " ] > self . rc_id :
if highest_id is None or change [ " rcid " ] > highest_id : # make sure that the highest_rc is really highest rcid but do allow other entries with potentially lesser rcids come after without breaking the cycle
highest_id = change [ " rcid " ]
2022-07-24 20:02:25 +00:00
for combination , webhooks in self . targets . items ( ) :
2021-09-07 15:55:39 +00:00
message , metadata = await rc_processor ( self , change , categorize_events , combination , webhooks )
2022-06-22 17:17:20 +00:00
break
2021-07-04 11:39:50 +00:00
2022-07-24 20:02:25 +00:00
@cache
def prepare_settings ( display_mode : int ) - > dict :
""" Prepares dict of RcGcDw compatible settings based on a template and display mode of given call """
with open ( " src/api/template_settings.json " , " r " ) as template_json :
template = json . load ( template_json )
template [ " appearance " ] [ " embed " ] [ " embed_images " ] = True if display_mode > 1 else False
template [ " appearance " ] [ " embed " ] [ " show_edit_changes " ] = True if display_mode > 2 else False
return template
2021-07-03 12:07:47 +00:00
2022-07-26 13:48:44 +00:00
async def rc_processor ( wiki : Wiki , change : dict , changed_categories : dict , display_options : namedtuple ( " Settings " , [ " lang " , " display " ] ) , webhooks : list ) - > tuple [
discord . discord . DiscordMessage , discord . discord . DiscordMessageMetadata ] :
2021-07-08 11:33:10 +00:00
from src . misc import LinkParser
LinkParser = LinkParser ( )
2022-07-26 13:48:44 +00:00
metadata = discord . discord . DiscordMessageMetadata ( " POST " , rev_id = change . get ( " revid " , None ) , log_id = change . get ( " logid " , None ) ,
page_id = change . get ( " pageid " , None ) )
2022-07-24 20:02:25 +00:00
context = Context ( " embed " if display_options . display > 0 else " compact " , " recentchanges " , webhook , wiki . client , langs [ display_options . lang ] [ " rc_formatters " ] , prepare_settings ( display_options . display ) )
2021-07-18 12:40:26 +00:00
if ( " actionhidden " in change or " suppressed " in change ) and " suppressed " not in settings [ " ignored " ] : # if event is hidden using suppression
2021-07-08 11:33:10 +00:00
context . event = " suppressed "
try :
2022-07-26 13:48:44 +00:00
discord_message : Optional [ discord . discord . DiscordMessage ] = default_message ( " suppressed " , display_options . display , formatter_hooks ) ( context , change )
2021-07-08 11:33:10 +00:00
except NoFormatter :
return
except :
if settings . get ( " error_tolerance " , 1 ) > 0 :
2022-07-26 13:48:44 +00:00
discord_message : Optional [ discord . discord . DiscordMessage ] = None # It's handled by send_to_discord, we still want other code to run
2021-07-08 11:33:10 +00:00
else :
raise
else :
if " commenthidden " not in change :
LinkParser . feed ( change . get ( " parsedcomment " , " " ) )
parsed_comment = LinkParser . new_string
else :
parsed_comment = _ ( " ~~hidden~~ " )
if not parsed_comment and context . message_type == " embed " and settings [ " appearance " ] . get ( " embed " , { } ) . get (
" show_no_description_provided " , True ) :
parsed_comment = _ ( " No description provided " )
context . set_parsedcomment ( parsed_comment )
if " userhidden " in change :
change [ " user " ] = _ ( " hidden " )
if change . get ( " ns " , - 1 ) in settings . get ( " ignored_namespaces " , ( ) ) :
return
if change [ " type " ] in [ " edit " , " new " ] :
logger . debug ( " List of categories in essential_info: {} " . format ( changed_categories ) )
identification_string = change [ " type " ]
context . set_categories ( changed_categories )
elif change [ " type " ] == " categorize " :
return
elif change [ " type " ] == " log " :
identification_string = " {logtype} / {logaction} " . format ( logtype = change [ " logtype " ] ,
logaction = change [ " logaction " ] )
else :
identification_string = change . get ( " type " , " unknown " ) # If event doesn't have a type
if identification_string in settings [ " ignored " ] :
return
context . event = identification_string
try :
2022-07-26 13:48:44 +00:00
discord_message : Optional [ discord . discord . DiscordMessage ] = default_message ( identification_string , formatter_hooks ) ( context ,
change )
2021-07-08 11:33:10 +00:00
except :
if settings . get ( " error_tolerance " , 1 ) > 0 :
discord_message : Optional [
2022-07-26 13:48:44 +00:00
discord . discord . DiscordMessage ] = None # It's handled by send_to_discord, we still want other code to run
2021-07-08 11:33:10 +00:00
else :
raise
2021-09-07 15:55:39 +00:00
if identification_string in ( " delete/delete " , " delete/delete_redir " ) : # TODO Move it into a hook?
2021-07-08 11:33:10 +00:00
delete_messages ( dict ( pageid = change . get ( " pageid " ) ) )
2021-07-09 12:55:23 +00:00
elif identification_string == " delete/event " :
2021-07-08 11:33:10 +00:00
logparams = change . get ( ' logparams ' , { " ids " : [ ] } )
if settings [ " appearance " ] [ " mode " ] == " embed " :
redact_messages ( logparams . get ( " ids " , [ ] ) , 1 , logparams . get ( " new " , { } ) )
else :
for logid in logparams . get ( " ids " , [ ] ) :
delete_messages ( dict ( logid = logid ) )
2021-07-09 12:55:23 +00:00
elif identification_string == " delete/revision " :
2021-07-08 11:33:10 +00:00
logparams = change . get ( ' logparams ' , { " ids " : [ ] } )
if settings [ " appearance " ] [ " mode " ] == " embed " :
redact_messages ( logparams . get ( " ids " , [ ] ) , 0 , logparams . get ( " new " , { } ) )
else :
for revid in logparams . get ( " ids " , [ ] ) :
delete_messages ( dict ( revid = revid ) )
discord_message . finish_embed ( )
return discord_message , metadata
2021-06-22 19:42:32 +00:00
2020-07-09 23:58:25 +00:00
@dataclass
2021-05-30 11:23:48 +00:00
class Wiki_old :
2020-07-10 13:38:36 +00:00
mw_messages : int = None
2020-07-09 23:58:25 +00:00
fail_times : int = 0 # corresponding to amount of times connection with wiki failed for client reasons (400-499)
2020-07-21 12:15:40 +00:00
session : aiohttp . ClientSession = None
2020-08-07 16:56:29 +00:00
rc_active : int = 0
2021-01-21 13:40:55 +00:00
last_check : float = 0.0
2021-01-27 17:48:46 +00:00
last_discussion_check : float = 0.0
2020-07-10 13:38:36 +00:00
2020-07-28 12:39:32 +00:00
@staticmethod
2020-08-09 11:53:11 +00:00
async def fetch_wiki ( extended , script_path , session : aiohttp . ClientSession , ratelimiter : RateLimiter , amount = 20 ) - > aiohttp . ClientResponse :
2020-08-06 00:46:43 +00:00
await ratelimiter . timeout_wait ( )
2020-07-19 13:32:54 +00:00
url_path = script_path + " api.php "
2020-07-10 13:38:36 +00:00
if extended :
params = { " action " : " query " , " format " : " json " , " uselang " : " content " , " list " : " tags|recentchanges " ,
" meta " : " allmessages|siteinfo " ,
" utf8 " : 1 , " tglimit " : " max " , " tgprop " : " displayname " ,
" rcprop " : " title|redirect|timestamp|ids|loginfo|parsedcomment|sizes|flags|tags|user " ,
2020-07-28 13:58:25 +00:00
" rclimit " : amount , " rcshow " : " !bot " , " rctype " : " edit|new|log|categorize " ,
2020-07-10 13:38:36 +00:00
" ammessages " : " recentchanges-page-added-to-category|recentchanges-page-removed-from-category|recentchanges-page-added-to-category-bundled|recentchanges-page-removed-from-category-bundled " ,
2020-07-20 00:52:02 +00:00
" amenableparser " : 1 , " amincludelocal " : 1 , " siprop " : " namespaces|general " }
2020-07-10 13:38:36 +00:00
else :
params = { " action " : " query " , " format " : " json " , " uselang " : " content " , " list " : " tags|recentchanges " ,
2020-07-20 00:52:02 +00:00
" meta " : " siteinfo " , " utf8 " : 1 ,
2020-07-26 21:52:24 +00:00
" tglimit " : " max " , " rcshow " : " !bot " , " tgprop " : " displayname " ,
2020-07-10 13:38:36 +00:00
" rcprop " : " title|redirect|timestamp|ids|loginfo|parsedcomment|sizes|flags|tags|user " ,
2020-07-28 13:58:25 +00:00
" rclimit " : amount , " rctype " : " edit|new|log|categorize " , " siprop " : " namespaces|general " }
2020-07-10 13:38:36 +00:00
try :
2020-07-26 21:52:24 +00:00
response = await session . get ( url_path , params = params )
2020-08-06 00:46:43 +00:00
ratelimiter . timeout_add ( 1.0 )
2020-07-28 23:33:14 +00:00
except ( aiohttp . ClientConnectionError , aiohttp . ServerTimeoutError , asyncio . TimeoutError ) :
2020-08-14 06:29:10 +00:00
logger . error ( " A connection error occurred while requesting {} " . format ( url_path ) )
2020-07-10 20:07:33 +00:00
raise WikiServerError
2020-07-10 14:11:45 +00:00
return response
2020-08-02 17:27:42 +00:00
@staticmethod
2020-11-22 12:44:15 +00:00
async def fetch_feeds ( wiki , session : aiohttp . ClientSession ) - > aiohttp . ClientResponse :
url_path = " {wiki} wikia.php " . format ( wiki = wiki )
2020-12-04 15:54:58 +00:00
params = { " controller " : " DiscussionPost " , " method " : " getPosts " , " includeCounters " : " false " , " sortDirection " : " descending " , " sortKey " : " creation_date " , " limit " : 20 }
2020-08-02 17:27:42 +00:00
try :
response = await session . get ( url_path , params = params )
2020-08-03 11:03:36 +00:00
response . raise_for_status ( )
except ( aiohttp . ClientConnectionError , aiohttp . ServerTimeoutError , asyncio . TimeoutError , aiohttp . ClientResponseError ) :
2020-08-12 11:40:48 +00:00
logger . error ( " A connection error occurred while requesting {} " . format ( url_path ) )
2020-08-02 17:27:42 +00:00
raise WikiServerError
return response
2020-07-28 01:11:27 +00:00
@staticmethod
2020-08-06 00:46:43 +00:00
async def safe_request ( url , ratelimiter , * keys ) :
await ratelimiter . timeout_wait ( )
2020-07-20 12:03:55 +00:00
try :
2020-10-19 14:45:03 +00:00
async with aiohttp . ClientSession ( headers = settings [ " header " ] , timeout = aiohttp . ClientTimeout ( 6.0 ) ) as session :
2021-03-16 20:36:11 +00:00
request = await session . get ( url )
2020-08-06 00:46:43 +00:00
ratelimiter . timeout_add ( 1.0 )
2020-07-28 01:11:27 +00:00
request . raise_for_status ( )
json_request = await request . json ( encoding = " UTF-8 " )
2020-07-29 01:02:28 +00:00
except ( aiohttp . ClientConnectionError , aiohttp . ServerTimeoutError , asyncio . TimeoutError ) :
2020-08-14 06:29:10 +00:00
logger . error ( " Reached connection error for request on link {url} " . format ( url = url ) )
2020-07-20 12:03:55 +00:00
else :
2020-07-28 01:11:27 +00:00
try :
for item in keys :
json_request = json_request [ item ]
except KeyError :
logger . warning (
2020-08-10 17:58:51 +00:00
" Failure while extracting data from request on key {key} in {change} " . format ( key = item , change = json_request ) )
2020-07-28 01:11:27 +00:00
return None
return json_request
2020-07-20 12:03:55 +00:00
2020-07-26 21:52:24 +00:00
async def fail_add ( self , wiki_url , status ) :
logger . debug ( " Increasing fail_times to {} " . format ( self . fail_times + 3 ) )
self . fail_times + = 3
if self . fail_times > 9 :
await self . remove ( wiki_url , status )
2020-07-19 23:40:20 +00:00
async def check_status ( self , wiki_url , status ) :
2020-07-10 14:11:45 +00:00
if 199 < status < 300 :
2020-07-26 21:52:24 +00:00
self . fail_times - = 1
2020-07-10 14:11:45 +00:00
pass
elif 400 < status < 500 : # ignore 400 error since this might be our fault
2020-07-26 21:52:24 +00:00
await self . fail_add ( wiki_url , status )
2020-07-19 23:40:20 +00:00
logger . warning ( " Wiki {} responded with HTTP code {} , increased fail_times to {} , skipping... " . format ( wiki_url , status , self . fail_times ) )
2020-07-10 14:11:45 +00:00
raise WikiError
elif 499 < status < 600 :
2020-07-19 23:40:20 +00:00
logger . warning ( " Wiki {} responded with HTTP code {} , skipping... " . format ( wiki_url , status , self . fail_times ) )
2020-07-10 14:11:45 +00:00
raise WikiServerError
2020-07-28 12:39:32 +00:00
@staticmethod
async def remove ( wiki_url , reason ) :
2020-07-29 16:33:40 +00:00
logger . info ( " Removing a wiki {} " . format ( wiki_url ) )
2022-07-26 13:48:44 +00:00
await discord . discord . wiki_removal ( wiki_url , reason )
await discord . discord . wiki_removal_monitor ( wiki_url , reason )
2021-03-20 12:42:54 +00:00
async with db . pool ( ) . acquire ( ) as connection :
result = await connection . execute ( ' DELETE FROM rcgcdw WHERE wiki = $1 ' , wiki_url )
2021-03-19 15:26:19 +00:00
logger . warning ( ' {} rows affected by DELETE FROM rcgcdw WHERE wiki = " {} " ' . format ( result , wiki_url ) )
2020-07-11 15:54:08 +00:00
2020-08-06 00:46:43 +00:00
async def pull_comment ( self , comment_id , WIKI_API_PATH , rate_limiter ) :
2020-07-21 12:15:40 +00:00
try :
comment = await self . safe_request (
" {wiki} ?action=comment&do=getRaw&comment_id= {comment} &format=json " . format ( wiki = WIKI_API_PATH ,
2020-08-06 00:46:43 +00:00
comment = comment_id ) , rate_limiter , " text " )
2020-07-21 12:15:40 +00:00
logger . debug ( " Got the following comment from the API: {} " . format ( comment ) )
2020-07-28 11:41:33 +00:00
if comment is None :
raise TypeError
2020-07-21 12:15:40 +00:00
except ( TypeError , AttributeError ) :
logger . exception ( " Could not resolve the comment text. " )
except KeyError :
logger . exception ( " CurseProfile extension API did not respond with a valid comment content. " )
else :
if len ( comment ) > 1000 :
comment = comment [ 0 : 1000 ] + " … "
return comment
return " "
2020-07-11 15:54:08 +00:00
2021-07-04 11:39:50 +00:00
async def process_cats ( event : dict , local_wiki : Wiki , categorize_events : dict ) :
2020-07-27 16:32:30 +00:00
""" Process categories based on local MW messages. """
2020-07-11 15:54:08 +00:00
if event [ " type " ] == " categorize " :
if " commenthidden " not in event :
2020-07-28 14:18:06 +00:00
if local_wiki . mw_messages is not None :
2020-07-11 15:54:08 +00:00
cat_title = event [ " title " ] . split ( ' : ' , 1 ) [ 1 ]
# I so much hate this, blame Markus for making me do this
if event [ " revid " ] not in categorize_events :
categorize_events [ event [ " revid " ] ] = { " new " : set ( ) , " removed " : set ( ) }
comment_to_match = re . sub ( r ' <.*?a> ' , ' ' , event [ " parsedcomment " ] )
2021-07-04 11:39:50 +00:00
if local_wiki . mw_messages [ " recentchanges-page-added-to-category " ] in comment_to_match or local_wiki . mw_messages [ " recentchanges-page-added-to-category-bundled " ] in comment_to_match : # Added to category
2020-07-11 15:54:08 +00:00
categorize_events [ event [ " revid " ] ] [ " new " ] . add ( cat_title )
2021-03-17 13:15:29 +00:00
#logger.debug("Matched {} to added category for {}".format(cat_title, event["revid"]))
2021-07-04 11:39:50 +00:00
elif local_wiki . mw_messages [ " recentchanges-page-removed-from-category " ] in comment_to_match or local_wiki . mw_messages [ " recentchanges-page-removed-from-category-bundled " ] in comment_to_match : # Removed from category
2020-07-11 15:54:08 +00:00
categorize_events [ event [ " revid " ] ] [ " removed " ] . add ( cat_title )
2021-03-17 13:15:29 +00:00
#logger.debug("Matched {} to removed category for {}".format(cat_title, event["revid"]))
2020-07-11 15:54:08 +00:00
else :
logger . debug (
2021-07-04 11:39:50 +00:00
" Unknown match for category change with messages {} and comment_to_match {} " . format ( local_wiki . mw_messages , comment_to_match ) )
2020-07-11 15:54:08 +00:00
else :
logger . warning (
" Init information not available, could not read category information. Please restart the bot. " )
else :
logger . debug ( " Log entry got suppressed, ignoring entry. " )
async def process_mwmsgs ( wiki_response : dict , local_wiki : Wiki , mw_msgs : dict ) :
"""
This function is made to parse the initial wiki extended information to update local_wiki . mw_messages that stores the key
to mw_msgs that is a dict storing id : tuple where tuple is a set of MW messages for categories .
The reason it ' s constructed this way is to prevent duplication of data in memory so Markus doesn ' t complain about
high RAM usage . It does however affect CPU performance as every wiki requires to check the list for the matching
tuples of MW messages .
: param wiki_response :
: param local_wiki :
: param mw_msgs :
: return :
"""
msgs = [ ]
2020-07-22 11:43:18 +00:00
for message in wiki_response [ " query " ] [ " allmessages " ] :
2020-07-11 15:54:08 +00:00
if not " missing " in message : # ignore missing strings
msgs . append ( ( message [ " name " ] , re . sub ( r ' \ [ \ [.*? \ ] \ ] ' , ' ' , message [ " * " ] ) ) )
else :
2020-07-26 21:52:24 +00:00
logger . warning ( " Could not fetch the MW message translation for: {} " . format ( message [ " name " ] ) )
2020-07-11 15:54:08 +00:00
msgs = tuple ( msgs )
for key , set in mw_msgs . items ( ) :
if msgs == set :
local_wiki . mw_messages = key
return
2020-07-19 23:40:20 +00:00
# if same entry is not in mw_msgs
2020-07-11 15:54:08 +00:00
key = len ( mw_msgs )
mw_msgs [ key ] = msgs # it may be a little bit messy for sure, however I don't expect any reason to remove mw_msgs entries by one
local_wiki . mw_messages = key
2020-07-18 12:12:00 +00:00
2021-02-20 15:33:44 +00:00
2020-11-22 13:19:21 +00:00
# db_wiki: webhook, wiki, lang, display, rcid, postid
2020-08-07 16:56:29 +00:00
async def essential_info ( change : dict , changed_categories , local_wiki : Wiki , target : tuple , paths : tuple , request : dict ,
2022-07-26 13:48:44 +00:00
rate_limiter : RateLimiter ) - > discord . discord . DiscordMessage :
2020-07-18 12:12:00 +00:00
""" Prepares essential information for both embed and compact message format. """
2020-08-09 23:57:14 +00:00
_ = langs [ target [ 0 ] [ 0 ] ] [ " wiki " ] . gettext
2020-07-28 17:52:48 +00:00
changed_categories = changed_categories . get ( change [ " revid " ] , None )
2021-03-17 13:15:29 +00:00
#logger.debug("List of categories in essential_info: {}".format(changed_categories))
2020-07-19 23:40:20 +00:00
appearance_mode = embed_formatter if target [ 0 ] [ 1 ] > 0 else compact_formatter
2020-07-28 12:39:32 +00:00
if " actionhidden " in change or " suppressed " in change : # if event is hidden using suppression
2020-08-09 23:57:14 +00:00
await appearance_mode ( " suppressed " , change , " " , changed_categories , local_wiki , target , paths , rate_limiter )
2020-07-18 12:12:00 +00:00
return
if " commenthidden " not in change :
2020-07-23 09:46:32 +00:00
parsed_comment = parse_link ( paths [ 3 ] , change [ " parsedcomment " ] )
2020-07-18 12:12:00 +00:00
else :
parsed_comment = _ ( " ~~hidden~~ " )
if not parsed_comment :
parsed_comment = None
if change [ " type " ] in [ " edit " , " new " ] :
if " userhidden " in change :
change [ " user " ] = _ ( " hidden " )
identification_string = change [ " type " ]
elif change [ " type " ] == " log " :
identification_string = " {logtype} / {logaction} " . format ( logtype = change [ " logtype " ] , logaction = change [ " logaction " ] )
elif change [ " type " ] == " categorize " :
return
else :
2020-07-28 13:41:07 +00:00
identification_string = change [ " type " ]
2020-07-21 12:15:40 +00:00
additional_data = { " namespaces " : request [ " query " ] [ " namespaces " ] , " tags " : { } }
for tag in request [ " query " ] [ " tags " ] :
try :
additional_data [ " tags " ] [ tag [ " name " ] ] = ( BeautifulSoup ( tag [ " displayname " ] , " lxml " ) ) . get_text ( )
except KeyError :
additional_data [ " tags " ] [ tag [ " name " ] ] = None # Tags with no displ
2020-11-28 13:08:37 +00:00
return await appearance_mode ( identification_string , change , parsed_comment , changed_categories , local_wiki , target , paths , rate_limiter , additional_data = additional_data )
2020-08-02 17:27:42 +00:00
2022-07-26 13:48:44 +00:00
async def essential_feeds ( change : dict , comment_pages : dict , db_wiki , target : tuple ) - > discord . discord . DiscordMessage :
2020-08-02 17:27:42 +00:00
""" Prepares essential information for both embed and compact message format. """
appearance_mode = feeds_embed_formatter if target [ 0 ] [ 1 ] > 0 else feeds_compact_formatter
2020-08-02 21:40:30 +00:00
identification_string = change [ " _embedded " ] [ " thread " ] [ 0 ] [ " containerType " ]
2020-08-22 17:51:15 +00:00
comment_page = None
2020-10-12 14:57:51 +00:00
if identification_string == " ARTICLE_COMMENT " and comment_pages is not None :
2020-10-23 14:00:56 +00:00
comment_page = comment_pages . get ( change [ " forumId " ] , None )
if comment_page is not None :
2020-12-05 22:12:09 +00:00
comment_page [ " fullUrl " ] = " / " . join ( db_wiki [ " wiki " ] . split ( " / " , 3 ) [ : 3 ] ) + comment_page [ " relativeUrl " ]
2020-11-28 22:31:13 +00:00
return await appearance_mode ( identification_string , change , target , db_wiki [ " wiki " ] , article_page = comment_page )