2020-07-09 23:58:25 +00:00
from dataclasses import dataclass
2020-07-10 13:38:36 +00:00
from src . session import session
2020-07-11 15:54:08 +00:00
import re
2020-07-10 14:11:45 +00:00
import logging , aiohttp
from src . exceptions import *
2020-07-11 15:54:08 +00:00
from src . database import db_cursor , db_connection
2020-07-18 12:12:00 +00:00
from src . formatters . rc import embed_formatter , compact_formatter
from i18n import langs
2020-07-10 20:07:33 +00:00
import src . discord
2020-07-10 13:38:36 +00:00
2020-07-10 14:11:45 +00:00
logger = logging . getLogger ( " rcgcdb.wiki " )
2020-07-09 23:58:25 +00:00
@dataclass
2020-07-09 22:24:23 +00:00
class Wiki :
2020-07-10 13:38:36 +00:00
mw_messages : int = None
2020-07-09 23:58:25 +00:00
fail_times : int = 0 # corresponding to amount of times connection with wiki failed for client reasons (400-499)
2020-07-10 13:38:36 +00:00
2020-07-10 20:07:33 +00:00
async def fetch_wiki ( self , extended , script_path , api_path ) - > aiohttp . ClientResponse :
url_path = script_path + api_path
2020-07-10 13:38:36 +00:00
amount = 20
if extended :
params = { " action " : " query " , " format " : " json " , " uselang " : " content " , " list " : " tags|recentchanges " ,
" meta " : " allmessages|siteinfo " ,
" utf8 " : 1 , " tglimit " : " max " , " tgprop " : " displayname " ,
" rcprop " : " title|redirect|timestamp|ids|loginfo|parsedcomment|sizes|flags|tags|user " ,
" rclimit " : amount , " rctype " : " edit|new|log|external " ,
" ammessages " : " recentchanges-page-added-to-category|recentchanges-page-removed-from-category|recentchanges-page-added-to-category-bundled|recentchanges-page-removed-from-category-bundled " ,
" amenableparser " : 1 , " amincludelocal " : 1 , " siprop " : " namespaces " }
else :
params = { " action " : " query " , " format " : " json " , " uselang " : " content " , " list " : " tags|recentchanges " ,
" utf8 " : 1 ,
" tglimit " : " max " , " tgprop " : " displayname " ,
" rcprop " : " title|redirect|timestamp|ids|loginfo|parsedcomment|sizes|flags|tags|user " ,
" rclimit " : amount , " rctype " : " edit|new|log|external " , " siprop " : " namespaces " }
try :
2020-07-10 14:11:45 +00:00
response = await session . get ( url_path , params = params )
2020-07-10 20:07:33 +00:00
except ( aiohttp . ClientConnectionError , aiohttp . ServerTimeoutError ) :
logger . exception ( " A connection error occurred while requesting {} " . format ( url_path ) )
raise WikiServerError
2020-07-10 14:11:45 +00:00
return response
async def check_status ( self , wiki_id , status , name ) :
if 199 < status < 300 :
self . fail_times = 0
pass
elif 400 < status < 500 : # ignore 400 error since this might be our fault
self . fail_times + = 1
logger . warning ( " Wiki {} responded with HTTP code {} , increased fail_times to {} , skipping... " . format ( name , status , self . fail_times ) )
if self . fail_times > 3 :
2020-07-10 20:07:33 +00:00
await self . remove ( wiki_id , status )
2020-07-10 14:11:45 +00:00
raise WikiError
elif 499 < status < 600 :
logger . warning ( " Wiki {} responded with HTTP code {} , skipping... " . format ( name , status , self . fail_times ) )
raise WikiServerError
2020-07-10 20:07:33 +00:00
async def remove ( self , wiki_id , reason ) :
2020-07-11 15:54:08 +00:00
src . discord . wiki_removal ( wiki_id , reason )
src . discord . wiki_removal_monitor ( wiki_id , reason )
db_cursor . execute ( " DELETE FROM observers WHERE wiki_id = ? " , wiki_id )
db_cursor . execute ( " DELETE FROM wikis WHERE ROWID = ? " , wiki_id )
db_connection . commit ( )
2020-07-18 12:12:00 +00:00
async def process_cats ( event : dict , local_wiki : Wiki , category_msgs : dict , categorize_events : dict ) :
2020-07-11 15:54:08 +00:00
if event [ " type " ] == " categorize " :
if " commenthidden " not in event :
if local_wiki . mw_messages :
cat_title = event [ " title " ] . split ( ' : ' , 1 ) [ 1 ]
# I so much hate this, blame Markus for making me do this
if event [ " revid " ] not in categorize_events :
categorize_events [ event [ " revid " ] ] = { " new " : set ( ) , " removed " : set ( ) }
comment_to_match = re . sub ( r ' <.*?a> ' , ' ' , event [ " parsedcomment " ] )
wiki_cat_mw_messages = category_msgs [ local_wiki . mw_messages ]
if wiki_cat_mw_messages [ 0 ] in comment_to_match or wiki_cat_mw_messages [ 2 ] in comment_to_match : # Added to category
categorize_events [ event [ " revid " ] ] [ " new " ] . add ( cat_title )
logger . debug ( " Matched {} to added category for {} " . format ( cat_title , event [ " revid " ] ) )
elif wiki_cat_mw_messages [ 1 ] in comment_to_match or wiki_cat_mw_messages [ 3 ] in comment_to_match : # Removed from category
categorize_events [ event [ " revid " ] ] [ " removed " ] . add ( cat_title )
logger . debug ( " Matched {} to removed category for {} " . format ( cat_title , event [ " revid " ] ) )
else :
logger . debug (
" Unknown match for category change with messages {} , {} , {} , {} and comment_to_match {} " . format (
wiki_cat_mw_messages [ 0 ] , wiki_cat_mw_messages [ 1 ] , wiki_cat_mw_messages [ 2 ] , wiki_cat_mw_messages [ 3 ] ,
comment_to_match ) )
else :
logger . warning (
" Init information not available, could not read category information. Please restart the bot. " )
else :
logger . debug ( " Log entry got suppressed, ignoring entry. " )
async def process_mwmsgs ( wiki_response : dict , local_wiki : Wiki , mw_msgs : dict ) :
"""
This function is made to parse the initial wiki extended information to update local_wiki . mw_messages that stores the key
to mw_msgs that is a dict storing id : tuple where tuple is a set of MW messages for categories .
The reason it ' s constructed this way is to prevent duplication of data in memory so Markus doesn ' t complain about
high RAM usage . It does however affect CPU performance as every wiki requires to check the list for the matching
tuples of MW messages .
: param wiki_response :
: param local_wiki :
: param mw_msgs :
: return :
"""
msgs = [ ]
for message in wiki_response [ " allmessages " ] :
if not " missing " in message : # ignore missing strings
msgs . append ( ( message [ " name " ] , re . sub ( r ' \ [ \ [.*? \ ] \ ] ' , ' ' , message [ " * " ] ) ) )
else :
logging . warning ( " Could not fetch the MW message translation for: {} " . format ( message [ " name " ] ) )
msgs = tuple ( msgs )
for key , set in mw_msgs . items ( ) :
if msgs == set :
local_wiki . mw_messages = key
return
key = len ( mw_msgs )
mw_msgs [ key ] = msgs # it may be a little bit messy for sure, however I don't expect any reason to remove mw_msgs entries by one
local_wiki . mw_messages = key
2020-07-18 12:12:00 +00:00
def essential_info ( change , changed_categories , local_wiki , db_wiki ) :
""" Prepares essential information for both embed and compact message format. """
logger . debug ( change )
lang = langs [ db_wiki [ 1 ] ]
appearance_mode = embed_formatter
if ( " actionhidden " in change or " suppressed " in change ) : # if event is hidden using suppression
appearance_mode ( " suppressed " , change , " " , changed_categories , recent_changes )
return
if " commenthidden " not in change :
LinkParser . feed ( change [ " parsedcomment " ] )
parsed_comment = LinkParser . new_string
LinkParser . new_string = " "
parsed_comment = re . sub ( r " (`|_| \ *|~| { |}| \ | \ |) " , " \\ \\ \\ 1 " , parsed_comment , 0 )
else :
parsed_comment = _ ( " ~~hidden~~ " )
if not parsed_comment :
parsed_comment = None
if change [ " type " ] in [ " edit " , " new " ] :
logger . debug ( " List of categories in essential_info: {} " . format ( changed_categories ) )
if " userhidden " in change :
change [ " user " ] = _ ( " hidden " )
identification_string = change [ " type " ]
elif change [ " type " ] == " log " :
identification_string = " {logtype} / {logaction} " . format ( logtype = change [ " logtype " ] , logaction = change [ " logaction " ] )
if identification_string not in supported_logs :
logger . warning (
" This event is not implemented in the script. Please make an issue on the tracker attaching the following info: wiki url, time, and this information: {} " . format (
change ) )
return
elif change [ " type " ] == " categorize " :
return
else :
logger . warning ( " This event is not implemented in the script. Please make an issue on the tracker attaching the following info: wiki url, time, and this information: {} " . format ( change ) )
return
if identification_string in settings [ " ignored " ] :
return
appearance_mode ( identification_string , change , parsed_comment , changed_categories , recent_changes )