2020-07-09 23:58:25 +00:00
from dataclasses import dataclass
2020-07-11 15:54:08 +00:00
import re
2020-07-10 14:11:45 +00:00
import logging , aiohttp
from src . exceptions import *
2020-07-11 15:54:08 +00:00
from src . database import db_cursor , db_connection
2020-07-18 12:12:00 +00:00
from src . formatters . rc import embed_formatter , compact_formatter
2020-07-21 12:15:40 +00:00
from src . misc import LinkParser
from src . i18n import langs
2020-07-10 20:07:33 +00:00
import src . discord
2020-07-21 12:15:40 +00:00
from src . config import settings
from bs4 import BeautifulSoup
2020-07-10 13:38:36 +00:00
2020-07-10 14:11:45 +00:00
logger = logging . getLogger ( " rcgcdb.wiki " )
2020-07-09 23:58:25 +00:00
2020-07-19 13:32:54 +00:00
supported_logs = [ " protect/protect " , " protect/modify " , " protect/unprotect " , " upload/overwrite " , " upload/upload " , " delete/delete " , " delete/delete_redir " , " delete/restore " , " delete/revision " , " delete/event " , " import/upload " , " import/interwiki " , " merge/merge " , " move/move " , " move/move_redir " , " protect/move_prot " , " block/block " , " block/unblock " , " block/reblock " , " rights/rights " , " rights/autopromote " , " abusefilter/modify " , " abusefilter/create " , " interwiki/iw_add " , " interwiki/iw_edit " , " interwiki/iw_delete " , " curseprofile/comment-created " , " curseprofile/comment-edited " , " curseprofile/comment-deleted " , " curseprofile/comment-purged " , " curseprofile/profile-edited " , " curseprofile/comment-replied " , " contentmodel/change " , " sprite/sprite " , " sprite/sheet " , " sprite/slice " , " managetags/create " , " managetags/delete " , " managetags/activate " , " managetags/deactivate " , " tag/update " , " cargo/createtable " , " cargo/deletetable " , " cargo/recreatetable " , " cargo/replacetable " , " upload/revert " ]
2020-07-20 12:03:55 +00:00
2020-07-09 23:58:25 +00:00
@dataclass
2020-07-09 22:24:23 +00:00
class Wiki :
2020-07-10 13:38:36 +00:00
mw_messages : int = None
2020-07-09 23:58:25 +00:00
fail_times : int = 0 # corresponding to amount of times connection with wiki failed for client reasons (400-499)
2020-07-21 12:15:40 +00:00
session : aiohttp . ClientSession = None
async def create_session ( self ) :
self . session = aiohttp . ClientSession ( headers = settings [ " header " ] , timeout = aiohttp . ClientTimeout ( 5.0 ) )
2020-07-10 13:38:36 +00:00
2020-07-19 13:32:54 +00:00
async def fetch_wiki ( self , extended , script_path ) - > aiohttp . ClientResponse :
2020-07-21 12:15:40 +00:00
if self . session is None :
await self . create_session ( )
2020-07-19 13:32:54 +00:00
url_path = script_path + " api.php "
2020-07-10 13:38:36 +00:00
amount = 20
if extended :
params = { " action " : " query " , " format " : " json " , " uselang " : " content " , " list " : " tags|recentchanges " ,
" meta " : " allmessages|siteinfo " ,
" utf8 " : 1 , " tglimit " : " max " , " tgprop " : " displayname " ,
" rcprop " : " title|redirect|timestamp|ids|loginfo|parsedcomment|sizes|flags|tags|user " ,
" rclimit " : amount , " rctype " : " edit|new|log|external " ,
" ammessages " : " recentchanges-page-added-to-category|recentchanges-page-removed-from-category|recentchanges-page-added-to-category-bundled|recentchanges-page-removed-from-category-bundled " ,
2020-07-20 00:52:02 +00:00
" amenableparser " : 1 , " amincludelocal " : 1 , " siprop " : " namespaces|general " }
2020-07-10 13:38:36 +00:00
else :
params = { " action " : " query " , " format " : " json " , " uselang " : " content " , " list " : " tags|recentchanges " ,
2020-07-20 00:52:02 +00:00
" meta " : " siteinfo " , " utf8 " : 1 ,
2020-07-10 13:38:36 +00:00
" tglimit " : " max " , " tgprop " : " displayname " ,
" rcprop " : " title|redirect|timestamp|ids|loginfo|parsedcomment|sizes|flags|tags|user " ,
2020-07-20 00:52:02 +00:00
" rclimit " : amount , " rctype " : " edit|new|log|external " , " siprop " : " namespaces|general " }
2020-07-10 13:38:36 +00:00
try :
2020-07-21 12:15:40 +00:00
response = await self . session . get ( url_path , params = params )
2020-07-10 20:07:33 +00:00
except ( aiohttp . ClientConnectionError , aiohttp . ServerTimeoutError ) :
logger . exception ( " A connection error occurred while requesting {} " . format ( url_path ) )
raise WikiServerError
2020-07-10 14:11:45 +00:00
return response
2020-07-21 12:15:40 +00:00
async def safe_request ( self , url ) :
2020-07-20 12:03:55 +00:00
try :
2020-07-21 12:15:40 +00:00
request = await self . session . get ( url , timeout = 5 , allow_redirects = False )
2020-07-20 12:03:55 +00:00
request . raise_for_status ( )
except ( aiohttp . ClientConnectionError , aiohttp . ServerTimeoutError ) :
logger . exception ( " Reached connection error for request on link {url} " . format ( url = url ) )
return None
else :
return request
2020-07-19 23:40:20 +00:00
async def check_status ( self , wiki_url , status ) :
2020-07-10 14:11:45 +00:00
if 199 < status < 300 :
self . fail_times = 0
pass
elif 400 < status < 500 : # ignore 400 error since this might be our fault
self . fail_times + = 1
2020-07-19 23:40:20 +00:00
logger . warning ( " Wiki {} responded with HTTP code {} , increased fail_times to {} , skipping... " . format ( wiki_url , status , self . fail_times ) )
2020-07-10 14:11:45 +00:00
if self . fail_times > 3 :
2020-07-19 23:40:20 +00:00
await self . remove ( wiki_url , status )
2020-07-10 14:11:45 +00:00
raise WikiError
elif 499 < status < 600 :
2020-07-19 23:40:20 +00:00
logger . warning ( " Wiki {} responded with HTTP code {} , skipping... " . format ( wiki_url , status , self . fail_times ) )
2020-07-10 14:11:45 +00:00
raise WikiServerError
2020-07-10 20:07:33 +00:00
async def remove ( self , wiki_id , reason ) :
2020-07-11 15:54:08 +00:00
src . discord . wiki_removal ( wiki_id , reason )
src . discord . wiki_removal_monitor ( wiki_id , reason )
2020-07-22 11:43:18 +00:00
db_cursor . execute ( " DELETE FROM rcgcdw WHERE wiki = ? " , ( wiki_id , ) )
2020-07-19 23:40:20 +00:00
logger . warning ( " {} rows affected by DELETE FROM rcgcdw WHERE wiki = {} " . format ( db_cursor . rowcount , wiki_id ) )
2020-07-11 15:54:08 +00:00
db_connection . commit ( )
2020-07-21 12:15:40 +00:00
async def pull_comment ( self , comment_id , WIKI_API_PATH ) :
try :
comment = await self . safe_request (
" {wiki} ?action=comment&do=getRaw&comment_id= {comment} &format=json " . format ( wiki = WIKI_API_PATH ,
comment = comment_id ) ) . json ( ) [
" text " ]
logger . debug ( " Got the following comment from the API: {} " . format ( comment ) )
except ( TypeError , AttributeError ) :
logger . exception ( " Could not resolve the comment text. " )
except KeyError :
logger . exception ( " CurseProfile extension API did not respond with a valid comment content. " )
else :
if len ( comment ) > 1000 :
comment = comment [ 0 : 1000 ] + " … "
return comment
return " "
2020-07-11 15:54:08 +00:00
2020-07-18 12:12:00 +00:00
async def process_cats ( event : dict , local_wiki : Wiki , category_msgs : dict , categorize_events : dict ) :
2020-07-11 15:54:08 +00:00
if event [ " type " ] == " categorize " :
if " commenthidden " not in event :
if local_wiki . mw_messages :
cat_title = event [ " title " ] . split ( ' : ' , 1 ) [ 1 ]
# I so much hate this, blame Markus for making me do this
if event [ " revid " ] not in categorize_events :
categorize_events [ event [ " revid " ] ] = { " new " : set ( ) , " removed " : set ( ) }
comment_to_match = re . sub ( r ' <.*?a> ' , ' ' , event [ " parsedcomment " ] )
wiki_cat_mw_messages = category_msgs [ local_wiki . mw_messages ]
if wiki_cat_mw_messages [ 0 ] in comment_to_match or wiki_cat_mw_messages [ 2 ] in comment_to_match : # Added to category
categorize_events [ event [ " revid " ] ] [ " new " ] . add ( cat_title )
logger . debug ( " Matched {} to added category for {} " . format ( cat_title , event [ " revid " ] ) )
elif wiki_cat_mw_messages [ 1 ] in comment_to_match or wiki_cat_mw_messages [ 3 ] in comment_to_match : # Removed from category
categorize_events [ event [ " revid " ] ] [ " removed " ] . add ( cat_title )
logger . debug ( " Matched {} to removed category for {} " . format ( cat_title , event [ " revid " ] ) )
else :
logger . debug (
" Unknown match for category change with messages {} , {} , {} , {} and comment_to_match {} " . format (
wiki_cat_mw_messages [ 0 ] , wiki_cat_mw_messages [ 1 ] , wiki_cat_mw_messages [ 2 ] , wiki_cat_mw_messages [ 3 ] ,
comment_to_match ) )
else :
logger . warning (
" Init information not available, could not read category information. Please restart the bot. " )
else :
logger . debug ( " Log entry got suppressed, ignoring entry. " )
async def process_mwmsgs ( wiki_response : dict , local_wiki : Wiki , mw_msgs : dict ) :
"""
This function is made to parse the initial wiki extended information to update local_wiki . mw_messages that stores the key
to mw_msgs that is a dict storing id : tuple where tuple is a set of MW messages for categories .
The reason it ' s constructed this way is to prevent duplication of data in memory so Markus doesn ' t complain about
high RAM usage . It does however affect CPU performance as every wiki requires to check the list for the matching
tuples of MW messages .
: param wiki_response :
: param local_wiki :
: param mw_msgs :
: return :
"""
msgs = [ ]
2020-07-22 11:43:18 +00:00
for message in wiki_response [ " query " ] [ " allmessages " ] :
2020-07-11 15:54:08 +00:00
if not " missing " in message : # ignore missing strings
msgs . append ( ( message [ " name " ] , re . sub ( r ' \ [ \ [.*? \ ] \ ] ' , ' ' , message [ " * " ] ) ) )
else :
logging . warning ( " Could not fetch the MW message translation for: {} " . format ( message [ " name " ] ) )
msgs = tuple ( msgs )
for key , set in mw_msgs . items ( ) :
if msgs == set :
local_wiki . mw_messages = key
return
2020-07-19 23:40:20 +00:00
# if same entry is not in mw_msgs
2020-07-11 15:54:08 +00:00
key = len ( mw_msgs )
mw_msgs [ key ] = msgs # it may be a little bit messy for sure, however I don't expect any reason to remove mw_msgs entries by one
local_wiki . mw_messages = key
2020-07-18 12:12:00 +00:00
2020-07-21 12:15:40 +00:00
async def essential_info ( change : dict , changed_categories , local_wiki : Wiki , db_wiki : tuple , target : tuple , paths : tuple , request : dict ) :
2020-07-22 11:43:18 +00:00
global LinkParser
2020-07-18 12:12:00 +00:00
""" Prepares essential information for both embed and compact message format. """
2020-07-19 23:40:20 +00:00
def _ ( string : str ) - > str :
""" Our own translation string to make it compatible with async """
return lang . gettext ( string )
2020-07-20 00:52:02 +00:00
lang = langs [ target [ 0 ] [ 0 ] ]
ngettext = lang . ngettext
2020-07-20 12:03:55 +00:00
# recent_changes = RecentChangesClass() # TODO Look into replacing RecentChangesClass with local_wiki
2020-07-22 11:43:18 +00:00
LinkParser = LinkParser ( paths [ 3 ] )
2020-07-18 12:12:00 +00:00
logger . debug ( change )
2020-07-19 23:40:20 +00:00
appearance_mode = embed_formatter if target [ 0 ] [ 1 ] > 0 else compact_formatter
2020-07-18 12:12:00 +00:00
if ( " actionhidden " in change or " suppressed " in change ) : # if event is hidden using suppression
2020-07-20 12:03:55 +00:00
await appearance_mode ( " suppressed " , change , " " , changed_categories , local_wiki , target , _ , ngettext , paths )
2020-07-18 12:12:00 +00:00
return
if " commenthidden " not in change :
LinkParser . feed ( change [ " parsedcomment " ] )
parsed_comment = LinkParser . new_string
LinkParser . new_string = " "
parsed_comment = re . sub ( r " (`|_| \ *|~| { |}| \ | \ |) " , " \\ \\ \\ 1 " , parsed_comment , 0 )
else :
parsed_comment = _ ( " ~~hidden~~ " )
if not parsed_comment :
parsed_comment = None
if change [ " type " ] in [ " edit " , " new " ] :
logger . debug ( " List of categories in essential_info: {} " . format ( changed_categories ) )
if " userhidden " in change :
change [ " user " ] = _ ( " hidden " )
identification_string = change [ " type " ]
elif change [ " type " ] == " log " :
identification_string = " {logtype} / {logaction} " . format ( logtype = change [ " logtype " ] , logaction = change [ " logaction " ] )
if identification_string not in supported_logs :
logger . warning (
" This event is not implemented in the script. Please make an issue on the tracker attaching the following info: wiki url, time, and this information: {} " . format (
change ) )
return
elif change [ " type " ] == " categorize " :
return
else :
logger . warning ( " This event is not implemented in the script. Please make an issue on the tracker attaching the following info: wiki url, time, and this information: {} " . format ( change ) )
return
2020-07-21 12:15:40 +00:00
additional_data = { " namespaces " : request [ " query " ] [ " namespaces " ] , " tags " : { } }
for tag in request [ " query " ] [ " tags " ] :
try :
additional_data [ " tags " ] [ tag [ " name " ] ] = ( BeautifulSoup ( tag [ " displayname " ] , " lxml " ) ) . get_text ( )
except KeyError :
additional_data [ " tags " ] [ tag [ " name " ] ] = None # Tags with no displ
await appearance_mode ( identification_string , change , parsed_comment , changed_categories , local_wiki , target , _ , ngettext , paths , additional_data = additional_data )