2018-06-15 13:56:35 +00:00
#!/usr/bin/python
# -*- coding: utf-8 -*-
2021-04-18 23:21:38 +00:00
# This file is part of Recent changes Goat compatible Discord webhook (RcGcDw).
2018-06-21 23:56:04 +00:00
2021-04-18 23:21:38 +00:00
# RcGcDw is free software: you can redistribute it and/or modify
2019-02-13 08:14:08 +00:00
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
2018-09-30 16:14:44 +00:00
# (at your option) any later version.
2018-06-21 23:56:04 +00:00
2021-04-18 23:21:38 +00:00
# RcGcDw is distributed in the hope that it will be useful,
2018-09-30 16:14:44 +00:00
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2019-02-13 08:14:08 +00:00
# GNU General Public License for more details.
2018-06-21 23:56:04 +00:00
2019-02-13 08:14:08 +00:00
# You should have received a copy of the GNU General Public License
2021-04-18 23:21:38 +00:00
# along with RcGcDw. If not, see <http://www.gnu.org/licenses/>.
2018-06-21 23:56:04 +00:00
2018-09-30 16:14:44 +00:00
# WARNING! SHITTY CODE AHEAD. ENTER ONLY IF YOU ARE SURE YOU CAN TAKE IT
# You have been warned
2018-06-22 10:18:22 +00:00
2020-07-16 12:46:23 +00:00
import time , logging . config , requests , datetime , gettext , math , os . path , schedule , sys
2020-03-15 17:32:53 +00:00
2020-07-07 11:21:49 +00:00
import src . misc
2018-06-21 23:35:24 +00:00
from collections import defaultdict , Counter
2020-07-07 11:21:49 +00:00
from src . configloader import settings
2020-07-16 12:46:23 +00:00
from src . misc import add_to_dict , datafile , \
2020-11-08 21:29:15 +00:00
WIKI_API_PATH , create_article_path
from src . discord . queue import send_to_discord
from src . discord . message import DiscordMessage , DiscordMessageMetadata
2021-04-24 12:32:23 +00:00
from src . rc import wiki
2020-07-16 12:46:23 +00:00
from src . exceptions import MWError
2020-08-08 01:23:01 +00:00
from src . i18n import rcgcdw
2020-07-16 12:46:23 +00:00
2020-08-08 01:23:01 +00:00
_ = rcgcdw . gettext
ngettext = rcgcdw . ngettext
2020-04-04 12:29:18 +00:00
if settings [ " fandom_discussions " ] [ " enabled " ] :
2020-07-08 15:59:19 +00:00
import src . discussions
2018-06-23 10:02:54 +00:00
2019-03-16 12:49:13 +00:00
TESTING = True if " --test " in sys . argv else False # debug mode, pipeline testing
2021-04-25 00:32:50 +00:00
formatter_hooks = { }
2019-05-19 15:03:05 +00:00
# Prepare logging
2019-05-20 10:16:54 +00:00
logging . config . dictConfig ( settings [ " logging " ] )
2019-05-19 15:03:05 +00:00
logger = logging . getLogger ( " rcgcdw " )
2019-05-20 14:51:17 +00:00
logger . debug ( " Current settings: {settings} " . format ( settings = settings ) )
2021-04-17 14:13:24 +00:00
from src . migrations import * # migrations after logging
2021-04-25 00:32:50 +00:00
def load_extensions ( ) :
""" Loads all of the extensions, can be a local import because all we need is them to register """
try :
import extensions
except ImportError :
logger . critical ( " No extensions module found. What ' s going on? " )
sys . exit ( 1 )
2020-10-18 09:39:16 +00:00
storage = datafile
2019-05-19 15:03:05 +00:00
# Remove previous data holding file if exists and limitfetch allows
if settings [ " limitrefetch " ] != - 1 and os . path . exists ( " lastchange.txt " ) is True :
with open ( " lastchange.txt " , ' r ' ) as sfile :
logger . info ( " Converting old lastchange.txt file into new data storage data.json... " )
2019-05-20 10:32:26 +00:00
storage [ " rcid " ] = int ( sfile . read ( ) . strip ( ) )
2020-04-04 12:29:18 +00:00
datafile . save_datafile ( )
2019-05-19 15:03:05 +00:00
os . remove ( " lastchange.txt " )
2018-09-30 16:14:44 +00:00
2018-06-21 23:35:24 +00:00
def day_overview_request ( ) :
2019-05-19 15:03:05 +00:00
logger . info ( " Fetching daily overview... This may take up to 30 seconds! " )
2018-09-30 16:14:44 +00:00
timestamp = ( datetime . datetime . utcnow ( ) - datetime . timedelta ( hours = 24 ) ) . isoformat ( timespec = ' milliseconds ' )
2019-05-19 15:03:05 +00:00
logger . debug ( " timestamp is {} " . format ( timestamp ) )
2018-06-21 23:35:24 +00:00
complete = False
result = [ ]
passes = 0
continuearg = " "
while not complete and passes < 10 :
2021-04-24 12:32:23 +00:00
request = wiki . _safe_request (
2020-03-15 16:03:44 +00:00
" {wiki} ?action=query&format=json&list=recentchanges&rcend= {timestamp} Z&rcprop=title % 7Ctimestamp % 7Csizes % 7Cloginfo % 7Cuser&rcshow=!bot&rclimit=500&rctype=edit % 7Cnew % 7Clog {continuearg} " . format (
wiki = WIKI_API_PATH , timestamp = timestamp , continuearg = continuearg ) )
2018-09-30 16:14:44 +00:00
if request :
2018-06-21 23:35:24 +00:00
try :
request = request . json ( )
2021-04-24 12:32:23 +00:00
request = wiki . handle_mw_errors ( request )
2018-06-21 23:35:24 +00:00
rc = request [ ' query ' ] [ ' recentchanges ' ]
continuearg = request [ " continue " ] [ " rccontinue " ] if " continue " in request else None
except ValueError :
2019-05-19 15:03:05 +00:00
logger . warning ( " ValueError in fetching changes " )
2021-04-24 12:32:23 +00:00
wiki . downtime_controller ( True )
2018-06-21 23:35:24 +00:00
complete = 2
except KeyError :
2021-04-18 23:21:38 +00:00
logger . warning ( " Wiki returned %s " % request )
2018-06-21 23:35:24 +00:00
complete = 2
2020-04-09 16:31:17 +00:00
except MWError :
complete = 2
2018-06-21 23:35:24 +00:00
else :
2018-09-30 16:14:44 +00:00
result + = rc
2018-06-21 23:35:24 +00:00
if continuearg :
continuearg = " &rccontinue= {} " . format ( continuearg )
2018-09-30 16:14:44 +00:00
passes + = 1
2019-05-19 15:03:05 +00:00
logger . debug (
2018-09-30 16:14:44 +00:00
" continuing requesting next pages of recent changes with {} passes and continuearg being {} " . format (
passes , continuearg ) )
2018-06-21 23:35:24 +00:00
time . sleep ( 3.0 )
else :
complete = 1
else :
complete = 2
if passes == 10 :
2019-05-19 15:03:05 +00:00
logger . debug ( " quit the loop because there been too many passes " )
2019-05-20 10:32:26 +00:00
return result , complete
2018-06-21 23:35:24 +00:00
2018-09-30 16:14:44 +00:00
2019-05-19 16:25:20 +00:00
def daily_overview_sync ( edits , files , admin , changed_bytes , new_articles , unique_contributors , day_score ) :
2019-05-20 10:32:26 +00:00
weight = storage [ " daily_overview " ] [ " days_tracked " ]
2020-09-03 11:15:57 +00:00
logger . debug ( _ )
2019-05-19 16:25:20 +00:00
if weight == 0 :
2019-05-20 10:32:26 +00:00
storage [ " daily_overview " ] . update ( { " edits " : edits , " new_files " : files , " admin_actions " : admin , " bytes_changed " : changed_bytes , " new_articles " : new_articles , " unique_editors " : unique_contributors , " day_score " : day_score } )
2019-05-19 16:25:20 +00:00
edits , files , admin , changed_bytes , new_articles , unique_contributors , day_score = str ( edits ) , str ( files ) , str ( admin ) , str ( changed_bytes ) , str ( new_articles ) , str ( unique_contributors ) , str ( day_score )
else :
2020-07-07 11:21:49 +00:00
edits_avg = src . misc . weighted_average ( storage [ " daily_overview " ] [ " edits " ] , weight , edits )
2019-05-19 16:25:20 +00:00
edits = _ ( " {value} (avg. {avg} ) " ) . format ( value = edits , avg = edits_avg )
2020-07-07 11:21:49 +00:00
files_avg = src . misc . weighted_average ( storage [ " daily_overview " ] [ " new_files " ] , weight , files )
2019-05-19 16:25:20 +00:00
files = _ ( " {value} (avg. {avg} ) " ) . format ( value = files , avg = files_avg )
2020-07-07 11:21:49 +00:00
admin_avg = src . misc . weighted_average ( storage [ " daily_overview " ] [ " admin_actions " ] , weight , admin )
2019-05-19 16:25:20 +00:00
admin = _ ( " {value} (avg. {avg} ) " ) . format ( value = admin , avg = admin_avg )
2020-07-07 11:21:49 +00:00
changed_bytes_avg = src . misc . weighted_average ( storage [ " daily_overview " ] [ " bytes_changed " ] , weight , changed_bytes )
2019-05-19 16:25:20 +00:00
changed_bytes = _ ( " {value} (avg. {avg} ) " ) . format ( value = changed_bytes , avg = changed_bytes_avg )
2020-07-07 11:21:49 +00:00
new_articles_avg = src . misc . weighted_average ( storage [ " daily_overview " ] [ " new_articles " ] , weight , new_articles )
2019-05-19 16:25:20 +00:00
new_articles = _ ( " {value} (avg. {avg} ) " ) . format ( value = new_articles , avg = new_articles_avg )
2020-07-07 11:21:49 +00:00
unique_contributors_avg = src . misc . weighted_average ( storage [ " daily_overview " ] [ " unique_editors " ] , weight , unique_contributors )
2019-05-19 16:25:20 +00:00
unique_contributors = _ ( " {value} (avg. {avg} ) " ) . format ( value = unique_contributors , avg = unique_contributors_avg )
2020-07-07 11:21:49 +00:00
day_score_avg = src . misc . weighted_average ( storage [ " daily_overview " ] [ " day_score " ] , weight , day_score )
2019-05-19 16:25:20 +00:00
day_score = _ ( " {value} (avg. {avg} ) " ) . format ( value = day_score , avg = day_score_avg )
2019-05-20 10:32:26 +00:00
storage [ " daily_overview " ] . update ( { " edits " : edits_avg , " new_files " : files_avg , " admin_actions " : admin_avg , " bytes_changed " : changed_bytes_avg ,
2019-05-19 16:25:20 +00:00
" new_articles " : new_articles_avg , " unique_editors " : unique_contributors_avg , " day_score " : day_score_avg } )
2019-05-20 10:32:26 +00:00
storage [ " daily_overview " ] [ " days_tracked " ] + = 1
2020-04-04 12:29:18 +00:00
datafile . save_datafile ( )
2019-05-19 16:25:20 +00:00
return edits , files , admin , changed_bytes , new_articles , unique_contributors , day_score
2018-09-30 16:14:44 +00:00
2021-04-18 23:21:38 +00:00
2019-05-20 10:32:26 +00:00
def day_overview ( ) :
2018-06-21 23:35:24 +00:00
result = day_overview_request ( )
if result [ 1 ] == 1 :
activity = defaultdict ( dict )
hours = defaultdict ( dict )
2019-01-26 18:24:36 +00:00
articles = defaultdict ( dict )
2018-06-21 23:35:24 +00:00
edits = 0
files = 0
admin = 0
changed_bytes = 0
new_articles = 0
2019-01-28 15:09:04 +00:00
active_articles = [ ]
2020-05-22 15:30:58 +00:00
embed = DiscordMessage ( " embed " , " daily_overview " , settings [ " webhookURL " ] )
embed [ " title " ] = _ ( " Daily overview " )
embed [ " url " ] = create_article_path ( " Special:Statistics " )
2020-12-06 13:36:28 +00:00
embed . set_author ( settings [ " wikiname " ] , create_article_path ( " " ) )
2020-03-15 23:12:00 +00:00
if not result [ 0 ] :
if not settings [ " send_empty_overview " ] :
return # no changes in this day
else :
embed [ " description " ] = _ ( " No activity " )
2018-06-21 23:35:24 +00:00
else :
2020-03-15 23:12:00 +00:00
for item in result [ 0 ] :
if " actionhidden " in item or " suppressed " in item or " userhidden " in item :
continue # while such actions have type value (edit/new/log) many other values are hidden and therefore can crash with key error, let's not process such events
activity = add_to_dict ( activity , item [ " user " ] )
hours = add_to_dict ( hours , datetime . datetime . strptime ( item [ " timestamp " ] , " % Y- % m- %d T % H: % M: % SZ " ) . hour )
if item [ " type " ] == " edit " :
edits + = 1
changed_bytes + = item [ " newlen " ] - item [ " oldlen " ]
2021-04-24 12:32:23 +00:00
if ( wiki . namespaces is not None and " content " in wiki . namespaces . get ( str ( item [ " ns " ] ) , { } ) ) or item [ " ns " ] == 0 :
2020-03-15 23:12:00 +00:00
articles = add_to_dict ( articles , item [ " title " ] )
2020-08-29 15:04:49 +00:00
elif item [ " type " ] == " new " :
2021-04-24 12:32:23 +00:00
if " content " in ( wiki . namespaces is not None and wiki . namespaces . get ( str ( item [ " ns " ] ) , { } ) ) or item [ " ns " ] == 0 :
2020-03-15 23:12:00 +00:00
new_articles + = 1
changed_bytes + = item [ " newlen " ]
2020-08-29 15:04:49 +00:00
elif item [ " type " ] == " log " :
2020-03-15 23:12:00 +00:00
files = files + 1 if item [ " logtype " ] == item [ " logaction " ] == " upload " else files
admin = admin + 1 if item [ " logtype " ] in [ " delete " , " merge " , " block " , " protect " , " import " , " rights " ,
" abusefilter " , " interwiki " , " managetags " ] else admin
overall = round ( new_articles + edits * 0.1 + files * 0.3 + admin * 0.1 + math . fabs ( changed_bytes * 0.001 ) , 2 )
if activity :
active_users = [ ]
for user , numberu in Counter ( activity ) . most_common ( 3 ) : # find most active users
active_users . append ( user + ngettext ( " ( {} action) " , " ( {} actions) " , numberu ) . format ( numberu ) )
for article , numbere in Counter ( articles ) . most_common ( 3 ) : # find most active users
active_articles . append ( article + ngettext ( " ( {} edit) " , " ( {} edits) " , numbere ) . format ( numbere ) )
v = hours . values ( )
active_hours = [ ]
for hour , numberh in Counter ( hours ) . most_common ( list ( v ) . count ( max ( v ) ) ) : # find most active hours
active_hours . append ( str ( hour ) )
houramount = ngettext ( " UTC ( {} action) " , " UTC ( {} actions) " , numberh ) . format ( numberh )
else :
active_users = [ _ ( " But nobody came " ) ] # a reference to my favorite game of all the time, sorry ^_^
active_hours = [ _ ( " But nobody came " ) ]
usramount = " "
houramount = " "
if not active_articles :
active_articles = [ _ ( " But nobody came " ) ]
edits , files , admin , changed_bytes , new_articles , unique_contributors , overall = daily_overview_sync ( edits , files , admin , changed_bytes , new_articles , len ( activity ) , overall )
fields = (
( ngettext ( " Most active user " , " Most active users " , len ( active_users ) ) , ' , ' . join ( active_users ) ) ,
( ngettext ( " Most edited article " , " Most edited articles " , len ( active_articles ) ) , ' , ' . join ( active_articles ) ) ,
( _ ( " Edits made " ) , edits ) , ( _ ( " New files " ) , files ) , ( _ ( " Admin actions " ) , admin ) ,
( _ ( " Bytes changed " ) , changed_bytes ) , ( _ ( " New articles " ) , new_articles ) ,
( _ ( " Unique contributors " ) , unique_contributors ) ,
( ngettext ( " Most active hour " , " Most active hours " , len ( active_hours ) ) , ' , ' . join ( active_hours ) + houramount ) ,
( _ ( " Day score " ) , overall ) )
for name , value in fields :
2020-04-26 12:40:38 +00:00
embed . add_field ( name , value , inline = True )
embed . finish_embed ( )
2020-11-05 21:20:35 +00:00
send_to_discord ( embed , meta = DiscordMessageMetadata ( " POST " ) )
2018-06-21 23:35:24 +00:00
else :
2019-05-19 15:03:05 +00:00
logger . debug ( " function requesting changes for day overview returned with error code " )
2018-06-21 23:35:24 +00:00
2018-09-30 16:14:44 +00:00
2019-04-16 11:54:01 +00:00
# Log in and download wiki information
2018-12-02 10:46:11 +00:00
try :
if settings [ " wiki_bot_login " ] and settings [ " wiki_bot_password " ] :
2021-04-24 12:32:23 +00:00
wiki . log_in ( )
2020-03-15 23:32:39 +00:00
time . sleep ( 2.0 )
2021-04-24 12:32:23 +00:00
wiki . init_info ( )
2018-12-02 10:46:11 +00:00
except requests . exceptions . ConnectionError :
2019-05-19 15:03:05 +00:00
logger . critical ( " A connection can ' t be established with the wiki. Exiting... " )
2018-12-02 10:46:11 +00:00
sys . exit ( 1 )
2020-03-15 23:32:39 +00:00
time . sleep ( 3.0 ) # this timeout is to prevent timeouts. It seems Fandom does not like our ~2-3 request in under a second
2020-04-08 10:45:09 +00:00
if settings [ " rc_enabled " ] :
logger . info ( " Script started! Fetching newest changes... " )
2021-04-24 12:32:23 +00:00
wiki . fetch ( amount = settings [ " limitrefetch " ] if settings [ " limitrefetch " ] != - 1 else settings [ " limit " ] )
schedule . every ( settings [ " cooldown " ] ) . seconds . do ( wiki . fetch )
2020-04-08 10:45:09 +00:00
if settings [ " overview " ] :
try :
overview_time = time . strptime ( settings [ " overview_time " ] , ' % H: % M ' )
schedule . every ( ) . day . at ( " {} : {} " . format ( str ( overview_time . tm_hour ) . zfill ( 2 ) ,
str ( overview_time . tm_min ) . zfill ( 2 ) ) ) . do ( day_overview )
del overview_time
except schedule . ScheduleValueError :
logger . error ( " Invalid time format! Currently: {} : {} " . format (
time . strptime ( settings [ " overview_time " ] , ' % H: % M ' ) . tm_hour ,
time . strptime ( settings [ " overview_time " ] , ' % H: % M ' ) . tm_min ) )
except ValueError :
logger . error ( " Invalid time format! Currentely: {} . Note: It needs to be in HH:MM format. " . format (
settings [ " overview_time " ] ) )
2021-04-24 12:32:23 +00:00
schedule . every ( ) . day . at ( " 00:00 " ) . do ( wiki . clear_cache )
2020-04-08 10:45:09 +00:00
else :
logger . info ( " Script started! RC is disabled however, this means no recent changes will be sent :c " )
2018-09-30 16:14:44 +00:00
2021-04-18 23:21:38 +00:00
if 1 == 2 : # additional translation strings in unreachable code
2018-09-30 16:14:44 +00:00
print ( _ ( " director " ) , _ ( " bot " ) , _ ( " editor " ) , _ ( " directors " ) , _ ( " sysop " ) , _ ( " bureaucrat " ) , _ ( " reviewer " ) ,
2019-03-09 13:27:25 +00:00
_ ( " autoreview " ) , _ ( " autopatrol " ) , _ ( " wiki_guardian " ) , ngettext ( " second " , " seconds " , 1 ) , ngettext ( " minute " , " minutes " , 1 ) , ngettext ( " hour " , " hours " , 1 ) , ngettext ( " day " , " days " , 1 ) , ngettext ( " week " , " weeks " , 1 ) , ngettext ( " month " , " months " , 1 ) , ngettext ( " year " , " years " , 1 ) , ngettext ( " millennium " , " millennia " , 1 ) , ngettext ( " decade " , " decades " , 1 ) , ngettext ( " century " , " centuries " , 1 ) )
2020-04-08 10:45:09 +00:00
# noinspection PyUnreachableCode
2018-06-18 15:26:03 +00:00
2021-04-25 00:32:50 +00:00
load_extensions ( )
2018-09-30 16:14:44 +00:00
2019-02-13 09:12:18 +00:00
if TESTING :
2019-08-10 15:31:03 +00:00
logger . debug ( " DEBUGGING " )
2020-10-18 09:39:16 +00:00
storage [ " rcid " ] = 1
2021-04-24 12:32:23 +00:00
wiki . fetch ( amount = 5 )
2019-02-13 09:12:18 +00:00
day_overview ( )
2020-07-08 15:59:19 +00:00
import src . discussions
src . discussions . fetch_discussions ( )
2019-02-13 09:12:18 +00:00
sys . exit ( 0 )
2021-04-18 23:21:38 +00:00
while 1 :
2018-06-21 23:35:24 +00:00
time . sleep ( 1.0 )
schedule . run_pending ( )