Added code

This commit is contained in:
Frisk 2020-07-19 15:32:54 +02:00
parent 6c9dd2245d
commit 19730dfcdb
No known key found for this signature in database
GPG key ID: 213F7C15068AF8AC
5 changed files with 333 additions and 39 deletions

View file

@ -1,7 +1,7 @@
import logging.config
from src.config import settings
import sqlite3
from src.wiki import Wiki, process_cats, process_mwmsgs
from src.wiki import Wiki, process_cats, process_mwmsgs, essential_info
import asyncio, aiohttp
from src.exceptions import *
from src.database import db_cursor
@ -19,7 +19,7 @@ mw_msgs: dict = {} # will have the type of id: tuple
# Reasons for this: 1. we require amount of wikis to calculate the cooldown between requests
# 2. Easier to code
for wiki in db_cursor.execute('SELECT ROWID, * FROM wikis'):
for wiki in db_cursor.execute('SELECT ROWID, * FROM rcgcdw'):
all_wikis[wiki[0]] = Wiki()
# Start queueing logic
@ -31,10 +31,10 @@ def calculate_delay() -> float:
else:
return min_delay
async def main_loop():
async def wiki_scanner():
calc_delay = calculate_delay()
for db_wiki in db_cursor.execute('SELECT ROWID, * FROM wikis'):
for db_wiki in db_cursor.execute('SELECT ROWID, * FROM rcgcdw'):
extended = False
if wiki[0] not in all_wikis:
logger.debug("New wiki: {}".format(wiki[1]))
@ -43,7 +43,7 @@ async def main_loop():
if local_wiki.mw_messages is None:
extended = True
try:
wiki_response = await local_wiki.fetch_wiki(extended, db_wiki[3], db_wiki[4])
wiki_response = await local_wiki.fetch_wiki(extended, db_wiki[4])
await local_wiki.check_status(wiki[0], wiki_response.status, db_wiki[1])
except (WikiServerError, WikiError):
continue # ignore this wiki if it throws errors
@ -56,7 +56,7 @@ async def main_loop():
if extended:
await process_mwmsgs(recent_changes_resp, local_wiki, mw_msgs)
categorize_events = {}
if db_wiki[6] is None: # new wiki, just get the last rc to not spam the channel
if db_wiki[7] is None: # new wiki, just get the last rc to not spam the channel
if len(recent_changes) > 0:
DBHandler.add(db_wiki[0], recent_changes[-1]["rcid"])
continue
@ -66,8 +66,17 @@ async def main_loop():
for change in recent_changes:
await process_cats(change, local_wiki, mw_msgs, categorize_events)
for change in recent_changes: # Yeah, second loop since the categories require to be all loaded up
if change["rcid"] < db_wiki[6]:
if change["rcid"] < db_wiki[7]:
await essential_info(change, categorize_events, local_wiki, db_wiki)
await asyncio.sleep(delay=calc_delay)
async def message_sender():
pass
async def main_loop():
task1 = asyncio.create_task(wiki_scanner())
task2 = asyncio.create_task(message_sender())
asyncio.run(main_loop())

View file

@ -3,23 +3,25 @@ import math
import re
import time
import logging
import base64
from config import settings
from src.misc import link_formatter, create_article_path, LinkParser, profile_field_name, ContentParser, DiscordMessage
from urllib.parse import quote_plus
# from html.parser import HTMLParser
from bs4 import BeautifulSoup
#from src.configloader import settings
#from src.misc import link_formatter, create_article_path, WIKI_SCRIPT_PATH, send_to_discord, DiscordMessage, safe_read, \
# WIKI_API_PATH, ContentParser, profile_field_name, LinkParser
from src.i18n import lang
from src.i18n import langs
#from src.rc import recent_changes, pull_comment
ngettext = lang.ngettext
logger = logging.getLogger("rcgcdw.rc_formatters")
#from src.rcgcdw import recent_changes, ngettext, logger, profile_field_name, LinkParser, pull_comment
LinkParser = LinkParser()
def compact_formatter(action, change, parsed_comment, categories, recent_changes):
LinkParser = LinkParser("domain")
if action != "suppressed":
author_url = link_formatter(create_article_path("User:{user}".format(user=change["user"])))
author = change["user"]
@ -308,32 +310,14 @@ def compact_formatter(action, change, parsed_comment, categories, recent_changes
def embed_formatter(action, change, parsed_comment, categories, recent_changes):
LinkParser = LinkParser()
embed = DiscordMessage("embed", action, settings["webhookURL"])
WIKI_API_PATH =
if parsed_comment is None:
parsed_comment = _("No description provided")
if action != "suppressed":
if "anon" in change:
author_url = create_article_path("Special:Contributions/{user}".format(user=change["user"].replace(" ", "_"))) # Replace here needed in case of #75
logger.debug("current user: {} with cache of IPs: {}".format(change["user"], recent_changes.map_ips.keys()))
if change["user"] not in list(recent_changes.map_ips.keys()):
contibs = safe_read(recent_changes.safe_request(
"{wiki}?action=query&format=json&list=usercontribs&uclimit=max&ucuser={user}&ucstart={timestamp}&ucprop=".format(
wiki=WIKI_API_PATH, user=change["user"], timestamp=change["timestamp"])), "query", "usercontribs")
if contibs is None:
logger.warning(
"WARNING: Something went wrong when checking amount of contributions for given IP address")
change["user"] = change["user"] + "(?)"
else:
recent_changes.map_ips[change["user"]] = len(contibs)
logger.debug("Current params user {} and state of map_ips {}".format(change["user"], recent_changes.map_ips))
change["user"] = "{author} ({contribs})".format(author=change["user"], contribs=len(contibs))
else:
logger.debug(
"Current params user {} and state of map_ips {}".format(change["user"], recent_changes.map_ips))
if action in ("edit", "new"):
recent_changes.map_ips[change["user"]] += 1
change["user"] = "{author} ({amount})".format(author=change["user"],
amount=recent_changes.map_ips[change["user"]])
author_url = create_article_path("Special:Contributions/{user}".format(user=change["user"].replace(" ", "_")))
else:
author_url = create_article_path("User:{}".format(change["user"].replace(" ", "_")))
embed.set_author(change["user"], author_url)

247
src/misc.py Normal file
View file

@ -0,0 +1,247 @@
from html.parser import HTMLParser
import base64, re
from src.config import settings
import json
import logging
from collections import defaultdict
import random
import math
profile_fields = {"profile-location": _("Location"), "profile-aboutme": _("About me"), "profile-link-google": _("Google link"), "profile-link-facebook":_("Facebook link"), "profile-link-twitter": _("Twitter link"), "profile-link-reddit": _("Reddit link"), "profile-link-twitch": _("Twitch link"), "profile-link-psn": _("PSN link"), "profile-link-vk": _("VK link"), "profile-link-xbl": _("XBL link"), "profile-link-steam": _("Steam link"), "profile-link-discord": _("Discord handle"), "profile-link-battlenet": _("Battle.net handle")}
logger = logging.getLogger("rcgcdw.misc")
class DiscordMessage():
"""A class defining a typical Discord JSON representation of webhook payload."""
def __init__(self, message_type: str, event_type: str, webhook_url: str, content=None):
self.webhook_object = dict(allowed_mentions={"parse": []}, avatar_url=settings["avatars"].get(message_type, ""))
self.webhook_url = webhook_url
if message_type == "embed":
self.__setup_embed()
elif message_type == "compact":
self.webhook_object["content"] = content
self.event_type = event_type
def __setitem__(self, key, value):
"""Set item is used only in embeds."""
try:
self.embed[key] = value
except NameError:
raise TypeError("Tried to assign a value when message type is plain message!")
def __getitem__(self, item):
return self.embed[item]
def __repr__(self):
"""Return the Discord webhook object ready to be sent"""
return json.dumps(self.webhook_object)
def __setup_embed(self):
self.embed = defaultdict(dict)
if "embeds" not in self.webhook_object:
self.webhook_object["embeds"] = [self.embed]
else:
self.webhook_object["embeds"].append(self.embed)
self.embed["color"] = None
def add_embed(self):
self.finish_embed()
self.__setup_embed()
def finish_embed(self):
if self.embed["color"] is None:
if settings["appearance"]["embed"].get(self.event_type, {"color": None})["color"] is None:
self.embed["color"] = random.randrange(1, 16777215)
else:
self.embed["color"] = settings["appearance"]["embed"][self.event_type]["color"]
else:
self.embed["color"] = math.floor(self.embed["color"])
def set_author(self, name, url, icon_url=""):
self.embed["author"]["name"] = name
self.embed["author"]["url"] = url
self.embed["author"]["icon_url"] = icon_url
def add_field(self, name, value, inline=False):
if "fields" not in self.embed:
self.embed["fields"] = []
self.embed["fields"].append(dict(name=name, value=value, inline=inline))
def set_avatar(self, url):
self.webhook_object["avatar_url"] = url
def set_name(self, name):
self.webhook_object["username"] = name
class LinkParser(HTMLParser):
new_string = ""
recent_href = ""
def __init__(self, domain):
super().__init__()
self.WIKI_JUST_DOMAIN = domain
def handle_starttag(self, tag, attrs):
for attr in attrs:
if attr[0] == 'href':
self.recent_href = attr[1]
if self.recent_href.startswith("//"):
self.recent_href = "https:{rest}".format(rest=self.recent_href)
elif not self.recent_href.startswith("http"):
self.recent_href = self.WIKI_JUST_DOMAIN + self.recent_href
self.recent_href = self.recent_href.replace(")", "\\)")
elif attr[0] == 'data-uncrawlable-url':
self.recent_href = attr[1].encode('ascii')
self.recent_href = base64.b64decode(self.recent_href)
self.recent_href = self.WIKI_JUST_DOMAIN + self.recent_href.decode('ascii')
def handle_data(self, data):
if self.recent_href:
self.new_string = self.new_string + "[{}](<{}>)".format(data, self.recent_href)
self.recent_href = ""
else:
self.new_string = self.new_string + data
def handle_comment(self, data):
self.new_string = self.new_string + data
def handle_endtag(self, tag):
# logger.debug(self.new_string)
pass
def link_formatter(link: str) -> str:
"""Formats a link to not embed it"""
return "<" + re.sub(r"([)])", "\\\\\\1", link).replace(" ", "_") + ">"
def escape_formatting(data: str) -> str:
"""Escape Discord formatting"""
return re.sub(r"([`_*~<>{}@/|\\])", "\\\\\\1", data, 0)
def create_article_path(article: str, WIKI_ARTICLE_PATH: str) -> str:
"""Takes the string and creates an URL with it as the article name"""
return WIKI_ARTICLE_PATH.replace("$1", article)
def profile_field_name(name, embed):
try:
return profile_fields[name]
except KeyError:
if embed:
return _("Unknown")
else:
return _("unknown")
class ContentParser(HTMLParser):
more = _("\n__And more__")
current_tag = ""
small_prev_ins = ""
small_prev_del = ""
ins_length = len(more)
del_length = len(more)
added = False
def handle_starttag(self, tagname, attribs):
if tagname == "ins" or tagname == "del":
self.current_tag = tagname
if tagname == "td" and 'diff-addedline' in attribs[0]:
self.current_tag = tagname + "a"
if tagname == "td" and 'diff-deletedline' in attribs[0]:
self.current_tag = tagname + "d"
if tagname == "td" and 'diff-marker' in attribs[0]:
self.added = True
def handle_data(self, data):
data = re.sub(r"([`_*~<>{}@/|\\])", "\\\\\\1", data, 0)
if self.current_tag == "ins" and self.ins_length <= 1000:
self.ins_length += len("**" + data + '**')
if self.ins_length <= 1000:
self.small_prev_ins = self.small_prev_ins + "**" + data + '**'
else:
self.small_prev_ins = self.small_prev_ins + self.more
if self.current_tag == "del" and self.del_length <= 1000:
self.del_length += len("~~" + data + '~~')
if self.del_length <= 1000:
self.small_prev_del = self.small_prev_del + "~~" + data + '~~'
else:
self.small_prev_del = self.small_prev_del + self.more
if (self.current_tag == "afterins" or self.current_tag == "tda") and self.ins_length <= 1000:
self.ins_length += len(data)
if self.ins_length <= 1000:
self.small_prev_ins = self.small_prev_ins + data
else:
self.small_prev_ins = self.small_prev_ins + self.more
if (self.current_tag == "afterdel" or self.current_tag == "tdd") and self.del_length <= 1000:
self.del_length += len(data)
if self.del_length <= 1000:
self.small_prev_del = self.small_prev_del + data
else:
self.small_prev_del = self.small_prev_del + self.more
if self.added:
if data == '+' and self.ins_length <= 1000:
self.ins_length += 1
if self.ins_length <= 1000:
self.small_prev_ins = self.small_prev_ins + '\n'
else:
self.small_prev_ins = self.small_prev_ins + self.more
if data == '' and self.del_length <= 1000:
self.del_length += 1
if self.del_length <= 1000:
self.small_prev_del = self.small_prev_del + '\n'
else:
self.small_prev_del = self.small_prev_del + self.more
self.added = False
def handle_endtag(self, tagname):
if tagname == "ins":
self.current_tag = "afterins"
elif tagname == "del":
self.current_tag = "afterdel"
else:
self.current_tag = ""
class RecentChangesClass():
"""Store verious data and functions related to wiki and fetching of Recent Changes"""
def __init__(self):
self.tags = {}
self.mw_messages = {}
self.namespaces = None
self.session = session
@staticmethod
def handle_mw_errors(request):
if "errors" in request:
logger.error(request["errors"])
raise MWError
return request
def safe_request(self, url):
try:
request = self.session.get(url, timeout=10, allow_redirects=False)
except requests.exceptions.Timeout:
logger.warning("Reached timeout error for request on link {url}".format(url=url))
self.downtime_controller()
return None
except requests.exceptions.ConnectionError:
logger.warning("Reached connection error for request on link {url}".format(url=url))
self.downtime_controller()
return None
except requests.exceptions.ChunkedEncodingError:
logger.warning("Detected faulty response from the web server for request on link {url}".format(url=url))
self.downtime_controller()
return None
else:
if 499 < request.status_code < 600:
self.downtime_controller()
return None
elif request.status_code == 302:
logger.warning("Redirect detected! Either the wiki given in the script settings (wiki field) is incorrect/the wiki got removed or Gamepedia is giving us the false value. Please provide the real URL to the wiki, current URL redirects to {}".format(request.next.url))
return request
def init_info(self):
return

49
src/msgqueue.py Normal file
View file

@ -0,0 +1,49 @@
import asyncio, logging
logger = logging.getLogger("rcgcdw.msgqueue")
class MessageQueue:
"""Message queue class for undelivered messages"""
def __init__(self):
self._queue = []
def __repr__(self):
return self._queue
def __len__(self):
return len(self._queue)
def __iter__(self):
return iter(self._queue)
def clear(self):
self._queue.clear()
def add_message(self, message):
self._queue.append(message)
def cut_messages(self, item_num):
self._queue = self._queue[item_num:]
async def resend_msgs(self):
if self._queue:
logger.info(
"{} messages waiting to be delivered to Discord due to Discord throwing errors/no connection to Discord servers.".format(
len(self._queue)))
for num, item in enumerate(self._queue):
logger.debug(
"Trying to send a message to Discord from the queue with id of {} and content {}".format(str(num),
str(item)))
if send_to_discord_webhook(item) < 2:
logger.debug("Sending message succeeded")
await asyncio.sleep(2.5)
else:
logger.debug("Sending message failed")
break
else:
self.clear()
logger.debug("Queue emptied, all messages delivered")
self.cut_messages(num)
logger.debug(self._queue)
messagequeue = MessageQueue()

View file

@ -5,18 +5,23 @@ import logging, aiohttp
from src.exceptions import *
from src.database import db_cursor, db_connection
from src.formatters.rc import embed_formatter, compact_formatter
from src.misc import LinkParser, RecentChangesClass
from i18n import langs
import src.discord
logger = logging.getLogger("rcgcdb.wiki")
supported_logs = ["protect/protect", "protect/modify", "protect/unprotect", "upload/overwrite", "upload/upload", "delete/delete", "delete/delete_redir", "delete/restore", "delete/revision", "delete/event", "import/upload", "import/interwiki", "merge/merge", "move/move", "move/move_redir", "protect/move_prot", "block/block", "block/unblock", "block/reblock", "rights/rights", "rights/autopromote", "abusefilter/modify", "abusefilter/create", "interwiki/iw_add", "interwiki/iw_edit", "interwiki/iw_delete", "curseprofile/comment-created", "curseprofile/comment-edited", "curseprofile/comment-deleted", "curseprofile/comment-purged", "curseprofile/profile-edited", "curseprofile/comment-replied", "contentmodel/change", "sprite/sprite", "sprite/sheet", "sprite/slice", "managetags/create", "managetags/delete", "managetags/activate", "managetags/deactivate", "tag/update", "cargo/createtable", "cargo/deletetable", "cargo/recreatetable", "cargo/replacetable", "upload/revert"]
@dataclass
class Wiki:
mw_messages: int = None
fail_times: int = 0 # corresponding to amount of times connection with wiki failed for client reasons (400-499)
async def fetch_wiki(self, extended, script_path, api_path) -> aiohttp.ClientResponse:
url_path = script_path + api_path
async def fetch_wiki(self, extended, script_path) -> aiohttp.ClientResponse:
url_path = script_path + "api.php"
amount = 20
if extended:
params = {"action": "query", "format": "json", "uselang": "content", "list": "tags|recentchanges",
@ -117,11 +122,13 @@ async def process_mwmsgs(wiki_response: dict, local_wiki: Wiki, mw_msgs: dict):
mw_msgs[key] = msgs # it may be a little bit messy for sure, however I don't expect any reason to remove mw_msgs entries by one
local_wiki.mw_messages = key
def essential_info(change, changed_categories, local_wiki, db_wiki):
async def essential_info(change, changed_categories, local_wiki, db_wiki):
"""Prepares essential information for both embed and compact message format."""
recent_changes = RecentChangesClass()
LinkParser = LinkParser("domain")
logger.debug(change)
lang = langs[db_wiki[1]]
appearance_mode = embed_formatter
appearance_mode = embed_formatter # TODO Add chanding depending on the DB entry
if ("actionhidden" in change or "suppressed" in change): # if event is hidden using suppression
appearance_mode("suppressed", change, "", changed_categories, recent_changes)
return
@ -151,6 +158,4 @@ def essential_info(change, changed_categories, local_wiki, db_wiki):
else:
logger.warning("This event is not implemented in the script. Please make an issue on the tracker attaching the following info: wiki url, time, and this information: {}".format(change))
return
if identification_string in settings["ignored"]:
return
appearance_mode(identification_string, change, parsed_comment, changed_categories, recent_changes)