RcGcDw/src/misc.py

# -*- coding: utf-8 -*-

# Recent changes Goat compatible Discord webhook is a project for using a webhook as recent changes page from MediaWiki.
# Copyright (C) 2018 Frisk

# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
import base64
import json, logging, sys, re
from html.parser import HTMLParser
from urllib.parse import urlparse, urlunparse, quote
import requests
from src.configloader import settings
from src.discord.message import DiscordMessage, DiscordMessageMetadata
from src.discord.queue import messagequeue, send_to_discord
from src.i18n import misc

AUTO_SUPPRESSION_ENABLED = settings.get("auto_suppression", {"enabled": False}).get("enabled")

_ = misc.gettext

# Create a custom logger

misc_logger = logging.getLogger("rcgcdw.misc")

data_template = {"rcid": None, "discussion_id": 0, "abuse_log_id": None,
                 "daily_overview": {"edits": None, "new_files": None, "admin_actions": None, "bytes_changed": None,
                                    "new_articles": None, "unique_editors": None, "day_score": None, "days_tracked": 0}}

WIKI_API_PATH: str = ""
WIKI_ARTICLE_PATH: str = ""
WIKI_SCRIPT_PATH: str = ""
WIKI_JUST_DOMAIN: str = ""

profile_fields = {"profile-location": _("Location"), "profile-aboutme": _("About me"), "profile-link-google": _("Google link"), "profile-link-facebook":_("Facebook link"), "profile-link-twitter": _("Twitter link"), "profile-link-reddit": _("Reddit link"), "profile-link-twitch": _("Twitch link"), "profile-link-psn": _("PSN link"), "profile-link-vk": _("VK link"), "profile-link-xbl": _("XBL link"), "profile-link-steam": _("Steam link"), "profile-link-discord": _("Discord handle"), "profile-link-battlenet": _("Battle.net handle")}

class DataFile:
	"""Data class which instance of is shared by multiple modules to remain consistent and do not cause too many IO operations."""
	def __init__(self):
		self.data = self.load_datafile()
		self.changed = False

	@staticmethod
	def generate_datafile():
		"""Generate a data.json file from a template."""
		try:
			with open("data.json", 'w') as data:
				data.write(json.dumps(data_template, indent=4))
		except PermissionError:
			misc_logger.critical("Could not create a data file (no permissions). No way to store last edit.")
			sys.exit(1)

	def load_datafile(self) -> dict:
		"""Read a data.json file and return a dictionary with contents
		:rtype: dict
		"""
		try:
			with open("data.json") as data:
				return json.loads(data.read())
		except FileNotFoundError:
			self.generate_datafile()
			misc_logger.info("The data file could not be found. Generating a new one...")
			return data_template

	def save_datafile(self):
		"""Overwrites the data.json file with given dictionary"""
		if self.changed is False:  # don't cause unnecessary write operations
			return
		try:
			with open("data.json", "w") as data_file:
				data_file.write(json.dumps(self.data, indent=4))
			self.changed = False
		except PermissionError:
			misc_logger.critical("Could not modify a data file (no permissions). No way to store last edit.")
			sys.exit(1)

	def __setitem__(self, instance, value):
		self.data[instance] = value
		self.changed = True

	def __getitem__(self, item):
		return self.data[item]


datafile = DataFile()


def weighted_average(value, weight, new_value):
	"""Calculates weighted average of value number with weight weight and new_value with weight 1"""
	return round(((value * weight) + new_value) / (weight + 1), 2)


def link_formatter(link):
	"""Formats a link to not embed it"""
	return "<" + quote(link.replace(" ", "_"), "/:?") + ">"


def escape_formatting(data):
	"""Escape Discord formatting"""
	return re.sub(r"([`_*~<>{}@/|\\])", "\\\\\\1", data, 0)


class ContentParser(HTMLParser):
	more = _("\n__And more__")
	current_tag = ""
	last_ins = None
	last_del = None
	empty = False
	small_prev_ins = ""
	small_prev_del = ""
	ins_length = len(more)
	del_length = len(more)

	def handle_starttag(self, tagname, attribs):
		if tagname == "ins" or tagname == "del":
			self.current_tag = tagname
		if tagname == "td" and "diff-addedline" in attribs[0] and self.ins_length <= 1000:
			self.current_tag = "tda"
			self.last_ins = ""
		if tagname == "td" and "diff-deletedline" in attribs[0] and self.del_length <= 1000:
			self.current_tag = "tdd"
			self.last_del = ""
		if tagname == "td" and "diff-empty" in attribs[0]:
			self.empty = True

	def handle_data(self, data):
		data = escape_formatting(data)
		if self.current_tag == "ins" and self.ins_length <= 1000:
			self.ins_length += len("**" + data + "**")
			if self.ins_length <= 1000:
				self.last_ins = self.last_ins + "**" + data + "**"
		if self.current_tag == "del" and self.del_length <= 1000:
			self.del_length += len("~~" + data + "~~")
			if self.del_length <= 1000:
				self.last_del = self.last_del + "~~" + data + "~~"
		if self.current_tag == "tda" and self.ins_length <= 1000:
			self.ins_length += len(data)
			if self.ins_length <= 1000:
				self.last_ins = self.last_ins + data
		if self.current_tag == "tdd" and self.del_length <= 1000:
			self.del_length += len(data)
			if self.del_length <= 1000:
				self.last_del = self.last_del + data

	def handle_endtag(self, tagname):
		self.current_tag = ""
		if tagname == "ins":
			self.current_tag = "tda"
		elif tagname == "del":
			self.current_tag = "tdd"
		elif tagname == "tr":
			if self.last_ins is not None:
				self.ins_length += 1
				if self.empty and not self.last_ins.isspace() and "**" not in self.last_ins:
					self.ins_length += 4
					self.last_ins = "**" + self.last_ins + "**"
				self.small_prev_ins = self.small_prev_ins + "\n" + self.last_ins
				if self.ins_length > 1000:
					self.small_prev_ins = self.small_prev_ins + self.more
				self.last_ins = None
			if self.last_del is not None:
				self.del_length += 1
				if self.empty and not self.last_del.isspace() and "~~" not in self.last_del:
					self.del_length += 4
					self.last_del = "~~" + self.last_del + "~~"
				self.small_prev_del = self.small_prev_del + "\n" + self.last_del
				if self.del_length > 1000:
					self.small_prev_del = self.small_prev_del + self.more
				self.last_del = None
			self.empty = False


def safe_read(request, *keys):
	if request is None:
		return None
	try:
		request = request.json()
		for item in keys:
			request = request[item]
	except KeyError:
		misc_logger.warning(
			"Failure while extracting data from request on key {key} in {change}".format(key=item, change=request))
		return None
	except ValueError:
		misc_logger.warning("Failure while extracting data from request in {change}".format(change=request))
		return None
	return request


def add_to_dict(dictionary, key):
	if key in dictionary:
		dictionary[key] += 1
	else:
		dictionary[key] = 1
	return dictionary

def prepare_paths(path, dry=False):
	global WIKI_API_PATH
	global WIKI_ARTICLE_PATH
	global WIKI_SCRIPT_PATH
	global WIKI_JUST_DOMAIN
	"""Set the URL paths for article namespace and script namespace
	WIKI_API_PATH will be: WIKI_DOMAIN/api.php
	WIKI_ARTICLE_PATH will be: WIKI_DOMAIN/articlepath/$1 where $1 is the replaced string
	WIKI_SCRIPT_PATH will be: WIKI_DOMAIN/
	WIKI_JUST_DOMAIN will be: WIKI_DOMAIN"""
	def quick_try_url(url):
		"""Quickly test if URL is the proper script path,
		False if it appears invalid
		dictionary when it appears valid"""
		try:
			request = requests.get(url, timeout=5)
			if request.status_code == requests.codes.ok:
				if request.json()["query"]["general"] is not None:
					return request
			return False
		except (KeyError, requests.exceptions.ConnectionError):
			return False
	try:
		parsed_url = urlparse(path)
	except KeyError:
		misc_logger.critical("wiki_url is not specified in the settings. Please provide the wiki url in the settings and start the script again.")
		sys.exit(1)
	for url_scheme in (path, path.split("wiki")[0], urlunparse((*parsed_url[0:2], "", "", "", ""))):  # check different combinations, it's supposed to be idiot-proof
		tested = quick_try_url(url_scheme + "/api.php?action=query&format=json&meta=siteinfo")
		if tested:
			if not dry:
				WIKI_API_PATH = urlunparse((*parsed_url[0:2], "", "", "", "")) + tested.json()["query"]["general"]["scriptpath"] + "/api.php"
				WIKI_SCRIPT_PATH = urlunparse((*parsed_url[0:2], "", "", "", "")) + tested.json()["query"]["general"]["scriptpath"] + "/"
				WIKI_ARTICLE_PATH = urlunparse((*parsed_url[0:2], "", "", "", "")) + tested.json()["query"]["general"]["articlepath"]
				WIKI_JUST_DOMAIN = urlunparse((*parsed_url[0:2], "", "", "", ""))
				break
			return urlunparse((*parsed_url[0:2], "", "", "", ""))

	else:
		misc_logger.critical("Could not verify wikis paths. Please make sure you have given the proper wiki URLs in settings.json ({path} should be script path to your wiki) and your Internet connection is working.".format(path=path))
		sys.exit(1)


prepare_paths(settings["wiki_url"])


def create_article_path(article: str) -> str:
	"""Takes the string and creates an URL with it as the article name"""
	return WIKI_ARTICLE_PATH.replace("$1", article)


def send_simple(msgtype, message, name, avatar):
	discord_msg = DiscordMessage("compact", msgtype, settings["webhookURL"], content=message)
	discord_msg.set_avatar(avatar)
	discord_msg.set_name(name)
	messagequeue.resend_msgs()
	send_to_discord(discord_msg, meta=DiscordMessageMetadata("POST"))


def profile_field_name(name, embed):
	try:
		return profile_fields[name]
	except KeyError:
		if embed:
			return _("Unknown")
		else:
			return _("unknown")


class LinkParser(HTMLParser):
	new_string = ""
	recent_href = ""

	def handle_starttag(self, tag, attrs):
		for attr in attrs:
			if attr[0] == 'href':
				self.recent_href = attr[1]
				if self.recent_href.startswith("//"):
					self.recent_href = "https:{rest}".format(rest=self.recent_href)
				elif not self.recent_href.startswith("http"):
					self.recent_href = WIKI_JUST_DOMAIN + self.recent_href
				self.recent_href = self.recent_href.replace(")", "\\)")
			elif attr[0] == 'data-uncrawlable-url':
				self.recent_href = attr[1].encode('ascii')
				self.recent_href = base64.b64decode(self.recent_href)
				self.recent_href = WIKI_JUST_DOMAIN + self.recent_href.decode('ascii')

	def handle_data(self, data):
		if self.recent_href:
			self.new_string = self.new_string + "[{}](<{}>)".format(data.replace("//", "/\\/"), self.recent_href)
			self.recent_href = ""
		else:
			self.new_string = self.new_string + data.replace("//", "/\\/")

	def handle_comment(self, data):
		self.new_string = self.new_string + data.replace("//", "/\\/")

	def handle_endtag(self, tag):
		misc_logger.debug(self.new_string)
Added German and Polish translations 2019-05-20 15:28:55 +00:00			`# -- coding: utf-8 --`

Update copyright notices with new script name 2020-03-27 15:47:43 +00:00			`# Recent changes Goat compatible Discord webhook is a project for using a webhook as recent changes page from MediaWiki.`
Added copyright to misc.py 2019-05-20 13:32:23 +00:00			`# Copyright (C) 2018 Frisk`

			`# This program is free software: you can redistribute it and/or modify`
			`# it under the terms of the GNU General Public License as published by`
			`# the Free Software Foundation, either version 3 of the License, or`
			`# (at your option) any later version.`

			`# This program is distributed in the hope that it will be useful,`
			`# but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`# GNU General Public License for more details.`

			`# You should have received a copy of the GNU General Public License`
			`# along with this program. If not, see <http://www.gnu.org/licenses/>.`
Added #131, may cause issues as the split has been done to many files and it scares me, will require work on i18n structure 2020-07-16 12:46:23 +00:00			`import base64`
Structurize the code, fixed some issues (like not recognizing edit and new events) 2020-11-08 21:29:15 +00:00			`import json, logging, sys, re`
Added #71 2019-05-20 13:11:30 +00:00			`from html.parser import HTMLParser`
Fixed #145 and improvements in escaping 2020-10-18 10:18:27 +00:00			`from urllib.parse import urlparse, urlunparse, quote`
Initial commit, discussions work in progress, split off most important functions from rcgcdw.py since we need them for modularization 2020-04-04 12:29:18 +00:00			`import requests`
Cleaned the file structure (#111) 2020-07-07 11:21:49 +00:00			`from src.configloader import settings`
Structurize the code, fixed some issues (like not recognizing edit and new events) 2020-11-08 21:29:15 +00:00			`from src.discord.message import DiscordMessage, DiscordMessageMetadata`
			`from src.discord.queue import messagequeue, send_to_discord`
Fix #137 2020-08-03 13:27:24 +00:00			`from src.i18n import misc`
Fixed translation of misc module 2019-05-20 19:01:45 +00:00
Small improvements 2020-11-10 23:48:11 +00:00			`AUTO_SUPPRESSION_ENABLED = settings.get("auto_suppression", {"enabled": False}).get("enabled")`
Further work on the message redaction function 2020-11-05 21:20:35 +00:00
Fix #137 2020-08-03 13:27:24 +00:00			`_ = misc.gettext`
Magor logging system refactoring, added new misc module and added data.json as new data storage file 2019-05-19 15:03:05 +00:00
			`# Create a custom logger`
Moved link_formatter function to misc module 2019-05-20 10:41:40 +00:00
Magor logging system refactoring, added new misc module and added data.json as new data storage file 2019-05-19 15:03:05 +00:00			`misc_logger = logging.getLogger("rcgcdw.misc")`

Further restructuring Removed rcid cache (stability of this method to be tested) Added support for #142 Reorganized code to be more readable (?) 2020-10-17 23:45:06 +00:00			`data_template = {"rcid": None, "discussion_id": 0, "abuse_log_id": None,`
Magor logging system refactoring, added new misc module and added data.json as new data storage file 2019-05-19 15:03:05 +00:00			`"daily_overview": {"edits": None, "new_files": None, "admin_actions": None, "bytes_changed": None,`
Finished calculating and showing average values for daily overviews (also closes #22) 2019-05-19 16:25:20 +00:00			`"new_articles": None, "unique_editors": None, "day_score": None, "days_tracked": 0}}`
Magor logging system refactoring, added new misc module and added data.json as new data storage file 2019-05-19 15:03:05 +00:00
Initial commit, discussions work in progress, split off most important functions from rcgcdw.py since we need them for modularization 2020-04-04 12:29:18 +00:00			`WIKI_API_PATH: str = ""`
			`WIKI_ARTICLE_PATH: str = ""`
			`WIKI_SCRIPT_PATH: str = ""`
			`WIKI_JUST_DOMAIN: str = ""`

Added #131, may cause issues as the split has been done to many files and it scares me, will require work on i18n structure 2020-07-16 12:46:23 +00:00			profile_fields = {"profile-location": _("Location"), "profile-aboutme": _("About me"), "profile-link-google": _("Google link"), "profile-link-facebook":_("Facebook link"), "profile-link-twitter": _("Twitter link"), "profile-link-reddit": _("Reddit link"), "profile-link-twitch": _("Twitch link"), "profile-link-psn": _("PSN link"), "profile-link-vk": _("VK link"), "profile-link-xbl": _("XBL link"), "profile-link-steam": _("Steam link"), "profile-link-discord": _("Discord handle"), "profile-link-battlenet": _("Battle.net handle")}

Initial commit, discussions work in progress, split off most important functions from rcgcdw.py since we need them for modularization 2020-04-04 12:29:18 +00:00			`class DataFile:`
			`"""Data class which instance of is shared by multiple modules to remain consistent and do not cause too many IO operations."""`
			`def __init__(self):`
			`self.data = self.load_datafile()`
Further restructuring Removed rcid cache (stability of this method to be tested) Added support for #142 Reorganized code to be more readable (?) 2020-10-17 23:45:06 +00:00			`self.changed = False`
Initial commit, discussions work in progress, split off most important functions from rcgcdw.py since we need them for modularization 2020-04-04 12:29:18 +00:00
			`@staticmethod`
			`def generate_datafile():`
			`"""Generate a data.json file from a template."""`
			`try:`
			`with open("data.json", 'w') as data:`
			`data.write(json.dumps(data_template, indent=4))`
			`except PermissionError:`
			`misc_logger.critical("Could not create a data file (no permissions). No way to store last edit.")`
			`sys.exit(1)`

			`def load_datafile(self) -> dict:`
			`"""Read a data.json file and return a dictionary with contents`
			`:rtype: dict`
			`"""`
			`try:`
			`with open("data.json") as data:`
			`return json.loads(data.read())`
			`except FileNotFoundError:`
			`self.generate_datafile()`
			`misc_logger.info("The data file could not be found. Generating a new one...")`
			`return data_template`

			`def save_datafile(self):`
			`"""Overwrites the data.json file with given dictionary"""`
Further restructuring Removed rcid cache (stability of this method to be tested) Added support for #142 Reorganized code to be more readable (?) 2020-10-17 23:45:06 +00:00			`if self.changed is False: # don't cause unnecessary write operations`
			`return`
Initial commit, discussions work in progress, split off most important functions from rcgcdw.py since we need them for modularization 2020-04-04 12:29:18 +00:00			`try:`
			`with open("data.json", "w") as data_file:`
			`data_file.write(json.dumps(self.data, indent=4))`
Further restructuring Removed rcid cache (stability of this method to be tested) Added support for #142 Reorganized code to be more readable (?) 2020-10-17 23:45:06 +00:00			`self.changed = False`
Initial commit, discussions work in progress, split off most important functions from rcgcdw.py since we need them for modularization 2020-04-04 12:29:18 +00:00			`except PermissionError:`
			`misc_logger.critical("Could not modify a data file (no permissions). No way to store last edit.")`
			`sys.exit(1)`

Further restructuring Removed rcid cache (stability of this method to be tested) Added support for #142 Reorganized code to be more readable (?) 2020-10-17 23:45:06 +00:00			`def __setitem__(self, instance, value):`
			`self.data[instance] = value`
			`self.changed = True`

			`def __getitem__(self, item):`
			`return self.data[item]`


Initial commit, discussions work in progress, split off most important functions from rcgcdw.py since we need them for modularization 2020-04-04 12:29:18 +00:00			`datafile = DataFile()`
Finished calculating and showing average values for daily overviews (also closes #22) 2019-05-19 16:25:20 +00:00
Added #147 with few exceptions, switched supported_logs variable to more performant set 2020-10-18 11:25:50 +00:00
Finished calculating and showing average values for daily overviews (also closes #22) 2019-05-19 16:25:20 +00:00			`def weighted_average(value, weight, new_value):`
			`"""Calculates weighted average of value number with weight weight and new_value with weight 1"""`
			`return round(((value * weight) + new_value) / (weight + 1), 2)`
Moved link_formatter function to misc module 2019-05-20 10:41:40 +00:00

			`def link_formatter(link):`
			`"""Formats a link to not embed it"""`
Fixed #145 and improvements in escaping 2020-10-18 10:18:27 +00:00			`return "<" + quote(link.replace(" ", "_"), "/:?") + ">"`
Added #71 2019-05-20 13:11:30 +00:00
Added #147 with few exceptions, switched supported_logs variable to more performant set 2020-10-18 11:25:50 +00:00
Works on formatting the Fandom discussions, fix to daily overview message handling 2020-04-26 12:40:38 +00:00			`def escape_formatting(data):`
			`"""Escape Discord formatting"""`
			return re.sub(r"([`_*~<>{}@/\|\\])", "\\\\\\1", data, 0)
Added #71 2019-05-20 13:11:30 +00:00
Added #147 with few exceptions, switched supported_logs variable to more performant set 2020-10-18 11:25:50 +00:00
Added #71 2019-05-20 13:11:30 +00:00			`class ContentParser(HTMLParser):`
			`more = _("\n__And more__")`
			`current_tag = ""`
Update diff parser Based on Wiki-Bot diff parser 2020-11-09 17:04:36 +00:00			`last_ins = None`
			`last_del = None`
			`empty = False`
Added #71 2019-05-20 13:11:30 +00:00			`small_prev_ins = ""`
			`small_prev_del = ""`
			`ins_length = len(more)`
			`del_length = len(more)`

			`def handle_starttag(self, tagname, attribs):`
			`if tagname == "ins" or tagname == "del":`
			`self.current_tag = tagname`
Update diff parser Based on Wiki-Bot diff parser 2020-11-09 17:04:36 +00:00			`if tagname == "td" and "diff-addedline" in attribs[0] and self.ins_length <= 1000:`
			`self.current_tag = "tda"`
			`self.last_ins = ""`
			`if tagname == "td" and "diff-deletedline" in attribs[0] and self.del_length <= 1000:`
			`self.current_tag = "tdd"`
			`self.last_del = ""`
			`if tagname == "td" and "diff-empty" in attribs[0]:`
			`self.empty = True`
Added #71 2019-05-20 13:11:30 +00:00
			`def handle_data(self, data):`
Update diff parser Based on Wiki-Bot diff parser 2020-11-09 17:04:36 +00:00			`data = escape_formatting(data)`
Added #71 2019-05-20 13:11:30 +00:00			`if self.current_tag == "ins" and self.ins_length <= 1000:`
Update diff parser Based on Wiki-Bot diff parser 2020-11-09 17:04:36 +00:00			`self.ins_length += len("" + data + "")`
Added #71 2019-05-20 13:11:30 +00:00			`if self.ins_length <= 1000:`
Update diff parser Based on Wiki-Bot diff parser 2020-11-09 17:04:36 +00:00			`self.last_ins = self.last_ins + "" + data + ""`
Added #71 2019-05-20 13:11:30 +00:00			`if self.current_tag == "del" and self.del_length <= 1000:`
Update diff parser Based on Wiki-Bot diff parser 2020-11-09 17:04:36 +00:00			`self.del_length += len("~~" + data + "~~")`
Added #71 2019-05-20 13:11:30 +00:00			`if self.del_length <= 1000:`
Update diff parser Based on Wiki-Bot diff parser 2020-11-09 17:04:36 +00:00			`self.last_del = self.last_del + "~~" + data + "~~"`
			`if self.current_tag == "tda" and self.ins_length <= 1000:`
Added #71 2019-05-20 13:11:30 +00:00			`self.ins_length += len(data)`
			`if self.ins_length <= 1000:`
Update diff parser Based on Wiki-Bot diff parser 2020-11-09 17:04:36 +00:00			`self.last_ins = self.last_ins + data`
			`if self.current_tag == "tdd" and self.del_length <= 1000:`
Added #71 2019-05-20 13:11:30 +00:00			`self.del_length += len(data)`
			`if self.del_length <= 1000:`
Update diff parser Based on Wiki-Bot diff parser 2020-11-09 17:04:36 +00:00			`self.last_del = self.last_del + data`
Added #71 2019-05-20 13:11:30 +00:00
			`def handle_endtag(self, tagname):`
Update diff parser Based on Wiki-Bot diff parser 2020-11-09 17:04:36 +00:00			`self.current_tag = ""`
Added #71 2019-05-20 13:11:30 +00:00			`if tagname == "ins":`
Update diff parser Based on Wiki-Bot diff parser 2020-11-09 17:04:36 +00:00			`self.current_tag = "tda"`
Added #71 2019-05-20 13:11:30 +00:00			`elif tagname == "del":`
Update diff parser Based on Wiki-Bot diff parser 2020-11-09 17:04:36 +00:00			`self.current_tag = "tdd"`
			`elif tagname == "tr":`
			`if self.last_ins is not None:`
			`self.ins_length += 1`
			`if self.empty and not self.last_ins.isspace() and "**" not in self.last_ins:`
			`self.ins_length += 4`
			`self.last_ins = "" + self.last_ins + ""`
			`self.small_prev_ins = self.small_prev_ins + "\n" + self.last_ins`
			`if self.ins_length > 1000:`
			`self.small_prev_ins = self.small_prev_ins + self.more`
			`self.last_ins = None`
			`if self.last_del is not None:`
			`self.del_length += 1`
			`if self.empty and not self.last_del.isspace() and "~~" not in self.last_del:`
			`self.del_length += 4`
			`self.last_del = "~~" + self.last_del + "~~"`
			`self.small_prev_del = self.small_prev_del + "\n" + self.last_del`
			`if self.del_length > 1000:`
			`self.small_prev_del = self.small_prev_del + self.more`
			`self.last_del = None`
			`self.empty = False`
Refactoring - moved a few functions to misc 2019-05-20 19:23:19 +00:00

			`def safe_read(request, *keys):`
			`if request is None:`
			`return None`
			`try:`
			`request = request.json()`
			`for item in keys:`
			`request = request[item]`
			`except KeyError:`
			`misc_logger.warning(`
			`"Failure while extracting data from request on key {key} in {change}".format(key=item, change=request))`
			`return None`
			`except ValueError:`
			`misc_logger.warning("Failure while extracting data from request in {change}".format(change=request))`
			`return None`
			`return request`


			`def add_to_dict(dictionary, key):`
			`if key in dictionary:`
			`dictionary[key] += 1`
			`else:`
			`dictionary[key] = 1`
Initial commit, discussions work in progress, split off most important functions from rcgcdw.py since we need them for modularization 2020-04-04 12:29:18 +00:00			`return dictionary`

Finished #139 2020-08-23 13:32:12 +00:00			`def prepare_paths(path, dry=False):`
Initial commit, discussions work in progress, split off most important functions from rcgcdw.py since we need them for modularization 2020-04-04 12:29:18 +00:00			`global WIKI_API_PATH`
			`global WIKI_ARTICLE_PATH`
			`global WIKI_SCRIPT_PATH`
			`global WIKI_JUST_DOMAIN`
			`"""Set the URL paths for article namespace and script namespace`
			`WIKI_API_PATH will be: WIKI_DOMAIN/api.php`
			`WIKI_ARTICLE_PATH will be: WIKI_DOMAIN/articlepath/$1 where $1 is the replaced string`
			`WIKI_SCRIPT_PATH will be: WIKI_DOMAIN/`
			`WIKI_JUST_DOMAIN will be: WIKI_DOMAIN"""`
			`def quick_try_url(url):`
			`"""Quickly test if URL is the proper script path,`
			`False if it appears invalid`
			`dictionary when it appears valid"""`
			`try:`
			`request = requests.get(url, timeout=5)`
			`if request.status_code == requests.codes.ok:`
			`if request.json()["query"]["general"] is not None:`
			`return request`
			`return False`
			`except (KeyError, requests.exceptions.ConnectionError):`
			`return False`
			`try:`
Finished #139 2020-08-23 13:32:12 +00:00			`parsed_url = urlparse(path)`
Initial commit, discussions work in progress, split off most important functions from rcgcdw.py since we need them for modularization 2020-04-04 12:29:18 +00:00			`except KeyError:`
			`misc_logger.critical("wiki_url is not specified in the settings. Please provide the wiki url in the settings and start the script again.")`
			`sys.exit(1)`
Finished #139 2020-08-23 13:32:12 +00:00			`for url_scheme in (path, path.split("wiki")[0], urlunparse((*parsed_url[0:2], "", "", "", ""))): # check different combinations, it's supposed to be idiot-proof`
Initial commit, discussions work in progress, split off most important functions from rcgcdw.py since we need them for modularization 2020-04-04 12:29:18 +00:00			`tested = quick_try_url(url_scheme + "/api.php?action=query&format=json&meta=siteinfo")`
			`if tested:`
Finished #139 2020-08-23 13:32:12 +00:00			`if not dry:`
			`WIKI_API_PATH = urlunparse((*parsed_url[0:2], "", "", "", "")) + tested.json()["query"]["general"]["scriptpath"] + "/api.php"`
			`WIKI_SCRIPT_PATH = urlunparse((*parsed_url[0:2], "", "", "", "")) + tested.json()["query"]["general"]["scriptpath"] + "/"`
			`WIKI_ARTICLE_PATH = urlunparse((*parsed_url[0:2], "", "", "", "")) + tested.json()["query"]["general"]["articlepath"]`
			`WIKI_JUST_DOMAIN = urlunparse((*parsed_url[0:2], "", "", "", ""))`
			`break`
			`return urlunparse((*parsed_url[0:2], "", "", "", ""))`

Initial commit, discussions work in progress, split off most important functions from rcgcdw.py since we need them for modularization 2020-04-04 12:29:18 +00:00			`else:`
Finished #139 2020-08-23 13:32:12 +00:00			`misc_logger.critical("Could not verify wikis paths. Please make sure you have given the proper wiki URLs in settings.json ({path} should be script path to your wiki) and your Internet connection is working.".format(path=path))`
Initial commit, discussions work in progress, split off most important functions from rcgcdw.py since we need them for modularization 2020-04-04 12:29:18 +00:00			`sys.exit(1)`

Further work on the discussions 2020-04-05 00:07:56 +00:00
Finished #139 2020-08-23 13:32:12 +00:00			`prepare_paths(settings["wiki_url"])`
Further work on the discussions 2020-04-05 00:07:56 +00:00

			`def create_article_path(article: str) -> str:`
			`"""Takes the string and creates an URL with it as the article name"""`
			`return WIKI_ARTICLE_PATH.replace("$1", article)`
Finalizing the discussions module 2020-04-05 21:50:36 +00:00

Fixed #120 2020-05-08 22:16:14 +00:00			`def send_simple(msgtype, message, name, avatar):`
Fixed #116 2020-05-22 15:30:58 +00:00			`discord_msg = DiscordMessage("compact", msgtype, settings["webhookURL"], content=message)`
Fixed #120 2020-05-08 22:16:14 +00:00			`discord_msg.set_avatar(avatar)`
			`discord_msg.set_name(name)`
			`messagequeue.resend_msgs()`
Further work on the message redaction function 2020-11-05 21:20:35 +00:00			`send_to_discord(discord_msg, meta=DiscordMessageMetadata("POST"))`
Added #131, may cause issues as the split has been done to many files and it scares me, will require work on i18n structure 2020-07-16 12:46:23 +00:00

			`def profile_field_name(name, embed):`
			`try:`
			`return profile_fields[name]`
			`except KeyError:`
			`if embed:`
			`return _("Unknown")`
			`else:`
			`return _("unknown")`


			`class LinkParser(HTMLParser):`
			`new_string = ""`
			`recent_href = ""`

			`def handle_starttag(self, tag, attrs):`
			`for attr in attrs:`
			`if attr[0] == 'href':`
			`self.recent_href = attr[1]`
			`if self.recent_href.startswith("//"):`
			`self.recent_href = "https:{rest}".format(rest=self.recent_href)`
			`elif not self.recent_href.startswith("http"):`
			`self.recent_href = WIKI_JUST_DOMAIN + self.recent_href`
			`self.recent_href = self.recent_href.replace(")", "\\)")`
			`elif attr[0] == 'data-uncrawlable-url':`
			`self.recent_href = attr[1].encode('ascii')`
			`self.recent_href = base64.b64decode(self.recent_href)`
			`self.recent_href = WIKI_JUST_DOMAIN + self.recent_href.decode('ascii')`

			`def handle_data(self, data):`
			`if self.recent_href:`
don't link non-link links 2020-08-01 21:22:49 +00:00			`self.new_string = self.new_string + "[{}](<{}>)".format(data.replace("//", "/\\/"), self.recent_href)`
Added #131, may cause issues as the split has been done to many files and it scares me, will require work on i18n structure 2020-07-16 12:46:23 +00:00			`self.recent_href = ""`
			`else:`
don't link non-link links 2020-08-01 21:22:49 +00:00			`self.new_string = self.new_string + data.replace("//", "/\\/")`
Added #131, may cause issues as the split has been done to many files and it scares me, will require work on i18n structure 2020-07-16 12:46:23 +00:00
			`def handle_comment(self, data):`
don't link non-link links 2020-08-01 21:22:49 +00:00			`self.new_string = self.new_string + data.replace("//", "/\\/")`
Added #131, may cause issues as the split has been done to many files and it scares me, will require work on i18n structure 2020-07-16 12:46:23 +00:00
			`def handle_endtag(self, tag):`
Fixed #154 and added more to message deletion code 2020-10-26 12:25:14 +00:00			`misc_logger.debug(self.new_string)`