mirror of
https://gitlab.com/chicken-riders/RcGcDb.git
synced 2025-02-23 00:54:09 +00:00
Updates and fixes
This commit is contained in:
parent
ebaca40d8e
commit
090a14c6c4
|
@ -105,7 +105,7 @@ class Client:
|
|||
return dict()
|
||||
|
||||
def parse_links(self, summary: str):
|
||||
link_parser = self.LinkParser()
|
||||
link_parser = self.LinkParser(self.WIKI_JUST_DOMAIN)
|
||||
link_parser.feed(summary)
|
||||
return link_parser.new_string
|
||||
|
||||
|
|
|
@ -51,7 +51,7 @@ def sanitize_to_url(text: str) -> str: # TODO ) replaces needed?
|
|||
|
||||
def parse_mediawiki_changes(ctx: Context, content: str, embed: DiscordMessage) -> None:
|
||||
"""Parses MediaWiki changes and adds them to embed as fields "Added" and "Removed" """
|
||||
edit_diff = ctx.client.content_parser()
|
||||
edit_diff = ctx.client.content_parser(ctx._)
|
||||
edit_diff.feed(content)
|
||||
if edit_diff.small_prev_del:
|
||||
if edit_diff.small_prev_del.replace("~~", "").replace("__", "").isspace():
|
||||
|
|
|
@ -16,7 +16,6 @@ from src.database import db
|
|||
from src.exceptions import *
|
||||
from src.queue_handler import dbmanager
|
||||
from src.wiki import Wiki, process_cats, essential_feeds
|
||||
from src.wiki_ratelimiter import RateLimiter
|
||||
from src.domain_manager import domains
|
||||
|
||||
|
||||
|
|
|
@ -34,17 +34,5 @@ class db_connection:
|
|||
def pool(self) -> asyncpg.Pool:
|
||||
return self.connection_pool
|
||||
|
||||
# Tried to make it a decorator but tbh won't probably work
|
||||
# async def in_transaction(self, func):
|
||||
# async def single_transaction():
|
||||
# async with self.connection.acquire() as connection:
|
||||
# async with connection.transaction():
|
||||
# await func()
|
||||
# return single_transaction
|
||||
|
||||
# async def query(self, string, *arg):
|
||||
# async with self.connection.acquire() as connection:
|
||||
# async with connection.transaction():
|
||||
# return connection.cursor(string, *arg)
|
||||
|
||||
db = db_connection()
|
|
@ -114,14 +114,12 @@ class MessageQueue:
|
|||
|
||||
async def pack_massages(self, messages: list[QueueEntry], current_pack=None) -> AsyncGenerator[tuple[StackedDiscordMessage, int, str], None]:
|
||||
"""Pack messages into StackedDiscordMessage. It's an async generator"""
|
||||
# TODO Rebuild to support DELETE and PATCH messages
|
||||
for index, message in enumerate(messages):
|
||||
if message.method == "POST":
|
||||
if current_pack is None:
|
||||
current_pack = StackedDiscordMessage(0 if message.discord_message.message_type == "compact" else 1,
|
||||
message.wiki)
|
||||
else:
|
||||
# message.discord_message. # TODO Where do we store method?
|
||||
yield message.discord_message, index, message.method
|
||||
message = message.discord_message
|
||||
try:
|
||||
|
@ -152,7 +150,7 @@ class MessageQueue:
|
|||
self.global_rate_limit = True
|
||||
await asyncio.sleep(e.remaining / 1000)
|
||||
return
|
||||
for queue_message in messages[max(index-len(msg.message_list), 0):max(index, 1)]: # mark messages as delivered
|
||||
for queue_message in messages[max(index-len(msg.message_list), 0):index+1]: # mark messages as delivered
|
||||
queue_message.confirm_sent_status(webhook_url)
|
||||
if client_error is False:
|
||||
msg.webhook = webhook_url
|
||||
|
|
|
@ -8,7 +8,6 @@ from src.argparser import command_line_args
|
|||
from functools import cache
|
||||
# from src.discussions import Discussions
|
||||
from statistics import Log, LogType
|
||||
import src.wiki_ratelimiter
|
||||
|
||||
logger = logging.getLogger("rcgcdb.domain")
|
||||
|
||||
|
@ -22,7 +21,6 @@ class Domain:
|
|||
self.name = name # This should be always in format of topname.extension for example fandom.com
|
||||
self.task: Optional[asyncio.Task] = None
|
||||
self.wikis: OrderedDict[str, src.wiki.Wiki] = OrderedDict()
|
||||
self.rate_limiter: src.wiki_ratelimiter = src.wiki_ratelimiter.RateLimiter()
|
||||
self.irc: Optional[src.irc_feed.AioIRCCat] = None
|
||||
# self.discussions_handler: Optional[Discussions] = Discussions(self.wikis) if name == "fandom.com" else None
|
||||
|
||||
|
@ -38,7 +36,7 @@ class Domain:
|
|||
def destroy(self):
|
||||
"""Destroy the domain – do all of the tasks that should make sure there is no leftovers before being collected by GC"""
|
||||
if self.irc:
|
||||
self.irc.connection.disconnect("Leaving")
|
||||
self.irc.connection.die("Leaving")
|
||||
if self.discussions_handler:
|
||||
self.discussions_handler.close()
|
||||
if self.task:
|
||||
|
@ -81,11 +79,9 @@ class Domain:
|
|||
self.wikis.move_to_end(wiki.script_url, last=False)
|
||||
|
||||
async def run_wiki_scan(self, wiki: src.wiki.Wiki, reason: Optional[int] = None):
|
||||
await self.rate_limiter.timeout_wait()
|
||||
await wiki.scan()
|
||||
wiki.statistics.update(Log(type=LogType.SCAN_REASON, title=str(reason)))
|
||||
self.wikis.move_to_end(wiki.script_url)
|
||||
self.rate_limiter.timeout_add(1.0)
|
||||
|
||||
async def irc_scheduler(self):
|
||||
try:
|
||||
|
@ -108,7 +104,7 @@ class Domain:
|
|||
return # Recently scanned wikis will get at the end of the self.wikis, so we assume what is first hasn't been checked for a while
|
||||
except:
|
||||
if command_line_args.debug:
|
||||
logger.exception("IRC task for domain {} failed!".format(self.name))
|
||||
logger.exception("IRC scheduler task for domain {} failed!".format(self.name))
|
||||
else:
|
||||
# TODO Write
|
||||
pass
|
||||
|
@ -140,7 +136,7 @@ class Domain:
|
|||
except asyncio.exceptions.CancelledError:
|
||||
for wiki in self.wikis.values():
|
||||
await wiki.session.close()
|
||||
await self.irc.connection.disconnect()
|
||||
self.irc.connection.disconnect()
|
||||
else:
|
||||
try:
|
||||
await self.regular_scheduler()
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
from __future__ import annotations
|
||||
from typing import TYPE_CHECKING, Optional
|
||||
from typing import TYPE_CHECKING
|
||||
from urllib.parse import urlparse, urlunparse
|
||||
import logging
|
||||
import asyncpg
|
||||
|
@ -9,8 +9,6 @@ from src.config import settings
|
|||
from src.domain import Domain
|
||||
from src.irc_feed import AioIRCCat
|
||||
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from src.wiki import Wiki
|
||||
|
||||
logger = logging.getLogger("rcgcdb.domain_manager")
|
||||
|
@ -23,6 +21,7 @@ class DomainManager:
|
|||
"""Callback for database listener. Used to update our domain cache on changes such as new wikis or removed wikis"""
|
||||
# TODO Write a trigger for pub/sub in database/Wiki-Bot repo
|
||||
split_payload = payload.split(" ")
|
||||
logger.debug("Received pub/sub message: {}".format(payload))
|
||||
if len(split_payload) < 2:
|
||||
raise ValueError("Improper pub/sub message! Pub/sub payload: {}".format(payload))
|
||||
if split_payload[0] == "ADD":
|
||||
|
@ -30,8 +29,8 @@ class DomainManager:
|
|||
elif split_payload[0] == "REMOVE":
|
||||
try:
|
||||
results = await connection.fetch("SELECT * FROM rcgcdw WHERE wiki = $1;", split_payload[1])
|
||||
if len(results) > 0:
|
||||
return
|
||||
if len(results) > 0: # If there are still webhooks for this wiki - just update its targets
|
||||
await self.return_domain(self.get_domain(split_payload[1])).get_wiki(split_payload[1]).update_targets()
|
||||
except asyncpg.IdleSessionTimeoutError:
|
||||
logger.error("Couldn't check amount of webhooks with {} wiki!".format(split_payload[1]))
|
||||
return
|
||||
|
@ -50,9 +49,9 @@ class DomainManager:
|
|||
new_domain = await self.new_domain(wiki_domain)
|
||||
await new_domain.add_wiki(wiki)
|
||||
|
||||
def remove_domain(self, domain):
|
||||
domain.destoy()
|
||||
del self.domains[domain]
|
||||
def remove_domain(self, domain: Domain):
|
||||
domain.destroy()
|
||||
del self.domains[domain.name]
|
||||
|
||||
def remove_wiki(self, script_url: str):
|
||||
wiki_domain = self.get_domain(script_url)
|
||||
|
@ -79,6 +78,8 @@ class DomainManager:
|
|||
for irc_server in settings["irc_servers"].keys():
|
||||
if name in settings["irc_servers"][irc_server]["domains"]:
|
||||
domain_object.set_irc(AioIRCCat(settings["irc_servers"][irc_server]["irc_channel_mapping"], domain_object, None, None))
|
||||
domain_object.irc.connect(settings["irc_servers"][irc_server]["irc_host"], settings["irc_servers"][irc_server]["irc_port"],
|
||||
settings["irc_servers"][irc_server]["irc_name"], ircname=settings["irc_servers"][irc_server]["irc_nickname"])
|
||||
break # Allow only one IRC for a domain
|
||||
self.domains[name] = domain_object
|
||||
return self.domains[name]
|
||||
|
|
|
@ -44,7 +44,8 @@ class AioIRCCat(irc.client_aio.AioSimpleIRCClient):
|
|||
c.nick(c.get_nickname() + "_")
|
||||
|
||||
def on_disconnect(self, connection, event):
|
||||
self.connect(*self.connection_details[0], **self.connection_details[1]) # attempt to reconnect
|
||||
# self.connect(*self.connection_details[0], **self.connection_details[1]) # attempt to reconnect
|
||||
pass
|
||||
|
||||
def parse_fandom_message(self, message: str):
|
||||
message = message.split("\x035*\x03")
|
||||
|
@ -71,8 +72,8 @@ class AioIRCCat(irc.client_aio.AioSimpleIRCClient):
|
|||
if post.get('action', 'unknown') != "deleted": # ignore deletion events
|
||||
url = urlparse(post.get('url'))
|
||||
full_url ="https://"+ url.netloc + recognize_langs(url.path)
|
||||
if full_url in self.domain:
|
||||
self.discussion_callback(full_url)
|
||||
# if full_url in self.domain:
|
||||
# self.discussion_callback(full_url)
|
||||
|
||||
|
||||
def recognize_langs(path):
|
||||
|
|
29
src/misc.py
29
src/misc.py
|
@ -2,6 +2,7 @@ from html.parser import HTMLParser
|
|||
import base64, re
|
||||
|
||||
import logging
|
||||
from typing import Callable
|
||||
from urllib.parse import urlparse, urlunparse
|
||||
from src.i18n import langs
|
||||
|
||||
|
@ -28,7 +29,10 @@ class LinkParser(HTMLParser):
|
|||
|
||||
new_string = ""
|
||||
recent_href = ""
|
||||
WIKI_JUST_DOMAIN = ""
|
||||
|
||||
def __init__(self, DOMAIN_URL):
|
||||
self.WIKI_JUST_DOMAIN = DOMAIN_URL
|
||||
super().__init__()
|
||||
|
||||
def handle_starttag(self, tag, attrs):
|
||||
for attr in attrs:
|
||||
|
@ -59,16 +63,17 @@ class LinkParser(HTMLParser):
|
|||
pass
|
||||
|
||||
|
||||
LinkParse = LinkParser()
|
||||
# LinkParse = LinkParser()
|
||||
|
||||
def parse_link(domain: str, to_parse: str) -> str:
|
||||
"""Because I have strange issues using the LinkParser class myself, this is a helper function
|
||||
to utilize the LinkParser properly"""
|
||||
LinkParse.WIKI_JUST_DOMAIN = domain
|
||||
LinkParse.new_string = ""
|
||||
LinkParse.feed(to_parse)
|
||||
LinkParse.recent_href = ""
|
||||
return LinkParse.new_string
|
||||
|
||||
# def parse_link(domain: str, to_parse: str) -> str:
|
||||
# """Because I have strange issues using the LinkParser class myself, this is a helper function
|
||||
# to utilize the LinkParser properly"""
|
||||
# LinkParse.WIKI_JUST_DOMAIN = domain
|
||||
# LinkParse.new_string = ""
|
||||
# LinkParse.feed(to_parse)
|
||||
# LinkParse.recent_href = ""
|
||||
# return LinkParse.new_string
|
||||
|
||||
|
||||
def link_formatter(link: str) -> str:
|
||||
|
@ -117,9 +122,9 @@ class ContentParser(HTMLParser):
|
|||
small_prev_ins = ""
|
||||
small_prev_del = ""
|
||||
|
||||
def __init__(self, lang):
|
||||
def __init__(self, lang: Callable):
|
||||
super().__init__()
|
||||
self.more = langs[lang]["misc"].gettext("\n__And more__")
|
||||
self.more = lang("\n__And more__")
|
||||
self.ins_length = len(self.more)
|
||||
self.del_length = len(self.more)
|
||||
|
||||
|
|
|
@ -28,6 +28,7 @@ class UpdateDB:
|
|||
async def update_db(self):
|
||||
try:
|
||||
while True:
|
||||
logger.debug("Running DB check")
|
||||
if self.updated:
|
||||
async with db.pool().acquire() as connection:
|
||||
async with connection.transaction():
|
||||
|
|
|
@ -44,7 +44,7 @@ class Statistics:
|
|||
self.logs: LimitedList[Log] = LimitedList()
|
||||
|
||||
def update(self, *args: Log, **kwargs: dict[str, Union[float, int]]):
|
||||
for key, value in kwargs:
|
||||
for key, value in kwargs.items():
|
||||
self.__setattr__(key, value)
|
||||
for log in args:
|
||||
self.logs.append(log)
|
||||
|
|
|
@ -242,7 +242,7 @@ class Wiki:
|
|||
"""Synchronous function based on api_request created for compatibility reasons with RcGcDw API"""
|
||||
try:
|
||||
if isinstance(params, str):
|
||||
request = self.session_requests.get(self.script_url + "api.php?" + params + "&errorformat=raw", timeout=10, allow_redirects=allow_redirects)
|
||||
request = self.session_requests.get(self.script_url + "api.php" + params + "&errorformat=raw", timeout=10, allow_redirects=allow_redirects)
|
||||
elif isinstance(params, OrderedDict):
|
||||
request = self.session_requests.get(self.script_url + "api.php", params=params, timeout=10, allow_redirects=allow_redirects)
|
||||
else:
|
||||
|
@ -357,6 +357,8 @@ class Wiki:
|
|||
message.wiki = self
|
||||
message_list.append(QueueEntry(message, webhooks, self))
|
||||
messagequeue.add_messages(message_list)
|
||||
self.statistics.update(last_action=highest_id)
|
||||
dbmanager.add(("UPDATE rcgcdw SET rcid = $1 WHERE wiki = $2", (highest_id, self.script_url))) # If this is not enough for the future, save rcid in message sending function to make sure we always send all of the changes
|
||||
return
|
||||
|
||||
|
||||
|
@ -394,7 +396,7 @@ async def rc_processor(wiki: Wiki, change: dict, changed_categories: dict, displ
|
|||
"""This function takes more vital information, communicates with a formatter and constructs DiscordMessage with it.
|
||||
It creates DiscordMessageMetadata object, LinkParser and Context. Prepares a comment """
|
||||
from src.misc import LinkParser
|
||||
LinkParser = LinkParser()
|
||||
LinkParser = LinkParser(wiki.client.WIKI_ARTICLE_PATH)
|
||||
metadata = DiscordMessageMetadata("POST", rev_id=change.get("revid", None), log_id=change.get("logid", None),
|
||||
page_id=change.get("pageid", None))
|
||||
context = Context("embed" if display_options.display > 0 else "compact", "recentchanges", webhooks, wiki.client, langs[display_options.lang]["rc_formatters"], prepare_settings(display_options.display))
|
||||
|
@ -446,7 +448,7 @@ async def rc_processor(wiki: Wiki, change: dict, changed_categories: dict, displ
|
|||
else:
|
||||
raise
|
||||
if identification_string in ("delete/delete", "delete/delete_redir"): # TODO Move it into a hook?
|
||||
wiki.delete_messages(dict(pageid=change.get("pageid")))
|
||||
wiki.delete_messages(dict(page_id=change.get("pageid")))
|
||||
elif identification_string == "delete/event":
|
||||
logparams = change.get('logparams', {"ids": []})
|
||||
if settings["appearance"]["mode"] == "embed":
|
||||
|
|
|
@ -1,22 +0,0 @@
|
|||
import logging, time, asyncio
|
||||
|
||||
logger = logging.getLogger("rcgcdw.ratelimiter")
|
||||
|
||||
class RateLimiter:
|
||||
def __init__(self):
|
||||
self.timeout_until = 0
|
||||
|
||||
def timeout_add(self, timeout: float):
|
||||
"""This function sets a new timeout"""
|
||||
self.timeout_until = time.time() + timeout
|
||||
#logger.debug("Added {} timeout".format(timeout))
|
||||
|
||||
async def timeout_wait(self):
|
||||
"""This awaitable calculates the time to wait according to timeout_until, does not wait if it's past the timeout to not skip a cycle"""
|
||||
calculated_timeout = self.timeout_until - time.time()
|
||||
#logger.debug("Waiting {}".format(calculated_timeout))
|
||||
if calculated_timeout > 0:
|
||||
await asyncio.sleep(calculated_timeout)
|
||||
|
||||
def timeout_raw(self):
|
||||
return self.timeout_until - time.time()
|
Loading…
Reference in a new issue