This commit is contained in:
Frisk 2021-06-22 21:42:32 +02:00
parent d9ddd30b1b
commit d7add6b2a8
No known key found for this signature in database
GPG key ID: 213F7C15068AF8AC
3 changed files with 51 additions and 10 deletions

View file

@ -4,6 +4,7 @@ import logging
from collections import OrderedDict from collections import OrderedDict
from src.config import settings from src.config import settings
from typing import TYPE_CHECKING, Optional from typing import TYPE_CHECKING, Optional
from functools import cache
logger = logging.getLogger("rcgcdb.domain") logger = logging.getLogger("rcgcdb.domain")
if TYPE_CHECKING: if TYPE_CHECKING:
@ -56,7 +57,7 @@ class Domain:
async def run_wiki_scan(self, wiki: src.wiki.Wiki): async def run_wiki_scan(self, wiki: src.wiki.Wiki):
await self.rate_limiter.timeout_wait() await self.rate_limiter.timeout_wait()
await wiki.scan() await wiki.scan(self.rate_limiter)
self.wikis.move_to_end(wiki.script_url) self.wikis.move_to_end(wiki.script_url)
self.rate_limiter.timeout_add(1.0) self.rate_limiter.timeout_add(1.0)
@ -80,9 +81,12 @@ class Domain:
async def regular_scheduler(self): async def regular_scheduler(self):
while True: while True:
await asyncio.sleep(max((-25*len(self))+150, 1)) # To make sure that we don't spam domains with one wiki every second we calculate a sane timeout for domains with few wikis await asyncio.sleep(self.calculate_sleep_time(len(self))) # To make sure that we don't spam domains with one wiki every second we calculate a sane timeout for domains with few wikis
await self.run_wiki_scan(self.wikis.pop()) await self.run_wiki_scan(self.wikis.pop())
@cache
def calculate_sleep_time(self, queue_length: int):
return max((-25 * queue_length) + 150, 1)
async def run_wiki_check(self): async def run_wiki_check(self):
if self.irc: if self.irc:

14
src/mw_messages.py Normal file
View file

@ -0,0 +1,14 @@
# Why overthink it? Module executes once, I can have one global
message_sets: dict[int, dict] = dict()
class MWMessages:
def __init__(self, mc_messages: dict):
for key, message_set in message_sets.items():
if message_set == mc_messages:
self.mw_id = key
break
else:
self.mw_id = len(message_sets)
message_sets[self.mw_id] = mc_messages

View file

@ -1,6 +1,8 @@
from dataclasses import dataclass from dataclasses import dataclass
import re import re
import logging, aiohttp import logging, aiohttp
from mw_messages import MWMessages
from src.exceptions import * from src.exceptions import *
from src.database import db from src.database import db
from src.formatters.rc import embed_formatter, compact_formatter from src.formatters.rc import embed_formatter, compact_formatter
@ -16,25 +18,37 @@ from src.config import settings
# noinspection PyPackageRequirements # noinspection PyPackageRequirements
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from collections import OrderedDict from collections import OrderedDict
from typing import Union from typing import Union, Optional
logger = logging.getLogger("rcgcdb.wiki") logger = logging.getLogger("rcgcdb.wiki")
class Wiki: class Wiki:
def __init__(self, script_url: str, rc_id: int, discussion_id: int): def __init__(self, script_url: str, rc_id: int, discussion_id: int):
self.script_url: str = script_url self.script_url: str = script_url
self.session = aiohttp.ClientSession(headers=settings["header"], timeout=aiohttp.ClientTimeout(6.0)) self.session = aiohttp.ClientSession(headers=settings["header"], timeout=aiohttp.ClientTimeout(6.0))
self.statistics: Statistics = Statistics(rc_id, discussion_id) self.statistics: Statistics = Statistics(rc_id, discussion_id)
self.fail_times: int = 0 self.fail_times: int = 0
self.mw_messages: MWMessagesHashmap = MWMessagesHashmap() self.mw_messages: Optional[MWMessages] = None # TODO Need to initialize MWMessages() somewhere
self.first_fetch_done: bool = False
@property @property
def rc_id(self): def rc_id(self):
return self.statistics.last_action return self.statistics.last_action
def downtime_controller(self, down): @staticmethod
async def remove(wiki_url, reason):
logger.info("Removing a wiki {}".format(wiki_url))
await src.discord.wiki_removal(wiki_url, reason)
await src.discord.wiki_removal_monitor(wiki_url, reason)
async with db.pool().acquire() as connection:
result = await connection.execute('DELETE FROM rcgcdw WHERE wiki = $1', wiki_url)
logger.warning('{} rows affected by DELETE FROM rcgcdw WHERE wiki = "{}"'.format(result, wiki_url))
def downtime_controller(self, down): # TODO Finish this one
if down: if down:
self.fail_times += 1 self.fail_times += 1
else: else:
self.fail_times -= 1 self.fail_times -= 1
@ -118,12 +132,11 @@ class Wiki:
except KeyError: except KeyError:
logger.exception("KeyError while iterating over json_path, full response: {}".format(request.json())) logger.exception("KeyError while iterating over json_path, full response: {}".format(request.json()))
raise raise
self.first_fetch_done = True
return request_json return request_json
async def fetch_wiki(self, extended, script_path, session: aiohttp.ClientSession, ratelimiter: RateLimiter, async def fetch_wiki(self, amount=20) -> dict:
amount=20) -> aiohttp.ClientResponse: if self.first_fetch_done is False:
await ratelimiter.timeout_wait()
if extended:
params = OrderedDict({"action": "query", "format": "json", "uselang": "content", "list": "tags|recentchanges", params = OrderedDict({"action": "query", "format": "json", "uselang": "content", "list": "tags|recentchanges",
"meta": "allmessages|siteinfo", "meta": "allmessages|siteinfo",
"utf8": 1, "tglimit": "max", "tgprop": "displayname", "utf8": 1, "tglimit": "max", "tgprop": "displayname",
@ -139,12 +152,22 @@ class Wiki:
"rclimit": amount, "rctype": "edit|new|log|categorize", "siprop": "namespaces|general"}) "rclimit": amount, "rctype": "edit|new|log|categorize", "siprop": "namespaces|general"})
try: try:
response = await self.api_request(params=params) response = await self.api_request(params=params)
ratelimiter.timeout_add(1.0)
except (aiohttp.ClientConnectionError, aiohttp.ServerTimeoutError, asyncio.TimeoutError): except (aiohttp.ClientConnectionError, aiohttp.ServerTimeoutError, asyncio.TimeoutError):
logger.error("A connection error occurred while requesting {}".format(params)) logger.error("A connection error occurred while requesting {}".format(params))
raise WikiServerError raise WikiServerError
return response return response
def scan(self):
try:
request = await self.fetch_wiki()
except WikiServerError:
self.downtime_controller(True)
return # TODO Add a log entry?
else:
self.downtime_controller(False)
if not self.mw_messages:
mw_messages = request.get("query")
@dataclass @dataclass
class Wiki_old: class Wiki_old:
mw_messages: int = None mw_messages: int = None