Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions bot/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,8 @@ class _CodeBlock(EnvConfig, env_prefix="code_block_"):
cooldown_seconds: int = 300
minimum_lines: int = 4

maximum_auto_formatted_characters: int = 1500


CodeBlock = _CodeBlock()

Expand Down
79 changes: 79 additions & 0 deletions bot/exts/info/codeblock/_auto_formatting.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import itertools

import black

from bot import constants
from bot.exts.info.codeblock import _parsing
from bot.log import get_logger

log = get_logger(__name__)


def _code_as_markdown(code: str) -> str:
return f"{_parsing.BACKTICK * 3}py\n{code}\n{_parsing.BACKTICK * 3}"


def _try_format_with_black(code: str) -> str | None:
try:
return black.format_str(code, mode=black.FileMode())
except black.InvalidInput:
log.debug("automatic formatting with Black failed")
return None


def _attempt_formatting_whole_content(content: str) -> str | None:
if _parsing.is_python_code(content):
formatted_code = _try_format_with_black(content)
if formatted_code is None:
log.trace("Code is detected as Python code but Black formatting failed.")
return None

if not formatted_code:
log.error(
"Code has been detected as Python code, Black formatting didn't fail, but no output was produced. "
"This should never happen.")
return None

return formatted_code
return None


def _merge_non_code_blocks_with_code_blocks(non_code_blocks: list[str], formatted_code_blocks: list[str]) -> str:
return "".join(
f"{non_code_block}{formatted_code_block and _code_as_markdown(formatted_code_block)}"
for non_code_block, formatted_code_block
in itertools.zip_longest(non_code_blocks, formatted_code_blocks, fillvalue="")
)


def try_fix_markdown(content: str) -> str | None:
"""
Converts the user's content to a properly formatted Markdown message if it finds a non-formatted code block.

Returns None if it encounters any problems.
"""
log.trace("Try to automatically format code blocks.")
formatted_content = _attempt_formatting_whole_content(content)
if formatted_content is not None:
return _code_as_markdown(formatted_content)

code_blocks = _parsing.find_code_blocks(content)
if len(code_blocks) == 0:
return None

formatted_code_blocks = [_try_format_with_black(code_block.content) for code_block in code_blocks]
if None in formatted_code_blocks:
log.trace("Multiple code blocks detected but formatting failed for at least one code block.")
return None

non_code_blocks = _parsing.find_non_code_blocks(content)
if len(formatted_code_blocks) + 1 != len(non_code_blocks):
log.trace("Code blocks detected, but there are inconsistencies in what code blocks are detected.")
return None

fixed_markdown = _merge_non_code_blocks_with_code_blocks(non_code_blocks, formatted_code_blocks)
if len(fixed_markdown) > constants.CodeBlock.maximum_auto_formatted_characters:
log.trace("Automatically formatted message would be too large to post")
return None

return fixed_markdown
32 changes: 32 additions & 0 deletions bot/exts/info/codeblock/_cog.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from bot.exts.filtering._filters.unique.discord_token import DiscordTokenFilter
from bot.exts.filtering._filters.unique.webhook import WEBHOOK_URL_RE
from bot.exts.help_channels._channel import is_help_forum_post
from bot.exts.info.codeblock import _auto_formatting
from bot.exts.info.codeblock._instructions import get_instructions
from bot.log import get_logger
from bot.utils import has_lines
Expand Down Expand Up @@ -118,6 +119,32 @@ async def send_instructions(self, message: discord.Message, instructions: str) -
# Increase amount of codeblock correction in stats
self.bot.stats.incr("codeblock_corrections")

async def send_proper_markdown_message_and_delete_original(self, message: discord.Message,
proper_markdown: str) -> None:
"""
Send an embed with `proper_formatting`, replacing the user-given message containing non-formatted code blocks.

This embed will delete the original user's message immediately.

The purpose of this function is for when automatically fixing the formatting of a message is easier than
spamming the chat with instructions on how to fix the code.

Addresses: https://github.com/python-discord/bot/issues/2328
"""
log.info(f"Sending proper Markdown formatted message, thereby replacing message {message.id}.")

await message.channel.send(
f"Hey {message.author.mention}!\n"
"We detected improperly formatted code blocks in your message and managed to automatically fix them.\n"
"Type `!code` to learn how to properly format code.\n\n"
"Your message was:\n"
+ proper_markdown
)
await message.delete()

# Increase amount of codeblock replacements in stats
self.bot.stats.incr("codeblock_replacements")

def should_parse(self, message: discord.Message) -> bool:
"""
Return True if `message` should be parsed.
Expand Down Expand Up @@ -149,6 +176,11 @@ async def on_message(self, msg: Message) -> None:
log.trace(f"Skipping code block detection of {msg.id}: #{msg.channel} is on cooldown.")
return

auto_formatted_message = _auto_formatting.try_fix_markdown(msg.content)
if auto_formatted_message:
await self.send_proper_markdown_message_and_delete_original(msg, auto_formatted_message)
return

instructions = get_instructions(msg.content)
if instructions:
await self.send_instructions(msg, instructions)
Expand Down
33 changes: 27 additions & 6 deletions bot/exts/info/codeblock/_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,11 @@
fr"""
(?P<ticks>
(?P<tick>[{''.join(_TICKS)}]) # Put all ticks into a character class within a group.
\2{{2}} # Match previous group 2 more times to ensure the same char.
\2* # Match previous group up to N more times to ensure the same char.
)
(?P<lang>[A-Za-z0-9\+\-\.]+\n)? # Optionally match a language specifier followed by a newline.
(?P<lang>[A-Za-z0-9+\-.]+\s)? # Optionally match a language specifier followed by a whitespace.
(?P<code>.+?) # Match the actual code within the block.
\1 # Match the same 3 ticks used at the start of the block.
\1 # Match the same N ticks used at the start of the block.
""",
re.DOTALL | re.VERBOSE
)
Expand Down Expand Up @@ -86,9 +86,9 @@ def find_code_blocks(message: str) -> Sequence[CodeBlock] | None:
for match in _RE_CODE_BLOCK.finditer(message):
# Used to ensure non-matched groups have an empty string as the default value.
groups = match.groupdict("")
language = groups["lang"].strip() # Strip the newline cause it's included in the group.
language = groups["lang"].strip() # Strip the whitespace cause it's included in the group.

if groups["tick"] == BACKTICK and language:
if groups["tick"] == BACKTICK and len(groups["ticks"]) == 3 and language and ("\n" in groups["lang"]):
log.trace("Message has a valid code block with a language; returning None.")
return None
if has_lines(groups["code"], constants.CodeBlock.minimum_lines):
Expand All @@ -100,6 +100,27 @@ def find_code_blocks(message: str) -> Sequence[CodeBlock] | None:
return code_blocks


def find_non_code_blocks(message: str) -> list[str]:
"""
Find and return all pieces of the `message` that are not Markdown code blocks.

This can be used to extract the text surrounding code blocks.
Analogue to the `find_code_blocks` function, code blocks with 3 or fewer lines are not counted as code blocks.
"""
log.trace("Finding all non-code blocks in a message.")
non_code_blocks = re.sub(_RE_CODE_BLOCK, "\x00", message).split("\x00")

for i, match in enumerate(re.finditer(_RE_CODE_BLOCK, message)):
groups = match.groupdict("")

if not has_lines(groups["code"], constants.CodeBlock.minimum_lines):
# not a proper code block; merge back into non_code_blocks
log.debug(f"Skipping non-code block {i}.")
non_code_blocks[i:i + 2] = [non_code_blocks[i] + match.group(0) + non_code_blocks[i + 1]]

return non_code_blocks


def _is_python_code(content: str) -> bool:
"""Return True if `content` is valid Python consisting of more than just expressions."""
log.trace("Checking if content is Python code.")
Expand Down Expand Up @@ -181,7 +202,7 @@ def parse_bad_language(content: str) -> BadLanguage | None:
)


def _get_leading_spaces(content: str) -> int:
def _get_leading_spaces(content: str) -> int | None:
"""Return the number of spaces at the start of the first line in `content`."""
leading_spaces = 0
for char in content:
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ dependencies = [
"tenacity==9.1.2",
"tldextract==5.3.0",
"yarl==1.22.0",
"black==26.5.1",
]
name = "bot"
version = "1.0.1"
Expand Down
Loading