From 80152092fb8b7577bd269c8957e5aa35dcbcd987 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Samuel=20R=C3=B6tttgermann?= Date: Mon, 1 Jun 2026 01:00:47 +0200 Subject: [PATCH 01/11] add Black to project dependencies --- pyproject.toml | 2 ++ uv.lock | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index beb96859a7..f214bc183a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,6 +24,8 @@ dependencies = [ "tenacity==9.1.2", "tldextract==5.3.0", "yarl==1.22.0", + "ruff==0.14.2", + "black==26.5.1", ] name = "bot" version = "1.0.1" diff --git a/uv.lock b/uv.lock index 7e47fa3c3d..9e2dfef64e 100644 --- a/uv.lock +++ b/uv.lock @@ -198,6 +198,28 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/94/fe/3aed5d0be4d404d12d36ab97e2f1791424d9ca39c2f754a6285d59a3b01d/beautifulsoup4-4.14.2-py3-none-any.whl", hash = "sha256:5ef6fa3a8cbece8488d66985560f97ed091e22bbc4e9c2338508a9d5de6d4515", size = 106392, upload-time = "2025-09-29T10:05:43.771Z" }, ] +[[package]] +name = "black" +version = "26.5.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "click" }, + { name = "mypy-extensions" }, + { name = "packaging" }, + { name = "pathspec" }, + { name = "platformdirs" }, + { name = "pytokens" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c0/37/5628dd55bf2b34257fc7603f0fe97c40e3aaf24265f416a9c85c95ca1436/black-26.5.1.tar.gz", hash = "sha256:dd321f668053961824bcc1be1cc1df748b2d7e4fa28086b08331e577b0100a73", size = 679439, upload-time = "2026-05-18T16:53:36.107Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a6/16/a8da8eb208c51c7f4ce74609a45d0dcc6d8a2141e45e81ee5289d1bb0d59/black-26.5.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:e88976690a64b0af98312ca958415849cb42423423c5f2ee74af4b49a97a2168", size = 2004800, upload-time = "2026-05-18T17:05:38.182Z" }, + { url = "https://files.pythonhosted.org/packages/11/8a/a479296a19e383b70a725882a6cf3d786540601ff03cabbaaf1cce864c5a/black-26.5.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:32d5ea7f6c8bdfa6e648326ebca1f02b0764e2a029edc6f8dce2627e19d468c3", size = 1815576, upload-time = "2026-05-18T17:05:40.309Z" }, + { url = "https://files.pythonhosted.org/packages/81/6b/cfaf3d39f25132c156a068f6b805576c9103a84086019507c70e1911ee7d/black-26.5.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ea8d16dc41655aa113cd64665e7219446cd7e4ff2248d7178eaa905190c86b18", size = 1877927, upload-time = "2026-05-18T17:05:42.463Z" }, + { url = "https://files.pythonhosted.org/packages/66/76/302e313964bcff7e28df329d39f84f5270095730d85ff0acc260610a0d82/black-26.5.1-cp314-cp314-win_amd64.whl", hash = "sha256:577f21094ea469ef92ec1adaf2c9441a226d2144d01a5be2fa823cecf6543e50", size = 1511860, upload-time = "2026-05-18T17:05:43.943Z" }, + { url = "https://files.pythonhosted.org/packages/27/4e/a3827e35e0e567f9f9ee59e2a0ab979267dca98718f25547ca8c6733afd4/black-26.5.1-cp314-cp314-win_arm64.whl", hash = "sha256:ed1a20af114c301a0269bf01163d51dbef72737fd65f850001e7cbe7f3c7abae", size = 1316632, upload-time = "2026-05-18T17:05:45.521Z" }, + { url = "https://files.pythonhosted.org/packages/94/51/f975cae76d44274cc2868dc9040ac5d58d464784610234455b4e7b19c6ef/black-26.5.1-py3-none-any.whl", hash = "sha256:4ed7f7da04046d2e488437170797d3b4a4ad83906683bcb7dfc68b673bbce5e2", size = 213693, upload-time = "2026-05-18T16:53:33.964Z" }, +] + [[package]] name = "bot" version = "1.0.1" @@ -206,6 +228,7 @@ dependencies = [ { name = "aiohttp" }, { name = "arrow" }, { name = "beautifulsoup4" }, + { name = "black" }, { name = "deepdiff" }, { name = "emoji" }, { name = "feedparser" }, @@ -218,6 +241,7 @@ dependencies = [ { name = "python-frontmatter" }, { name = "rapidfuzz" }, { name = "regex" }, + { name = "ruff" }, { name = "sentry-sdk" }, { name = "tenacity" }, { name = "tldextract" }, @@ -242,6 +266,7 @@ requires-dist = [ { name = "aiohttp", specifier = "==3.13.4" }, { name = "arrow", specifier = "==1.4.0" }, { name = "beautifulsoup4", specifier = "==4.14.2" }, + { name = "black", specifier = "==26.5.1" }, { name = "deepdiff", specifier = "==9.0.0" }, { name = "emoji", specifier = "==2.15.0" }, { name = "feedparser", specifier = "==6.0.12" }, @@ -254,6 +279,7 @@ requires-dist = [ { name = "python-frontmatter", specifier = "==1.1.0" }, { name = "rapidfuzz", specifier = "==3.14.1" }, { name = "regex", specifier = "==2026.3.32" }, + { name = "ruff", specifier = "==0.14.2" }, { name = "sentry-sdk", specifier = "==2.43.0" }, { name = "tenacity", specifier = "==9.1.2" }, { name = "tldextract", specifier = "==5.3.0" }, @@ -349,6 +375,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0a/4c/925909008ed5a988ccbb72dcc897407e5d6d3bd72410d69e051fc0c14647/charset_normalizer-3.4.4-py3-none-any.whl", hash = "sha256:7a32c560861a02ff789ad905a2fe94e3f840803362c84fecf1851cb4cf3dc37f", size = 53402, upload-time = "2025-10-14T04:42:31.76Z" }, ] +[[package]] +name = "click" +version = "8.4.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/9b/98/518d8e5081007684232226f475082b30087d0f585e8457db087298259f49/click-8.4.1.tar.gz", hash = "sha256:918b5633eddf6b41c32d4f454bf0de810065c74e3f7dbf8ee5452f8be88d3e96", size = 353007, upload-time = "2026-05-22T04:08:37.769Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c7/0d/67e5b4109ea4a837e80daa87c2c696711955e40449a97e8926672534def2/click-8.4.1-py3-none-any.whl", hash = "sha256:482be17c6991b8c19c5429a1e995d9b0efdbb63172824c41f99965dc0ade8ec2", size = 116639, upload-time = "2026-05-22T04:08:35.26Z" }, +] + [[package]] name = "colorama" version = "0.4.6" @@ -742,6 +780,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b7/da/7d22601b625e241d4f23ef1ebff8acfc60da633c9e7e7922e24d10f592b3/multidict-6.7.0-py3-none-any.whl", hash = "sha256:394fc5c42a333c9ffc3e421a4c85e08580d990e08b99f6bf35b4132114c5dcb3", size = 12317, upload-time = "2025-10-06T14:52:29.272Z" }, ] +[[package]] +name = "mypy-extensions" +version = "1.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a2/6e/371856a3fb9d31ca8dac321cda606860fa4548858c0cc45d9d1d4ca2628b/mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558", size = 6343, upload-time = "2025-04-22T14:54:24.164Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963, upload-time = "2025-04-22T14:54:22.983Z" }, +] + [[package]] name = "nodeenv" version = "1.9.1" @@ -769,6 +816,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469, upload-time = "2025-04-19T11:48:57.875Z" }, ] +[[package]] +name = "pathspec" +version = "1.1.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/5a/82/42f767fc1c1143d6fd36efb827202a2d997a375e160a71eb2888a925aac1/pathspec-1.1.1.tar.gz", hash = "sha256:17db5ecd524104a120e173814c90367a96a98d07c45b2e10c2f3919fff91bf5a", size = 135180, upload-time = "2026-04-27T01:46:08.907Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f1/d9/7fb5aa316bc299258e68c73ba3bddbc499654a07f151cba08f6153988714/pathspec-1.1.1-py3-none-any.whl", hash = "sha256:a00ce642f577bf7f473932318056212bc4f8bfdf53128c78bbd5af0b9b20b189", size = 57328, upload-time = "2026-04-27T01:46:07.06Z" }, +] + [[package]] name = "platformdirs" version = "4.5.0" @@ -1081,6 +1137,25 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/49/87/3c8da047b3ec5f99511d1b4d7a5bc72d4b98751c7e78492d14dc736319c5/python_frontmatter-1.1.0-py3-none-any.whl", hash = "sha256:335465556358d9d0e6c98bbeb69b1c969f2a4a21360587b9873bfc3b213407c1", size = 9834, upload-time = "2024-01-16T18:50:00.911Z" }, ] +[[package]] +name = "pytokens" +version = "0.4.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b6/34/b4e015b99031667a7b960f888889c5bd34ef585c85e1cb56a594b92836ac/pytokens-0.4.1.tar.gz", hash = "sha256:292052fe80923aae2260c073f822ceba21f3872ced9a68bb7953b348e561179a", size = 23015, upload-time = "2026-01-30T01:03:45.924Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8f/a7/b470f672e6fc5fee0a01d9e75005a0e617e162381974213a945fcd274843/pytokens-0.4.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:4a14d5f5fc78ce85e426aa159489e2d5961acf0e47575e08f35584009178e321", size = 160821, upload-time = "2026-01-30T01:03:19.684Z" }, + { url = "https://files.pythonhosted.org/packages/80/98/e83a36fe8d170c911f864bfded690d2542bfcfacb9c649d11a9e6eb9dc41/pytokens-0.4.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:97f50fd18543be72da51dd505e2ed20d2228c74e0464e4262e4899797803d7fa", size = 254263, upload-time = "2026-01-30T01:03:20.834Z" }, + { url = "https://files.pythonhosted.org/packages/0f/95/70d7041273890f9f97a24234c00b746e8da86df462620194cef1d411ddeb/pytokens-0.4.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dc74c035f9bfca0255c1af77ddd2d6ae8419012805453e4b0e7513e17904545d", size = 268071, upload-time = "2026-01-30T01:03:21.888Z" }, + { url = "https://files.pythonhosted.org/packages/da/79/76e6d09ae19c99404656d7db9c35dfd20f2086f3eb6ecb496b5b31163bad/pytokens-0.4.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:f66a6bbe741bd431f6d741e617e0f39ec7257ca1f89089593479347cc4d13324", size = 271716, upload-time = "2026-01-30T01:03:23.633Z" }, + { url = "https://files.pythonhosted.org/packages/79/37/482e55fa1602e0a7ff012661d8c946bafdc05e480ea5a32f4f7e336d4aa9/pytokens-0.4.1-cp314-cp314-win_amd64.whl", hash = "sha256:b35d7e5ad269804f6697727702da3c517bb8a5228afa450ab0fa787732055fc9", size = 104539, upload-time = "2026-01-30T01:03:24.788Z" }, + { url = "https://files.pythonhosted.org/packages/30/e8/20e7db907c23f3d63b0be3b8a4fd1927f6da2395f5bcc7f72242bb963dfe/pytokens-0.4.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:8fcb9ba3709ff77e77f1c7022ff11d13553f3c30299a9fe246a166903e9091eb", size = 168474, upload-time = "2026-01-30T01:03:26.428Z" }, + { url = "https://files.pythonhosted.org/packages/d6/81/88a95ee9fafdd8f5f3452107748fd04c24930d500b9aba9738f3ade642cc/pytokens-0.4.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:79fc6b8699564e1f9b521582c35435f1bd32dd06822322ec44afdeba666d8cb3", size = 290473, upload-time = "2026-01-30T01:03:27.415Z" }, + { url = "https://files.pythonhosted.org/packages/cf/35/3aa899645e29b6375b4aed9f8d21df219e7c958c4c186b465e42ee0a06bf/pytokens-0.4.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d31b97b3de0f61571a124a00ffe9a81fb9939146c122c11060725bd5aea79975", size = 303485, upload-time = "2026-01-30T01:03:28.558Z" }, + { url = "https://files.pythonhosted.org/packages/52/a0/07907b6ff512674d9b201859f7d212298c44933633c946703a20c25e9d81/pytokens-0.4.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:967cf6e3fd4adf7de8fc73cd3043754ae79c36475c1c11d514fc72cf5490094a", size = 306698, upload-time = "2026-01-30T01:03:29.653Z" }, + { url = "https://files.pythonhosted.org/packages/39/2a/cbbf9250020a4a8dd53ba83a46c097b69e5eb49dd14e708f496f548c6612/pytokens-0.4.1-cp314-cp314t-win_amd64.whl", hash = "sha256:584c80c24b078eec1e227079d56dc22ff755e0ba8654d8383b2c549107528918", size = 116287, upload-time = "2026-01-30T01:03:30.912Z" }, + { url = "https://files.pythonhosted.org/packages/c6/78/397db326746f0a342855b81216ae1f0a32965deccfd7c830a2dbc66d2483/pytokens-0.4.1-py3-none-any.whl", hash = "sha256:26cef14744a8385f35d0e095dc8b3a7583f6c953c2e3d269c7f82484bf5ad2de", size = 13729, upload-time = "2026-01-30T01:03:45.029Z" }, +] + [[package]] name = "pyyaml" version = "6.0.3" From d0d2a615d47052ebfa25f35eee8cb48bb72c9692 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Samuel=20R=C3=B6tttgermann?= Date: Mon, 1 Jun 2026 01:07:12 +0200 Subject: [PATCH 02/11] integrate an auto-formatter to replace the format hints for easy to fix messages --- bot/exts/info/codeblock/_auto_formatting.py | 54 +++++++++++++++++++++ bot/exts/info/codeblock/_instructions.py | 9 +++- 2 files changed, 61 insertions(+), 2 deletions(-) create mode 100644 bot/exts/info/codeblock/_auto_formatting.py diff --git a/bot/exts/info/codeblock/_auto_formatting.py b/bot/exts/info/codeblock/_auto_formatting.py new file mode 100644 index 0000000000..609556045a --- /dev/null +++ b/bot/exts/info/codeblock/_auto_formatting.py @@ -0,0 +1,54 @@ +from collections.abc import Sequence + +import black + +from bot.exts.info.codeblock import _parsing +from bot.exts.info.codeblock._parsing import CodeBlock +from bot.log import get_logger + +log = get_logger(__name__) + + +def _code_as_markdown(code: str) -> str: + return f"{_parsing.BACKTICK * 3}py\n{code}\n{_parsing.BACKTICK * 3}" + + +def _try_format_with_black(code: str) -> str | None: + try: + return black.format_str(code, mode=black.FileMode()) + except Exception: + log.trace("automatic formatting failed") + return None + + +def _attempt_formatting_whole_content(content: str) -> str | None: + if _parsing.is_python_code(content): + formatted_code = _try_format_with_black(content) + if formatted_code is None: + log.trace("Code is detected as Python code but Black formatting failed.") + return None + + if not formatted_code: + log.error( + "Code has been detected as Python code, Black formatting didn't fail, but no output was produced. " + "This should never happen.") + return None + + return formatted_code + return None + + +def attempt_formatting(content: str, code_blocks: Sequence[CodeBlock]) -> str | None: + formatted_content = _attempt_formatting_whole_content(content) + if formatted_content is not None: + return _code_as_markdown(formatted_content) + + formatted_code_blocks = [_try_format_with_black(code_block.content) for code_block in code_blocks] + if None in formatted_code_blocks: + log.trace("Multiple code blocks detected but formatting failed for at least one code block.") + return None + + return "\n".join( + f"Code {i}:\n{_code_as_markdown(formatted_code_block)}" + for i, formatted_code_block in enumerate(formatted_code_blocks, start=1) + ) diff --git a/bot/exts/info/codeblock/_instructions.py b/bot/exts/info/codeblock/_instructions.py index 210217ccaf..ec4351bf58 100644 --- a/bot/exts/info/codeblock/_instructions.py +++ b/bot/exts/info/codeblock/_instructions.py @@ -1,7 +1,6 @@ """This module generates and formats instructional messages about fixing Markdown code blocks.""" - -from bot.exts.info.codeblock import _parsing +from bot.exts.info.codeblock import _auto_formatting, _parsing from bot.log import get_logger log = get_logger(__name__) @@ -69,6 +68,7 @@ def _get_no_ticks_message(content: str) -> str | None: if _parsing.is_python_code(content): example_blocks = _get_example("py") return example_blocks + log.trace("Aborting missing code block instructions: content is not Python code.") return None @@ -143,6 +143,11 @@ def get_instructions(content: str) -> str | None: log.trace("At least one valid code block found; no instructions to return.") return None + log.trace("Try to automatically format code blocks.") + formatted_code = _auto_formatting.attempt_formatting(content, blocks) + if formatted_code is not None: + return formatted_code + if not blocks: log.trace("No code blocks were found in message.") instructions = _get_no_ticks_message(content) From c144654be3adad3140a89edb6db46b3c75ca0f18 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Samuel=20R=C3=B6tttgermann?= Date: Mon, 1 Jun 2026 01:07:47 +0200 Subject: [PATCH 03/11] adapted the codeblock regex to recognize more malformed codeblocks --- bot/exts/info/codeblock/_parsing.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/bot/exts/info/codeblock/_parsing.py b/bot/exts/info/codeblock/_parsing.py index abad09eef1..dc6dee9b7b 100644 --- a/bot/exts/info/codeblock/_parsing.py +++ b/bot/exts/info/codeblock/_parsing.py @@ -35,11 +35,11 @@ fr""" (?P (?P[{''.join(_TICKS)}]) # Put all ticks into a character class within a group. - \2{{2}} # Match previous group 2 more times to ensure the same char. + \2* # Match previous group up to N more times to ensure the same char. ) - (?P[A-Za-z0-9\+\-\.]+\n)? # Optionally match a language specifier followed by a newline. + (?P[A-Za-z0-9\+\-\.]+\n?)? # Optionally match a language specifier followed by a newline. (?P.+?) # Match the actual code within the block. - \1 # Match the same 3 ticks used at the start of the block. + \1 # Match the same N ticks used at the start of the block. """, re.DOTALL | re.VERBOSE ) @@ -88,7 +88,7 @@ def find_code_blocks(message: str) -> Sequence[CodeBlock] | None: groups = match.groupdict("") language = groups["lang"].strip() # Strip the newline cause it's included in the group. - if groups["tick"] == BACKTICK and language: + if groups["tick"] == BACKTICK and len(groups["ticks"]) == 3 and language: log.trace("Message has a valid code block with a language; returning None.") return None if has_lines(groups["code"], constants.CodeBlock.minimum_lines): From dfa23a7ff87b309edcd9027110fde883d2b04860 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Samuel=20R=C3=B6tttgermann?= Date: Mon, 1 Jun 2026 02:36:10 +0200 Subject: [PATCH 04/11] separated auto-formatting from the instructions.py file --- bot/exts/info/codeblock/_auto_formatting.py | 26 ++++++++++++++------- bot/exts/info/codeblock/_cog.py | 7 ++++++ bot/exts/info/codeblock/_instructions.py | 7 +----- 3 files changed, 26 insertions(+), 14 deletions(-) diff --git a/bot/exts/info/codeblock/_auto_formatting.py b/bot/exts/info/codeblock/_auto_formatting.py index 609556045a..0bcf71f971 100644 --- a/bot/exts/info/codeblock/_auto_formatting.py +++ b/bot/exts/info/codeblock/_auto_formatting.py @@ -1,9 +1,6 @@ -from collections.abc import Sequence - import black from bot.exts.info.codeblock import _parsing -from bot.exts.info.codeblock._parsing import CodeBlock from bot.log import get_logger log = get_logger(__name__) @@ -16,8 +13,8 @@ def _code_as_markdown(code: str) -> str: def _try_format_with_black(code: str) -> str | None: try: return black.format_str(code, mode=black.FileMode()) - except Exception: - log.trace("automatic formatting failed") + except black.InvalidInput: + log.debug("automatic formatting with Black failed") return None @@ -38,17 +35,30 @@ def _attempt_formatting_whole_content(content: str) -> str | None: return None -def attempt_formatting(content: str, code_blocks: Sequence[CodeBlock]) -> str | None: +def try_fix_markdown(content: str) -> str | None: + """ + Converts the user's content to a properly formatted Markdown message. + + Returns None if it encounters any problems. + """ + log.trace("Try to automatically format code blocks.") formatted_content = _attempt_formatting_whole_content(content) if formatted_content is not None: return _code_as_markdown(formatted_content) + code_blocks = _parsing.find_code_blocks(content) + if len(code_blocks) == 0: + return None + formatted_code_blocks = [_try_format_with_black(code_block.content) for code_block in code_blocks] if None in formatted_code_blocks: log.trace("Multiple code blocks detected but formatting failed for at least one code block.") return None - return "\n".join( - f"Code {i}:\n{_code_as_markdown(formatted_code_block)}" + if len(formatted_code_blocks) == 1: + return f"Your code correctly formatted:\n{_code_as_markdown(formatted_code_blocks[0])}" + + return "Your codes correctly formatted:\n" + "\n".join( + f"Codeblock {i}:\n{_code_as_markdown(formatted_code_block)}" for i, formatted_code_block in enumerate(formatted_code_blocks, start=1) ) diff --git a/bot/exts/info/codeblock/_cog.py b/bot/exts/info/codeblock/_cog.py index 4b0936fbcf..0b5683115c 100644 --- a/bot/exts/info/codeblock/_cog.py +++ b/bot/exts/info/codeblock/_cog.py @@ -10,6 +10,7 @@ from bot.exts.filtering._filters.unique.discord_token import DiscordTokenFilter from bot.exts.filtering._filters.unique.webhook import WEBHOOK_URL_RE from bot.exts.help_channels._channel import is_help_forum_post +from bot.exts.info.codeblock import _auto_formatting from bot.exts.info.codeblock._instructions import get_instructions from bot.log import get_logger from bot.utils import has_lines @@ -149,6 +150,12 @@ async def on_message(self, msg: Message) -> None: log.trace(f"Skipping code block detection of {msg.id}: #{msg.channel} is on cooldown.") return + auto_formatted_message = _auto_formatting.try_fix_markdown(msg.content) + if auto_formatted_message: + await self.send_instructions(msg, auto_formatted_message) + return + + instructions = get_instructions(msg.content) if instructions: await self.send_instructions(msg, instructions) diff --git a/bot/exts/info/codeblock/_instructions.py b/bot/exts/info/codeblock/_instructions.py index ec4351bf58..ad7dc2efbd 100644 --- a/bot/exts/info/codeblock/_instructions.py +++ b/bot/exts/info/codeblock/_instructions.py @@ -1,6 +1,6 @@ """This module generates and formats instructional messages about fixing Markdown code blocks.""" -from bot.exts.info.codeblock import _auto_formatting, _parsing +from bot.exts.info.codeblock import _parsing from bot.log import get_logger log = get_logger(__name__) @@ -143,11 +143,6 @@ def get_instructions(content: str) -> str | None: log.trace("At least one valid code block found; no instructions to return.") return None - log.trace("Try to automatically format code blocks.") - formatted_code = _auto_formatting.attempt_formatting(content, blocks) - if formatted_code is not None: - return formatted_code - if not blocks: log.trace("No code blocks were found in message.") instructions = _get_no_ticks_message(content) From eff3a3fd26aafe852477b547efb46dada776f683 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Samuel=20R=C3=B6tttgermann?= Date: Mon, 1 Jun 2026 04:21:06 +0200 Subject: [PATCH 05/11] add new constant for maximum number of characters for auto-formatted response --- bot/constants.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bot/constants.py b/bot/constants.py index 63fc156fef..7609634ce5 100644 --- a/bot/constants.py +++ b/bot/constants.py @@ -296,6 +296,8 @@ class _CodeBlock(EnvConfig, env_prefix="code_block_"): cooldown_seconds: int = 300 minimum_lines: int = 4 + maximum_auto_formatted_characters: int = 1500 + CodeBlock = _CodeBlock() From 212e7f942c428e73d4e280c8172ca43c276ecc24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Samuel=20R=C3=B6tttgermann?= Date: Mon, 1 Jun 2026 04:25:39 +0200 Subject: [PATCH 06/11] updated the code block regex to accept any whitespace; added parser functionality to parse for non-code-blocks --- bot/exts/info/codeblock/_parsing.py | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/bot/exts/info/codeblock/_parsing.py b/bot/exts/info/codeblock/_parsing.py index dc6dee9b7b..13d51f12ac 100644 --- a/bot/exts/info/codeblock/_parsing.py +++ b/bot/exts/info/codeblock/_parsing.py @@ -37,7 +37,7 @@ (?P[{''.join(_TICKS)}]) # Put all ticks into a character class within a group. \2* # Match previous group up to N more times to ensure the same char. ) - (?P[A-Za-z0-9\+\-\.]+\n?)? # Optionally match a language specifier followed by a newline. + (?P[A-Za-z0-9+\-.]+\s)? # Optionally match a language specifier followed by a whitespace. (?P.+?) # Match the actual code within the block. \1 # Match the same N ticks used at the start of the block. """, @@ -86,7 +86,7 @@ def find_code_blocks(message: str) -> Sequence[CodeBlock] | None: for match in _RE_CODE_BLOCK.finditer(message): # Used to ensure non-matched groups have an empty string as the default value. groups = match.groupdict("") - language = groups["lang"].strip() # Strip the newline cause it's included in the group. + language = groups["lang"].strip() # Strip the whitespace cause it's included in the group. if groups["tick"] == BACKTICK and len(groups["ticks"]) == 3 and language: log.trace("Message has a valid code block with a language; returning None.") @@ -100,6 +100,27 @@ def find_code_blocks(message: str) -> Sequence[CodeBlock] | None: return code_blocks +def find_non_code_blocks(message: str) -> list[str]: + """ + Find and return all pieces of the `message` that are not Markdown code blocks. + + This can be used to extract the text surrounding code blocks. + Analogue to the `find_code_blocks` function, code blocks with 3 or fewer lines are not counted as code blocks. + """ + log.trace("Finding all non-code blocks in a message.") + non_code_blocks = re.sub(_RE_CODE_BLOCK, "\x00", message).split("\x00") + + for i, match in enumerate(re.finditer(_RE_CODE_BLOCK, message)): + groups = match.groupdict("") + + if not has_lines(groups["code"], constants.CodeBlock.minimum_lines): + # not a proper code block; merge back into non_code_blocks + log.debug(f"Skipping non-code block {i}.") + non_code_blocks[i:i + 2] = [non_code_blocks[i] + match.group(0) + non_code_blocks[i + 1]] + + return non_code_blocks + + def _is_python_code(content: str) -> bool: """Return True if `content` is valid Python consisting of more than just expressions.""" log.trace("Checking if content is Python code.") @@ -181,7 +202,7 @@ def parse_bad_language(content: str) -> BadLanguage | None: ) -def _get_leading_spaces(content: str) -> int: +def _get_leading_spaces(content: str) -> int | None: """Return the number of spaces at the start of the first line in `content`.""" leading_spaces = 0 for char in content: From d1125e7559e4504d4baa0e3f90f65e75deb7b767 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Samuel=20R=C3=B6tttgermann?= Date: Mon, 1 Jun 2026 04:27:53 +0200 Subject: [PATCH 07/11] changed the response with the auto-formatted message to a regular message rather than an embed due to width-issues --- bot/exts/info/codeblock/_cog.py | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/bot/exts/info/codeblock/_cog.py b/bot/exts/info/codeblock/_cog.py index 0b5683115c..2ae3b10d35 100644 --- a/bot/exts/info/codeblock/_cog.py +++ b/bot/exts/info/codeblock/_cog.py @@ -119,6 +119,32 @@ async def send_instructions(self, message: discord.Message, instructions: str) - # Increase amount of codeblock correction in stats self.bot.stats.incr("codeblock_corrections") + async def send_proper_markdown_message_and_delete_original(self, message: discord.Message, + proper_markdown: str) -> None: + """ + Send an embed with `proper_formatting`, replacing the user-given message containing non-formatted code blocks. + + This embed will delete the original user's message immediately. + + The purpose of this function is for when automatically fixing the formatting of a message is easier than + spamming the chat with instructions on how to fix the code. + + Addresses: https://github.com/python-discord/bot/issues/2328 + """ + log.info(f"Sending proper Markdown formatted message, thereby replacing message {message.id}.") + + await message.channel.send( + f"Hey {message.author.mention}!\n" + "We detected improperly formatted code blocks in your message and managed to automatically fix them.\n" + "Type `!code` to learn how to properly format code.\n\n" + "Your message was:\n" + + proper_markdown + ) + await message.delete() + + # Increase amount of codeblock replacements in stats + self.bot.stats.incr("codeblock_replacements") + def should_parse(self, message: discord.Message) -> bool: """ Return True if `message` should be parsed. @@ -152,10 +178,9 @@ async def on_message(self, msg: Message) -> None: auto_formatted_message = _auto_formatting.try_fix_markdown(msg.content) if auto_formatted_message: - await self.send_instructions(msg, auto_formatted_message) + await self.send_proper_markdown_message_and_delete_original(msg, auto_formatted_message) return - instructions = get_instructions(msg.content) if instructions: await self.send_instructions(msg, instructions) From 0932b4016618dfa265c45f09dc3d91d81210c3c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Samuel=20R=C3=B6tttgermann?= Date: Mon, 1 Jun 2026 04:31:25 +0200 Subject: [PATCH 08/11] extended _auto_formatting.py to be able to merge non-code blocks and code-blocks, so that it can fix messages containing (multiple) code block(s) and text in some scenarios --- bot/exts/info/codeblock/_auto_formatting.py | 29 ++++++++++++++++----- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/bot/exts/info/codeblock/_auto_formatting.py b/bot/exts/info/codeblock/_auto_formatting.py index 0bcf71f971..ba526db5df 100644 --- a/bot/exts/info/codeblock/_auto_formatting.py +++ b/bot/exts/info/codeblock/_auto_formatting.py @@ -1,5 +1,8 @@ +import itertools + import black +from bot import constants from bot.exts.info.codeblock import _parsing from bot.log import get_logger @@ -35,9 +38,17 @@ def _attempt_formatting_whole_content(content: str) -> str | None: return None +def _merge_non_code_blocks_with_code_blocks(non_code_blocks: list[str], formatted_code_blocks: list[str]) -> str: + return "".join( + f"{non_code_block}{formatted_code_block and _code_as_markdown(formatted_code_block)}" + for non_code_block, formatted_code_block + in itertools.zip_longest(non_code_blocks, formatted_code_blocks, fillvalue="") + ) + + def try_fix_markdown(content: str) -> str | None: """ - Converts the user's content to a properly formatted Markdown message. + Converts the user's content to a properly formatted Markdown message if it finds a non-formatted code block. Returns None if it encounters any problems. """ @@ -55,10 +66,14 @@ def try_fix_markdown(content: str) -> str | None: log.trace("Multiple code blocks detected but formatting failed for at least one code block.") return None - if len(formatted_code_blocks) == 1: - return f"Your code correctly formatted:\n{_code_as_markdown(formatted_code_blocks[0])}" + non_code_blocks = _parsing.find_non_code_blocks(content) + if len(formatted_code_blocks) + 1 != len(non_code_blocks): + log.trace("Code blocks detected, but there are inconsistencies in what code blocks are detected.") + return None - return "Your codes correctly formatted:\n" + "\n".join( - f"Codeblock {i}:\n{_code_as_markdown(formatted_code_block)}" - for i, formatted_code_block in enumerate(formatted_code_blocks, start=1) - ) + fixed_markdown = _merge_non_code_blocks_with_code_blocks(non_code_blocks, formatted_code_blocks) + if len(fixed_markdown) > constants.CodeBlock.maximum_auto_formatted_characters: + log.trace("Automatically formatted message would be too large to post") + return None + + return fixed_markdown From b0a02eb1427ad8753ad3742d39eb4f570e355637 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Samuel=20R=C3=B6tttgermann?= Date: Mon, 1 Jun 2026 05:21:15 +0200 Subject: [PATCH 09/11] added requirement for whitespace to be a newline to be seen as a properly formatted code block --- bot/exts/info/codeblock/_parsing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bot/exts/info/codeblock/_parsing.py b/bot/exts/info/codeblock/_parsing.py index 13d51f12ac..ab75da7ba1 100644 --- a/bot/exts/info/codeblock/_parsing.py +++ b/bot/exts/info/codeblock/_parsing.py @@ -88,7 +88,7 @@ def find_code_blocks(message: str) -> Sequence[CodeBlock] | None: groups = match.groupdict("") language = groups["lang"].strip() # Strip the whitespace cause it's included in the group. - if groups["tick"] == BACKTICK and len(groups["ticks"]) == 3 and language: + if groups["tick"] == BACKTICK and len(groups["ticks"]) == 3 and language and ("\n" in groups["lang"]): log.trace("Message has a valid code block with a language; returning None.") return None if has_lines(groups["code"], constants.CodeBlock.minimum_lines): From fe3fe8541591e981406f065425c5ee073b77760a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Samuel=20R=C3=B6tttgermann?= Date: Mon, 1 Jun 2026 05:24:26 +0200 Subject: [PATCH 10/11] removed ruff from the dependencies again --- pyproject.toml | 1 - uv.lock | 2 -- 2 files changed, 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index f214bc183a..5c82fabb2c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,7 +24,6 @@ dependencies = [ "tenacity==9.1.2", "tldextract==5.3.0", "yarl==1.22.0", - "ruff==0.14.2", "black==26.5.1", ] name = "bot" diff --git a/uv.lock b/uv.lock index 9e2dfef64e..8de54f8e78 100644 --- a/uv.lock +++ b/uv.lock @@ -241,7 +241,6 @@ dependencies = [ { name = "python-frontmatter" }, { name = "rapidfuzz" }, { name = "regex" }, - { name = "ruff" }, { name = "sentry-sdk" }, { name = "tenacity" }, { name = "tldextract" }, @@ -279,7 +278,6 @@ requires-dist = [ { name = "python-frontmatter", specifier = "==1.1.0" }, { name = "rapidfuzz", specifier = "==3.14.1" }, { name = "regex", specifier = "==2026.3.32" }, - { name = "ruff", specifier = "==0.14.2" }, { name = "sentry-sdk", specifier = "==2.43.0" }, { name = "tenacity", specifier = "==9.1.2" }, { name = "tldextract", specifier = "==5.3.0" }, From 3e34fa19f8b2e8b0317520e2958b1394f04e9162 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Samuel=20R=C3=B6tttgermann?= Date: Mon, 1 Jun 2026 05:27:05 +0200 Subject: [PATCH 11/11] undo dead changes in _instructions.py --- bot/exts/info/codeblock/_instructions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bot/exts/info/codeblock/_instructions.py b/bot/exts/info/codeblock/_instructions.py index ad7dc2efbd..210217ccaf 100644 --- a/bot/exts/info/codeblock/_instructions.py +++ b/bot/exts/info/codeblock/_instructions.py @@ -1,5 +1,6 @@ """This module generates and formats instructional messages about fixing Markdown code blocks.""" + from bot.exts.info.codeblock import _parsing from bot.log import get_logger @@ -68,7 +69,6 @@ def _get_no_ticks_message(content: str) -> str | None: if _parsing.is_python_code(content): example_blocks = _get_example("py") return example_blocks - log.trace("Aborting missing code block instructions: content is not Python code.") return None