From 20465b34b6cb60cab865105c9587cc95eb160b83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EC=9D=B4=EC=A4=80=EC=98=81?= Date: Tue, 21 Apr 2026 02:24:21 +0900 Subject: [PATCH] Fix translation pipeline corrupting HTML tags and bold headings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Unescape entity-encoded HTML tags (<code> → ) in translated content when the same tag appears as raw HTML in the English source - Remove bare code-fence wrapping from bold heading lines (**...**) that the translation pipeline incorrectly wraps in fenced code blocks --- .../lib/correct-translation-content.ts | 28 +++++++ .../tests/correct-translation-content.ts | 74 +++++++++++++++++++ 2 files changed, 102 insertions(+) diff --git a/src/languages/lib/correct-translation-content.ts b/src/languages/lib/correct-translation-content.ts index fca1adefd0f9..4941db55ba0b 100644 --- a/src/languages/lib/correct-translation-content.ts +++ b/src/languages/lib/correct-translation-content.ts @@ -884,6 +884,34 @@ export function correctTranslatedContentStrings( } } + // Unescape HTML entity-encoded tags (`<tag>` → ``) that Crowdin + // introduces when the English source uses inline raw HTML — e.g. + // `label` inside table `` cells. + // Without this fix, those tags render as literal `` text on translated + // pages rather than as styled code elements. + // Only unescape tag names present as raw HTML in the English source to avoid + // incorrectly expanding intentional `<` entity sequences. + if (englishContent && content.includes('<')) { + const englishTagNames = new Set( + [...englishContent.matchAll(/<([a-z][a-z0-9]*)/gi)].map((m) => m[1].toLowerCase()), + ) + if (englishTagNames.size > 0) { + content = content.replace( + /<(\/?[a-z][a-z0-9]*)(\s[^<>]*?)?>/gi, + (match, tag: string, attrs = '') => { + const baseName = tag.replace(/^\//, '').toLowerCase() + return englishTagNames.has(baseName) ? `<${tag}${attrs}>` : match + }, + ) + } + } + + // Remove bare code-fence wrapping from bold heading lines. Translation pipelines + // sometimes wrap `**heading**` lines in bare (no-language) fenced code blocks, + // causing them to render as code instead of bold text. Strip the fences and + // restore the heading as plain Markdown. + content = content.replace(/^```\s*\n(\*\*[^\n]+\*\*)\s*\n```/gm, '$1') + // Collapsed Markdown table rows — restore linebreaks between `|` cells. content = content.replaceAll(' | | ', ' |\n| ') diff --git a/src/languages/tests/correct-translation-content.ts b/src/languages/tests/correct-translation-content.ts index 308391c5bb5b..59bfbab36c50 100644 --- a/src/languages/tests/correct-translation-content.ts +++ b/src/languages/tests/correct-translation-content.ts @@ -1355,6 +1355,80 @@ describe('correctTranslatedContentStrings', () => { expect(fix('{{%raw %}', 'es')).toBe('{% raw %}') expect(fix('{{% raw %}', 'es')).toBe('{% raw %}') }) + + test('unescapes entity-encoded HTML tags when English source has matching raw HTML', () => { + const english = + 'ubuntu-latest' + + expect(fix('<code>ubuntu-latest</code>', 'ko', english)).toBe( + 'ubuntu-latest', + ) + expect( + fix('<a href="https://example.com">ubuntu-latest</a>', 'ko', english), + ).toBe('ubuntu-latest') + expect( + fix( + '<code><a href="https://github.com/actions/runner-images/blob/main/images/ubuntu/Ubuntu2404-Readme.md">ubuntu-latest</a></code>', + 'ko', + english, + ), + ).toBe( + 'ubuntu-latest', + ) + }) + + test('does not unescape entity-encoded tags absent from English source', () => { + const english = '

Simple paragraph without code elements

' + const input = '<code>text</code>' + expect(fix(input, 'ko', english)).toBe(input) + }) + + test('does not unescape entity-encoded tags when no English content provided', () => { + const input = '<code>ubuntu-latest</code>' + expect(fix(input, 'ko')).toBe(input) + }) + + test('removes bare code-fence wrapping from bold heading lines', () => { + const input = '```\n**다음은 작업을 다운로드하는 데 필요합니다.**\n```' + expect(fix(input, 'ko')).toBe('**다음은 작업을 다운로드하는 데 필요합니다.**') + }) + + test('removes bare code-fence wrapping from bold headings between real code blocks', () => { + const input = [ + '```shell copy', + 'github.com', + 'api.github.com', + '```', + '', + '```', + '**다음은 작업을 다운로드하는 데 필요합니다.**', + '```', + '', + '```shell copy', + 'codeload.github.com', + '```', + ].join('\n') + + const expected = [ + '```shell copy', + 'github.com', + 'api.github.com', + '```', + '', + '**다음은 작업을 다운로드하는 데 필요합니다.**', + '', + '```shell copy', + 'codeload.github.com', + '```', + ].join('\n') + + expect(fix(input, 'ko')).toBe(expected) + }) + + test('does not strip language-specified code fences with bold content', () => { + const input = '```shell\n**not a heading**\n```' + expect(fix(input, 'ko')).toBe(input) + }) }) // ─── EDGE CASES ────────────────────────────────────────────────────