-
Notifications
You must be signed in to change notification settings - Fork 3.9k
fix: preserve angle brackets inside fenced code blocks during sanitization #2308
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -12,7 +12,12 @@ var policy *bluemonday.Policy | |
| var policyOnce sync.Once | ||
|
|
||
| func Sanitize(input string) string { | ||
| return FilterHTMLTags(FilterCodeFenceMetadata(FilterInvisibleCharacters(input))) | ||
| s := FilterInvisibleCharacters(input) | ||
| s = FilterCodeFenceMetadata(s) | ||
| s = protectCodeAngles(s) | ||
| s = FilterHTMLTags(s) | ||
| s = restoreCodeAngles(s) | ||
| return s | ||
| } | ||
|
|
||
| // FilterInvisibleCharacters removes invisible or control characters that should not appear | ||
|
|
@@ -207,3 +212,72 @@ func shouldRemoveRune(r rune) bool { | |
|
|
||
| return false | ||
| } | ||
|
|
||
| // Placeholders used to shield angle brackets inside code regions from | ||
| // the HTML sanitizer. They must not look like HTML tags themselves and | ||
| // must be unlikely to appear in real content. | ||
| const ( | ||
| codeLtPlaceholder = "\x00CODELT\x00" | ||
| codeGtPlaceholder = "\x00CODEGT\x00" | ||
| ) | ||
|
|
||
| // protectCodeAngles replaces < and > with unique placeholders inside | ||
| // fenced code blocks so that bluemonday does not strip them as HTML tags. | ||
| // This must run after FilterCodeFenceMetadata (which cleans fence info | ||
| // strings) and before FilterHTMLTags. | ||
| func protectCodeAngles(input string) string { | ||
| if input == "" { | ||
| return input | ||
| } | ||
|
|
||
| lines := strings.Split(input, "\n") | ||
| insideFence := false | ||
| currentFenceLen := 0 | ||
|
|
||
| for i, line := range lines { | ||
| fenceIdx := strings.Index(line, "```") | ||
|
|
||
| if fenceIdx != -1 && !hasNonWhitespace(line[:fenceIdx]) { | ||
| fenceEnd := fenceIdx | ||
| for fenceEnd < len(line) && line[fenceEnd] == '`' { | ||
| fenceEnd++ | ||
| } | ||
| fenceLen := fenceEnd - fenceIdx | ||
|
|
||
| if fenceLen >= 3 { | ||
| if insideFence { | ||
| if currentFenceLen == 0 || fenceLen >= currentFenceLen { | ||
| // Valid closing fence (CommonMark: closing fence | ||
| // must be at least as long as the opening fence). | ||
| insideFence = false | ||
| currentFenceLen = 0 | ||
| continue | ||
| } | ||
| // Fence length too short — still inside code. | ||
| } else { | ||
| // Opening fence. | ||
| insideFence = true | ||
| currentFenceLen = fenceLen | ||
| continue | ||
| } | ||
| } | ||
| } | ||
|
|
||
| if insideFence { | ||
| lines[i] = strings.ReplaceAll( | ||
| strings.ReplaceAll(line, "<", codeLtPlaceholder), | ||
| ">", codeGtPlaceholder, | ||
| ) | ||
| } | ||
| } | ||
|
|
||
| return strings.Join(lines, "\n") | ||
| } | ||
|
|
||
| // restoreCodeAngles reverses the placeholder substitution performed by | ||
| // protectCodeAngles. | ||
| func restoreCodeAngles(input string) string { | ||
| s := strings.ReplaceAll(input, codeLtPlaceholder, "<") | ||
| s = strings.ReplaceAll(s, codeGtPlaceholder, ">") | ||
| return s | ||
|
Comment on lines
+277
to
+282
|
||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
protectCodeAnglesre-implements fenced-code tracking instead of reusingsanitizeCodeFenceLine/FilterCodeFenceMetadatalogic, and it uses different closing-fence semantics (>= currentFenceLenhere vs exact-length matching insanitizeCodeFenceLine). This divergence can cause the two passes to disagree about whether subsequent lines are inside a fence (e.g., with a longer closing fence), making future behavior brittle. Consider extracting a shared fence parser/toggler so both steps stay consistent (and updating one place if the fence rules change).