From f89eb7e8678c44f0d1e6a82ee1b52fe9ed3bbc1e Mon Sep 17 00:00:00 2001 From: Matt Fisher Date: Fri, 26 Jun 2026 12:36:59 +1000 Subject: [PATCH 1/9] Render from_markdown via markdown-it-py Replace the hand-rolled Markdown parser in from_markdown() with markdown-it-py plus the standard footnote plugin, and a small renderer (mdrender.py) that maps the syntax tree to Substack's node schema. Node construction is centralised in a new nodes.py module so the schema lives in one place. Footnotes (including multi-paragraph definitions) come from the footnote plugin. Adds end-to-end from_markdown feature tests covering every documented feature. Two intentional, CommonMark-correct behaviour changes vs the old parser: consecutive '>' lines are one paragraph (blank '>' lines split them), and unreferenced footnote definitions are dropped rather than appended. --- README.md | 1 + pyproject.toml | 2 + substack/mdrender.py | 183 ++++++++ substack/nodes.py | 121 +++++ substack/post.py | 429 +----------------- tests/substack/test_footnotes.py | 11 +- tests/substack/test_from_markdown_features.py | 145 ++++++ tests/substack/test_post.py | 7 +- 8 files changed, 472 insertions(+), 427 deletions(-) create mode 100644 substack/mdrender.py create mode 100644 substack/nodes.py create mode 100644 tests/substack/test_from_markdown_features.py diff --git a/README.md b/README.md index cfe0611..8a42266 100644 --- a/README.md +++ b/README.md @@ -171,6 +171,7 @@ post.from_markdown(footnote_markdown, api=api) post.paragraph(content=[{"content": "Some claim."}]).footnote_anchor(1) post.footnote(1, "The note text, with **formatting** allowed.") + draft = api.post_draft(post.get_draft()) # set section (can only be done after first posting the draft) diff --git a/pyproject.toml b/pyproject.toml index 67f5a0d..68c91b4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,6 +21,8 @@ python = "<4.0,>=3.10" requests = "^2.32.0" python-dotenv = "^1.2.1" PyYAML = "^6.0" +markdown-it-py = "^3.0" +mdit-py-plugins = "^0.4" [tool.poetry.group.dev.dependencies] diff --git a/substack/mdrender.py b/substack/mdrender.py new file mode 100644 index 0000000..a3f5515 --- /dev/null +++ b/substack/mdrender.py @@ -0,0 +1,183 @@ +"""PROTOTYPE: Markdown -> Substack ProseMirror via markdown-it-py. + +This replaces the hand-rolled parser in Post.from_markdown() with a real +CommonMark parser (markdown-it-py) plus the standard footnote plugin, and a +small renderer that walks the syntax tree into Substack's node schema. + +Node construction goes through ``substack.nodes`` so the (undocumented) schema +lives in exactly one place. + +Not wired for production; imported by Post.from_markdown() when available so the +existing test-suite can be run against it for evaluation. +""" + +from __future__ import annotations + +from typing import Dict, List, Optional + +from markdown_it import MarkdownIt +from markdown_it.tree import SyntaxTreeNode +from mdit_py_plugins.footnote import footnote_plugin + +from substack import nodes +from substack.nodes import MarkType, NodeType + +_MARK_FOR = { + "strong": {"type": MarkType.STRONG}, + "em": {"type": MarkType.EM}, + "s": {"type": MarkType.STRIKETHROUGH}, +} + + +def _make_parser() -> MarkdownIt: + return MarkdownIt("commonmark").use(footnote_plugin).enable("strikethrough") + + +def _coalesce(out_nodes: List[Dict]) -> List[Dict]: + """Merge adjacent text nodes that carry identical marks (e.g. softbreaks).""" + merged: List[Dict] = [] + for node in out_nodes: + if ( + merged + and node.get("type") == NodeType.TEXT + and merged[-1].get("type") == NodeType.TEXT + and node.get("marks") == merged[-1].get("marks") + ): + merged[-1]["text"] += node["text"] + else: + merged.append(node) + return merged + + +def _render_inline(node: SyntaxTreeNode, marks: List[Dict]) -> List[Dict]: + """Render an inline subtree into a flat list of text / anchor nodes.""" + out: List[Dict] = [] + for child in node.children: + t = child.type + if t == "text": + if child.content: + out.append(nodes.text(child.content, marks)) + elif t == "code_inline": + out.append(nodes.text(child.content, marks + [nodes.code_mark()])) + elif t in _MARK_FOR: + out.extend(_render_inline(child, marks + [_MARK_FOR[t]])) + elif t == "link": + href = child.attrs.get("href", "") + out.extend(_render_inline(child, marks + [nodes.link_mark(href)])) + elif t in ("softbreak", "hardbreak"): + out.append(nodes.text(" ", marks)) + elif t == "footnote_ref": + out.append(nodes.footnote_anchor(child.meta["id"] + 1)) + elif t == "image": + # Inline images are rare in this schema; fall back to alt text. + alt = child.attrs.get("alt") or "".join( + c.content for c in child.children if c.type == "text" + ) + if alt: + out.append(nodes.text(alt, marks)) + return _coalesce(out) + + +def _only_image(inline: SyntaxTreeNode) -> Optional[SyntaxTreeNode]: + """If an inline node is just an image (optionally wrapped in a link), return it.""" + kids = [c for c in inline.children if c.type != "softbreak"] + if len(kids) == 1 and kids[0].type == "image": + return kids[0] + if len(kids) == 1 and kids[0].type == "link": + inner = [c for c in kids[0].children if c.type != "softbreak"] + if len(inner) == 1 and inner[0].type == "image": + img = inner[0] + img._link_href = kids[0].attrs.get("href") # type: ignore[attr-defined] + return img + return None + + +def _captioned_image(img: SyntaxTreeNode, api) -> Dict: + src = img.attrs.get("src", "") + if src.startswith("/"): + src = src[1:] + if api is not None and not src.startswith("http"): + try: + src = api.get_image(src).get("url") + except Exception: + pass + # markdown-it stores the image alt text as the node's content, not in attrs. + alt = img.content or img.attrs.get("alt") or None + return nodes.captioned_image( + src, + alt=alt, + href=getattr(img, "_link_href", None), + ) + + +def _render_block(node: SyntaxTreeNode, api) -> List[Dict]: + """Render a block-level node into zero or more Substack nodes.""" + t = node.type + + if t == "paragraph": + inline = node.children[0] + img = _only_image(inline) + if img is not None: + return [_captioned_image(img, api)] + return [nodes.paragraph(_render_inline(inline, []))] + + if t == "heading": + level = int(node.tag[1]) + return [nodes.heading(_render_inline(node.children[0], []), level=level)] + + if t == "hr": + return [nodes.horizontal_rule()] + + if t in ("fence", "code_block"): + return [ + nodes.code_block( + node.content.rstrip("\n"), language=node.info.strip() or None + ) + ] + + if t == "blockquote": + paras: List[Dict] = [] + for child in node.children: + paras.extend(_render_block(child, api)) + return [nodes.blockquote(paras)] + + if t == "bullet_list": + return [nodes.bullet_list(_render_list_items(node, api))] + + if t == "ordered_list": + return [nodes.ordered_list(_render_list_items(node, api))] + + if t == "footnote_block": + out = [] + for fn in node.children: + number = fn.meta["id"] + 1 + paras = [ + nodes.paragraph(_render_inline(child.children[0], [])) + for child in fn.children + if child.type == "paragraph" + ] + out.append(nodes.footnote(number, paras)) + return out + + return [] + + +def _render_list_items(list_node: SyntaxTreeNode, api) -> List[Dict]: + items = [] + for li in list_node.children: + # A list_item built by nodes.list_item wraps inline content in a single + # paragraph; here items may already contain block nodes, so build directly. + content: List[Dict] = [] + for child in li.children: + content.extend(_render_block(child, api)) + items.append({"type": NodeType.LIST_ITEM, "content": content}) + return items + + +def markdown_to_doc(markdown_content: str, api=None) -> List[Dict]: + """Convert Markdown into a list of Substack ProseMirror block nodes.""" + tree = SyntaxTreeNode(_make_parser().parse(markdown_content)) + out: List[Dict] = [] + for node in tree.children: + out.extend(_render_block(node, api)) + return out diff --git a/substack/nodes.py b/substack/nodes.py new file mode 100644 index 0000000..a0e2cfe --- /dev/null +++ b/substack/nodes.py @@ -0,0 +1,121 @@ +"""ProseMirror node builders for Substack documents. + +PROTOTYPE: this module centralises the (undocumented) Substack ProseMirror +schema in one place. Today the node-type strings ("paragraph", "footnoteAnchor", +"image2", ...) and their shapes are scattered across post.py as inline dict +literals. Pulling them here gives: + + * one source of truth for node shapes (so a schema change is a one-line fix), + * discoverable, typed constructors instead of bare dict literals, + * a natural seam for validation. + +The builders intentionally return plain dicts so they stay 100% compatible with +the existing draft_body structure. +""" + +from __future__ import annotations + +from typing import Dict, List, Optional + + +class NodeType: + DOC = "doc" + PARAGRAPH = "paragraph" + HEADING = "heading" + TEXT = "text" + BLOCKQUOTE = "blockquote" + CODE_BLOCK = "codeBlock" + HORIZONTAL_RULE = "horizontal_rule" + BULLET_LIST = "bullet_list" + ORDERED_LIST = "ordered_list" + LIST_ITEM = "list_item" + FOOTNOTE = "footnote" + FOOTNOTE_ANCHOR = "footnoteAnchor" + CAPTIONED_IMAGE = "captionedImage" + + +class MarkType: + STRONG = "strong" + EM = "em" + CODE = "code" + STRIKETHROUGH = "strikethrough" + LINK = "link" + + +def code_mark() -> Dict: + return {"type": MarkType.CODE} + + +def text(value: str, marks: Optional[List[Dict]] = None) -> Dict: + node: Dict = {"type": NodeType.TEXT, "text": value} + if marks: + node["marks"] = marks + return node + + +def link_mark(href: str) -> Dict: + return {"type": MarkType.LINK, "attrs": {"href": href}} + + +def paragraph(content: Optional[List[Dict]] = None) -> Dict: + return {"type": NodeType.PARAGRAPH, "content": content or []} + + +def heading(content: List[Dict], level: int = 1) -> Dict: + return {"type": NodeType.HEADING, "content": content, "attrs": {"level": level}} + + +def horizontal_rule() -> Dict: + return {"type": NodeType.HORIZONTAL_RULE} + + +def blockquote(paragraphs: List[Dict]) -> Dict: + node: Dict = {"type": NodeType.BLOCKQUOTE} + if paragraphs: + node["content"] = paragraphs + return node + + +def list_item(content_nodes: List[Dict]) -> Dict: + return { + "type": NodeType.LIST_ITEM, + "content": [paragraph(content_nodes)], + } + + +def bullet_list(items: List[Dict]) -> Dict: + return {"type": NodeType.BULLET_LIST, "content": items} + + +def ordered_list(items: List[Dict]) -> Dict: + return {"type": NodeType.ORDERED_LIST, "content": items} + + +def code_block(code: str, language: Optional[str] = None) -> Dict: + node: Dict = {"type": NodeType.CODE_BLOCK, "content": [text(code)]} + if language: + node["attrs"] = {"language": language} + return node + + +def captioned_image( + src: str, alt: Optional[str] = None, href: Optional[str] = None +) -> Dict: + node: Dict = {"type": NodeType.CAPTIONED_IMAGE, "src": src} + if alt: + node["alt"] = alt + if href: + node["href"] = href + return node + + +def footnote_anchor(number: int) -> Dict: + return {"type": NodeType.FOOTNOTE_ANCHOR, "attrs": {"number": number}} + + +def footnote(number: int, paragraphs: List[Dict]) -> Dict: + return { + "type": NodeType.FOOTNOTE, + "attrs": {"number": number}, + "content": paragraphs or [paragraph()], + } diff --git a/substack/post.py b/substack/post.py index 690b402..f325048 100644 --- a/substack/post.py +++ b/substack/post.py @@ -11,10 +11,7 @@ __all__ = ["Post", "parse_inline", "tokens_to_text_nodes"] from substack.exceptions import SectionNotExistsException - -# Markdown footnotes: ``text.[^label]`` references and ``[^label]: definition`` lines. -FOOTNOTE_REFERENCE_PATTERN = re.compile(r"\[\^([^\]]+)\]") -FOOTNOTE_DEFINITION_PATTERN = re.compile(r"^\[\^([^\]]+)\]:\s?(.*)$") +from substack import nodes def tokens_to_text_nodes(tokens: List[Dict]) -> List[Dict]: @@ -560,7 +557,7 @@ def footnote_anchor(self, number: int): """ content = self.draft_body["content"][-1].get("content", []) - content += [{"type": "footnoteAnchor", "attrs": {"number": number}}] + content += [nodes.footnote_anchor(number)] self.draft_body["content"][-1]["content"] = content return self @@ -586,147 +583,19 @@ def footnote(self, number: int, content=None): for chunk in re.split(r"\n\s*\n", content): chunk = chunk.strip() if chunk: - paragraphs.append( - {"type": "paragraph", "content": tokens_to_text_nodes(parse_inline(chunk))} - ) + paragraphs.append(nodes.paragraph(tokens_to_text_nodes(parse_inline(chunk)))) elif isinstance(content, list): # Accept either parse_inline tokens ({"content": ...}) or text nodes. if content and content[0].get("type") == "text": text_nodes = content else: text_nodes = tokens_to_text_nodes(content) - paragraphs.append({"type": "paragraph", "content": text_nodes}) - - if not paragraphs: - paragraphs = [{"type": "paragraph", "content": []}] + paragraphs.append(nodes.paragraph(text_nodes)) - node: Dict = { - "type": "footnote", - "attrs": {"number": number}, - "content": paragraphs, - } + node: Dict = nodes.footnote(number, paragraphs) self.draft_body["content"] = self.draft_body.get("content", []) + [node] return self - @staticmethod - def _extract_footnote_definitions(markdown_content: str): - """ - - Pull ``[^label]: definition`` lines out of the Markdown. - - Definitions may wrap onto indented continuation lines and may contain - multiple paragraphs (blank line followed by an indented block). Returns - the body with definitions removed plus a {label: definition_text} mapping, - where paragraphs are separated by a blank line. - - """ - lines = markdown_content.split("\n") - body_lines: List[str] = [] - definitions: Dict[str, str] = {} - in_code_fence = False - i = 0 - while i < len(lines): - # Track fenced code blocks so footnote-like lines inside them are - # left untouched. - if lines[i].lstrip().startswith("```"): - in_code_fence = not in_code_fence - body_lines.append(lines[i]) - i += 1 - continue - match = None if in_code_fence else FOOTNOTE_DEFINITION_PATTERN.match(lines[i]) - if match: - label, first = match.group(1), match.group(2) - paragraphs: List[str] = [] - current = [first.strip()] if first.strip() else [] - i += 1 - while i < len(lines): - line = lines[i] - if line.strip() == "": - # A blank line stays in the footnote only if the next - # non-empty line is indented (a further paragraph). - nxt = i + 1 - if ( - nxt < len(lines) - and lines[nxt].strip() - and lines[nxt][:1] in (" ", "\t") - ): - if current: - paragraphs.append(" ".join(current)) - current = [] - i += 1 - continue - break - if line[:1] in (" ", "\t"): - current.append(line.strip()) - i += 1 - else: - break - if current: - paragraphs.append(" ".join(current)) - definitions[label] = "\n\n".join(paragraphs) - else: - body_lines.append(lines[i]) - i += 1 - return "\n".join(body_lines), definitions - - @staticmethod - def _number_footnotes(markdown_content: str, definitions: Dict[str, str]): - """Number footnotes by order of first inline reference in the body.""" - order: List[str] = [] - for match in FOOTNOTE_REFERENCE_PATTERN.finditer(markdown_content): - label = match.group(1) - if label in definitions and label not in order: - order.append(label) - # Defined-but-unreferenced footnotes go last, in definition order. - for label in definitions: - if label not in order: - order.append(label) - return {label: index + 1 for index, label in enumerate(order)} - - def _inject_footnote_anchors(self, node: Dict, numbers_by_label: Dict[str, int]): - """Recursively replace ``[^label]`` in text nodes with footnoteAnchor nodes.""" - # Never rewrite the contents of a code block. - if node.get("type") == "codeBlock": - return - content = node.get("content") - if not isinstance(content, list): - return - new_content: List[Dict] = [] - for child in content: - text = child.get("text", "") - has_code_mark = any( - mark.get("type") == "code" for mark in (child.get("marks") or []) - ) - if ( - child.get("type") == "text" - and not has_code_mark - and FOOTNOTE_REFERENCE_PATTERN.search(text) - ): - marks = child.get("marks") - last = 0 - for match in FOOTNOTE_REFERENCE_PATTERN.finditer(text): - label = match.group(1) - if label not in numbers_by_label: - continue # Unknown label: leave the literal text in place. - if match.start() > last: - segment = {"type": "text", "text": text[last:match.start()]} - if marks: - segment["marks"] = marks - new_content.append(segment) - new_content.append( - {"type": "footnoteAnchor", "attrs": {"number": numbers_by_label[label]}} - ) - last = match.end() - if last < len(text): - segment = {"type": "text", "text": text[last:]} - if marks: - segment["marks"] = marks - new_content.append(segment) - else: - self._inject_footnote_anchors(child, numbers_by_label) - new_content.append(child) - node["content"] = new_content - def from_markdown(self, markdown_content: str, api=None): """ Parse Markdown content and add it to the post. @@ -760,290 +629,8 @@ def from_markdown(self, markdown_content: str, api=None): >>> post = Post("Title", "Subtitle", user_id) >>> post.from_markdown("# Heading\\n\\nThis is **bold** text with [a link](https://example.com).") """ - # Footnotes: extract ``[^label]: ...`` definitions and number them by - # order of first reference before parsing the rest of the body. - markdown_content, footnote_definitions = self._extract_footnote_definitions( - markdown_content - ) - footnote_numbers = self._number_footnotes(markdown_content, footnote_definitions) - - lines = markdown_content.split("\n") - blocks = [] - current_block: List[str] = [] - in_code_block = False - code_block_language = None - - for line in lines: - # Check for fenced code block start/end - if line.strip().startswith("```"): - if in_code_block: - # End of code block - if current_block: - blocks.append({ - "type": "code", - "language": code_block_language, - "content": "\n".join(current_block) - }) - current_block = [] - in_code_block = False - code_block_language = None - else: - # Start of code block - if current_block: - blocks.append({"type": "text", "content": "\n".join(current_block)}) - current_block = [] - # Extract language if specified - language = line.strip()[3:].strip() - code_block_language = language if language else None - in_code_block = True - continue - - if in_code_block: - # Inside code block - collect lines as-is - current_block.append(line) - else: - # Regular content - if line.strip() == "": - # Empty line - end current block if it has content - if current_block: - blocks.append({"type": "text", "content": "\n".join(current_block)}) - current_block = [] - else: - current_block.append(line) - - # Add any remaining content - if current_block: - if in_code_block: - blocks.append({ - "type": "code", - "language": code_block_language, - "content": "\n".join(current_block) - }) - else: - blocks.append({"type": "text", "content": "\n".join(current_block)}) - - # Process blocks - for block in blocks: - if block["type"] == "code": - # Add code block - code_content = block.get("content", "").strip() - if code_content: - # Substack uses "codeBlock" type - code_attrs = {} - if block.get("language"): - code_attrs["language"] = block["language"] - self.add({ - "type": "codeBlock", - "content": code_content, # Pass as string, code_block method will handle it - "attrs": code_attrs - }) - else: - # Process text block - text_content = block.get("content", "").strip() - if not text_content: - continue - - # Check for horizontal rule: ---, ***, ___ - if re.match(r'^(\*{3,}|-{3,}|_{3,})\s*$', text_content): - self.horizontal_rule() - continue - - # Process headings (lines starting with '#' characters) - if text_content.startswith("#"): - level = len(text_content) - len(text_content.lstrip("#")) - heading_text = text_content.lstrip("#").strip() - if heading_text: # Only add if there's actual text - self.heading(content=heading_text, level=min(level, 6)) - - # Process images using Markdown image syntax: ![Alt](URL) - # Also handle linked images: [![Alt](image_url)](link_url) - elif text_content.startswith("!") or (text_content.startswith("[") and "![" in text_content): - # Check for linked image first: [![alt](img)](link) - linked_image_match = re.match(r'\[!\[([^\]]*)\]\(([^)]+)\)\]\(([^)]+)\)', text_content) - if linked_image_match: - # Linked image - create image with href - alt_text = linked_image_match.group(1) - image_url = linked_image_match.group(2) - link_url = linked_image_match.group(3) - - # Adjust image URL if it starts with a slash - image_url = image_url[1:] if image_url.startswith("/") else image_url - - # If api is provided and image_url is a local file, upload it - if api is not None: - try: - image = api.get_image(image_url) - image_url = image.get("url") - except Exception: - # If upload fails, use original URL - pass - - self.add({ - "type": "captionedImage", - "src": image_url, - "alt": alt_text, - "href": link_url - }) - else: - # Regular image: ![Alt](URL) - match = re.match(r"!\[.*?\]\((.*?)\)", text_content) - if match: - image_url = match.group(1) - # Adjust image URL if it starts with a slash - image_url = image_url[1:] if image_url.startswith("/") else image_url - - # If api is provided and image_url is a local file, upload it - if api is not None: - try: - image = api.get_image(image_url) - image_url = image.get("url") - except Exception: - # If upload fails, use original URL - pass - - self.add({"type": "captionedImage", "src": image_url}) - - # Process paragraphs, bullet lists, ordered lists, or blockquotes - else: - if "\n" in text_content: - # Process each line, grouping consecutive bullets/ordered items - # into list nodes and consecutive blockquote lines into a - # single blockquote node. - pending_bullets: List[List[Dict]] = [] - pending_quotes: List[str] = [] - pending_ordered: List[List[Dict]] = [] - - def flush_bullets(): - if not pending_bullets: - return - list_items = [] - for bullet_nodes in pending_bullets: - list_items.append({ - "type": "list_item", - "content": [{"type": "paragraph", "content": bullet_nodes}], - }) - self.draft_body["content"].append( - {"type": "bullet_list", "content": list_items} - ) - pending_bullets.clear() - - def flush_quotes(): - if not pending_quotes: - return - paragraphs: List[Dict] = [] - for quote_line in pending_quotes: - tokens = parse_inline(quote_line) - text_nodes = tokens_to_text_nodes(tokens) - if text_nodes: - paragraphs.append({"type": "paragraph", "content": text_nodes}) - node: Dict = {"type": "blockquote"} - if paragraphs: - node["content"] = paragraphs - self.draft_body["content"].append(node) - pending_quotes.clear() - - def flush_ordered(): - if not pending_ordered: - return - list_items = [] - for item_nodes in pending_ordered: - list_items.append({ - "type": "list_item", - "content": [{"type": "paragraph", "content": item_nodes}], - }) - self.draft_body["content"].append( - {"type": "ordered_list", "content": list_items} - ) - pending_ordered.clear() - - for line in text_content.split("\n"): - line = line.strip() - if not line: - flush_bullets() - flush_ordered() - flush_quotes() - continue - - # Check for blockquote marker - if line.startswith("> ") or line == ">": - flush_bullets() - flush_ordered() - quote_text = line[2:] if line.startswith("> ") else "" - pending_quotes.append(quote_text) - continue - - # Check for ordered list marker - ordered_match = re.match(r'^(\d+)\.\s+(.*)', line) - if ordered_match: - flush_bullets() - flush_quotes() - item_text = ordered_match.group(2).strip() - tokens = parse_inline(item_text) - text_nodes = tokens_to_text_nodes(tokens) - if text_nodes: - pending_ordered.append(text_nodes) - continue - - # Check for bullet marker - bullet_text = None - if line.startswith("* "): - bullet_text = line[2:].strip() - elif line.startswith("- "): - bullet_text = line[2:].strip() - elif line.startswith("*") and not line.startswith("**"): - bullet_text = line[1:].strip() - - if bullet_text is not None: - flush_ordered() - flush_quotes() - tokens = parse_inline(bullet_text) - text_nodes = tokens_to_text_nodes(tokens) - if text_nodes: - pending_bullets.append(text_nodes) - else: - flush_bullets() - flush_ordered() - flush_quotes() - tokens = parse_inline(line) - self.add({"type": "paragraph", "content": tokens}) - - flush_bullets() - flush_ordered() - flush_quotes() - else: - # Single line — blockquote, ordered list, or paragraph - if text_content.startswith("> ") or text_content == ">": - quote_text = text_content[2:] if text_content.startswith("> ") else "" - tokens = parse_inline(quote_text) - text_nodes = tokens_to_text_nodes(tokens) - para = {"type": "paragraph", "content": text_nodes} if text_nodes else {"type": "paragraph"} - self.draft_body["content"] = self.draft_body.get("content", []) + [ - {"type": "blockquote", "content": [para]} - ] - - elif re.match(r'^(\d+)\.\s+(.*)', text_content): - ordered_match = re.match(r'^(\d+)\.\s+(.*)', text_content) - item_text = ordered_match.group(2).strip() - tokens = parse_inline(item_text) - text_nodes = tokens_to_text_nodes(tokens) - if text_nodes: - list_item = { - "type": "list_item", - "content": [{"type": "paragraph", "content": text_nodes}], - } - self.draft_body["content"].append( - {"type": "ordered_list", "content": [list_item]} - ) - - else: - tokens = parse_inline(text_content) - self.add({"type": "paragraph", "content": tokens}) - - # Footnotes: turn ``[^label]`` references into inline anchors, then append - # the footnote blocks in numbered order. - if footnote_numbers: - self._inject_footnote_anchors(self.draft_body, footnote_numbers) - for label, number in sorted(footnote_numbers.items(), key=lambda item: item[1]): - self.footnote(number, footnote_definitions[label]) + from substack import mdrender + rendered = mdrender.markdown_to_doc(markdown_content, api=api) + self.draft_body["content"] = self.draft_body.get("content", []) + rendered return self diff --git a/tests/substack/test_footnotes.py b/tests/substack/test_footnotes.py index 172d152..8cde174 100644 --- a/tests/substack/test_footnotes.py +++ b/tests/substack/test_footnotes.py @@ -144,11 +144,18 @@ def test_multiline_definition(self): text = footnotes(post)[0]["content"][0]["content"][0]["text"] assert text == "First line continued on the next line." - def test_unreferenced_definition_still_appended(self): + def test_unreferenced_definition_is_dropped(self): + # CommonMark footnote semantics: a definition that is never referenced is + # not rendered, and must not leak into the body text. post = make_post() post.from_markdown("No references here.\n\n[^1]: Orphan note.") assert len(anchors(post)) == 0 - assert len(footnotes(post)) == 1 + assert len(footnotes(post)) == 0 + paragraphs = find_nodes(post.draft_body, "paragraph") + body_text = " ".join( + n.get("text", "") for para in paragraphs for n in para.get("content", []) + ) + assert "Orphan note" not in body_text def test_reference_without_definition_left_as_text(self): post = make_post() diff --git a/tests/substack/test_from_markdown_features.py b/tests/substack/test_from_markdown_features.py new file mode 100644 index 0000000..a52ed3f --- /dev/null +++ b/tests/substack/test_from_markdown_features.py @@ -0,0 +1,145 @@ +"""End-to-end coverage of every feature listed in Post.from_markdown(). + +These exercise the renderer through from_markdown() (as opposed to the +parse_inline() unit tests), so they cover the actual Markdown -> Substack path. +""" + +from substack.post import Post + + +def make_post(): + return Post(title="T", subtitle="S", user_id=1) + + +def body(post): + return post.draft_body["content"] + + +def first_para_nodes(post): + return body(post)[0]["content"] + + +def marked(nodes, text): + """Return the marks on the text node with the given text.""" + node = next(n for n in nodes if n.get("text") == text) + return node.get("marks", []) + + +class TestInlineFormatting: + def test_bold(self): + post = make_post() + post.from_markdown("x **b** y") + assert {"type": "strong"} in marked(first_para_nodes(post), "b") + + def test_italic(self): + post = make_post() + post.from_markdown("x *i* y") + assert {"type": "em"} in marked(first_para_nodes(post), "i") + + def test_bold_italic(self): + post = make_post() + post.from_markdown("***bi***") + marks = marked(first_para_nodes(post), "bi") + assert {"type": "strong"} in marks + assert {"type": "em"} in marks + + def test_inline_code(self): + post = make_post() + post.from_markdown("use `code` now") + assert marked(first_para_nodes(post), "code") == [{"type": "code"}] + + def test_strikethrough(self): + post = make_post() + post.from_markdown("a ~~s~~ b") + assert marked(first_para_nodes(post), "s") == [{"type": "strikethrough"}] + + def test_link(self): + post = make_post() + post.from_markdown("[t](https://e.com)") + assert marked(first_para_nodes(post), "t") == [ + {"type": "link", "attrs": {"href": "https://e.com"}} + ] + + def test_multiple_marks_in_one_paragraph(self): + post = make_post() + post.from_markdown("**b** and *i* and `c` and [l](https://e.com)") + nodes = first_para_nodes(post) + assert {"type": "strong"} in marked(nodes, "b") + assert {"type": "em"} in marked(nodes, "i") + assert marked(nodes, "c") == [{"type": "code"}] + assert marked(nodes, "l")[0]["type"] == "link" + + +class TestBlocks: + def test_all_heading_levels(self): + for level in range(1, 7): + post = make_post() + post.from_markdown("#" * level + " Heading") + block = body(post)[0] + assert block["type"] == "heading" + assert block["attrs"]["level"] == level + + def test_paragraph(self): + post = make_post() + post.from_markdown("Just a plain paragraph.") + block = body(post)[0] + assert block["type"] == "paragraph" + assert block["content"][0]["text"] == "Just a plain paragraph." + + def test_bullet_list(self): + post = make_post() + post.from_markdown("- a\n- b") + block = body(post)[0] + assert block["type"] == "bullet_list" + assert len(block["content"]) == 2 + assert block["content"][0]["type"] == "list_item" + + def test_ordered_list(self): + post = make_post() + post.from_markdown("1. a\n2. b") + block = body(post)[0] + assert block["type"] == "ordered_list" + assert len(block["content"]) == 2 + + def test_code_block_with_language(self): + post = make_post() + post.from_markdown("```python\nprint('hi')\n```") + block = body(post)[0] + assert block["type"] == "codeBlock" + assert block["attrs"]["language"] == "python" + assert block["content"][0]["text"] == "print('hi')" + + def test_code_block_without_language(self): + post = make_post() + post.from_markdown("```\nplain\n```") + block = body(post)[0] + assert block["type"] == "codeBlock" + assert "attrs" not in block or "language" not in block.get("attrs", {}) + + def test_horizontal_rule(self): + post = make_post() + post.from_markdown("a\n\n---\n\nb") + assert [n["type"] for n in body(post)] == ["paragraph", "horizontal_rule", "paragraph"] + + def test_blockquote(self): + post = make_post() + post.from_markdown("> quote") + block = body(post)[0] + assert block["type"] == "blockquote" + assert block["content"][0]["type"] == "paragraph" + + def test_image(self): + post = make_post() + post.from_markdown("![alt](https://example.com/img.png)") + block = body(post)[0] + assert block["type"] == "captionedImage" + assert block["src"] == "https://example.com/img.png" + assert block["alt"] == "alt" + + def test_linked_image(self): + post = make_post() + post.from_markdown("[![alt](https://i/x.png)](https://link)") + block = body(post)[0] + assert block["type"] == "captionedImage" + assert block["src"] == "https://i/x.png" + assert block["href"] == "https://link" diff --git a/tests/substack/test_post.py b/tests/substack/test_post.py index 701c2a2..c619d88 100644 --- a/tests/substack/test_post.py +++ b/tests/substack/test_post.py @@ -97,15 +97,14 @@ def test_single_blockquote_line(self): assert bq["content"][0]["content"][0]["text"] == "This is a quote" def test_multiline_blockquote_grouped(self): - """Consecutive '>' lines become a single blockquote with multiple paragraphs.""" + """Consecutive '>' lines are one paragraph (CommonMark); blank '>' lines split them.""" post = Post(title="T", subtitle="S", user_id=1) post.from_markdown("> Line one\n> Line two\n> Line three") body = json.loads(post.get_draft()["draft_body"]) bq = body["content"][0] assert bq["type"] == "blockquote" - assert len(bq["content"]) == 3 - texts = [p["content"][0]["text"] for p in bq["content"]] - assert texts == ["Line one", "Line two", "Line three"] + assert len(bq["content"]) == 1 + assert bq["content"][0]["content"][0]["text"] == "Line one Line two Line three" def test_blockquote_separated_by_blank_line(self): """A blank line between '>' groups creates two separate blockquotes.""" From d5a438e0eede9b0eff215f6c9957f200104033c8 Mon Sep 17 00:00:00 2001 From: Matt Fisher Date: Sat, 27 Jun 2026 20:27:13 +1000 Subject: [PATCH 2/9] Fix captionedImage to preserve nested image2.attrs structure Restore the schema used by post.py's captioned_image() method: captionedImage wraps an image2 node whose attrs dict carries src, alt, href and the remaining layout fields. Update mdrender tests to match. Co-Authored-By: Claude Sonnet 4.6 --- substack/nodes.py | 29 +++++++++++++++---- tests/substack/test_from_markdown_features.py | 12 +++++--- 2 files changed, 31 insertions(+), 10 deletions(-) diff --git a/substack/nodes.py b/substack/nodes.py index a0e2cfe..ad71f81 100644 --- a/substack/nodes.py +++ b/substack/nodes.py @@ -101,12 +101,29 @@ def code_block(code: str, language: Optional[str] = None) -> Dict: def captioned_image( src: str, alt: Optional[str] = None, href: Optional[str] = None ) -> Dict: - node: Dict = {"type": NodeType.CAPTIONED_IMAGE, "src": src} - if alt: - node["alt"] = alt - if href: - node["href"] = href - return node + return { + "type": NodeType.CAPTIONED_IMAGE, + "content": [ + { + "type": "image2", + "attrs": { + "src": src, + "fullscreen": False, + "imageSize": "normal", + "height": 819, + "width": 1456, + "resizeWidth": 728, + "bytes": None, + "alt": alt, + "title": None, + "type": None, + "href": href, + "belowTheFold": False, + "internalRedirect": None, + }, + } + ], + } def footnote_anchor(number: int) -> Dict: diff --git a/tests/substack/test_from_markdown_features.py b/tests/substack/test_from_markdown_features.py index a52ed3f..7fc345b 100644 --- a/tests/substack/test_from_markdown_features.py +++ b/tests/substack/test_from_markdown_features.py @@ -133,13 +133,17 @@ def test_image(self): post.from_markdown("![alt](https://example.com/img.png)") block = body(post)[0] assert block["type"] == "captionedImage" - assert block["src"] == "https://example.com/img.png" - assert block["alt"] == "alt" + assert block["content"][0]["type"] == "image2" + attrs = block["content"][0]["attrs"] + assert attrs["src"] == "https://example.com/img.png" + assert attrs["alt"] == "alt" def test_linked_image(self): post = make_post() post.from_markdown("[![alt](https://i/x.png)](https://link)") block = body(post)[0] assert block["type"] == "captionedImage" - assert block["src"] == "https://i/x.png" - assert block["href"] == "https://link" + assert block["content"][0]["type"] == "image2" + attrs = block["content"][0]["attrs"] + assert attrs["src"] == "https://i/x.png" + assert attrs["href"] == "https://link" From 0ec3bd7aad2bd579c16022ca475b33ed6c3297f0 Mon Sep 17 00:00:00 2001 From: Matt Fisher Date: Sat, 27 Jun 2026 20:52:09 +1000 Subject: [PATCH 3/9] Regenerate poetry.lock with markdown-it-py and mdit-py-plugins Co-Authored-By: Claude Sonnet 4.6 --- poetry.lock | 57 ++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 39 insertions(+), 18 deletions(-) diff --git a/poetry.lock b/poetry.lock index 787b983..c22db48 100644 --- a/poetry.lock +++ b/poetry.lock @@ -21,7 +21,7 @@ version = "0.7.0" description = "Reusable constraint types to use with typing.Annotated" optional = false python-versions = ">=3.8" -groups = ["dev", "mcp"] +groups = ["mcp"] files = [ {file = "annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53"}, {file = "annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89"}, @@ -951,14 +951,14 @@ type = ["pygobject-stubs", "pytest-mypy (>=1.0.1)", "shtab", "types-pywin32"] [[package]] name = "markdown-it-py" -version = "4.0.0" +version = "3.0.0" description = "Python port of markdown-it. Markdown parsing, done right!" optional = false -python-versions = ">=3.10" -groups = ["mcp"] +python-versions = ">=3.8" +groups = ["main", "mcp"] files = [ - {file = "markdown_it_py-4.0.0-py3-none-any.whl", hash = "sha256:87327c59b172c5011896038353a81343b6754500a08cd7a4973bb48c6d578147"}, - {file = "markdown_it_py-4.0.0.tar.gz", hash = "sha256:cb0a2b4aa34f932c007117b194e945bd74e0ec24133ceb5bac59009cda1cb9f3"}, + {file = "markdown-it-py-3.0.0.tar.gz", hash = "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb"}, + {file = "markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1"}, ] [package.dependencies] @@ -966,12 +966,13 @@ mdurl = ">=0.1,<1.0" [package.extras] benchmarking = ["psutil", "pytest", "pytest-benchmark"] -compare = ["commonmark (>=0.9,<1.0)", "markdown (>=3.4,<4.0)", "markdown-it-pyrs", "mistletoe (>=1.0,<2.0)", "mistune (>=3.0,<4.0)", "panflute (>=2.3,<3.0)"] +code-style = ["pre-commit (>=3.0,<4.0)"] +compare = ["commonmark (>=0.9,<1.0)", "markdown (>=3.4,<4.0)", "mistletoe (>=1.0,<2.0)", "mistune (>=2.0,<3.0)", "panflute (>=2.3,<3.0)"] linkify = ["linkify-it-py (>=1,<3)"] -plugins = ["mdit-py-plugins (>=0.5.0)"] +plugins = ["mdit-py-plugins"] profiling = ["gprof2dot"] -rtd = ["ipykernel", "jupyter_sphinx", "mdit-py-plugins (>=0.5.0)", "myst-parser", "pyyaml", "sphinx", "sphinx-book-theme (>=1.0,<2.0)", "sphinx-copybutton", "sphinx-design"] -testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions", "requests"] +rtd = ["jupyter_sphinx", "mdit-py-plugins", "myst-parser", "pyyaml", "sphinx", "sphinx-copybutton", "sphinx-design", "sphinx_book_theme"] +testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"] [[package]] name = "mcp" @@ -1006,13 +1007,33 @@ cli = ["python-dotenv (>=1.0.0)", "typer (>=0.16.0)"] rich = ["rich (>=13.9.4)"] ws = ["websockets (>=15.0.1)"] +[[package]] +name = "mdit-py-plugins" +version = "0.4.2" +description = "Collection of plugins for markdown-it-py" +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "mdit_py_plugins-0.4.2-py3-none-any.whl", hash = "sha256:0c673c3f889399a33b95e88d2f0d111b4447bdfea7f237dab2d488f459835636"}, + {file = "mdit_py_plugins-0.4.2.tar.gz", hash = "sha256:5f2cd1fdb606ddf152d37ec30e46101a60512bc0e5fa1a7002c36647b09e26b5"}, +] + +[package.dependencies] +markdown-it-py = ">=1.0.0,<4.0.0" + +[package.extras] +code-style = ["pre-commit"] +rtd = ["myst-parser", "sphinx-book-theme"] +testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"] + [[package]] name = "mdurl" version = "0.1.2" description = "Markdown URL utilities" optional = false python-versions = ">=3.7" -groups = ["mcp"] +groups = ["main", "mcp"] files = [ {file = "mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8"}, {file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"}, @@ -1160,7 +1181,7 @@ version = "2.12.5" description = "Data validation using Python type hints" optional = false python-versions = ">=3.9" -groups = ["dev", "mcp"] +groups = ["mcp"] files = [ {file = "pydantic-2.12.5-py3-none-any.whl", hash = "sha256:e561593fccf61e8a20fc46dfc2dfe075b8be7d0188df33f221ad1f0139180f9d"}, {file = "pydantic-2.12.5.tar.gz", hash = "sha256:4d351024c75c0f085a9febbb665ce8c0c6ec5d30e903bdb6394b7ede26aebb49"}, @@ -1183,7 +1204,7 @@ version = "2.41.5" description = "Core functionality for Pydantic validation and serialization" optional = false python-versions = ">=3.9" -groups = ["dev", "mcp"] +groups = ["mcp"] files = [ {file = "pydantic_core-2.41.5-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:77b63866ca88d804225eaa4af3e664c5faf3568cea95360d21f4725ab6e07146"}, {file = "pydantic_core-2.41.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:dfa8a0c812ac681395907e71e1274819dec685fec28273a28905df579ef137e2"}, @@ -1317,7 +1338,7 @@ version = "2.14.2" description = "Settings management using Pydantic" optional = false python-versions = ">=3.10" -groups = ["dev", "mcp"] +groups = ["mcp"] files = [ {file = "pydantic_settings-2.14.2-py3-none-any.whl", hash = "sha256:a20c97b37910b6550d5ea50fbcc2d4187defe58cd57070b73863d069419c9440"}, {file = "pydantic_settings-2.14.2.tar.gz", hash = "sha256:c19dd64b19097f1de80184f0cc7b0272a13ae6e170cbf240a3e27e381ed14a5f"}, @@ -1387,7 +1408,7 @@ version = "1.2.2" description = "Read key-value pairs from a .env file and set them as environment variables" optional = false python-versions = ">=3.10" -groups = ["main", "dev", "mcp"] +groups = ["main", "mcp"] files = [ {file = "python_dotenv-1.2.2-py3-none-any.whl", hash = "sha256:1d8214789a24de455a8b8bd8ae6fe3c6b69a5e3d64aa8a8e5d68e694bbcb285a"}, {file = "python_dotenv-1.2.2.tar.gz", hash = "sha256:2c371a91fbd7ba082c2c1dc1f8bf89ca22564a087c2c287cd9b662adde799cf3"}, @@ -1860,7 +1881,7 @@ version = "4.15.0" description = "Backported and Experimental Type Hints for Python 3.9+" optional = false python-versions = ">=3.9" -groups = ["dev", "mcp"] +groups = ["mcp"] files = [ {file = "typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548"}, {file = "typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466"}, @@ -1872,7 +1893,7 @@ version = "0.4.2" description = "Runtime typing introspection tools" optional = false python-versions = ">=3.9" -groups = ["dev", "mcp"] +groups = ["mcp"] files = [ {file = "typing_inspection-0.4.2-py3-none-any.whl", hash = "sha256:4ed1cacbdc298c220f1bd249ed5287caa16f34d44ef4e9c3d0cbad5b521545e7"}, {file = "typing_inspection-0.4.2.tar.gz", hash = "sha256:ba561c48a67c5958007083d386c3295464928b01faa735ab8547c5692e87f464"}, @@ -2147,4 +2168,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.1" python-versions = "<4.0,>=3.10" -content-hash = "add336e7fc3c6fa9a72fb7e0b3e2950d49783e2eeb82e1783c61de0c6f09054f" +content-hash = "11a5871352eb39ede1d5316615d0d98fb5da8481a3b3a294ba6a772de59c4420" From 1feff2d52af7a2852dff9233d394ec64bb96db2c Mon Sep 17 00:00:00 2001 From: Matt Fisher Date: Sat, 27 Jun 2026 20:59:56 +1000 Subject: [PATCH 4/9] Remove prototype/hedging language from module docstrings Strip the PROTOTYPE: prefix and "not wired for production" disclaimer from nodes.py and mdrender.py now that the direction is approved. Co-Authored-By: Claude Sonnet 4.6 --- substack/mdrender.py | 11 ++++------- substack/nodes.py | 8 ++++---- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/substack/mdrender.py b/substack/mdrender.py index a3f5515..092541e 100644 --- a/substack/mdrender.py +++ b/substack/mdrender.py @@ -1,14 +1,11 @@ -"""PROTOTYPE: Markdown -> Substack ProseMirror via markdown-it-py. +"""Markdown -> Substack ProseMirror via markdown-it-py. -This replaces the hand-rolled parser in Post.from_markdown() with a real -CommonMark parser (markdown-it-py) plus the standard footnote plugin, and a -small renderer that walks the syntax tree into Substack's node schema. +Implements Post.from_markdown() using a real CommonMark parser (markdown-it-py) +plus the standard footnote plugin, with a small renderer that walks the syntax +tree into Substack's node schema. Node construction goes through ``substack.nodes`` so the (undocumented) schema lives in exactly one place. - -Not wired for production; imported by Post.from_markdown() when available so the -existing test-suite can be run against it for evaluation. """ from __future__ import annotations diff --git a/substack/nodes.py b/substack/nodes.py index ad71f81..51cf3a9 100644 --- a/substack/nodes.py +++ b/substack/nodes.py @@ -1,9 +1,9 @@ """ProseMirror node builders for Substack documents. -PROTOTYPE: this module centralises the (undocumented) Substack ProseMirror -schema in one place. Today the node-type strings ("paragraph", "footnoteAnchor", -"image2", ...) and their shapes are scattered across post.py as inline dict -literals. Pulling them here gives: +Centralises the (undocumented) Substack ProseMirror schema in one place. +The node-type strings ("paragraph", "footnoteAnchor", "image2", ...) and +their shapes live here rather than as inline dict literals scattered across +post.py, giving: * one source of truth for node shapes (so a schema change is a one-line fix), * discoverable, typed constructors instead of bare dict literals, From 890d31a26589351bcf9659a1c7e44d860e0d6634 Mon Sep 17 00:00:00 2001 From: Matt Fisher Date: Mon, 29 Jun 2026 12:01:41 +1000 Subject: [PATCH 5/9] Add end-to-end Markdown round-trip test against the live API Reads a feature-complete Markdown fixture (headings, inline formatting, links, captioned + linked images, code blocks with footnote-like text, blockquotes, lists, horizontal rules, and numeric/named/multi-paragraph footnotes), posts it as a real draft, retrieves it, and compares the normalised stored document against a saved golden file. Auth comes from COOKIES_STRING/COOKIES_PATH or EMAIL+PASSWORD; the test is skipped when no credentials are configured. Image src values are normalised since they vary per upload. --- .../fixtures/full_features.expected.json | 457 ++++++++++++++++++ tests/substack/fixtures/full_features.md | 53 ++ tests/substack/test_end_to_end.py | 128 +++++ 3 files changed, 638 insertions(+) create mode 100644 tests/substack/fixtures/full_features.expected.json create mode 100644 tests/substack/fixtures/full_features.md create mode 100644 tests/substack/test_end_to_end.py diff --git a/tests/substack/fixtures/full_features.expected.json b/tests/substack/fixtures/full_features.expected.json new file mode 100644 index 0000000..1a964f0 --- /dev/null +++ b/tests/substack/fixtures/full_features.expected.json @@ -0,0 +1,457 @@ +[ + { + "type": "heading", + "content": [ + { + "type": "text", + "text": "Heading level 1" + } + ], + "attrs": { + "level": 1 + } + }, + { + "type": "heading", + "content": [ + { + "type": "text", + "text": "Heading level 2" + } + ], + "attrs": { + "level": 2 + } + }, + { + "type": "heading", + "content": [ + { + "type": "text", + "text": "Heading level 3" + } + ], + "attrs": { + "level": 3 + } + }, + { + "type": "heading", + "content": [ + { + "type": "text", + "text": "Heading level 4" + } + ], + "attrs": { + "level": 4 + } + }, + { + "type": "heading", + "content": [ + { + "type": "text", + "text": "Heading level 5" + } + ], + "attrs": { + "level": 5 + } + }, + { + "type": "heading", + "content": [ + { + "type": "text", + "text": "Heading level 6" + } + ], + "attrs": { + "level": 6 + } + }, + { + "type": "paragraph", + "content": [ + { + "type": "text", + "text": "A paragraph with " + }, + { + "type": "text", + "text": "bold", + "marks": [ + { + "type": "strong" + } + ] + }, + { + "type": "text", + "text": ", " + }, + { + "type": "text", + "text": "italic", + "marks": [ + { + "type": "em" + } + ] + }, + { + "type": "text", + "text": ", " + }, + { + "type": "text", + "text": "bold italic", + "marks": [ + { + "type": "em" + }, + { + "type": "strong" + } + ] + }, + { + "type": "text", + "text": ", " + }, + { + "type": "text", + "text": "inline code", + "marks": [ + { + "type": "code" + } + ] + }, + { + "type": "text", + "text": ", " + }, + { + "type": "text", + "text": "strikethrough", + "marks": [ + { + "type": "strikethrough" + } + ] + }, + { + "type": "text", + "text": ", and a " + }, + { + "type": "text", + "text": "link", + "marks": [ + { + "type": "link", + "attrs": { + "href": "https://example.com" + } + } + ] + }, + { + "type": "text", + "text": "." + } + ] + }, + { + "type": "paragraph", + "content": [ + { + "type": "text", + "text": "A sentence with a numeric footnote" + }, + { + "type": "footnoteAnchor", + "attrs": { + "number": 1 + } + }, + { + "type": "text", + "text": " and a named footnote" + }, + { + "type": "footnoteAnchor", + "attrs": { + "number": 2 + } + }, + { + "type": "text", + "text": ", plus a repeat of the first." + }, + { + "type": "footnoteAnchor", + "attrs": { + "number": 1 + } + } + ] + }, + { + "type": "bullet_list", + "content": [ + { + "type": "list_item", + "content": [ + { + "type": "paragraph", + "content": [ + { + "type": "text", + "text": "Bullet one" + } + ] + } + ] + }, + { + "type": "list_item", + "content": [ + { + "type": "paragraph", + "content": [ + { + "type": "text", + "text": "Bullet two with " + }, + { + "type": "text", + "text": "bold", + "marks": [ + { + "type": "strong" + } + ] + } + ] + } + ] + } + ] + }, + { + "type": "ordered_list", + "content": [ + { + "type": "list_item", + "content": [ + { + "type": "paragraph", + "content": [ + { + "type": "text", + "text": "Ordered one" + } + ] + } + ] + }, + { + "type": "list_item", + "content": [ + { + "type": "paragraph", + "content": [ + { + "type": "text", + "text": "Ordered two" + } + ] + } + ] + } + ] + }, + { + "type": "blockquote", + "content": [ + { + "type": "paragraph", + "content": [ + { + "type": "text", + "text": "A blockquote across two wrapped lines." + } + ] + } + ] + }, + { + "type": "blockquote", + "content": [ + { + "type": "paragraph", + "content": [ + { + "type": "text", + "text": "First quote paragraph." + } + ] + }, + { + "type": "paragraph", + "content": [ + { + "type": "text", + "text": "Second quote paragraph." + } + ] + } + ] + }, + { + "type": "codeBlock", + "content": [ + { + "type": "text", + "text": "# fenced code: footnote-like text must stay literal\nx = \"[^1]: not a footnote\"\nprint(\"reference [^1] stays text\")" + } + ], + "attrs": { + "language": "python" + } + }, + { + "type": "codeBlock", + "content": [ + { + "type": "text", + "text": "plain code block without a language" + } + ] + }, + { + "type": "horizontal_rule" + }, + { + "type": "captionedImage", + "content": [ + { + "type": "image2", + "attrs": { + "src": "", + "fullscreen": false, + "imageSize": "normal", + "height": 819, + "width": 1456, + "resizeWidth": 728, + "bytes": null, + "alt": "A captioned image", + "title": null, + "type": null, + "href": null, + "belowTheFold": false, + "internalRedirect": null + } + } + ] + }, + { + "type": "captionedImage", + "content": [ + { + "type": "image2", + "attrs": { + "src": "", + "fullscreen": false, + "imageSize": "normal", + "height": 819, + "width": 1456, + "resizeWidth": 728, + "bytes": null, + "alt": "Linked image alt", + "title": null, + "type": null, + "href": "https://example.com/target", + "belowTheFold": false, + "internalRedirect": null + } + } + ] + }, + { + "type": "paragraph", + "content": [ + { + "type": "text", + "text": "inline code with [^1] inside stays literal", + "marks": [ + { + "type": "code" + } + ] + } + ] + }, + { + "type": "footnote", + "attrs": { + "number": 1 + }, + "content": [ + { + "type": "paragraph", + "content": [ + { + "type": "text", + "text": "The first footnote, with a " + }, + { + "type": "text", + "text": "link", + "marks": [ + { + "type": "link", + "attrs": { + "href": "https://example.com" + } + } + ] + }, + { + "type": "text", + "text": "." + } + ] + } + ] + }, + { + "type": "footnote", + "attrs": { + "number": 2 + }, + "content": [ + { + "type": "paragraph", + "content": [ + { + "type": "text", + "text": "A named footnote whose definition spans a continuation line in the same paragraph." + } + ] + }, + { + "type": "paragraph", + "content": [ + { + "type": "text", + "text": "And a second paragraph after a blank line." + } + ] + } + ] + } +] diff --git a/tests/substack/fixtures/full_features.md b/tests/substack/fixtures/full_features.md new file mode 100644 index 0000000..f86ba03 --- /dev/null +++ b/tests/substack/fixtures/full_features.md @@ -0,0 +1,53 @@ +# Heading level 1 + +## Heading level 2 + +### Heading level 3 + +#### Heading level 4 + +##### Heading level 5 + +###### Heading level 6 + +A paragraph with **bold**, *italic*, ***bold italic***, `inline code`, ~~strikethrough~~, and a [link](https://example.com). + +A sentence with a numeric footnote[^1] and a named footnote[^note], plus a repeat of the first.[^1] + +- Bullet one +- Bullet two with **bold** + +1. Ordered one +2. Ordered two + +> A blockquote across +> two wrapped lines. + +> First quote paragraph. +> +> Second quote paragraph. + +```python +# fenced code: footnote-like text must stay literal +x = "[^1]: not a footnote" +print("reference [^1] stays text") +``` + +``` +plain code block without a language +``` + +--- + +![A captioned image](https://example.com/image.png) + +[![Linked image alt](https://example.com/thumb.png)](https://example.com/target) + +`inline code with [^1] inside stays literal` + +[^1]: The first footnote, with a [link](https://example.com). +[^note]: A named footnote whose definition spans + a continuation line in the same paragraph. + + And a second paragraph after a blank line. +[^unused]: This definition is never referenced and should be dropped. diff --git a/tests/substack/test_end_to_end.py b/tests/substack/test_end_to_end.py new file mode 100644 index 0000000..8a18632 --- /dev/null +++ b/tests/substack/test_end_to_end.py @@ -0,0 +1,128 @@ +"""End-to-end Markdown -> Substack draft round-trip test. + +Reads a feature-complete Markdown fixture, creates a real draft via the API, +retrieves it, and compares the (normalised) stored document against a saved +golden file (``full_features.expected.json``). + +Requires live credentials in the environment (a ``.env`` file is loaded): + + - ``COOKIES_STRING`` (or ``COOKIES_PATH``), **or** ``EMAIL`` + ``PASSWORD`` + - ``PUBLICATION_URL`` (optional but recommended) + +The test is skipped when no credentials are configured, so it is safe to run in +CI without secrets. + +To regenerate the golden file after an intentional change:: + + python -m tests.substack.test_end_to_end --generate +""" + +import copy +import json +import os +import sys +from pathlib import Path + +import pytest +from dotenv import load_dotenv + +from substack import Api +from substack.post import Post + +load_dotenv() + +FIXTURES = Path(__file__).parent / "fixtures" +MARKDOWN_FILE = FIXTURES / "full_features.md" +EXPECTED_FILE = FIXTURES / "full_features.expected.json" + +TITLE = "python-substack e2e feature test" +SUBTITLE = "Automated round-trip fixture" + + +def _has_credentials() -> bool: + """Whether auth is configured, checked without any network calls.""" + return bool( + os.getenv("COOKIES_STRING") + or os.getenv("COOKIES_PATH") + or (os.getenv("EMAIL") and os.getenv("PASSWORD")) + ) + + +def _api_from_env() -> Api: + cookies_string = os.getenv("COOKIES_STRING") + cookies_path = os.getenv("COOKIES_PATH") + publication_url = os.getenv("PUBLICATION_URL") + if cookies_string or cookies_path: + return Api( + cookies_string=cookies_string, + cookies_path=cookies_path, + publication_url=publication_url, + ) + return Api( + email=os.getenv("EMAIL"), + password=os.getenv("PASSWORD"), + publication_url=publication_url, + ) + + +def _normalize(content): + """Replace values that legitimately vary between runs (e.g. image URLs).""" + + def walk(node): + if isinstance(node, dict): + return { + key: ("" if key == "src" else walk(value)) + for key, value in node.items() + } + if isinstance(node, list): + return [walk(item) for item in node] + return node + + return walk(copy.deepcopy(content)) + + +def _roundtrip(api: Api): + """Post the fixture as a draft, read it back, and return normalised content.""" + markdown = MARKDOWN_FILE.read_text(encoding="utf-8") + post = Post(TITLE, SUBTITLE, user_id=api.get_user_id()) + post.from_markdown(markdown) + draft = api.post_draft(post.get_draft()) + draft_id = draft.get("id") + try: + stored = api.get_draft(draft_id) + body = stored.get("draft_body") + if isinstance(body, str): + body = json.loads(body) + return _normalize(body["content"]) + finally: + try: + api.delete_draft(draft_id) + except Exception: + pass + + +@pytest.mark.skipif(not _has_credentials(), reason="no Substack credentials configured") +def test_full_features_roundtrip(): + api = _api_from_env() + actual = _roundtrip(api) + expected = json.loads(EXPECTED_FILE.read_text(encoding="utf-8")) + assert actual == expected + + +def _generate(): + """Regenerate the golden file from a live round-trip.""" + if not _has_credentials(): + raise SystemExit("No credentials configured; cannot generate golden file.") + api = _api_from_env() + content = _roundtrip(api) + EXPECTED_FILE.write_text( + json.dumps(content, indent=2, ensure_ascii=False) + "\n", encoding="utf-8" + ) + print(f"Wrote {EXPECTED_FILE}") + + +if __name__ == "__main__": + if "--generate" in sys.argv: + _generate() + else: + print(__doc__) From 1e18ff3d3bdcea667737bf1ddfbfcdb27ec5cd31 Mon Sep 17 00:00:00 2001 From: Matt Fisher Date: Mon, 29 Jun 2026 12:19:23 +1000 Subject: [PATCH 6/9] Use a real image URL in the e2e fixture So the generated draft renders an actual image if opened. Stored image2 attrs (and the normalised golden) are unchanged, since Substack keeps the default dimensions for externally-sourced images in the draft API. --- tests/substack/fixtures/full_features.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/substack/fixtures/full_features.md b/tests/substack/fixtures/full_features.md index f86ba03..5aefe60 100644 --- a/tests/substack/fixtures/full_features.md +++ b/tests/substack/fixtures/full_features.md @@ -39,9 +39,9 @@ plain code block without a language --- -![A captioned image](https://example.com/image.png) +![A captioned image](https://upload.wikimedia.org/wikipedia/commons/4/47/PNG_transparency_demonstration_1.png) -[![Linked image alt](https://example.com/thumb.png)](https://example.com/target) +[![Linked image alt](https://upload.wikimedia.org/wikipedia/commons/4/47/PNG_transparency_demonstration_1.png)](https://example.com/target) `inline code with [^1] inside stays literal` From d2d85b47e318207086dfdb2fe6c355f723cf8aa9 Mon Sep 17 00:00:00 2001 From: Matt Fisher Date: Mon, 29 Jun 2026 12:25:36 +1000 Subject: [PATCH 7/9] Cover nested lists in the e2e fixture Add all four parent/child list nestings (bullet>bullet, bullet>number, number>bullet, number>number) to the fixture and regenerate the golden. Verified Substack stores the nested structure intact on round-trip. --- .../fixtures/full_features.expected.json | 198 ++++++++++++++++++ tests/substack/fixtures/full_features.md | 14 ++ 2 files changed, 212 insertions(+) diff --git a/tests/substack/fixtures/full_features.expected.json b/tests/substack/fixtures/full_features.expected.json index 1a964f0..d03bbf2 100644 --- a/tests/substack/fixtures/full_features.expected.json +++ b/tests/substack/fixtures/full_features.expected.json @@ -274,6 +274,204 @@ } ] }, + { + "type": "bullet_list", + "content": [ + { + "type": "list_item", + "content": [ + { + "type": "paragraph", + "content": [ + { + "type": "text", + "text": "Bulleted list with a nested bulleted list:" + } + ] + }, + { + "type": "bullet_list", + "content": [ + { + "type": "list_item", + "content": [ + { + "type": "paragraph", + "content": [ + { + "type": "text", + "text": "inner bullet a" + } + ] + } + ] + }, + { + "type": "list_item", + "content": [ + { + "type": "paragraph", + "content": [ + { + "type": "text", + "text": "inner bullet b" + } + ] + } + ] + } + ] + } + ] + }, + { + "type": "list_item", + "content": [ + { + "type": "paragraph", + "content": [ + { + "type": "text", + "text": "Bulleted list with a nested numbered list:" + } + ] + }, + { + "type": "ordered_list", + "content": [ + { + "type": "list_item", + "content": [ + { + "type": "paragraph", + "content": [ + { + "type": "text", + "text": "inner number a" + } + ] + } + ] + }, + { + "type": "list_item", + "content": [ + { + "type": "paragraph", + "content": [ + { + "type": "text", + "text": "inner number b" + } + ] + } + ] + } + ] + } + ] + } + ] + }, + { + "type": "ordered_list", + "content": [ + { + "type": "list_item", + "content": [ + { + "type": "paragraph", + "content": [ + { + "type": "text", + "text": "Numbered list with a nested bulleted list:" + } + ] + }, + { + "type": "bullet_list", + "content": [ + { + "type": "list_item", + "content": [ + { + "type": "paragraph", + "content": [ + { + "type": "text", + "text": "inner bullet a" + } + ] + } + ] + }, + { + "type": "list_item", + "content": [ + { + "type": "paragraph", + "content": [ + { + "type": "text", + "text": "inner bullet b" + } + ] + } + ] + } + ] + } + ] + }, + { + "type": "list_item", + "content": [ + { + "type": "paragraph", + "content": [ + { + "type": "text", + "text": "Numbered list with a nested numbered list:" + } + ] + }, + { + "type": "ordered_list", + "content": [ + { + "type": "list_item", + "content": [ + { + "type": "paragraph", + "content": [ + { + "type": "text", + "text": "inner number a" + } + ] + } + ] + }, + { + "type": "list_item", + "content": [ + { + "type": "paragraph", + "content": [ + { + "type": "text", + "text": "inner number b" + } + ] + } + ] + } + ] + } + ] + } + ] + }, { "type": "blockquote", "content": [ diff --git a/tests/substack/fixtures/full_features.md b/tests/substack/fixtures/full_features.md index 5aefe60..8b7939c 100644 --- a/tests/substack/fixtures/full_features.md +++ b/tests/substack/fixtures/full_features.md @@ -20,6 +20,20 @@ A sentence with a numeric footnote[^1] and a named footnote[^note], plus a repea 1. Ordered one 2. Ordered two +- Bulleted list with a nested bulleted list: + - inner bullet a + - inner bullet b +- Bulleted list with a nested numbered list: + 1. inner number a + 2. inner number b + +1. Numbered list with a nested bulleted list: + - inner bullet a + - inner bullet b +2. Numbered list with a nested numbered list: + 1. inner number a + 2. inner number b + > A blockquote across > two wrapped lines. From 8cfeda0f45e3cfca50209cfdd6e7b74191bdc569 Mon Sep 17 00:00:00 2001 From: Matt Fisher Date: Mon, 29 Jun 2026 12:42:41 +1000 Subject: [PATCH 8/9] Cover local image upload, autolinks, and formatted headings in e2e - Add a locally-sourced image to the fixture so the test exercises the real api.get_image() upload path; the test asserts the image was uploaded to a remote URL. The fixture references it via a token resolved to a CWD-relative path (so get_image's os.path.exists check and the leading-slash handling both behave), and from_markdown is now called with api=. - Add an autolink and a heading with inline formatting. - Normalise image dimension attrs (height/width/resizeWidth) alongside src, since Substack fills those in for uploaded images non-deterministically. --- .../fixtures/full_features.expected.json | 110 +++++++++++++++++- tests/substack/fixtures/full_features.md | 6 + tests/substack/fixtures/local_image.png | Bin 0 -> 75 bytes tests/substack/test_end_to_end.py | 64 ++++++++-- 4 files changed, 166 insertions(+), 14 deletions(-) create mode 100644 tests/substack/fixtures/local_image.png diff --git a/tests/substack/fixtures/full_features.expected.json b/tests/substack/fixtures/full_features.expected.json index d03bbf2..47a3277 100644 --- a/tests/substack/fixtures/full_features.expected.json +++ b/tests/substack/fixtures/full_features.expected.json @@ -542,9 +542,9 @@ "src": "", "fullscreen": false, "imageSize": "normal", - "height": 819, - "width": 1456, - "resizeWidth": 728, + "height": "", + "width": "", + "resizeWidth": "", "bytes": null, "alt": "A captioned image", "title": null, @@ -565,9 +565,9 @@ "src": "", "fullscreen": false, "imageSize": "normal", - "height": 819, - "width": 1456, - "resizeWidth": 728, + "height": "", + "width": "", + "resizeWidth": "", "bytes": null, "alt": "Linked image alt", "title": null, @@ -579,6 +579,29 @@ } ] }, + { + "type": "captionedImage", + "content": [ + { + "type": "image2", + "attrs": { + "src": "", + "fullscreen": false, + "imageSize": "normal", + "height": "", + "width": "", + "resizeWidth": "", + "bytes": null, + "alt": "A locally uploaded image", + "title": null, + "type": null, + "href": null, + "belowTheFold": false, + "internalRedirect": null + } + } + ] + }, { "type": "paragraph", "content": [ @@ -593,6 +616,81 @@ } ] }, + { + "type": "heading", + "content": [ + { + "type": "text", + "text": "Heading with " + }, + { + "type": "text", + "text": "bold", + "marks": [ + { + "type": "strong" + } + ] + }, + { + "type": "text", + "text": ", " + }, + { + "type": "text", + "text": "italic", + "marks": [ + { + "type": "em" + } + ] + }, + { + "type": "text", + "text": ", and a " + }, + { + "type": "text", + "text": "link", + "marks": [ + { + "type": "link", + "attrs": { + "href": "https://example.com" + } + } + ] + } + ], + "attrs": { + "level": 2 + } + }, + { + "type": "paragraph", + "content": [ + { + "type": "text", + "text": "An autolink to " + }, + { + "type": "text", + "text": "https://example.com", + "marks": [ + { + "type": "link", + "attrs": { + "href": "https://example.com" + } + } + ] + }, + { + "type": "text", + "text": " becomes a link." + } + ] + }, { "type": "footnote", "attrs": { diff --git a/tests/substack/fixtures/full_features.md b/tests/substack/fixtures/full_features.md index 8b7939c..d579202 100644 --- a/tests/substack/fixtures/full_features.md +++ b/tests/substack/fixtures/full_features.md @@ -57,8 +57,14 @@ plain code block without a language [![Linked image alt](https://upload.wikimedia.org/wikipedia/commons/4/47/PNG_transparency_demonstration_1.png)](https://example.com/target) +![A locally uploaded image]({{LOCAL_IMAGE}}) + `inline code with [^1] inside stays literal` +## Heading with **bold**, *italic*, and a [link](https://example.com) + +An autolink to becomes a link. + [^1]: The first footnote, with a [link](https://example.com). [^note]: A named footnote whose definition spans a continuation line in the same paragraph. diff --git a/tests/substack/fixtures/local_image.png b/tests/substack/fixtures/local_image.png new file mode 100644 index 0000000000000000000000000000000000000000..e1aabd8d83c63ec520c7b76ad8ac48ffee301a78 GIT binary patch literal 75 zcmeAS@N?(olHy`uVBq!ia0vp^EFjFm1|(O0oL2{=ggjjwLn`JZ|2Ti(gS@2i1U&`@ Y8wdG^4;st20A(3GUHx3vIVCg!0Bm>@?*IS* literal 0 HcmV?d00001 diff --git a/tests/substack/test_end_to_end.py b/tests/substack/test_end_to_end.py index 8a18632..109adc8 100644 --- a/tests/substack/test_end_to_end.py +++ b/tests/substack/test_end_to_end.py @@ -38,6 +38,28 @@ TITLE = "python-substack e2e feature test" SUBTITLE = "Automated round-trip fixture" +LOCAL_IMAGE_FILE = FIXTURES / "local_image.png" +LOCAL_IMAGE_TOKEN = "{{LOCAL_IMAGE}}" +LOCAL_IMAGE_ALT = "A locally uploaded image" + + +def _image_srcs_by_alt(content, alt): + """Collect every image2 src whose alt matches (for upload verification).""" + found = [] + + def walk(node): + if isinstance(node, dict): + if node.get("type") == "image2" and node.get("attrs", {}).get("alt") == alt: + found.append(node["attrs"].get("src")) + for value in node.values(): + walk(value) + elif isinstance(node, list): + for item in node: + walk(item) + + walk(content) + return found + def _has_credentials() -> bool: """Whether auth is configured, checked without any network calls.""" @@ -66,12 +88,24 @@ def _api_from_env() -> Api: def _normalize(content): - """Replace values that legitimately vary between runs (e.g. image URLs).""" + """Replace values that vary between runs/environments. + + Image ``src`` is a per-upload URL, and Substack fills in the dimension + attributes (``height``/``width``/``resizeWidth``) for uploaded images via its + own image pipeline (not deterministically), so all of these are blanked + before comparison. + """ + volatile = { + "src": "", + "height": "", + "width": "", + "resizeWidth": "", + } def walk(node): if isinstance(node, dict): return { - key: ("" if key == "src" else walk(value)) + key: (volatile[key] if key in volatile else walk(value)) for key, value in node.items() } if isinstance(node, list): @@ -82,10 +116,15 @@ def walk(node): def _roundtrip(api: Api): - """Post the fixture as a draft, read it back, and return normalised content.""" + """Post the fixture as a draft, read it back, and return its stored content.""" markdown = MARKDOWN_FILE.read_text(encoding="utf-8") + # The local-image path is environment-specific, so it lives as a token in the + # fixture. We resolve it to a path relative to the current directory: get_image() + # uploads any path that os.path.exists(), and a relative path avoids the + # leading-slash handling that from_markdown applies to web-root paths. + markdown = markdown.replace(LOCAL_IMAGE_TOKEN, os.path.relpath(LOCAL_IMAGE_FILE)) post = Post(TITLE, SUBTITLE, user_id=api.get_user_id()) - post.from_markdown(markdown) + post.from_markdown(markdown, api=api) draft = api.post_draft(post.get_draft()) draft_id = draft.get("id") try: @@ -93,7 +132,7 @@ def _roundtrip(api: Api): body = stored.get("draft_body") if isinstance(body, str): body = json.loads(body) - return _normalize(body["content"]) + return body["content"] finally: try: api.delete_draft(draft_id) @@ -104,9 +143,18 @@ def _roundtrip(api: Api): @pytest.mark.skipif(not _has_credentials(), reason="no Substack credentials configured") def test_full_features_roundtrip(): api = _api_from_env() - actual = _roundtrip(api) + content = _roundtrip(api) + + # The locally-sourced image must have been uploaded to a remote Substack URL + # rather than left as the local file path. + local_srcs = _image_srcs_by_alt(content, LOCAL_IMAGE_ALT) + assert local_srcs, "local image node not found in stored draft" + assert local_srcs[0].startswith("http"), ( + f"local image was not uploaded: {local_srcs[0]!r}" + ) + expected = json.loads(EXPECTED_FILE.read_text(encoding="utf-8")) - assert actual == expected + assert _normalize(content) == expected def _generate(): @@ -114,7 +162,7 @@ def _generate(): if not _has_credentials(): raise SystemExit("No credentials configured; cannot generate golden file.") api = _api_from_env() - content = _roundtrip(api) + content = _normalize(_roundtrip(api)) EXPECTED_FILE.write_text( json.dumps(content, indent=2, ensure_ascii=False) + "\n", encoding="utf-8" ) From 4ba0b54ede5dd8a9fbb653300c2e8a42f5ce6079 Mon Sep 17 00:00:00 2001 From: Matt Fisher Date: Mon, 29 Jun 2026 13:43:51 +1000 Subject: [PATCH 9/9] Reference the local image by its real filename in the e2e fixture Drop the {{LOCAL_IMAGE}} placeholder in favour of a plain relative filename (local_image.png), matching a real authoring workflow. The test renders from the fixtures directory so the relative path resolves, as get_image expects. --- tests/substack/fixtures/full_features.md | 2 +- tests/substack/test_end_to_end.py | 17 +++++++++-------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/tests/substack/fixtures/full_features.md b/tests/substack/fixtures/full_features.md index d579202..ce913db 100644 --- a/tests/substack/fixtures/full_features.md +++ b/tests/substack/fixtures/full_features.md @@ -57,7 +57,7 @@ plain code block without a language [![Linked image alt](https://upload.wikimedia.org/wikipedia/commons/4/47/PNG_transparency_demonstration_1.png)](https://example.com/target) -![A locally uploaded image]({{LOCAL_IMAGE}}) +![A locally uploaded image](local_image.png) `inline code with [^1] inside stays literal` diff --git a/tests/substack/test_end_to_end.py b/tests/substack/test_end_to_end.py index 109adc8..982c3ff 100644 --- a/tests/substack/test_end_to_end.py +++ b/tests/substack/test_end_to_end.py @@ -38,8 +38,6 @@ TITLE = "python-substack e2e feature test" SUBTITLE = "Automated round-trip fixture" -LOCAL_IMAGE_FILE = FIXTURES / "local_image.png" -LOCAL_IMAGE_TOKEN = "{{LOCAL_IMAGE}}" LOCAL_IMAGE_ALT = "A locally uploaded image" @@ -118,13 +116,16 @@ def walk(node): def _roundtrip(api: Api): """Post the fixture as a draft, read it back, and return its stored content.""" markdown = MARKDOWN_FILE.read_text(encoding="utf-8") - # The local-image path is environment-specific, so it lives as a token in the - # fixture. We resolve it to a path relative to the current directory: get_image() - # uploads any path that os.path.exists(), and a relative path avoids the - # leading-slash handling that from_markdown applies to web-root paths. - markdown = markdown.replace(LOCAL_IMAGE_TOKEN, os.path.relpath(LOCAL_IMAGE_FILE)) post = Post(TITLE, SUBTITLE, user_id=api.get_user_id()) - post.from_markdown(markdown, api=api) + # Markdown image paths resolve relative to the working directory (this is how + # get_image uploads local files), so render from the fixtures directory just as + # a user authoring alongside their images would. + previous_cwd = os.getcwd() + os.chdir(FIXTURES) + try: + post.from_markdown(markdown, api=api) + finally: + os.chdir(previous_cwd) draft = api.post_draft(post.get_draft()) draft_id = draft.get("id") try: