Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 25 additions & 51 deletions api/routers/seo.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
"""SEO endpoints (sitemap, bot-optimized pages)."""

import html
import re
from datetime import datetime

from fastapi import APIRouter, Depends, HTTPException, Request
from fastapi.responses import HTMLResponse, Response
from fastapi.responses import HTMLResponse, RedirectResponse, Response
from sqlalchemy.ext.asyncio import AsyncSession

from api.cache import cache_key, get_cache, get_or_set_cache, set_cache
Expand All @@ -16,6 +17,11 @@

router = APIRouter(tags=["seo"])

# Canonical spec-id shape — lowercase alphanumerics with hyphen separators.
# Same pattern enforced in automation/scripts/sync_to_postgres.py. Used here to
# constrain user-controlled path segments before they land in Location headers.
_SPEC_ID_RE = re.compile(r"^[a-z0-9]+(-[a-z0-9]+)*$")


def _lastmod(dt: datetime | None) -> str:
"""Format datetime as <lastmod> XML element, or empty string if None."""
Expand All @@ -25,10 +31,13 @@
def _build_sitemap_xml(specs: list) -> str:
"""Build sitemap XML string from specs.

Emits three URL tiers per spec:
- /{spec_id} Cross-language hub
- /{spec_id}/{language} Language overview
Emits two URL tiers per spec:
- /{spec_id} Cross-language hub (canonical overview)
- /{spec_id}/{language}/{library} Implementation detail

The /{spec_id}/{language} tier is intentionally omitted: language filtering
is served as /{spec_id}?language={language} (filtered hub, same canonical),
so listing it would create duplicate-content entries for Google.
"""
xml_lines = [
'<?xml version="1.0" encoding="UTF-8"?>',
Expand All @@ -48,21 +57,7 @@
if not spec.impls:
continue
spec_id = html.escape(spec.id)
# Cross-language hub
xml_lines.append(f" <url><loc>https://anyplot.ai/{spec_id}</loc>{_lastmod(spec.updated)}</url>")
# Language overviews + implementation details, grouped per language
languages = sorted({impl.library.language for impl in spec.impls if impl.library})
for language in languages:
language_esc = html.escape(language)
language_updates = [
impl.updated
for impl in spec.impls
if impl.library and impl.library.language == language and impl.updated is not None
]
language_lastmod = max(language_updates) if language_updates else spec.updated
xml_lines.append(
f" <url><loc>https://anyplot.ai/{spec_id}/{language_esc}</loc>{_lastmod(language_lastmod)}</url>"
)
for impl in spec.impls:
if not impl.library:
continue
Expand Down Expand Up @@ -318,40 +313,19 @@


@router.get("/seo-proxy/{spec_id}/{language}")
async def seo_spec_language(spec_id: str, language: str, db: AsyncSession | None = Depends(optional_db)):
"""Bot-optimized language-specific spec overview."""
if db is None:
return HTMLResponse(
BOT_HTML_TEMPLATE.format(
title=f"{html.escape(spec_id)} - {html.escape(language)} | anyplot.ai",
description=DEFAULT_DESCRIPTION,
image=DEFAULT_HOME_IMAGE,
url=f"https://anyplot.ai/{html.escape(spec_id)}/{html.escape(language)}",
)
)

key = cache_key("seo", spec_id, language)
cached = get_cache(key)
if cached:
return HTMLResponse(cached)

repo = SpecRepository(db)
spec = await repo.get_by_id(spec_id)
if not spec:
async def seo_spec_language(spec_id: str, language: str):
"""Permanent redirect: language-overview URLs now live on the hub with ?language=.

The /{spec_id}/{language} tier was consolidated into /{spec_id} to eliminate
duplicate content. Bots following this endpoint get a 301 to the hub proxy;
humans get the SPA redirect configured in app/src/router.tsx. The `language`
query parameter is dropped because the hub's canonical tag does not include
it — Google should consolidate the page, not a filtered variant.
"""
del language # referenced for route matching only; deliberately not forwarded
if not _SPEC_ID_RE.fullmatch(spec_id):
raise HTTPException(status_code=404, detail="Spec not found")

lang_impls = [i for i in spec.impls if i.library and i.library.language == language]
has_previews = any(i.preview_url for i in lang_impls)
image = f"https://api.anyplot.ai/og/{spec_id}.png" if has_previews else DEFAULT_HOME_IMAGE

result = BOT_HTML_TEMPLATE.format(
title=f"{html.escape(spec.title)} - {html.escape(language)} | anyplot.ai",
description=html.escape(spec.description or DEFAULT_DESCRIPTION),
image=html.escape(image, quote=True),
url=f"https://anyplot.ai/{html.escape(spec_id)}/{html.escape(language)}",
)
set_cache(key, result)
return HTMLResponse(result)
return RedirectResponse(url=f"/seo-proxy/{spec_id}", status_code=301)

Check warning

Code scanning / CodeQL

URL redirection from remote source Medium

Untrusted URL redirection depends on a
user-provided value
.
Comment on lines 315 to +328
Copy link

Copilot AI Apr 21, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This endpoint’s behavior changed from serving HTML to returning a permanent 301. There’s currently no unit test asserting the redirect status code and Location header (and that the language segment is intentionally dropped). Adding a test for GET /seo-proxy/{spec}/{language} would prevent regressions and ensure crawlers get the expected consolidation behavior.

Copilot uses AI. Check for mistakes.


@router.get("/seo-proxy/{spec_id}/{language}/{library}")
Expand Down
35 changes: 18 additions & 17 deletions app/nginx.conf
Original file line number Diff line number Diff line change
Expand Up @@ -124,20 +124,20 @@ server {

# python.anyplot.ai — language-filtered marketing subdomain.
#
# Strategy: serve identical content to anyplot.ai with a /python language
# segment injected into spec routes. The canonical tag in the rendered HTML
# always points back to anyplot.ai, so Google consolidates link equity on the
# main domain while the subdomain stays available for marketing campaigns.
# Strategy: serve identical content to anyplot.ai. The canonical tag always
# points back to the main-domain hub (anyplot.ai/{spec_id}) so Google
# consolidates link equity on a single URL while the subdomain stays available
# for marketing campaigns.
#
# Bot path (SEO crawlers): nginx rewrites the requested URI by injecting
# `/python` and proxies to /seo-proxy/{spec}/python[/{library}] on the API.
# The SEO HTML emits canonical=https://anyplot.ai/{spec}/python[/{library}].
# Bot path (SEO crawlers): nginx proxies the hub to /seo-proxy/{spec} (no
# language segment — the language-filtered hub is /{spec}?language=python on
# the main domain, with the same canonical as the unfiltered hub). Library
# detail URLs keep the /python segment because those are content-unique.
#
# Human path: serves index.html for any spec route. The SPA must detect
# `window.location.hostname === 'python.anyplot.ai'` and inject `python` as
# the language when resolving routes / building canonical tags. Without that
# detection the SPA would treat `library` as `language` and 404. Subdomain
# rollout is therefore gated on a follow-up SPA change.
# Human path: serves index.html for any spec route. The SPA may detect
# `window.location.hostname === 'python.anyplot.ai'` and append
# `?language=python` on spec routes so the grid renders filtered without
# changing the canonical.
server {
listen 8080;
server_name python.anyplot.ai;
Expand Down Expand Up @@ -165,9 +165,9 @@ server {
add_header Expires "0";
}

# Bot SEO proxy — rewrite to inject /python and proxy to backend.
# /scatter-basic -> /seo-proxy/scatter-basic/python
# /scatter-basic/matplotlib -> /seo-proxy/scatter-basic/python/matplotlib
# Bot SEO proxy — proxy to backend.
# /scatter-basic -> /seo-proxy/scatter-basic (hub, no language segment)
# /scatter-basic/matplotlib -> /seo-proxy/scatter-basic/python/matplotlib (detail, language in path)
location @seo_proxy_python {
set $seo_backend https://api.anyplot.ai;
proxy_pass $seo_backend$python_seo_uri;
Expand All @@ -186,14 +186,15 @@ server {
try_files $uri $uri/ /index.html;
}

# /:specId[/:library] -> internally /:specId/python[/:library]
# /:specId -> hub on main domain (no language segment; canonical is /{spec_id})
location ~ "^/(?<spec_id>[A-Za-z0-9][A-Za-z0-9-]*)/?$" {
set $python_seo_uri /seo-proxy/$spec_id/python;
set $python_seo_uri /seo-proxy/$spec_id;
error_page 418 = @seo_proxy_python;
if ($is_bot) { return 418; }
try_files /index.html =404;
}

# /:specId/:library -> detail on main domain (language stays in path)
location ~ "^/(?<spec_id>[A-Za-z0-9][A-Za-z0-9-]*)/(?<library>[A-Za-z0-9][A-Za-z0-9-]*)/?$" {
set $python_seo_uri /seo-proxy/$spec_id/python/$library;
error_page 418 = @seo_proxy_python;
Expand Down
6 changes: 5 additions & 1 deletion app/src/hooks/useAnalytics.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,10 @@ function buildPlausibleUrl(): string {
const pathPrefix =
parts.length > 0 && !RESERVED_TOP_LEVEL.has(parts[0]) ? `/${parts.join("/")}` : "";

// Definierte Reihenfolge der Filter-Kategorien (inkl. impl-level tags)
// Definierte Reihenfolge der Filter-Kategorien (inkl. impl-level tags).
// `language` is included so the hub's ?language= filter is tracked as a
// distinct pageview path (/{spec}/language/python), matching the path-segment
// convention used for all other filter params.
const orderedKeys = [
"lib",
"spec",
Expand All @@ -45,6 +48,7 @@ function buildPlausibleUrl(): string {
"pat",
"prep",
"style",
"language",
];

for (const key of orderedKeys) {
Expand Down
63 changes: 37 additions & 26 deletions app/src/pages/SpecPage.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ interface SpecDetail {
implementations: Implementation[];
}

type Mode = 'hub' | 'language' | 'detail';
type Mode = 'hub' | 'detail';

export function SpecPage() {
const { specId, language: urlLanguage, library: urlLibrary } = useParams();
Expand All @@ -53,8 +53,9 @@ export function SpecPage() {
const [highlightedTags, setHighlightedTags] = useState<string[]>([]);
const { fetchCode, getCode } = useCodeFetch();

const mode: Mode = urlLibrary ? 'detail' : urlLanguage ? 'language' : 'hub';
const mode: Mode = urlLibrary ? 'detail' : 'hub';
const selectedLibrary = urlLibrary || null;
const languageFilter = mode === 'hub' ? searchParams.get('language') : null;

const getLibraryMeta = useCallback(
(libraryId: string) => librariesData.find((lib) => lib.id === libraryId),
Expand All @@ -81,19 +82,14 @@ export function SpecPage() {
const data: SpecDetail = await res.json();
setSpecData(data);

// Validate language matches at least one impl
if (urlLanguage && !data.implementations.some((i) => i.language === urlLanguage)) {
navigate(specPath(specId!), { replace: true });
return;
}

// Validate library matches an impl in the requested language
// Detail mode: validate library matches an impl in the requested language.
// If no match, fall back to the hub with a language filter to preserve intent.
if (urlLibrary && urlLanguage) {
const matched = data.implementations.find(
(i) => i.library_id === urlLibrary && i.language === urlLanguage,
);
if (!matched) {
navigate(specPath(specId!, urlLanguage), { replace: true });
navigate({ pathname: specPath(specId!), search: `?language=${encodeURIComponent(urlLanguage)}` }, { replace: true });
return;
}
}
Expand All @@ -108,7 +104,7 @@ export function SpecPage() {
fetchSpec();
}, [specId, urlLanguage, urlLibrary, navigate]);

// Implementations for the selected language (used in language + detail modes)
// Implementations for the selected language (used in detail mode for library pills)
const langImpls = useMemo(() => {
if (!specData || !urlLanguage) return specData?.implementations || [];
return specData.implementations.filter((i) => i.language === urlLanguage);
Expand All @@ -120,6 +116,15 @@ export function SpecPage() {
return Array.from(new Set(specData.implementations.map((i) => i.language))).sort();
}, [specData]);

// If ?language= points at a language that has no implementations, drop it.
useEffect(() => {
if (mode !== 'hub' || !specData || !languageFilter) return;
if (availableLanguages.includes(languageFilter)) return;
const params = new URLSearchParams(searchParams);
params.delete('language');
setSearchParams(params, { replace: true });
}, [mode, specData, languageFilter, availableLanguages, searchParams, setSearchParams]);

// Get current implementation (only in detail mode)
const currentImpl = useMemo(() => {
if (!specData || !selectedLibrary) return null;
Expand Down Expand Up @@ -199,7 +204,7 @@ export function SpecPage() {
trackEvent('download_image', {
spec: specId,
library: impl.library_id,
page: mode === 'detail' ? 'spec_detail' : mode === 'language' ? 'spec_language' : 'spec_hub',
page: mode === 'detail' ? 'spec_detail' : 'spec_hub',
});
},
[specId, trackEvent, mode],
Expand All @@ -216,7 +221,7 @@ export function SpecPage() {
spec: specId,
library: impl.library_id,
method: 'image',
page: mode === 'detail' ? 'spec_detail' : mode === 'language' ? 'spec_language' : 'spec_hub',
page: mode === 'detail' ? 'spec_detail' : 'spec_hub',
});
setTimeout(() => setCodeCopied(null), 2000);
} catch (err) {
Expand All @@ -234,13 +239,19 @@ export function SpecPage() {
return `${GITHUB_URL}/issues/new?${params.toString()}`;
}, [specId]);

// Track page view
// Track page view. Hub mode calls trackPageview() without an override so
// buildPlausibleUrl() picks up ?language= from window.location and converts
// it to the path-segment form (e.g. /{spec}/language/python). The URL
// override path used for detail mode cannot carry query strings — see
// useAnalytics.ts sendPageview() validation regex.
useEffect(() => {
if (!specData || !specId) return;
if (mode === 'hub') trackPageview(`/${specId}`);
else if (mode === 'language') trackPageview(`/${specId}/${urlLanguage}`);
else if (mode === 'detail' && selectedLibrary) trackPageview(`/${specId}/${urlLanguage}/${selectedLibrary}`);
}, [specData, mode, specId, urlLanguage, selectedLibrary, trackPageview]);
if (mode === 'hub') {
trackPageview();
} else if (mode === 'detail' && selectedLibrary) {
Comment on lines +249 to +251
Copy link

Copilot AI Apr 21, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In hub mode this calls trackPageview with a ?language= query string. useAnalytics().trackPageview currently validates urlOverride with /^\/([\w\-/])*$/ (no ?, =), so this override will be rejected and the pageview won’t be tracked when languageFilter is set. Consider either (a) encoding the filter into a path-only override that matches the allowed charset, or (b) not overriding and extending the analytics URL builder to incorporate language safely.

Copilot uses AI. Check for mistakes.
trackPageview(`/${specId}/${urlLanguage}/${selectedLibrary}`);
}
}, [specData, mode, specId, urlLanguage, selectedLibrary, languageFilter, trackPageview]);

// Keyboard shortcuts: left/right arrows switch libraries in detail mode
useEffect(() => {
Expand Down Expand Up @@ -303,15 +314,15 @@ export function SpecPage() {
const canonical =
mode === 'detail'
? `https://anyplot.ai/${specId}/${urlLanguage}/${selectedLibrary}`
: mode === 'language'
? `https://anyplot.ai/${specId}/${urlLanguage}`
: `https://anyplot.ai/${specId}`;
: `https://anyplot.ai/${specId}`;

const titleSuffix =
mode === 'detail' ? ` - ${selectedLibrary}` : mode === 'language' ? ` - ${urlLanguage}` : '';
const titleSuffix = mode === 'detail' ? ` - ${selectedLibrary}` : '';

// Implementations to render in the grid: language mode → only that lang; hub → all
const gridImpls = mode === 'hub' ? specData.implementations : langImpls;
// Implementations to render in the grid: hub mode optionally filtered by ?language=
const gridImpls =
languageFilter
? specData.implementations.filter((i) => i.language === languageFilter)
: specData.implementations;

return (
<>
Expand Down Expand Up @@ -445,7 +456,7 @@ export function SpecPage() {
/>

<Box sx={{ textAlign: 'center', mt: -0.5, mb: 1 }}>
<Box component={Link} to={specPath(specId!, urlLanguage)} sx={{
<Box component={Link} to={{ pathname: specPath(specId!), search: urlLanguage ? `?language=${encodeURIComponent(urlLanguage)}` : '' }} sx={{
fontFamily: typography.fontFamily,
fontSize: fontSize.sm,
color: semanticColors.mutedText,
Expand Down
10 changes: 8 additions & 2 deletions app/src/router.tsx
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { createBrowserRouter, RouterProvider } from 'react-router-dom';
import { createBrowserRouter, Navigate, RouterProvider, useParams } from 'react-router-dom';
import { HelmetProvider } from 'react-helmet-async';
import Box from '@mui/material/Box';
import CircularProgress from '@mui/material/CircularProgress';
Expand All @@ -16,6 +16,12 @@ const LazyFallback = () => (

const lazySpec = () => import('./pages/SpecPage').then(m => ({ Component: m.SpecPage, HydrateFallback: LazyFallback }));

function SpecLanguageRedirect() {
const { specId, language } = useParams();
if (!specId || !language) return <NotFoundPage />;
return <Navigate to={{ pathname: `/${specId}`, search: `?language=${encodeURIComponent(language)}` }} replace />;
}

const router = createBrowserRouter([
{
element: <RootLayout />,
Expand All @@ -30,7 +36,7 @@ const router = createBrowserRouter([
{ path: 'mcp', lazy: () => import('./pages/McpPage').then(m => ({ Component: m.McpPage })) },
{ path: 'stats', lazy: () => import('./pages/StatsPage').then(m => ({ Component: m.StatsPage })) },
{ path: ':specId', lazy: lazySpec },
{ path: ':specId/:language', lazy: lazySpec },
{ path: ':specId/:language', element: <SpecLanguageRedirect /> },
{ path: ':specId/:language/:library', lazy: lazySpec },
{ path: '*', element: <NotFoundPage /> },
],
Expand Down
Loading
Loading