diff --git a/docs/adding-entities.md b/docs/adding-entities.md index d53a148b..b8f528d9 100644 --- a/docs/adding-entities.md +++ b/docs/adding-entities.md @@ -71,6 +71,25 @@ Present what you inferred and get confirmation + category in a single step. Incl Everything else (`MODEL_OPTIONS`, `DEFAULT_MODELS`, `EXPERIMENTAL_MODELS`, `DEPRECATED_MODELS`, `MODEL_PREFIX_MAPPING`, `getModelLabel()`) is derived automatically. +**`packages/app/src/lib/compare-slug.ts`** (easy to miss — the /compare and /compare-per-dollar pages do NOT derive from `MODEL_CONFIG`): + +- `COMPARE_MODEL_SLUGS` — add an entry with `{ slug, displayName, dbKeys, label }`. `displayName` must match the `Model` enum value; `dbKeys` lists the DB buckets to query. Place it per the ordering comment (Chinese-lab flagships first, newer family member leads). Without this entry the model is absent from /compare, /compare-per-dollar, the sitemap, and their OG images. +- `COMPARE_MODEL_ALIASES` — only if a family-level or older-version slug should 308 to the new entry. + +**`packages/app/src/lib/compare-ssr.ts`**: + +- `KNOWN_MODELS` — add the display name so `?g_model=` URL overrides validate on compare pages. + +**`packages/app/src/app/compare/page.tsx`** and **`packages/app/src/app/compare-per-dollar/page.tsx`**: + +- `DESCRIPTION` — these SEO meta strings hardcode a sample model list ("…, Qwen 3.5 397B-A17B, and more"). Add the new model if it should appear in the catalog blurb. + +**`packages/app/src/lib/model-architectures.ts`** (optional — powers the per-model architecture diagram on the inference tab): + +- `MODEL_ARCHITECTURES` — add a `[Model.X]` entry with verified config.json values. Omitted models simply render no diagram (`getModelArchitecture` returns `undefined`), so this is non-blocking but expected for parity with other models. + +`/about` needs no change — its model list derives from `DB_MODEL_TO_DISPLAY` and includes the new key automatically once `models.ts` is updated. + --- ## Featuring a Day-0 Model diff --git a/packages/app/cypress/e2e/model-architecture.cy.ts b/packages/app/cypress/e2e/model-architecture.cy.ts index f6e29592..e9834075 100644 --- a/packages/app/cypress/e2e/model-architecture.cy.ts +++ b/packages/app/cypress/e2e/model-architecture.cy.ts @@ -241,6 +241,43 @@ describe('Model Architecture Diagram', () => { }); }); + describe('Collapsible Transformer Blocks (MoE model - MiniMax M3)', () => { + before(() => { + // Clear any stale Radix scroll lock from prior Select interactions + cy.document().then((doc) => { + delete doc.body.dataset.scrollLocked; + doc.body.style.removeProperty('pointer-events'); + }); + cy.get('[role="combobox"]').filter(':visible').first().click(); + cy.get('[role="option"]').contains('MiniMax M3').click(); + + cy.get('[data-testid="model-architecture-toggle"]').should('be.visible'); + cy.get('body').then(($body) => { + if ($body.find('[data-testid="model-architecture-svg"]:visible').length === 0) { + cy.get('[data-testid="model-architecture-toggle"]').click(); + } + }); + cy.get('[data-testid="model-architecture-svg"]').should('be.visible'); + }); + + it('shows MoE and GQA badges for MiniMax M3', () => { + cy.get('[data-testid="model-architecture-toggle"]').should('contain.text', 'MoE'); + cy.get('[data-testid="model-architecture-toggle"]').should('contain.text', 'GQA'); + cy.get('[data-testid="model-architecture-toggle"]').should('contain.text', '428B'); + }); + + it('GQA attention is NOT expandable (sparse attention rendered as a static block)', () => { + cy.get('[data-testid="expand-transformer"]').click({ force: true }); + cy.get('[data-testid="expand-attention"]').should('not.exist'); + cy.get('[data-testid="expand-experts"]').should('exist'); + }); + + it('shows MiniMax M3 sparse-attention features', () => { + cy.contains('MiniMax Sparse Attention (MSA)').should('be.visible'); + cy.contains('GQA with QK Norm').should('be.visible'); + }); + }); + describe('Alternating Attention Blocks (MoE model - gpt-oss 120B)', () => { before(() => { // Clear any stale Radix scroll lock from prior Select interactions diff --git a/packages/app/src/app/compare-per-dollar/page.tsx b/packages/app/src/app/compare-per-dollar/page.tsx index e8e254c1..0d2701b6 100644 --- a/packages/app/src/app/compare-per-dollar/page.tsx +++ b/packages/app/src/app/compare-per-dollar/page.tsx @@ -12,7 +12,7 @@ import { bucketComparePairsByVendor, formatModelList } from '@/lib/compare-ssr'; export const dynamic = 'force-dynamic'; const DESCRIPTION = - 'GPU performance per dollar — head-to-head cost per million tokens across every model and hardware pair we benchmark. Performance normalized by owning-hyperscaler TCO for DeepSeek V4 Pro 1.6T, DeepSeek R1, Kimi K2.5/K2.6/K2.7-Code 1T, GLM 5/5.1, Qwen 3.5 397B-A17B, and more. Pick the cheapest SKU for your workload.'; + 'GPU performance per dollar — head-to-head cost per million tokens across every model and hardware pair we benchmark. Performance normalized by owning-hyperscaler TCO for DeepSeek V4 Pro 1.6T, DeepSeek R1, Kimi K2.5/K2.6/K2.7-Code 1T, MiniMax M3 428B, GLM 5/5.1, Qwen 3.5 397B-A17B, and more. Pick the cheapest SKU for your workload.'; export const metadata: Metadata = { title: 'GPU Performance per Dollar', diff --git a/packages/app/src/app/compare/page.tsx b/packages/app/src/app/compare/page.tsx index ba8b11ea..0762b974 100644 --- a/packages/app/src/app/compare/page.tsx +++ b/packages/app/src/app/compare/page.tsx @@ -13,7 +13,7 @@ import { bucketComparePairsByVendor, formatModelList } from '@/lib/compare-ssr'; export const dynamic = 'force-dynamic'; const DESCRIPTION = - 'Browse head-to-head GPU inference benchmark comparisons across every model and hardware pair we test. Latency, throughput, and cost for DeepSeek V4 Pro 1.6T, DeepSeek R1, Kimi K2.5/K2.6/K2.7-Code 1T, GLM 5/5.1, Qwen 3.5 397B-A17B, and more.'; + 'Browse head-to-head GPU inference benchmark comparisons across every model and hardware pair we test. Latency, throughput, and cost for DeepSeek V4 Pro 1.6T, DeepSeek R1, Kimi K2.5/K2.6/K2.7-Code 1T, MiniMax M3 428B, GLM 5/5.1, Qwen 3.5 397B-A17B, and more.'; export const metadata: Metadata = { title: 'GPU Comparisons', diff --git a/packages/app/src/lib/compare-slug.test.ts b/packages/app/src/lib/compare-slug.test.ts index ac37c81b..c984d826 100644 --- a/packages/app/src/lib/compare-slug.test.ts +++ b/packages/app/src/lib/compare-slug.test.ts @@ -43,6 +43,15 @@ describe('parseCompareSlug — new model-prefixed form', () => { expect(parsed?.b).toBe('gb200'); }); + it('parses the minimax-m3 slug as its own model, distinct from minimax-m27', () => { + const parsed = parseCompareSlug('minimax-m3-h100-vs-h200'); + expect(parsed?.model.slug).toBe('minimax-m3'); + expect(parsed?.model.dbKeys).toEqual(['minimaxm3']); + expect(parsed?.a).toBe('h100'); + expect(parsed?.b).toBe('h200'); + expect(parsed?.isAliasModel).toBe(false); + }); + it('preserves non-canonical GPU order so caller can redirect', () => { const parsed = parseCompareSlug('kimi-k26-h200-vs-h100'); expect(parsed?.a).toBe('h200'); @@ -259,6 +268,11 @@ describe('getCompareModelBySlug', () => { expect(getCompareModelBySlug('glm-5')).toBe(GLM_51); }); + it('keeps the bare minimax alias on the M2 series, with minimax-m3 canonical', () => { + expect(getCompareModelBySlug('minimax')?.slug).toBe('minimax-m27'); + expect(getCompareModelBySlug('minimax-m3')?.slug).toBe('minimax-m3'); + }); + it('returns null for unknown slugs', () => { expect(getCompareModelBySlug('nonexistent')).toBeNull(); }); diff --git a/packages/app/src/lib/compare-slug.ts b/packages/app/src/lib/compare-slug.ts index 6d49bb97..24624050 100644 --- a/packages/app/src/lib/compare-slug.ts +++ b/packages/app/src/lib/compare-slug.ts @@ -31,10 +31,12 @@ export interface CompareModelSlug { } // Order matches the master /compare and /compare-per-dollar index display: -// DeepSeek V4 Pro → R1 → Kimi → GLM → MiniMax → Qwen → gpt-oss → Llama 70B. -// Per product spec — flagship Chinese-developed models first, smaller open -// US-developed models at the bottom. Qwen sits between MiniMax and gpt-oss to -// keep the Chinese-lab cluster contiguous before the US transition. +// DeepSeek V4 Pro → R1 → Kimi → GLM → MiniMax M3 → MiniMax M2 → Qwen → +// gpt-oss → Llama 70B. Per product spec — flagship Chinese-developed models +// first, smaller open US-developed models at the bottom. Qwen sits between +// MiniMax and gpt-oss to keep the Chinese-lab cluster contiguous before the +// US transition. The two MiniMax entries stay adjacent with the newer M3 +// flagship leading the older M2 series. export const COMPARE_MODEL_SLUGS: CompareModelSlug[] = [ { slug: 'deepseek-v4', @@ -69,6 +71,15 @@ export const COMPARE_MODEL_SLUGS: CompareModelSlug[] = [ dbKeys: ['glm5.1', 'glm5'], label: 'GLM 5/5.1', }, + { + slug: 'minimax-m3', + displayName: 'MiniMax-M3', + // M3 is a new 428B architecture (MiniMax Sparse Attention), not a point + // release of the M2 series, so it gets its own slug and dbKey rather than + // joining the minimax-m27 group. + dbKeys: ['minimaxm3'], + label: 'MiniMax M3 428B', + }, { slug: 'minimax-m27', displayName: 'MiniMax-M2.5', diff --git a/packages/app/src/lib/compare-ssr.ts b/packages/app/src/lib/compare-ssr.ts index e6e87dd7..1a2ce660 100644 --- a/packages/app/src/lib/compare-ssr.ts +++ b/packages/app/src/lib/compare-ssr.ts @@ -68,6 +68,7 @@ export const KNOWN_MODELS = new Set([ 'Qwen-3.5-397B-A17B', 'Kimi-K2.5', 'MiniMax-M2.5', + 'MiniMax-M3', 'GLM-5', 'DeepSeek-V4-Pro', ]); diff --git a/packages/app/src/lib/model-architectures.test.ts b/packages/app/src/lib/model-architectures.test.ts index e55ba84e..13162f91 100644 --- a/packages/app/src/lib/model-architectures.test.ts +++ b/packages/app/src/lib/model-architectures.test.ts @@ -25,6 +25,7 @@ describe('MODEL_ARCHITECTURES', () => { Model.GptOss, Model.Kimi_K2_5, Model.MiniMax_M2_5, + Model.MiniMax_M3, ]; for (const model of models) { @@ -276,6 +277,29 @@ describe('getModelArchitecture', () => { expect(arch?.denseFFNDim).toBeUndefined(); }); + it('returns architecture for MiniMax M3 with MoE, sparse attention, and shared expert', () => { + const arch = getModelArchitecture(Model.MiniMax_M3); + expect(arch).toBeDefined(); + expect(arch?.totalParams).toBe(428); + expect(arch?.activeParams).toBe(23); + expect(arch?.architectureType).toBe('moe'); + expect(arch?.attentionType).toBe('GQA'); + expect(arch?.attentionExpandable).toBe(false); + expect(arch?.numLayers).toBe(60); + expect(arch?.hiddenSize).toBe(6144); + expect(arch?.numHeads).toBe(64); + expect(arch?.numKVHeads).toBe(4); + expect(arch?.headDim).toBe(128); + expect(arch?.ffnDim).toBe(3072); + expect(arch?.numExperts).toBe(129); + expect(arch?.activeExperts).toBe(4); + expect(arch?.hasSharedExpert).toBe(true); + expect(arch?.contextWindow).toBe(1048576); + expect(arch?.vocabSize).toBe(200064); + expect(arch?.developer).toBe('MiniMax'); + expect(arch?.sourceUrl).toBe('https://huggingface.co/MiniMaxAI/MiniMax-M3'); + }); + it('returns architecture for gpt-oss 120B with MoE, alternating attention, and sink tokens', () => { const arch = getModelArchitecture(Model.GptOss); expect(arch).toBeDefined(); diff --git a/packages/app/src/lib/model-architectures.ts b/packages/app/src/lib/model-architectures.ts index 83e02044..8dea0a65 100644 --- a/packages/app/src/lib/model-architectures.ts +++ b/packages/app/src/lib/model-architectures.ts @@ -113,6 +113,7 @@ export interface ModelArchitecture { * - https://huggingface.co/moonshotai/Kimi-K2.5/blob/main/config.json * - https://huggingface.co/openai/gpt-oss-120b/blob/main/config.json * - https://huggingface.co/MiniMaxAI/MiniMax-M2/blob/main/config.json + * - https://huggingface.co/MiniMaxAI/MiniMax-M3/blob/main/config.json */ export const MODEL_ARCHITECTURES: Partial> = { [Model.DeepSeek_R1]: { @@ -336,6 +337,39 @@ export const MODEL_ARCHITECTURES: Partial> = { developer: 'MiniMax', sourceUrl: 'https://huggingface.co/MiniMaxAI/MiniMax-M2', }, + [Model.MiniMax_M3]: { + model: Model.MiniMax_M3, + totalParams: 428, + activeParams: 23, + architectureType: 'moe', + // MiniMax Sparse Attention (MSA) is built on a GQA projection layout + // (64 Q / 4 KV heads) with sparse KV selection layered on top. Render it as + // a static attention block, not the standard GQA Q/K/V drill-down — same + // treatment as the M2.5 entry. + attentionType: 'GQA', + attentionExpandable: false, + numLayers: 60, + hiddenSize: 6144, + numHeads: 64, + numKVHeads: 4, + headDim: 128, + vocabSize: 200064, + ffnDim: 3072, // moe_intermediate_size (per-expert FFN) + numExperts: 129, // 128 routed + 1 shared + activeExperts: 4, + hasSharedExpert: true, + contextWindow: 1048576, // 1M + features: [ + 'MiniMax Sparse Attention (MSA)', + 'GQA with QK Norm', + 'Partial RoPE (rotary factor 0.5)', + 'SwiGLU FFN', + 'Native Multimodality (text/image/video)', + 'MoE (128 routed + 1 shared experts, 4 active)', + ], + developer: 'MiniMax', + sourceUrl: 'https://huggingface.co/MiniMaxAI/MiniMax-M3', + }, }; /** diff --git a/packages/db/src/etl/normalizers.ts b/packages/db/src/etl/normalizers.ts index afa51b92..1d6a95c1 100644 --- a/packages/db/src/etl/normalizers.ts +++ b/packages/db/src/etl/normalizers.ts @@ -92,6 +92,8 @@ export const MODEL_TO_KEY: Record = { 'moonshotai/Kimi-K2.7-Code': 'kimik2.7-code', // MiniMax-M2.5 'MiniMaxAI/MiniMax-M2.5': 'minimaxm2.5', + // MiniMax-M3 (428B, distinct architecture from the M2 series) + 'MiniMaxAI/MiniMax-M3': 'minimaxm3', // GLM-5 'zai-org/GLM-5-FP8': 'glm5', 'amd/GLM-5.1-MXFP4': 'glm5.1',