Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions docs/adding-entities.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,25 @@ Present what you inferred and get confirmation + category in a single step. Incl

Everything else (`MODEL_OPTIONS`, `DEFAULT_MODELS`, `EXPERIMENTAL_MODELS`, `DEPRECATED_MODELS`, `MODEL_PREFIX_MAPPING`, `getModelLabel()`) is derived automatically.

**`packages/app/src/lib/compare-slug.ts`** (easy to miss — the /compare and /compare-per-dollar pages do NOT derive from `MODEL_CONFIG`):

- `COMPARE_MODEL_SLUGS` — add an entry with `{ slug, displayName, dbKeys, label }`. `displayName` must match the `Model` enum value; `dbKeys` lists the DB buckets to query. Place it per the ordering comment (Chinese-lab flagships first, newer family member leads). Without this entry the model is absent from /compare, /compare-per-dollar, the sitemap, and their OG images.
- `COMPARE_MODEL_ALIASES` — only if a family-level or older-version slug should 308 to the new entry.

**`packages/app/src/lib/compare-ssr.ts`**:

- `KNOWN_MODELS` — add the display name so `?g_model=` URL overrides validate on compare pages.

**`packages/app/src/app/compare/page.tsx`** and **`packages/app/src/app/compare-per-dollar/page.tsx`**:

- `DESCRIPTION` — these SEO meta strings hardcode a sample model list ("…, Qwen 3.5 397B-A17B, and more"). Add the new model if it should appear in the catalog blurb.

**`packages/app/src/lib/model-architectures.ts`** (optional — powers the per-model architecture diagram on the inference tab):

- `MODEL_ARCHITECTURES` — add a `[Model.X]` entry with verified config.json values. Omitted models simply render no diagram (`getModelArchitecture` returns `undefined`), so this is non-blocking but expected for parity with other models.

`/about` needs no change — its model list derives from `DB_MODEL_TO_DISPLAY` and includes the new key automatically once `models.ts` is updated.

---

## Featuring a Day-0 Model
Expand Down
37 changes: 37 additions & 0 deletions packages/app/cypress/e2e/model-architecture.cy.ts
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,43 @@ describe('Model Architecture Diagram', () => {
});
});

describe('Collapsible Transformer Blocks (MoE model - MiniMax M3)', () => {
before(() => {
// Clear any stale Radix scroll lock from prior Select interactions
cy.document().then((doc) => {
delete doc.body.dataset.scrollLocked;
doc.body.style.removeProperty('pointer-events');
});
cy.get('[role="combobox"]').filter(':visible').first().click();
cy.get('[role="option"]').contains('MiniMax M3').click();

cy.get('[data-testid="model-architecture-toggle"]').should('be.visible');
cy.get('body').then(($body) => {
if ($body.find('[data-testid="model-architecture-svg"]:visible').length === 0) {
cy.get('[data-testid="model-architecture-toggle"]').click();
}
});
cy.get('[data-testid="model-architecture-svg"]').should('be.visible');
});

it('shows MoE and GQA badges for MiniMax M3', () => {
cy.get('[data-testid="model-architecture-toggle"]').should('contain.text', 'MoE');
cy.get('[data-testid="model-architecture-toggle"]').should('contain.text', 'GQA');
cy.get('[data-testid="model-architecture-toggle"]').should('contain.text', '428B');
});

it('GQA attention is NOT expandable (sparse attention rendered as a static block)', () => {
cy.get('[data-testid="expand-transformer"]').click({ force: true });
cy.get('[data-testid="expand-attention"]').should('not.exist');
cy.get('[data-testid="expand-experts"]').should('exist');
});

it('shows MiniMax M3 sparse-attention features', () => {
cy.contains('MiniMax Sparse Attention (MSA)').should('be.visible');
cy.contains('GQA with QK Norm').should('be.visible');
});
});

describe('Alternating Attention Blocks (MoE model - gpt-oss 120B)', () => {
before(() => {
// Clear any stale Radix scroll lock from prior Select interactions
Expand Down
2 changes: 1 addition & 1 deletion packages/app/src/app/compare-per-dollar/page.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import { bucketComparePairsByVendor, formatModelList } from '@/lib/compare-ssr';
export const dynamic = 'force-dynamic';

const DESCRIPTION =
'GPU performance per dollar — head-to-head cost per million tokens across every model and hardware pair we benchmark. Performance normalized by owning-hyperscaler TCO for DeepSeek V4 Pro 1.6T, DeepSeek R1, Kimi K2.5/K2.6/K2.7-Code 1T, GLM 5/5.1, Qwen 3.5 397B-A17B, and more. Pick the cheapest SKU for your workload.';
'GPU performance per dollar — head-to-head cost per million tokens across every model and hardware pair we benchmark. Performance normalized by owning-hyperscaler TCO for DeepSeek V4 Pro 1.6T, DeepSeek R1, Kimi K2.5/K2.6/K2.7-Code 1T, MiniMax M3 428B, GLM 5/5.1, Qwen 3.5 397B-A17B, and more. Pick the cheapest SKU for your workload.';

export const metadata: Metadata = {
title: 'GPU Performance per Dollar',
Expand Down
2 changes: 1 addition & 1 deletion packages/app/src/app/compare/page.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ import { bucketComparePairsByVendor, formatModelList } from '@/lib/compare-ssr';
export const dynamic = 'force-dynamic';

const DESCRIPTION =
'Browse head-to-head GPU inference benchmark comparisons across every model and hardware pair we test. Latency, throughput, and cost for DeepSeek V4 Pro 1.6T, DeepSeek R1, Kimi K2.5/K2.6/K2.7-Code 1T, GLM 5/5.1, Qwen 3.5 397B-A17B, and more.';
'Browse head-to-head GPU inference benchmark comparisons across every model and hardware pair we test. Latency, throughput, and cost for DeepSeek V4 Pro 1.6T, DeepSeek R1, Kimi K2.5/K2.6/K2.7-Code 1T, MiniMax M3 428B, GLM 5/5.1, Qwen 3.5 397B-A17B, and more.';

export const metadata: Metadata = {
title: 'GPU Comparisons',
Expand Down
14 changes: 14 additions & 0 deletions packages/app/src/lib/compare-slug.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,15 @@ describe('parseCompareSlug — new model-prefixed form', () => {
expect(parsed?.b).toBe('gb200');
});

it('parses the minimax-m3 slug as its own model, distinct from minimax-m27', () => {
const parsed = parseCompareSlug('minimax-m3-h100-vs-h200');
expect(parsed?.model.slug).toBe('minimax-m3');
expect(parsed?.model.dbKeys).toEqual(['minimaxm3']);
expect(parsed?.a).toBe('h100');
expect(parsed?.b).toBe('h200');
expect(parsed?.isAliasModel).toBe(false);
});

it('preserves non-canonical GPU order so caller can redirect', () => {
const parsed = parseCompareSlug('kimi-k26-h200-vs-h100');
expect(parsed?.a).toBe('h200');
Expand Down Expand Up @@ -259,6 +268,11 @@ describe('getCompareModelBySlug', () => {
expect(getCompareModelBySlug('glm-5')).toBe(GLM_51);
});

it('keeps the bare minimax alias on the M2 series, with minimax-m3 canonical', () => {
expect(getCompareModelBySlug('minimax')?.slug).toBe('minimax-m27');
expect(getCompareModelBySlug('minimax-m3')?.slug).toBe('minimax-m3');
});

it('returns null for unknown slugs', () => {
expect(getCompareModelBySlug('nonexistent')).toBeNull();
});
Expand Down
19 changes: 15 additions & 4 deletions packages/app/src/lib/compare-slug.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,12 @@ export interface CompareModelSlug {
}

// Order matches the master /compare and /compare-per-dollar index display:
// DeepSeek V4 Pro → R1 → Kimi → GLM → MiniMax → Qwen → gpt-oss → Llama 70B.
// Per product spec — flagship Chinese-developed models first, smaller open
// US-developed models at the bottom. Qwen sits between MiniMax and gpt-oss to
// keep the Chinese-lab cluster contiguous before the US transition.
// DeepSeek V4 Pro → R1 → Kimi → GLM → MiniMax M3 → MiniMax M2 → Qwen →
// gpt-oss → Llama 70B. Per product spec — flagship Chinese-developed models
// first, smaller open US-developed models at the bottom. Qwen sits between
// MiniMax and gpt-oss to keep the Chinese-lab cluster contiguous before the
// US transition. The two MiniMax entries stay adjacent with the newer M3
// flagship leading the older M2 series.
export const COMPARE_MODEL_SLUGS: CompareModelSlug[] = [
{
slug: 'deepseek-v4',
Expand Down Expand Up @@ -69,6 +71,15 @@ export const COMPARE_MODEL_SLUGS: CompareModelSlug[] = [
dbKeys: ['glm5.1', 'glm5'],
label: 'GLM 5/5.1',
},
{
slug: 'minimax-m3',
displayName: 'MiniMax-M3',
// M3 is a new 428B architecture (MiniMax Sparse Attention), not a point
// release of the M2 series, so it gets its own slug and dbKey rather than
// joining the minimax-m27 group.
dbKeys: ['minimaxm3'],
label: 'MiniMax M3 428B',
},
{
slug: 'minimax-m27',
displayName: 'MiniMax-M2.5',
Expand Down
1 change: 1 addition & 0 deletions packages/app/src/lib/compare-ssr.ts
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ export const KNOWN_MODELS = new Set([
'Qwen-3.5-397B-A17B',
'Kimi-K2.5',
'MiniMax-M2.5',
'MiniMax-M3',
'GLM-5',
'DeepSeek-V4-Pro',
]);
Expand Down
24 changes: 24 additions & 0 deletions packages/app/src/lib/model-architectures.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ describe('MODEL_ARCHITECTURES', () => {
Model.GptOss,
Model.Kimi_K2_5,
Model.MiniMax_M2_5,
Model.MiniMax_M3,
];

for (const model of models) {
Expand Down Expand Up @@ -276,6 +277,29 @@ describe('getModelArchitecture', () => {
expect(arch?.denseFFNDim).toBeUndefined();
});

it('returns architecture for MiniMax M3 with MoE, sparse attention, and shared expert', () => {
const arch = getModelArchitecture(Model.MiniMax_M3);
expect(arch).toBeDefined();
expect(arch?.totalParams).toBe(428);
expect(arch?.activeParams).toBe(23);
expect(arch?.architectureType).toBe('moe');
expect(arch?.attentionType).toBe('GQA');
expect(arch?.attentionExpandable).toBe(false);
expect(arch?.numLayers).toBe(60);
expect(arch?.hiddenSize).toBe(6144);
expect(arch?.numHeads).toBe(64);
expect(arch?.numKVHeads).toBe(4);
expect(arch?.headDim).toBe(128);
expect(arch?.ffnDim).toBe(3072);
expect(arch?.numExperts).toBe(129);
expect(arch?.activeExperts).toBe(4);
expect(arch?.hasSharedExpert).toBe(true);
expect(arch?.contextWindow).toBe(1048576);
expect(arch?.vocabSize).toBe(200064);
expect(arch?.developer).toBe('MiniMax');
expect(arch?.sourceUrl).toBe('https://huggingface.co/MiniMaxAI/MiniMax-M3');
});

it('returns architecture for gpt-oss 120B with MoE, alternating attention, and sink tokens', () => {
const arch = getModelArchitecture(Model.GptOss);
expect(arch).toBeDefined();
Expand Down
34 changes: 34 additions & 0 deletions packages/app/src/lib/model-architectures.ts
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ export interface ModelArchitecture {
* - https://huggingface.co/moonshotai/Kimi-K2.5/blob/main/config.json
* - https://huggingface.co/openai/gpt-oss-120b/blob/main/config.json
* - https://huggingface.co/MiniMaxAI/MiniMax-M2/blob/main/config.json
* - https://huggingface.co/MiniMaxAI/MiniMax-M3/blob/main/config.json
*/
export const MODEL_ARCHITECTURES: Partial<Record<Model, ModelArchitecture>> = {
[Model.DeepSeek_R1]: {
Expand Down Expand Up @@ -336,6 +337,39 @@ export const MODEL_ARCHITECTURES: Partial<Record<Model, ModelArchitecture>> = {
developer: 'MiniMax',
sourceUrl: 'https://huggingface.co/MiniMaxAI/MiniMax-M2',
},
[Model.MiniMax_M3]: {
model: Model.MiniMax_M3,
totalParams: 428,
activeParams: 23,
architectureType: 'moe',
// MiniMax Sparse Attention (MSA) is built on a GQA projection layout
// (64 Q / 4 KV heads) with sparse KV selection layered on top. Render it as
// a static attention block, not the standard GQA Q/K/V drill-down — same
// treatment as the M2.5 entry.
attentionType: 'GQA',
attentionExpandable: false,
numLayers: 60,
hiddenSize: 6144,
numHeads: 64,
numKVHeads: 4,
headDim: 128,
vocabSize: 200064,
ffnDim: 3072, // moe_intermediate_size (per-expert FFN)
numExperts: 129, // 128 routed + 1 shared
activeExperts: 4,
hasSharedExpert: true,
contextWindow: 1048576, // 1M
features: [
'MiniMax Sparse Attention (MSA)',
'GQA with QK Norm',
'Partial RoPE (rotary factor 0.5)',
'SwiGLU FFN',
'Native Multimodality (text/image/video)',
'MoE (128 routed + 1 shared experts, 4 active)',
],
developer: 'MiniMax',
sourceUrl: 'https://huggingface.co/MiniMaxAI/MiniMax-M3',
},
};

/**
Expand Down
2 changes: 2 additions & 0 deletions packages/db/src/etl/normalizers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,8 @@ export const MODEL_TO_KEY: Record<string, string> = {
'moonshotai/Kimi-K2.7-Code': 'kimik2.7-code',
// MiniMax-M2.5
'MiniMaxAI/MiniMax-M2.5': 'minimaxm2.5',
// MiniMax-M3 (428B, distinct architecture from the M2 series)
'MiniMaxAI/MiniMax-M3': 'minimaxm3',
// GLM-5
'zai-org/GLM-5-FP8': 'glm5',
'amd/GLM-5.1-MXFP4': 'glm5.1',
Expand Down
Loading