diff --git a/leaderboard/generate.py b/leaderboard/generate.py
index ead41d62..49d5838b 100644
--- a/leaderboard/generate.py
+++ b/leaderboard/generate.py
@@ -19,7 +19,7 @@
 _pricing_cache: dict = {}
 _pricing_path = Path("schema/cloud_pricing.json")
 if _pricing_path.exists():
-    with open(_pricing_path) as _f:
+    with open(_pricing_path, encoding='utf-8') as _f:
         _pricing_cache = json.load(_f)
 
 RESULTS_DIR = Path("results")
@@ -45,26 +45,14 @@ def _get_suite_precision_required(suite_id: str) -> str:
     """Read precision_required from suite.json. Returns 'BF16' if not found."""
     path = Path("suites") / suite_id / "suite.json"
     try:
-        with open(path) as f:
+        with open(path, encoding='utf-8') as f:
             return json.load(f).get("precision_required", "BF16")
     except Exception:
         return "BF16"
 
 
 def _collect_suite_specs() -> dict:
-    """Collect UI-relevant per-suite spec from suites/suite_*/suite.json.
-
-    Baked into the generated leaderboard.js as ``window.SUITE_SPECS`` so
-    the static leaderboard UI auto-syncs whenever a maintainer edits a
-    suite contract — model id, dataset, prompt distribution, scenarios
-    default/extra split, online SLA, etc.  Editorial UI content (titles,
-    taglines, descriptions) stays in assets/js/data.js since it isn't a
-    property of the suite contract.
-
-    Returns a ``{ suite_id: spec }`` mapping with only the fields the UI
-    consumes.  Missing fields are omitted (the JS-side merge keeps the
-    hardcoded fallback when a key is absent).
-    """
+    """Collect UI-relevant per-suite spec from suites/suite_*/suite.json."""
     out: dict = {}
     suites_dir = Path("suites")
     if not suites_dir.exists():
@@ -76,7 +64,7 @@ def _collect_suite_specs() -> dict:
         if not sf.exists():
             continue
         try:
-            with open(sf) as f:
+            with open(sf, encoding='utf-8') as f:
                 data = json.load(f)
         except Exception:
             continue
@@ -84,7 +72,6 @@ def _collect_suite_specs() -> dict:
         rd = data.get("request_distribution") or {}
         scn = data.get("scenarios") or {}
         spec: dict = {}
-        # Fields the UI displays in suite cards / specs / compare headers.
         for k in (
             "model_id",
             "model_revision",
@@ -110,6 +97,95 @@ def _collect_suite_specs() -> dict:
     return out
 
 
+# ── Scenario metric extraction ────────────────────────────────────────────────
+
+def _extract_scenario_metric(result: dict, scenario_name: str) -> dict:
+    """Extract the best-throughput info for a single scenario from a result.
+
+    Returns a dict with keys:
+        throughput, metric_label, concurrency, peak_memory_gb, is_valid
+    """
+    metrics = result.get("metrics") or {}
+    out = {
+        "throughput": None,
+        "metric_label": "",
+        "concurrency": None,
+        "peak_memory_gb": None,
+        "is_valid": False,
+    }
+
+    if scenario_name == "offline":
+        offline = metrics.get("offline")
+        if offline:
+            rows = offline.get("results_by_concurrency") or offline.get("results_by_batch_size") or []
+            valid = [r for r in rows if not r.get("oom") and r.get("throughput_tokens_per_sec")]
+            if valid:
+                best = max(valid, key=lambda r: r["throughput_tokens_per_sec"])
+                out["throughput"] = best["throughput_tokens_per_sec"]
+                out["metric_label"] = "tokens/sec"
+                out["concurrency"] = best.get("client_concurrency") or best.get("concurrency")
+                out["peak_memory_gb"] = best.get("peak_memory_gb")
+                out["is_valid"] = True
+
+    elif scenario_name == "online":
+        online = metrics.get("online")
+        if online:
+            qps = online.get("max_valid_qps")
+            if qps is not None:
+                out["throughput"] = qps
+                out["metric_label"] = "max valid QPS"
+                out["is_valid"] = True
+
+    elif scenario_name == "interactive":
+        # interactive uses the same inference path as offline — reuse offline metric
+        offline = metrics.get("offline")
+        if offline:
+            rows = offline.get("results_by_concurrency") or offline.get("results_by_batch_size") or []
+            valid = [r for r in rows if not r.get("oom") and r.get("throughput_tokens_per_sec")]
+            if valid:
+                best = max(valid, key=lambda r: r["throughput_tokens_per_sec"])
+                out["throughput"] = best["throughput_tokens_per_sec"]
+                out["metric_label"] = "tokens/sec"
+                out["concurrency"] = best.get("client_concurrency") or best.get("concurrency")
+                out["peak_memory_gb"] = best.get("peak_memory_gb")
+                out["is_valid"] = True
+
+    elif scenario_name == "sustained":
+        sustained = metrics.get("sustained")
+        if sustained:
+            thr = sustained.get("sustained_throughput_tokens_per_sec")
+            if thr is not None:
+                out["throughput"] = thr
+                out["metric_label"] = "tok/s (sustained mean)"
+                out["concurrency"] = sustained.get("sustained_concurrency")
+                out["is_valid"] = True
+
+    elif scenario_name == "speculative":
+        speculative = metrics.get("speculative")
+        if speculative:
+            rows = speculative.get("results_by_concurrency") or speculative.get("results_by_batch_size") or []
+            valid = [r for r in rows if not r.get("oom") and r.get("throughput_tokens_per_sec")]
+            if valid:
+                best = max(valid, key=lambda r: r["throughput_tokens_per_sec"])
+                out["throughput"] = best["throughput_tokens_per_sec"]
+                out["metric_label"] = "tok/s (speculative)"
+                out["concurrency"] = best.get("client_concurrency") or best.get("concurrency")
+                out["peak_memory_gb"] = best.get("peak_memory_gb")
+                out["is_valid"] = True
+
+    elif scenario_name == "burst":
+        burst = metrics.get("burst")
+        if burst:
+            ratio = burst.get("burst_degradation_ratio")
+            if ratio is not None:
+                # Invert: higher = better, same polarity as throughput
+                out["throughput"] = round(1.0 - ratio, 4) if ratio <= 1.0 else 0.0
+                out["metric_label"] = "1 − degradation_ratio"
+                out["is_valid"] = True
+
+    return out
+
+
 # ── Data loading ──────────────────────────────────────────────────────────────
 
 def load_results() -> list[dict]:
@@ -125,7 +201,7 @@ def load_results() -> list[dict]:
             if not result_path.exists():
                 continue
             try:
-                with open(result_path) as f:
+                with open(result_path, encoding='utf-8') as f:
                     data = json.load(f)
                 data["_tier"]            = tier
                 data["_submission_name"] = submission_dir.name
@@ -133,11 +209,10 @@ def load_results() -> list[dict]:
                     "scenarios_run"   in data.get("task", {}) or
                     "chip_counts_run" in data.get("task", {})
                 )
-                # Load env_info.json alongside result.json (optional, best-effort)
                 env_path = submission_dir / "env_info.json"
                 if env_path.exists():
                     try:
-                        with open(env_path) as ef:
+                        with open(env_path, encoding='utf-8') as ef:
                             data["_env_info"] = json.load(ef)
                     except Exception as ee:
                         print(f"Warning: could not load {env_path}: {ee}")
@@ -163,14 +238,12 @@ def extract_detail(result: dict) -> dict:
     parallelism = task.get("parallelism") or {}
     env         = result.get("_env_info") or {}
 
-    # CPU string
     cpu_info = env.get("cpu", {})
     cpu_str  = None
     if cpu_info.get("model"):
         cores   = cpu_info.get("physical_cores")
         cpu_str = cpu_info["model"] + (f", {cores} cores" if cores else "")
 
-    # NIC string
     nics    = env.get("network_interfaces", [])
     nic_str = None
     if nics:
@@ -180,7 +253,6 @@ def extract_detail(result: dict) -> dict:
         names_str = ", ".join(nic_names) if nic_names else ""
         nic_str   = f"{len(nics)}x {type_str}" + (f" ({names_str})" if names_str else "")
 
-    # Intra-node interconnect: prefer result.json, fall back to topology parse
     intra = chip.get("interconnect_intra_node")
     if not intra and env.get("accelerator_topology"):
         nv_matches = re.findall(r'NV(\d+)', env["accelerator_topology"])
@@ -188,7 +260,6 @@ def extract_detail(result: dict) -> dict:
             intra = f"NVLink {max(int(x) for x in nv_matches)} (full mesh)"
 
     return {
-        # Hardware
         "hw_chip":               chip.get("name"),
         "hw_vendor":             chip.get("vendor"),
         "hw_count":              chip.get("count"),
@@ -199,7 +270,6 @@ def extract_detail(result: dict) -> dict:
         "hw_system_memory_gb":   env.get("system_memory_gb"),
         "hw_pcie":               env.get("pcie_generation"),
         "hw_network":            nic_str,
-        # Software
         "sw_framework":         software.get("framework"),
         "sw_framework_version": software.get("framework_version"),
         "sw_driver":            software.get("driver_version"),
@@ -207,31 +277,27 @@ def extract_detail(result: dict) -> dict:
         "sw_os":                software.get("os"),
         "sw_python":            software.get("python_version"),
         "sw_pytorch":           env.get("pytorch_version"),
-        # Model
         "model_id":              model.get("model_id"),
         "model_revision":        model.get("model_revision"),
-        "model_name":            model.get("model_name"),        # ← new
-        "model_note":            model.get("model_note"),        # ← new
-        "model_source":          model.get("model_source"),      # ← new
+        "model_name":            model.get("model_name"),
+        "model_note":            model.get("model_note"),
+        "model_source":          model.get("model_source"),
         "model_arch":            model.get("architecture"),
         "model_params_b":        model.get("parameter_count_b"),
         "model_precision":       model.get("precision"),
         "model_effective_dtype": model.get("effective_dtype"),
         "model_quant_method":    model.get("quantization_method"),
         "model_format":          model.get("model_format"),
-        # Run settings
         "run_scenarios":   task.get("scenarios_run"),
         "run_chip_counts": task.get("chip_counts_run"),
         "run_num_runs":    task.get("num_runs"),
         "run_tp":          parallelism.get("tensor_parallel_size"),
         "run_pp":          parallelism.get("pipeline_parallel_size"),
         "run_dp":          parallelism.get("data_parallel_size"),
-        # Accuracy
         "acc_score":          accuracy.get("subset_score"),
         "acc_baseline_delta": accuracy.get("baseline_delta"),
         "acc_valid":          accuracy.get("valid"),
         "acc_notes":          accuracy.get("notes"),
-        # Metadata
         "meta_submitted_by":     meta.get("submitted_by"),
         "meta_submission_type":  meta.get("submission_type"),
         "meta_date":             meta.get("date"),
@@ -240,10 +306,6 @@ def extract_detail(result: dict) -> dict:
         "meta_model_load_sec":   meta.get("model_load_seconds"),
         "meta_start_time":       meta.get("benchmark_start_time"),
         "meta_notes":            meta.get("notes"),
-        # Vendor-specific environment fields collected by platforms/<vendor>.py
-        # (e.g. ROCm-SMI link health, NVML clock telemetry). The modal flattens
-        # this dict and shows only non-null entries — different vendors record
-        # different keys by design and no UI tries to unify them.
         "env_vendor_details":    env.get("vendor_details") or {},
     }
 
@@ -251,11 +313,6 @@ def extract_detail(result: dict) -> dict:
 # ── Implementation extraction (modal impl tab) ───────────────────────────────
 
 def extract_impl(result: dict) -> dict | None:
-    """
-    Load runner meta.json for the implementation_id referenced in result.json.
-    Returns None if implementation_id is absent or the runner folder is not found.
-    Fields returned match meta.json schema plus a GitHub link.
-    """
     impl_id = result.get("implementation_id")
     if not impl_id:
         return None
@@ -265,7 +322,8 @@ def extract_impl(result: dict) -> dict | None:
         return None
 
     try:
-        meta = json.loads(meta_path.read_text())
+        with open(meta_path, encoding='utf-8') as f:
+            meta = json.load(f)
     except Exception:
         return None
 
@@ -285,7 +343,7 @@ def extract_impl(result: dict) -> dict | None:
     }
 
 
-# ── Visualization data extraction (modal viz tab) ─────────────────────────────
+# ── Visualization data extraction ─────────────────────────────────────────────
 
 def extract_viz(result: dict, metrics: dict) -> dict:
     """Chart-ready data for the per-suite visualization panel."""
@@ -304,9 +362,6 @@ def _concurrency_labels(rows):
     def _online_block():
         online   = metrics.get("online", {})
         qps_rows = online.get("results_by_qps", [])
-        # Per-QPS reliability blocks. Emitted as a parallel array so the
-        # frontend can render a badge next to each QPS row without joining
-        # by index from a separate object.
         return {
             "labels":        [str(r.get("target_qps", "")) for r in qps_rows],
             "ttft_p50":      [r.get("ttft_ms_p50") for r in qps_rows],
@@ -388,8 +443,6 @@ def _speculative_block():
             "mean_accepted_tokens": rm.get("mean_accepted_tokens"),
         }
 
-    # Per-concurrency-level offline reliability blocks. Parallel array to
-    # `throughput` and `memory_gb` so the frontend can join by row index.
     def _offline_reliability(rows):
         return [r.get("throughput_tokens_per_sec_reliability") or {} for r in rows]
 
@@ -463,7 +516,6 @@ def _offline_reliability(rows):
             None
         )
 
-        # ── Online cross-format data ──────────────────────────────────────
         online_by_precision = None
         q_online = metrics.get("quantization_online", {})
         if q_online:
@@ -479,7 +531,6 @@ def _offline_reliability(rows):
                     "sla_met":       [r.get("sla_met") for r in qps_rows],
                 })
 
-        # ── Sustained cross-format data ───────────────────────────────────
         sustained_by_precision = None
         q_sus = metrics.get("quantization_sustained", {})
         if q_sus:
@@ -601,7 +652,6 @@ def extract_row(result: dict) -> dict:
     is_suite_level = result.get("_is_suite_level", False)
     suite_id       = result.get("suite_id", "")
 
-    # ── Offline ───────────────────────────────────────────────────────────────
     offline_throughput      = None
     tokens_per_sec_per_chip = None
     peak_memory_gb          = None
@@ -620,15 +670,12 @@ def extract_row(result: dict) -> dict:
                 valid_mem, key=lambda r: r.get("throughput_tokens_per_sec", 0)
             ).get("peak_memory_gb")
 
-    # ── Online ────────────────────────────────────────────────────────────────
     online         = metrics.get("online")
     online_max_qps = online.get("max_valid_qps") if online else None
 
-    # ── Interactive ───────────────────────────────────────────────────────────
     interactive          = metrics.get("interactive")
     interactive_ttft_p99 = interactive.get("ttft_ms_p99") if interactive else None
 
-    # ── Sustained ─────────────────────────────────────────────────────────────
     sustained_throughput   = None
     throttle_ratio         = None
     throttle_onset_minute  = None
@@ -643,7 +690,6 @@ def extract_row(result: dict) -> dict:
         ttft_p99_drift_ms     = sustained.get("ttft_p99_drift_ms")
         sustained_concurrency = sustained.get("sustained_concurrency")
 
-    # ── Speculative ─────────────────────────────────────────────────────────
     speculative_throughput = None
     speculative_speedup    = None
     speculative_acceptance = None
@@ -659,7 +705,6 @@ def extract_row(result: dict) -> dict:
         if speculative_throughput and offline_throughput and offline_throughput > 0:
             speculative_speedup = round(speculative_throughput / offline_throughput, 3)
 
-    # ── Burst ────────────────────────────────────────────────────────────────
     burst_degradation      = None
     burst_steady_p99       = None
     burst_p99              = None
@@ -672,7 +717,6 @@ def extract_row(result: dict) -> dict:
         burst_p99         = burst.get("burst_ttft_p99_ms")
         burst_sla_met     = burst.get("sla_met_during_burst")
 
-    # ── Primary metric ────────────────────────────────────────────────────────
     scenario = task.get("scenario", "offline")
     if is_suite_level and suite_id not in ("suite_E", "suite_C", "suite_F"):
         primary_metric       = offline_throughput
@@ -700,7 +744,6 @@ def extract_row(result: dict) -> dict:
         primary_metric       = None
         primary_metric_label = None
 
-    # ── Suite E scaling ───────────────────────────────────────────────────────
     scaling_efficiency_2x  = None
     scaling_efficiency_4x  = None
     scaling_base_throughput = None
@@ -726,13 +769,12 @@ def extract_row(result: dict) -> dict:
             primary_metric       = scaling_base_throughput
             primary_metric_label = "tokens/sec (1x baseline)"
 
-    # ── Suite C quantization ──────────────────────────────────────────────────
     quant_bf16_throughput  = None
     quant_best_throughput  = None
     quant_best_precision   = None
-    quant_int8_speedup     = None   # W8A16 tier (best of W8A8/W8A16)
-    quant_int4_speedup     = None   # W4A16 tier
-    quant_quality_eff      = None   # best quality_efficiency across all formats
+    quant_int8_speedup     = None
+    quant_int4_speedup     = None
+    quant_quality_eff      = None
 
     quantization = metrics.get("quantization")
     if quantization:
@@ -746,23 +788,19 @@ def extract_row(result: dict) -> dict:
             if p == "BF16":
                 quant_bf16_throughput = thr
             elif p in ("W8A8", "W8A16"):
-                # Use W8A16 as "int8-tier" speedup if available, fall back to W8A8
                 if quant_int8_speedup is None or p == "W8A16":
                     quant_int8_speedup = spd
             elif p == "W4A16":
                 quant_int4_speedup = spd
 
-            # Track best throughput across all precision formats
             if thr and (quant_best_throughput is None or thr > quant_best_throughput):
                 quant_best_throughput = thr
                 quant_best_precision  = p
 
-            # Track best quality_efficiency across all formats
             if qe and (best_qe is None or qe > best_qe):
                 best_qe           = qe
                 quant_quality_eff = qe
 
-        # Primary metric for Suite C: best throughput across all precision formats
         if quant_best_throughput:
             primary_metric       = quant_best_throughput
             primary_metric_label = f"tokens/sec ({quant_best_precision})"
@@ -770,7 +808,6 @@ def extract_row(result: dict) -> dict:
             primary_metric       = quant_bf16_throughput
             primary_metric_label = "tokens/sec (BF16 baseline)"
 
-    # ── Efficiency ────────────────────────────────────────────────────────────
     memory_gb_per_chip     = chip.get("memory_gb", 0)
     memory_efficiency      = (
         round(offline_throughput / peak_memory_gb, 1)
@@ -790,7 +827,6 @@ def extract_row(result: dict) -> dict:
         if offline_throughput and min_price and min_price > 0 else None
     )
 
-    # ── Precision fallback detection ──────────────────────────────────────────
     precision           = model.get("precision", "BF16")
     effective_dtype     = model.get("effective_dtype")
     quantization_method = model.get("quantization_method")
@@ -799,7 +835,6 @@ def extract_row(result: dict) -> dict:
         precision.upper() != suite_required.upper()
         if precision and suite_required else False
     )
-    # Emulated flag: precision was requested but compute was in a different dtype
     precision_emulated = (
         effective_dtype is not None
         and effective_dtype.replace("torch.", "") != _precision_to_dtype(precision)
@@ -827,22 +862,18 @@ def extract_row(result: dict) -> dict:
         "architecture":  model.get("architecture"),
         "suite":              suite_id,
         "scenario":           "all" if is_suite_level else scenario,
-        # Primary
         "primary_metric":          primary_metric,
         "primary_metric_label":    primary_metric_label,
         "tokens_per_sec_per_chip": tokens_per_sec_per_chip,
-        # Scenario metrics
         "offline_throughput":   offline_throughput,
         "online_max_qps":       online_max_qps,
         "interactive_ttft_p99": interactive_ttft_p99,
-        # Efficiency
         "peak_memory_gb":                     peak_memory_gb,
         "memory_utilization_pct":             memory_utilization_pct,
         "memory_efficiency_toks_per_gb":      memory_efficiency,
         "min_price_usd_per_hr":               min_price,
         "cost_efficiency_toks_per_dollar_hr": cost_efficiency,
         "tokens_per_watt":                    derived.get("tokens_per_sec_per_watt"),
-        # Metadata
         "accuracy_valid":   accuracy.get("valid"),
         "accuracy_score":   accuracy.get("subset_score"),
         "date":             meta.get("date"),
@@ -852,37 +883,30 @@ def extract_row(result: dict) -> dict:
         "run_id":           meta.get("run_id"),
         "run_name":         meta.get("run_name"),
         "flagged":          meta.get("flagged"),
-        # Suite E
         "scaling_efficiency_2x":   scaling_efficiency_2x,
         "scaling_efficiency_4x":   scaling_efficiency_4x,
         "scaling_base_throughput": scaling_base_throughput,
-        # Suite C
         "quant_bf16_throughput":  quant_bf16_throughput,
         "quant_best_throughput":  quant_best_throughput,
         "quant_best_precision":   quant_best_precision,
         "quant_int8_speedup":     quant_int8_speedup,
         "quant_int4_speedup":     quant_int4_speedup,
         "quant_quality_eff":      quant_quality_eff,
-        # Sustained
         "sustained_throughput":    sustained_throughput,
         "throttle_ratio":          throttle_ratio,
         "throttle_onset_minute":   throttle_onset_minute,
         "ttft_p99_drift_ms":       ttft_p99_drift_ms,
         "sustained_concurrency":   sustained_concurrency,
-        # Speculative
         "speculative_throughput":   speculative_throughput,
         "speculative_speedup":     speculative_speedup,
         "speculative_acceptance":  speculative_acceptance,
-        # Burst
         "burst_degradation":       burst_degradation,
         "burst_steady_p99":        burst_steady_p99,
         "burst_p99":               burst_p99,
         "burst_sla_met":           burst_sla_met,
-        # Panel data
         "detail": extract_detail(result),
         "viz":    extract_viz(result, metrics),
         "impl":   extract_impl(result),
-        # Implementation ID (flat, for filtering/display without loading impl)
         "implementation_id": result.get("implementation_id"),
     }
 
@@ -890,20 +914,11 @@ def extract_row(result: dict) -> dict:
 # ── API generation ────────────────────────────────────────────────────────────
 
 def generate_api(results: list[dict], output_dir: Path) -> None:
-    """
-    Generate static JSON API for external tooling (OpenClaw Skill etc.).
-
-      api/rank.json   — per-submission ranking within chip+suite group
-      api/chips.json  — chip summary list (best offline throughput)
-      api/index.json  — chip lookup with per-suite best metrics
-      api/suites.json — suite metadata for discovery
-    """
+    """Generate static JSON API for external tooling (OpenClaw Skill etc.)."""
     api_dir = output_dir / "api"
     api_dir.mkdir(exist_ok=True)
 
-    # Group by chip+suite for fair per-suite ranking
     by_chip_suite: dict[tuple, list] = defaultdict(list)
-    # Also track chip-level best across all suites for chips.json
     by_chip: dict[str, list] = defaultdict(list)
 
     for r in results:
@@ -912,7 +927,6 @@ def generate_api(results: list[dict], output_dir: Path) -> None:
         submission_name = r.get("_submission_name", "unknown")
         tier            = r.get("_tier", "community")
 
-        # Primary metric per result
         offline = r.get("metrics", {}).get("offline")
         best_thr = None
         if offline:
@@ -923,7 +937,6 @@ def generate_api(results: list[dict], output_dir: Path) -> None:
             if valid:
                 best_thr = max(row["throughput_tokens_per_sec"] for row in valid)
 
-        # Suite E fallback
         if best_thr is None:
             scaling = r.get("metrics", {}).get("scaling", {})
             if scaling:
@@ -934,7 +947,6 @@ def generate_api(results: list[dict], output_dir: Path) -> None:
                             best_thr = entry.get("best_throughput_tokens_per_sec")
                             break
 
-        # Suite C: use best quality_efficiency as primary
         if best_thr is None:
             quant = r.get("metrics", {}).get("quantization", {})
             if quant:
@@ -950,7 +962,6 @@ def generate_api(results: list[dict], output_dir: Path) -> None:
         by_chip_suite[(chip_name, suite_id)].append((submission_name, best_thr, tier))
         by_chip[chip_name].append((submission_name, best_thr, suite_id, tier))
 
-    # ── rank.json ─────────────────────────────────────────────────────────────
     rank_data: dict[str, dict] = {}
     for (chip_name, suite_id), entries in by_chip_suite.items():
         sorted_entries = sorted(entries, key=lambda x: x[1], reverse=True)
@@ -970,7 +981,6 @@ def generate_api(results: list[dict], output_dir: Path) -> None:
     with open(api_dir / "rank.json", "w") as f:
         json.dump(rank_data, f, indent=2)
 
-    # ── chips.json ────────────────────────────────────────────────────────────
     chips = []
     chip_bests: dict[str, float] = {}
     for chip_name, entries in by_chip.items():
@@ -987,8 +997,6 @@ def generate_api(results: list[dict], output_dir: Path) -> None:
     with open(api_dir / "chips.json", "w") as f:
         json.dump(chips, f, indent=2)
 
-    # ── index.json ────────────────────────────────────────────────────────────
-    # Per-chip lookup with best metric per suite
     chip_index: dict[str, dict] = {}
     for chip_name in by_chip:
         chip_index[chip_name] = {
@@ -1010,7 +1018,6 @@ def generate_api(results: list[dict], output_dir: Path) -> None:
 
         suite_entry = chip_index[chip_name]["suites"].setdefault(suite_id, {})
 
-        # Offline throughput
         offline = metrics.get("offline")
         if offline:
             rows  = offline.get("results_by_concurrency") or \
@@ -1023,7 +1030,6 @@ def generate_api(results: list[dict], output_dir: Path) -> None:
                 if cur is None or thr > cur:
                     suite_entry["best_throughput_tokens_per_sec"] = round(thr, 1)
 
-        # Online
         if online:
             qps = online.get("max_valid_qps")
             if qps is not None:
@@ -1031,7 +1037,6 @@ def generate_api(results: list[dict], output_dir: Path) -> None:
                 if cur is None or qps > cur:
                     suite_entry["best_online_max_qps"] = qps
 
-        # Interactive
         if iv:
             ttft = iv.get("ttft_ms_p99")
             if ttft is not None:
@@ -1039,7 +1044,6 @@ def generate_api(results: list[dict], output_dir: Path) -> None:
                 if cur is None or ttft < cur:
                     suite_entry["best_interactive_ttft_p99_ms"] = round(ttft, 1)
 
-        # Scaling (Suite E)
         if scaling:
             base_thr = (
                 scaling.get("base_throughput_tokens_per_sec") or
@@ -1062,7 +1066,6 @@ def generate_api(results: list[dict], output_dir: Path) -> None:
                 elif count == 4 and eff:
                     suite_entry["best_scaling_efficiency_4x"] = eff
 
-        # Sustained
         if sustained:
             s_thr    = sustained.get("sustained_throughput_tokens_per_sec")
             throttle = sustained.get("throttle_ratio")
@@ -1073,7 +1076,6 @@ def generate_api(results: list[dict], output_dir: Path) -> None:
             if throttle is not None:
                 suite_entry["throttle_ratio"] = throttle
 
-        # Suite C quality efficiency
         quant = metrics.get("quantization")
         if quant:
             qes = [(e.get("precision"), e.get("quality_efficiency"))
@@ -1087,8 +1089,6 @@ def generate_api(results: list[dict], output_dir: Path) -> None:
     with open(api_dir / "index.json", "w") as f:
         json.dump(chip_index, f, indent=2)
 
-    # ── suites.json ───────────────────────────────────────────────────────────
-    # Static metadata about each suite for discovery
     suites_meta = {}
     for suite_dir in sorted(Path("suites").iterdir()):
         if not suite_dir.is_dir():
@@ -1097,7 +1097,7 @@ def generate_api(results: list[dict], output_dir: Path) -> None:
         if not suite_json.exists():
             continue
         try:
-            with open(suite_json) as f:
+            with open(suite_json, encoding='utf-8') as f:
                 s = json.load(f)
             suite_id = s.get("suite_id", suite_dir.name)
             scenarios_cfg = s.get("scenarios", {})
@@ -1129,6 +1129,367 @@ def generate_api(results: list[dict], output_dir: Path) -> None:
     print(f"  suites.json: {len(suites_meta)} suites documented")
 
 
+# ── Distribution data generation (新增) ───────────────────────────────────────
+
+def generate_distribution_data(results: list[dict], output_dir: Path) -> None:
+    """生成性能分布数据，用于分布图视图。
+
+    为每个去重后的提交生成完整元信息（包含所有 scenario 的指标），
+    支持前端按 suite / vendor / framework / model / scenario 筛选，
+    并按 (chip, suite) 聚合生成分组统计数据。
+    """
+
+    # ── 1. 先去重（与 main() 相同的逻辑）─────────────────────────────────
+    _seen: dict = {}
+    for r in results:
+        meta = r.get("meta") or {}
+        rid = meta.get("run_id")
+        if not rid:
+            continue
+        suite_id = r.get("suite_id", "")
+        # 计算去重用的指标值
+        if suite_id == "suite_C":
+            quant = (r.get("metrics") or {}).get("quantization", {})
+            qes = [e.get("quality_efficiency") for e in quant.get("results_by_precision", [])
+                   if e.get("quality_efficiency")]
+            metric = max(qes) if qes else 0
+        elif suite_id == "suite_E":
+            scaling = (r.get("metrics") or {}).get("scaling", {})
+            metric = 0
+            for e in scaling.get("results_by_chip_count", []):
+                if e.get("chip_count") == 4:
+                    metric = e.get("scaling_efficiency") or 0
+            if not metric:
+                for e in scaling.get("results_by_chip_count", []):
+                    if e.get("chip_count") == 2:
+                        metric = e.get("scaling_efficiency") or 0
+            if not metric:
+                metric = scaling.get("base_throughput_tokens_per_sec") or 0
+        else:
+            offline = (r.get("metrics") or {}).get("offline", {})
+            rows = offline.get("results_by_concurrency") or offline.get("results_by_batch_size") or []
+            valid_rows = [row for row in rows
+                          if not row.get("oom") and row.get("throughput_tokens_per_sec")]
+            metric = max((row["throughput_tokens_per_sec"] for row in valid_rows), default=0)
+        if rid not in _seen or metric > _seen[rid]["metric"]:
+            _seen[rid] = {"result": r, "metric": metric}
+    deduped = [entry["result"] for entry in _seen.values()]
+    print(f"  distribution: {len(results)} raw → {len(deduped)} deduplicated results")
+
+    # ── 2. 构建每个提交的详细数据 ─────────────────────────────────────────
+    all_submissions = []
+
+    for r in deduped:
+        chip_obj    = r.get("chip") or {}
+        chip        = chip_obj.get("name", "Unknown")
+        chip_vendor = chip_obj.get("vendor", "")
+        chip_count  = chip_obj.get("count", 1)
+        memory_gb   = chip_obj.get("memory_gb", 0)
+        suite       = r.get("suite_id", "")
+
+        model_obj       = r.get("model") or {}
+        model_full      = model_obj.get("model_id", "")
+        model_short     = model_full.split("/")[-1] if model_full else ""
+        model_params_b  = model_obj.get("parameter_count_b")
+        precision       = model_obj.get("precision", "BF16")
+        effective_dtype = model_obj.get("effective_dtype")
+
+        software          = r.get("software") or {}
+        framework         = software.get("framework", "")
+        framework_version = software.get("framework_version", "")
+
+        meta             = r.get("meta") or {}
+        submission_name  = r.get("_submission_name", "")
+        tier             = r.get("_tier", "community")
+        submitted_by     = meta.get("submitted_by", "")
+        date             = meta.get("date", "")
+        reproduce_script = meta.get("reproduce_script", "")
+        run_id           = meta.get("run_id", "")
+        impl_id          = r.get("implementation_id", "")
+
+        # 收集该提交跑了哪些 scenario（从 task.scenarios_run 读取）
+        task           = r.get("task") or {}
+        scenarios_run  = task.get("scenarios_run") or []
+        is_suite_level = "scenarios_run" in task or "chip_counts_run" in task
+
+        # 为每个 scenario 提取指标
+        scenarios = {}
+        for sc_name in scenarios_run:
+            scenarios[sc_name] = _extract_scenario_metric(r, sc_name)
+
+        # 如果 scenarios_run 为空（旧格式），至少从 offline 提取
+        if not scenarios_run:
+            offline_metric = _extract_scenario_metric(r, "offline")
+            if offline_metric["is_valid"]:
+                scenarios["offline"] = offline_metric
+            online_metric = _extract_scenario_metric(r, "online")
+            if online_metric["is_valid"]:
+                scenarios["online"] = online_metric
+
+        # 处理 Suite E（scaling）：从 metrics.scaling 提取
+        suite_primary_thr = None
+        suite_primary_label = None
+        suite_primary_scenario = "offline"
+
+        if suite == "suite_E":
+            scaling = (r.get("metrics") or {}).get("scaling", {})
+            for entry in scaling.get("results_by_chip_count", []):
+                if entry.get("chip_count") == 1:
+                    suite_primary_thr = entry.get("best_throughput_tokens_per_sec")
+                    suite_primary_label = "tokens/sec (1x baseline)"
+                    suite_primary_scenario = "scaling"
+                    break
+            if not suite_primary_thr:
+                suite_primary_thr = scaling.get("base_throughput_tokens_per_sec")
+                suite_primary_label = "tokens/sec (1x baseline)"
+                suite_primary_scenario = "scaling"
+
+        # ── Suite C：按精度爆炸，每种精度一条独立提交 ─────────────────
+        if suite == "suite_C":
+            quant         = (r.get("metrics") or {}).get("quantization", {})
+            quant_online  = (r.get("metrics") or {}).get("quantization_online", {})
+            quant_sus     = (r.get("metrics") or {}).get("quantization_sustained", {})
+            prec_online   = {e.get("precision",""): e.get("max_valid_qps")
+                             for e in quant_online.get("results_by_precision", [])}
+            prec_sustained = {e.get("precision",""): e.get("sustained_throughput_tokens_per_sec")
+                              for e in quant_sus.get("results_by_precision", [])}
+            tp = (task.get("parallelism") or {}).get("tensor_parallel_size")
+
+            for entry in quant.get("results_by_precision", []):
+                prec = entry.get("precision", "")
+                thr  = entry.get("best_throughput_tokens_per_sec")
+                if not thr:
+                    continue
+                prec_sc = {}
+                prec_sc["offline"] = {"throughput": thr, "metric_label": f"tokens/sec ({prec})",
+                                      "concurrency": None, "peak_memory_gb": None, "is_valid": True}
+                qps = prec_online.get(prec)
+                if qps is not None:
+                    prec_sc["online"] = {"throughput": qps, "metric_label": "max valid QPS",
+                                         "concurrency": None, "peak_memory_gb": None, "is_valid": True}
+                sus = prec_sustained.get(prec)
+                if sus is not None:
+                    prec_sc["sustained"] = {"throughput": sus, "metric_label": "tok/s (sustained mean)",
+                                            "concurrency": None, "peak_memory_gb": None, "is_valid": True}
+                for sc_name in scenarios_run:
+                    if sc_name not in prec_sc and sc_name != "accuracy":
+                        m = _extract_scenario_metric(r, sc_name)
+                        if m["is_valid"]:
+                            prec_sc[sc_name] = m
+
+                config = {"concurrency": None, "batch_size": None,
+                          "tensor_parallel": tp, "peak_memory_gb": None}
+                sub = {
+                    "id": f"{run_id or submission_name}_{prec}",
+                    "chip": chip, "chip_vendor": chip_vendor,
+                    "chip_count": chip_count, "memory_gb": memory_gb,
+                    "suite": suite, "model": model_short, "model_full": model_full,
+                    "model_params_b": model_params_b,
+                    "precision": prec, "effective_dtype": effective_dtype,
+                    "framework": framework, "framework_version": framework_version,
+                    "tier": tier, "submitted_by": submitted_by,
+                    "date": date, "reproduce_script": reproduce_script,
+                    "runner_id": impl_id,
+                    "scenarios": prec_sc,
+                    "primary_scenario": "quantization",
+                    "primary_throughput": thr,
+                    "primary_metric_label": f"tokens/sec ({prec})",
+                    "config": config,
+                }
+                all_submissions.append(sub)
+
+        else:
+            # ── 非 Suite C：标准单条提交 ────────────────────────────
+
+            # 确定 primary_throughput 和 primary_scenario
+            primary_throughput = suite_primary_thr
+            primary_scenario   = suite_primary_scenario
+            primary_label      = suite_primary_label
+
+            if primary_throughput is None:
+                _SCENARIO_PRIORITY = ["offline", "online", "sustained", "speculative", "burst"]
+                for sc in _SCENARIO_PRIORITY:
+                    if sc in scenarios and scenarios[sc]["is_valid"]:
+                        primary_throughput = scenarios[sc]["throughput"]
+                        primary_scenario   = sc
+                        primary_label      = scenarios[sc]["metric_label"]
+                        break
+
+            if primary_throughput is None:
+                continue
+
+            # 构建最佳配置信息
+            best_sc = scenarios.get(primary_scenario) if primary_scenario in scenarios else None
+            config = {}
+            if best_sc:
+                tp = (task.get("parallelism") or {}).get("tensor_parallel_size")
+                config = {
+                    "concurrency":    best_sc.get("concurrency"),
+                    "batch_size":     None,
+                    "tensor_parallel": tp,
+                    "peak_memory_gb": best_sc.get("peak_memory_gb"),
+                }
+            else:
+                offline = (r.get("metrics") or {}).get("offline", {})
+                rows = offline.get("results_by_concurrency") or offline.get("results_by_batch_size") or []
+                valid_rows = [row for row in rows
+                              if not row.get("oom") and row.get("throughput_tokens_per_sec")]
+                if valid_rows:
+                    best_row = max(valid_rows, key=lambda row: row["throughput_tokens_per_sec"])
+                    tp = (task.get("parallelism") or {}).get("tensor_parallel_size")
+                    config = {
+                        "concurrency":    best_row.get("client_concurrency") or best_row.get("concurrency"),
+                        "batch_size":     best_row.get("batch_size"),
+                        "tensor_parallel": tp,
+                        "peak_memory_gb": best_row.get("peak_memory_gb"),
+                    }
+
+            sub = {
+                "id":                  run_id or submission_name,
+                "chip":                chip,
+                "chip_vendor":         chip_vendor,
+                "chip_count":          chip_count,
+                "memory_gb":           memory_gb,
+                "suite":               suite,
+                "model":               model_short,
+                "model_full":          model_full,
+                "model_params_b":      model_params_b,
+                "precision":           precision,
+                "effective_dtype":     effective_dtype,
+                "framework":           framework,
+                "framework_version":   framework_version,
+                "tier":                tier,
+                "submitted_by":        submitted_by,
+                "date":                date,
+                "reproduce_script":    reproduce_script,
+                "runner_id":           impl_id,
+                "scenarios":           scenarios,
+                "primary_scenario":    primary_scenario,
+                "primary_throughput":  primary_throughput,
+                "primary_metric_label": primary_label,
+                "config":              config,
+            }
+            all_submissions.append(sub)
+
+    # ── 3. 按 (chip, suite) 分组聚合 ──────────────────────────────────────
+    groups: dict[tuple, dict] = defaultdict(lambda: {
+        "submissions": [],
+        "throughputs": [],
+    })
+    for sub in all_submissions:
+        key = (sub["chip"], sub["suite"])
+        groups[key]["submissions"].append(sub)
+        groups[key]["throughputs"].append(sub["primary_throughput"])
+
+    group_list = []
+    for (chip, suite), data in groups.items():
+        thr_list = sorted(data["throughputs"])
+        n = len(thr_list)
+        median = thr_list[n // 2]
+        best_sub = max(data["submissions"], key=lambda s: s["primary_throughput"])
+
+        # 各 scenario 汇总
+        scenario_summary = {}
+        for sub in data["submissions"]:
+            for sc_name, sc_info in sub["scenarios"].items():
+                if sc_name not in scenario_summary:
+                    scenario_summary[sc_name] = {
+                        "count": 0,
+                        "best_throughput": None,
+                        "best_framework": "",
+                    }
+                sm = scenario_summary[sc_name]
+                sm["count"] += 1
+                if sc_info["is_valid"] and sc_info["throughput"]:
+                    if sm["best_throughput"] is None or sc_info["throughput"] > sm["best_throughput"]:
+                        sm["best_throughput"] = sc_info["throughput"]
+                        sm["best_framework"] = sub["framework"]
+
+        # 标准差
+        stddev = None
+        if n >= 2:
+            stddev = round(statistics.stdev(thr_list), 2)
+
+        group_list.append({
+            "chip":               chip,
+            "chip_vendor":        best_sub["chip_vendor"],
+            "suite":              suite,
+            "model":              best_sub["model"],
+            "submission_count":   n,
+            "best_throughput":    thr_list[-1],
+            "median_throughput":  median,
+            "min_throughput":     thr_list[0],
+            "max_throughput":     thr_list[-1],
+            "stddev_throughput":  stddev,
+            "scenario_summary":   scenario_summary,
+            "best_submission_id": best_sub["id"],
+            "best_framework":     best_sub["framework"],
+            "best_submitted_by":  best_sub["submitted_by"],
+        })
+
+    # 按厂商优先级排序，再按中位数吞吐量降序
+    vendor_priority = {"NVIDIA": 1, "Nvidia": 1, "nvidia": 1,
+                       "Huawei": 2, "华为": 2,
+                       "AMD": 3, "amd": 3,
+                       "Google": 4, "Apple": 5,
+                       "Moore Threads": 6, "Iluvatar": 6, "Intel": 6}
+    group_list.sort(key=lambda g: (
+        vendor_priority.get(g["chip_vendor"], 99),
+        -(g["median_throughput"] or 0)
+    ))
+
+    # ── 4. Suite 元数据 ───────────────────────────────────────────────────
+    suite_meta = _collect_suite_specs()
+
+    # ── 5. 写入 distribution.js ───────────────────────────────────────────
+    out_path = output_dir / "distribution.js"
+    with open(out_path, "w", encoding="utf-8") as f:
+        f.write("// Auto-generated by leaderboard/generate.py. Do not edit manually.\n\n")
+        f.write(f"const DISTRIBUTION_SUBMISSIONS = {json.dumps(all_submissions, indent=2, ensure_ascii=False)};\n")
+        f.write("window.DISTRIBUTION_SUBMISSIONS = DISTRIBUTION_SUBMISSIONS;\n\n")
+        f.write(f"const DISTRIBUTION_GROUPS = {json.dumps(group_list, indent=2, ensure_ascii=False)};\n")
+        f.write("window.DISTRIBUTION_GROUPS = DISTRIBUTION_GROUPS;\n\n")
+        f.write(f"const DISTRIBUTION_SUITE_META = {json.dumps(suite_meta, indent=2, ensure_ascii=False)};\n")
+        f.write("window.DISTRIBUTION_SUITE_META = DISTRIBUTION_SUITE_META;\n")
+
+    group_count      = len(group_list)
+    submission_count = len(all_submissions)
+    print(f"Distribution data written to {out_path} "
+          f"({group_count} groups, {submission_count} submissions).")
+
+
+def _bust_index_cache(data_path: Path, index_path: Path) -> None:
+    """Rewrite <script src="leaderboard.js?v=<sha8>"> to match the short SHA-256."""
+    if not index_path.exists():
+        return
+    sha8 = hashlib.sha256(data_path.read_bytes()).hexdigest()[:8]
+    html = index_path.read_text(encoding='utf-8')
+    pattern = re.compile(
+        r'(<script\s+src="leaderboard\.js)(?:\?v=[0-9a-f]+)?(")',
+        re.IGNORECASE,
+    )
+    new_html, n = pattern.subn(rf'\1?v={sha8}\2', html)
+    if n and new_html != html:
+        index_path.write_text(new_html, encoding='utf-8')
+        print(f"  cache-busted leaderboard.js → ?v={sha8}")
+
+
+def _bust_distribution_cache(data_path: Path, html_path: Path) -> None:
+    """为 distribution.html 添加缓存破坏版本号"""
+    if not html_path.exists() or not data_path.exists():
+        return
+    sha8 = hashlib.sha256(data_path.read_bytes()).hexdigest()[:8]
+    html = html_path.read_text(encoding='utf-8')
+    pattern = re.compile(
+        r'(<script\s+src="distribution\.js)(?:\?v=[0-9a-f]+)?(")',
+        re.IGNORECASE,
+    )
+    new_html, n = pattern.subn(rf'\1?v={sha8}\2', html)
+    if n and new_html != html:
+        html_path.write_text(new_html, encoding='utf-8')
+        print(f"  cache-busted distribution.js → ?v={sha8}")
+
+
 # ── Entry point ───────────────────────────────────────────────────────────────
 
 def main():
@@ -1137,8 +1498,7 @@ def main():
 
     rows = [extract_row(r) for r in results]
 
-    # Deduplicate: for each run_id keep only the best result (highest primary metric).
-    # Results without run_id (older submissions) are always included as-is.
+    # Deduplicate: for each run_id keep only the best result
     _seen: dict = {}
     _deduped: list = []
 
@@ -1172,50 +1532,31 @@ def main():
 
     SITE_DIR.mkdir(parents=True, exist_ok=True)
     out_path = SITE_DIR / "leaderboard.js"
-    with open(out_path, "w") as f:
+    with open(out_path, "w", encoding='utf-8') as f:
         f.write("// Auto-generated by leaderboard/generate.py. Do not edit manually.\n")
-        # window.LEADERBOARD_DATA so ES modules (assets/js/data.js) can read it.
-        # Also exposed as bare LEADERBOARD_DATA for any legacy classic-script consumers.
         f.write(f"const LEADERBOARD_DATA = {json.dumps(rows, indent=2)};\n")
         f.write("window.LEADERBOARD_DATA = LEADERBOARD_DATA;\n")
-        # window.SUITE_SPECS — canonical per-suite spec from suites/suite_*/suite.json.
-        # data.js merges these into SUITE_META at init() so UI facts auto-sync
-        # with a suite contract edit (no JS to keep in step manually).
         f.write(f"const SUITE_SPECS = {json.dumps(suite_specs, indent=2)};\n")
         f.write("window.SUITE_SPECS = SUITE_SPECS;\n")
 
     print(f"Leaderboard data written to {out_path} "
           f"({len(rows)} rows, {len(suite_specs)} suite specs).")
 
-    # Cache-bust leaderboard.js in index.html so a stale CDN / browser
-    # cached copy never out-survives the data refresh.  GitHub Pages
-    # serves the static files with a 10-minute Cache-Control by default
-    # and *no* ETag-aware revalidation on cross-domain `<script src>`
-    # loads, so without a versioned query users routinely see "old"
-    # submissions for hours after a merge.  We hash the bytes we just
-    # wrote and rewrite the existing `?v=…` (or insert one) in place.
     _bust_index_cache(out_path, SITE_DIR / "index.html")
 
     generate_api(results, SITE_DIR)
-
-
-def _bust_index_cache(data_path: Path, index_path: Path) -> None:
-    """Rewrite ``<script src="leaderboard.js?v=<sha8>">`` to match the
-    short SHA-256 of the file we just wrote.  No-op if the index file
-    is missing (e.g. someone is running the generator outside the
-    repo)."""
-    if not index_path.exists():
-        return
-    sha8 = hashlib.sha256(data_path.read_bytes()).hexdigest()[:8]
-    html = index_path.read_text()
-    pattern = re.compile(
-        r'(<script\s+src="leaderboard\.js)(?:\?v=[0-9a-f]+)?(")',
-        re.IGNORECASE,
-    )
-    new_html, n = pattern.subn(rf'\1?v={sha8}\2', html)
-    if n and new_html != html:
-        index_path.write_text(new_html)
-        print(f"  cache-busted leaderboard.js → ?v={sha8}")
+    
+    # 生成分布数据
+    generate_distribution_data(results, SITE_DIR)
+    
+    # 为 distribution.html 做缓存破坏
+    dist_js = SITE_DIR / "distribution.js"
+    dist_html = SITE_DIR / "distribution.html"
+    if dist_js.exists() and dist_html.exists():
+        _bust_distribution_cache(dist_js, dist_html)
+        print("  distribution.html cache-busted")
+    else:
+        print(f"  distribution.html or distribution.js not found, skip cache bust")
 
 
 if __name__ == "__main__":
diff --git a/leaderboard/site/distribution.html b/leaderboard/site/distribution.html
new file mode 100644
index 00000000..4fee19a4
--- /dev/null
+++ b/leaderboard/site/distribution.html
@@ -0,0 +1,333 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+<meta charset="UTF-8"><meta name="viewport" content="width=device-width, initial-scale=1.0">
+<title>AccelMark — Serving Recipe Distribution</title>
+<meta name="theme-color" content="#0b0c10" media="(prefers-color-scheme: dark)">
+<meta name="theme-color" content="#faf8f3" media="(prefers-color-scheme: light)">
+<link rel="icon" type="image/svg+xml" href="favicon.svg">
+<link rel="stylesheet" href="assets/css/base.css">
+<link rel="stylesheet" href="assets/css/layout.css">
+<link rel="stylesheet" href="assets/css/components.css">
+<script src="https://cdn.jsdelivr.net/npm/echarts@5.5.0/dist/echarts.min.js"></script>
+<style>
+main{max-width:1500px}
+#chart-area{width:100%;height:480px}
+.hero-row{display:flex;align-items:center;gap:20px;flex-wrap:wrap;padding:18px 28px;margin-bottom:1.5rem!important}
+.hero-row .hl{flex:1;min-width:240px}
+.hero-row .hl h1{font-size:1.1rem;font-weight:700;color:var(--fg-strong);margin-bottom:4px}
+.hero-row .hl p{color:var(--fg-muted);font-size:.78rem;line-height:1.5}
+.cmd-box{background:var(--bg);border:1px solid var(--border);border-radius:var(--r-md);padding:6px 12px;font-family:'Cascadia Code','Fira Code','JetBrains Mono','SF Mono',Consolas,monospace;font-size:.7rem;color:var(--fg-muted);display:flex;align-items:center;gap:8px;cursor:pointer;transition:border-color .15s;white-space:nowrap}
+.cmd-box:hover{border-color:var(--accent-2)}
+.cmd-box code{color:var(--good);font-size:.68rem;font-family:inherit}
+.copy-btn{background:var(--accent-3);border:none;color:#fff;padding:3px 10px;border-radius:var(--r-sm);cursor:pointer;font-size:.65rem;white-space:nowrap}
+.copy-btn:hover{background:var(--accent-2)}
+.stats-row{display:flex;gap:8px;margin-bottom:1.8rem!important;flex-wrap:wrap}
+.stats-row .si{flex:1;min-width:90px;text-align:center;padding:12px 10px}
+.stats-row .si .sn{font-size:1.3rem;font-weight:700;color:var(--accent)}
+.stats-row .si .sl{font-size:.6rem;color:var(--fg-muted);text-transform:uppercase;letter-spacing:.4px}
+.filter-row{display:flex;flex-wrap:wrap;gap:6px;align-items:flex-end}
+.filter-row .fi{flex:1;min-width:85px}
+.filter-row .fi label{display:block;font-size:.55rem;color:var(--fg-faint);margin-bottom:1px;text-transform:uppercase;letter-spacing:.4px}
+.filter-row .fi select{width:100%;padding:5px 7px;background:var(--bg);border:1px solid var(--border);border-radius:var(--r-sm);font-size:.68rem;color:var(--fg);outline:none}
+.filter-row .fi select:focus{border-color:var(--accent-2)}
+.filter-info{margin-top:4px;font-size:.6rem;color:var(--fg-faint)}
+
+/* Chart tabs */
+.chart-tabs{display:flex;gap:4px;margin-bottom:8px;margin-top:8px;flex-wrap:wrap;align-items:center}
+.chart-tab{padding:5px 14px;border-radius:16px;font-size:.68rem;font-weight:500;cursor:pointer;border:1px solid var(--border);background:var(--bg);color:var(--fg-muted);transition:.15s}
+.chart-tab:hover{border-color:var(--accent-2);color:var(--fg)}
+.chart-tab.active{background:var(--accent-soft);color:var(--accent-2);border-color:var(--accent-2)}
+.info-btn{width:20px;height:20px;border-radius:50%;border:1px solid var(--border);background:var(--bg);color:var(--fg-muted);font-size:.65rem;font-weight:700;cursor:pointer;display:flex;align-items:center;justify-content:center;position:relative;margin-left:4px;transition:.15s}
+.info-btn:hover{background:var(--accent-soft);color:var(--accent-2);border-color:var(--accent-2)}
+.info-btn .info-pop{display:none;position:absolute;top:28px;left:50%;transform:translateX(-50%);background:var(--bg-elev);border:1px solid var(--border);border-radius:var(--r-md);padding:10px 14px;width:320px;font-size:.62rem;font-weight:400;color:var(--fg);line-height:1.6;z-index:99;box-shadow:0 4px 16px rgba(0,0,0,.2);white-space:normal;text-align:left}
+.info-btn:hover .info-pop{display:block}
+.chart-sub{text-align:center;margin-top:4px;font-size:.55rem;color:var(--fg-faint)}
+
+.chip-cols{display:flex;gap:16px;flex-wrap:wrap}
+.chip-col{flex:1;min-width:170px;max-width:280px;border:1px solid var(--border);border-radius:var(--r-lg);padding:10px 12px;background:var(--bg-elev)}
+.chip-col-hd{display:flex;align-items:center;gap:6px;margin-bottom:6px;padding-bottom:4px;border-bottom:1px solid var(--border-soft)}
+.chip-col-hd .ccdot{width:8px;height:8px;border-radius:50%;flex-shrink:0}
+.chip-col-hd .ccname{font-size:.75rem;font-weight:600;color:var(--fg-strong)}
+.chip-col-hd .cccount{font-size:.6rem;color:var(--fg-muted)}
+.chip-col .cc-item{display:flex;align-items:center;gap:6px;padding:3px 6px;border-radius:6px;cursor:pointer;font-size:.68rem;transition:background .1s;margin-bottom:1px}
+.chip-col .cc-item:hover{background:var(--bg-elev-2)}
+.chip-col .cc-item .ccname2{font-weight:500;color:var(--fg);flex:1;white-space:nowrap;overflow:hidden;text-overflow:ellipsis}
+.chip-col .cc-item .ccthr{color:var(--fg-muted);font-size:.62rem;white-space:nowrap}
+.chip-col .cc-item .ccruns{color:var(--fg-faint);font-size:.58rem}
+.chip-col .cc-more{font-size:.62rem;color:var(--accent);cursor:pointer;padding:2px 6px;border-radius:var(--r-sm);margin-top:2px;display:inline-block}
+.chip-col .cc-more:hover{background:var(--bg-elev-2)}
+
+.data-table{width:100%;border-collapse:collapse;font-size:.7rem}
+.data-table thead th{text-align:left;padding:7px 10px;background:var(--bg);color:var(--fg-muted);font-weight:500;font-size:.6rem;text-transform:uppercase;letter-spacing:.4px;white-space:nowrap;border-bottom:1px solid var(--border-soft)}
+.data-table tbody td{padding:6px 10px;color:var(--fg)}
+.data-table tbody tr{border-bottom:1px solid var(--border-soft);transition:background .1s;cursor:pointer}
+.data-table tbody tr:hover{background:var(--bg-elev-2)}
+.data-table .td-recipe{max-width:200px;overflow:hidden;text-overflow:ellipsis;white-space:nowrap;font-weight:600}
+.v-badge{display:inline-block;padding:1px 6px;border-radius:6px;font-size:.58rem;font-weight:500}
+.vNVIDIA{background:rgba(27,122,61,.12);color:#1b7a3d}.vHuawei{background:rgba(194,65,12,.12);color:#c2410c}
+.vAMD{background:rgba(190,24,93,.12);color:#be185d}.vGoogle{background:rgba(29,78,216,.12);color:#1d4ed8}
+.vApple{background:rgba(120,113,108,.12);color:#78716c}.vMoore{background:rgba(124,58,237,.12);color:#7c3aed}
+.sdots{display:flex;gap:1px}
+.sdot{width:13px;height:13px;line-height:13px;text-align:center;border-radius:2px;font-size:.48rem;font-weight:700}
+.sdot.on{background:var(--border);color:var(--fg-muted)}.sdot.off{background:transparent;border:1px solid var(--border);color:var(--fg-faint)}
+.hroom{font-size:.62rem;color:var(--fg-muted)}
+.expand-icon{transition:transform .2s;display:inline-block;color:var(--fg-faint);font-size:.65rem}
+.expand-icon.open{transform:rotate(90deg)}
+.expand-row{background:var(--bg)}
+.expand-row td{padding:0 10px 7px 10px}
+.expand-inner{font-size:.62rem;padding:4px 0}
+.expand-inner table{width:100%;border-collapse:collapse}
+.expand-inner th{background:var(--bg-elev-2);font-size:.58rem;padding:3px 5px;color:var(--fg-muted);font-weight:500}
+.expand-inner td{padding:3px 5px;color:var(--fg)}
+
+/* Heatmap */
+#heatmap-area{width:100%;height:480px}
+
+.modal-overlay{display:none;position:fixed;inset:0;background:rgba(0,0,0,.5);z-index:1000;justify-content:center;align-items:center}
+.modal-overlay.show{display:flex}
+.modal-box{background:var(--bg-elev);border:1px solid var(--border);border-radius:var(--r-lg);max-width:520px;width:92%;max-height:82vh;overflow-y:auto}
+.modal-hd{padding:12px 16px;border-bottom:1px solid var(--border);display:flex;justify-content:space-between;align-items:center;position:sticky;top:0;background:var(--bg-elev);border-radius:var(--r-lg) var(--r-lg) 0 0;z-index:1}
+.modal-hd h4{font-size:.78rem;color:var(--fg-strong)}
+.modal-close{background:none;border:none;font-size:1rem;cursor:pointer;color:var(--fg-muted);padding:2px 6px;border-radius:var(--r-sm)}
+.modal-close:hover{background:var(--bg-elev-2);color:var(--fg)}
+.modal-body{padding:10px 14px 14px}
+.dsec{margin-bottom:10px}.dsec h5{font-size:.62rem;color:var(--fg-muted);margin-bottom:3px;text-transform:uppercase;letter-spacing:.4px}
+.drow{display:flex;margin-bottom:2px;font-size:.68rem}.dlbl{width:70px;color:var(--fg-faint);flex-shrink:0}.dval{color:var(--fg)}
+.dval code{background:var(--bg);padding:1px 4px;border-radius:3px;font-size:.6rem}
+.stbl{width:100%;border-collapse:collapse;font-size:.6rem;margin-top:2px}
+.stbl th{background:var(--bg);padding:2px 4px;text-align:left;font-weight:500;color:var(--fg-muted)}
+.stbl td{padding:2px 4px;color:var(--fg)}.na{color:var(--fg-faint)}
+.cfg-pre{background:var(--bg);padding:4px 6px;border-radius:var(--r-sm);font-size:.58rem;overflow-x:auto;margin:0;color:var(--fg-muted);border:1px solid var(--border-soft)}
+@media(max-width:768px){.hero-row{flex-direction:column;align-items:flex-start;gap:10px}.chip-cols{flex-direction:column}}
+</style>
+</head>
+<body>
+
+<nav class="topnav" aria-label="Primary">
+<a class="brand" href="index.html"><svg width="24" height="24" viewBox="0 0 72 72"><defs><linearGradient id="nb" x1="0%" y1="0%" x2="0%" y2="100%"><stop offset="0%" stop-color="#FBBF24"/><stop offset="100%" stop-color="#F59E0B"/></linearGradient><linearGradient id="ng" x1="0%" y1="0%" x2="100%" y2="0%"><stop offset="0%" stop-color="#60A5FA"/><stop offset="100%" stop-color="#2563EB"/></linearGradient></defs><path d="M 10 50 A 26 26 0 1 1 62 50" fill="none" stroke="url(#ng)" stroke-width="6" stroke-linecap="round"/><path d="M 40 14 L 24 42 L 33 42 L 28 58 L 50 30 L 41 30 Z" fill="url(#nb)"/></svg><span>AccelMark</span></a>
+<div class="nav-links"><a class="nav-link" href="index.html">Home</a><a class="nav-link" href="index.html#/rankings">Rankings</a><a class="nav-link" href="index.html#/compare">Compare</a><a class="nav-link" href="index.html#/suites">Suites</a><a class="nav-link active" href="distribution.html">📈 Distribution</a></div>
+<div class="nav-right"><a class="gh-link" href="https://github.com/FreedomIntelligence/AccelMark" target="_blank" rel="noopener" aria-label="GitHub"><svg width="18" height="18" viewBox="0 0 16 16" fill="currentColor"><path d="M8 0C3.58 0 0 3.58 0 8a8 8 0 0 0 5.47 7.59c.4.07.55-.17.55-.38v-1.32c-2.22.48-2.69-1.07-2.69-1.07-.36-.93-.89-1.18-.89-1.18-.73-.49.06-.48.06-.48.8.06 1.23.82 1.23.82.72 1.23 1.88.88 2.34.67.07-.52.28-.88.51-1.08-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.13 0 0 .67-.21 2.2.82a7.58 7.58 0 0 1 4 0c1.53-1.04 2.2-.82 2.2-.82.44 1.11.16 1.93.08 2.13.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48v2.2c0 .21.15.46.55.38A8 8 0 0 0 16 8c0-4.42-3.58-8-8-8z"/></svg></a></div>
+</nav>
+<main>
+
+<div class="card hero-row section"><div class="hl"><h1>🚀 Serving Recipe Distribution</h1><p>Each dot = one real benchmark run. Wider spread within a Suite = more optimization headroom.<br><strong style="color:var(--good)">As the community tunes, points shift upward — proof that sharing recipes works.</strong></p></div><div class="cmd-box" onclick="copyCmd()"><code id="cmd-text">git clone https://github.com/FreedomIntelligence/AccelMark && cd AccelMark && python run.py --suite A</code><button class="copy-btn" id="copy-btn">📋 Copy</button></div></div>
+<div class="stats-row section" id="stats-row"></div>
+
+<div class="card section" style="padding:16px 20px"><div class="section-header"><div class="section-title"><h2>📊 Performance Distribution</h2></div></div>
+<div class="filter-row" style="margin-bottom:10px">
+<div class="fi"><label>Suite</label><select id="f-suite"><option value="">All</option></select></div>
+<div class="fi"><label>Vendor</label><select id="f-vendor"><option value="">All</option></select></div>
+<div class="fi"><label>Framework</label><select id="f-framework"><option value="">All</option></select></div>
+<div class="fi"><label>Precision</label><select id="f-precision"><option value="">All</option></select></div>
+<div class="fi"><label>Chip</label><select id="f-chip"><option value="">All</option></select></div>
+<button class="btn" id="btn-reset" style="align-self:flex-end;font-size:.65rem;padding:5px 10px">↺ Reset</button>
+</div><div class="filter-info" id="filter-info">Loading…</div>
+<div class="chart-tabs" id="chart-tabs" style="justify-content:space-between">
+<span class="chart-tab active" data-view="beeswarm">Beeswarm</span>
+<span class="chart-tab" data-view="scatter">Scatter</span>
+<span class="chart-tab" data-view="density">Density</span>
+<span class="chart-tab" data-view="heatmap">Heatmap</span>
+<span class="chart-tab" data-view="small">By Suite</span>
+<span class="info-btn" style="margin-right:auto">ⓘ<span class="info-pop"><b>Beeswarm</b> — Grouped by Suite, each dot is a submission. Spread = optimization headroom.<br><b>Scatter</b> — Throughput × QPS. Top-right = both strong.<br><b>Density</b> — Overlap heatmap. Darker = more recipes.<br><b>Heatmap</b> — Suite × Chip matrix. Color = throughput.<br><b>By Suite</b> — One mini-chart per Suite. Compare per benchmark.</span></span>
+<span style="display:flex;align-items:center;gap:4px;margin-left:auto"><span style="font-size:.6rem;color:var(--fg-faint);text-transform:uppercase;letter-spacing:.4px">Y-axis</span><select id="f-metric" style="padding:4px 8px;background:var(--bg);border:1px solid var(--border);border-radius:var(--r-sm);font-size:.65rem;color:var(--fg);outline:none;min-width:160px"><option value="offline">Offline Throughput</option><option value="online">Online Max QPS</option><option value="sustained">Sustained Throughput</option><option value="speculative">Speculative Throughput</option></select></span>
+</div></div>
+</div></div>
+<div id="chart-area"></div>
+<div id="heatmap-area" style="display:none"></div>
+<div class="chart-sub" id="chart-sub"></div></div>
+
+<div class="section"><div class="section-header"><div class="section-title"><h2>🖥️ Chips</h2></div><span style="font-size:.65rem;color:var(--fg-muted)">Click to jump to table</span></div><div id="chip-cols"></div></div>
+
+<div class="card section" style="padding:16px 20px"><div class="section-header"><div class="section-title"><h2>📋 Recipe Overview</h2></div></div><div style="overflow-x:auto"><table class="data-table" id="stats-table"><thead><tr><th>Best Recipe</th><th>Vendor</th><th>Chip</th><th>Suite Coverage</th><th>Recipes</th><th>Best Throughput</th><th>Headroom</th><th></th></tr></thead><tbody id="stats-tbody"><tr><td colspan="8">Loading…</td></tr></tbody></table></div></div>
+</main>
+<footer><p>AccelMark · Open, reproducible benchmark for AI accelerators.<br><a href="https://github.com/FreedomIntelligence/AccelMark">Source code</a> · <a href="https://github.com/FreedomIntelligence/AccelMark/blob/main/CONTRIBUTING.md">Contribute results</a></footer>
+
+<div class="modal-overlay" id="drilldown-overlay"><div class="modal-box"><div class="modal-hd"><h4 id="drilldown-title"></h4><button class="modal-close" onclick="closeDrillDown()">✕</button></div><div class="modal-body" id="drilldown-body"></div></div></div>
+
+<script src="distribution.js?v=ce057b33"></script>
+<script>
+var SUITE_LIST=['suite_A','suite_B','suite_C','suite_D','suite_E','suite_F','suite_G'];
+var VCOLS={'NVIDIA':'#1b7a3d','Huawei':'#c2410c','AMD':'#be185d','Google':'#1d4ed8','Apple':'#78716c','Moore Threads':'#7c3aed'};
+var VCLS={'NVIDIA':'vNVIDIA','Huawei':'vHuawei','AMD':'vAMD','Google':'vGoogle','Apple':'vApple','Moore Threads':'vMoore'};
+var SLAB={'suite_A':'A·Single-chip','suite_B':'B·Multi-chip','suite_C':'C·Quant','suite_D':'D·Long-ctx','suite_E':'E·Scaling','suite_F':'F·Edge','suite_G':'G·MoE'};
+function vc(v){return VCOLS[v]||'#7c3aed';}
+var allSubs=[],cChart=null,hChart=null,smCharts=[],cg=[],showAll=false,curView='beeswarm';
+/* Y-axis metric switcher */
+var METRICS={offline:{label:'Offline Throughput',unit:'tok/s',hi:true,get:function(s){return s.scenarios.offline&&s.scenarios.offline.is_valid?s.scenarios.offline.throughput:null;}},online:{label:'Online Max QPS',unit:'QPS',hi:true,get:function(s){return s.scenarios.online&&s.scenarios.online.is_valid?s.scenarios.online.throughput:null;}},sustained:{label:'Sustained Throughput',unit:'tok/s',hi:true,get:function(s){return s.scenarios.sustained&&s.scenarios.sustained.is_valid?s.scenarios.sustained.throughput:null;}},speculative:{label:'Speculative Throughput',unit:'tok/s',hi:true,get:function(s){return s.scenarios.speculative&&s.scenarios.speculative.is_valid?s.scenarios.speculative.throughput:null;}}};
+function curMetric(){var v=fid('f-metric');return METRICS[v]||METRICS.offline;}
+
+function isLight(){return window.matchMedia&&window.matchMedia('(prefers-color-scheme:light)').matches;}
+var AL,SL,AT,TBG,TBR,TFG;
+function thm(){if(isLight()){AL='#e5e0d8';SL='#ede8e0';AT='#a8a29e';TBG='rgba(255,255,255,.96)';TBR='#e5e0d8';TFG='#44403c';}else{AL='#2a2d36';SL='#20232b';AT='#71717a';TBG='rgba(20,22,28,.96)';TBR='#2a2d36';TFG='#d4d4d8';}}
+thm();
+
+function ld(){if(window.DISTRIBUTION_SUBMISSIONS){allSubs=window.DISTRIBUTION_SUBMISSIONS;return true;}return false;}
+function fid(id){return document.getElementById(id).value;}
+function gCD(){var fs=fid('f-suite'),fv=fid('f-vendor'),ff=fid('f-framework'),fp=fid('f-precision'),fc=fid('f-chip');return allSubs.filter(function(s){if(fs&&s.suite!==fs)return false;if(fv&&s.chip_vendor!==fv)return false;if(ff&&s.framework!==ff)return false;if(fp&&s.precision!==fp)return false;if(fc&&s.chip!==fc)return false;return s.scenarios.offline&&s.scenarios.offline.is_valid;});}
+function gAF(){var fs=fid('f-suite'),fv=fid('f-vendor'),ff=fid('f-framework'),fp=fid('f-precision'),fc=fid('f-chip');return allSubs.filter(function(s){if(fs&&s.suite!==fs)return false;if(fv&&s.chip_vendor!==fv)return false;if(ff&&s.framework!==ff)return false;if(fp&&s.precision!==fp)return false;if(fc&&s.chip!==fc)return false;return true;});}
+function gCG(subs){var m=new Map();subs.forEach(function(s){if(!m.has(s.chip))m.set(s.chip,{chip:s.chip,vendor:s.chip_vendor,subs:[],suites:new Set(),bestThr:0,bestSub:null});var e=m.get(s.chip);e.subs.push(s);e.suites.add(s.suite);if(s.primary_throughput>e.bestThr){e.bestThr=s.primary_throughput;e.bestSub=s;}});var r=Array.from(m.values());r.sort(function(a,b){return b.bestThr-a.bestThr;});return r;}
+function su(f){return[...new Set(allSubs.map(function(x){return x[f];}).filter(Boolean))].sort();}
+function ps(id,o){var s=document.getElementById(id);s.innerHTML='<option value="">All</option>';o.forEach(function(v){s.add(new Option(v,v));});}
+function iF(){ps('f-suite',su('suite'));ps('f-vendor',su('chip_vendor'));ps('f-framework',su('framework'));ps('f-precision',su('precision'));ps('f-chip',su('chip'));}
+function copyCmd(){var el=document.getElementById('cmd-text');navigator.clipboard.writeText(el.textContent).then(function(){var b=document.getElementById('copy-btn');b.textContent='✓ Copied';b.style.background='var(--good)';setTimeout(function(){b.textContent='📋 Copy';b.style.background='var(--accent-3)';},1500);});}
+function fm(v){if(v===null||v===undefined)return'N/A';if(v>=10000)return(v/1000).toFixed(0)+'k';if(v>=1000)return(v/1000).toFixed(1)+'k';if(v>=1)return v.toFixed(1);return v.toFixed(4);}
+function hs(s){var h=5381;for(var i=0;i<s.length;i++){h=((h<<5)+h)+s.charCodeAt(i);h=h&h;}return Math.abs(h);}
+function axes(xMn,yMn){return[{type:'log',min:xMn,name:'Offline Throughput (tok/s)',nameTextStyle:{color:AT,fontSize:9},axisLabel:{formatter:function(v){return v>=1000?(v/1000).toFixed(v>=10000?0:1)+'k':v;},color:AT,fontSize:8},axisLine:{lineStyle:{color:AL}},splitLine:{lineStyle:{color:SL}}},{type:'log',min:yMn,name:'Online Max QPS',nameTextStyle:{color:AT,fontSize:9},axisLabel:{formatter:function(v){return v>=100?(v).toFixed(0):v.toFixed(1);},color:AT,fontSize:8},axisLine:{lineStyle:{color:AL}},splitLine:{lineStyle:{color:SL}}}];}
+function tip(s){var mt=curMetric(),mv=mt.get(s);var ho=s.scenarios.online&&s.scenarios.online.is_valid;return'<div style="font-size:11px;line-height:1.55"><span style="color:'+vc(s.chip_vendor)+'">●</span> <strong>'+s.chip_vendor+'</strong> · '+s.chip+'<br/>📦 '+s.suite+' · '+s.model+'<br/>🔧 <strong>'+s.framework+' '+(s.framework_version||'')+'</strong> · '+(s.precision||'-')+'<br/>'+mt.label+': <strong style="color:var(--gold)">'+(mv!==null?fm(mv):'N/A')+'</strong> '+mt.unit+'<br/>🌐 Online QPS: <strong style="color:var(--good)">'+(ho?fm(s.scenarios.online.throughput):'—')+'</strong><br/>👤 '+(s.submitted_by||'community')+' · '+s.tier+'<br/><span style="color:#a8a29e">Click → details</span></div>';}
+
+/* ── VIEW: Beeswarm (grouped by suite, jittered dots) ── */
+function rBeeswarm(data){
+    var mt=curMetric(),getV=mt.get;
+    var suites=SUITE_LIST.filter(function(s){return data.some(function(d){return d.suite===s;});});
+    var vendors=[...new Set(data.map(function(d){return d.chip_vendor;}))].sort();
+    /* only keep submissions with this metric */
+    var valid=data.filter(function(d){return getV(d)!==null;});
+    var bestMap=new Map();valid.forEach(function(s){var k=s.suite;var vv=getV(s);if(!bestMap.has(k)||vv>bestMap.get(k).v)bestMap.set(k,{sub:s,v:vv});});
+    var allV=valid.map(function(d){return getV(d);}).filter(function(v){return v>0;});
+    var yMin=allV.length?Math.max(0.5,Math.min.apply(null,allV)*0.5):1;
+    var series=[];
+    vendors.forEach(function(v){
+        var pts=[];valid.forEach(function(d){if(d.chip_vendor!==v)return;var si=suites.indexOf(d.suite);if(si<0)return;var jt=(hs(d.id+d.chip)%100-50)/100*0.35;var isExp=d.suite==='suite_C'&&d.precision!=='BF16'&&d.precision!=='FP16';pts.push({value:[si+jt,getV(d)],submission:d,symbolSize:isExp?5:7,itemStyle:{opacity:isExp?.35:.65}});});
+        if(pts.length)series.push({name:v,type:'scatter',data:pts,symbol:'circle',itemStyle:{color:vc(v),borderColor:'rgba(0,0,0,.06)',borderWidth:1},emphasis:{scale:1.6,itemStyle:{opacity:1}},z:1});
+    });
+    vendors.forEach(function(v){
+        var bps=[];bestMap.forEach(function(e){var s=e.sub;if(s.chip_vendor!==v)return;var si=suites.indexOf(s.suite);if(si<0)return;var jt=(hs(s.id+s.chip)%100-50)/100*0.35;bps.push({value:[si+jt,getV(s)],submission:s});});
+        if(bps.length)series.push({name:v+' best',type:'scatter',data:bps,symbol:'circle',symbolSize:13,itemStyle:{color:vc(v),opacity:1},emphasis:{scale:1.3},z:5});
+    });
+    var yLab=mt.label+(mt.hi?' ↑':' ↓')+' ('+mt.unit+')';
+    return{
+        tooltip:{trigger:'item',backgroundColor:TBG,borderColor:TBR,textStyle:{color:TFG,fontSize:11},formatter:function(p){var s=p.data&&p.data.submission;return s?tip(s):'';}},
+        legend:{data:vendors,bottom:4,textStyle:{color:AT,fontSize:9},itemGap:14},
+        grid:{left:'8%',right:'5%',bottom:'14%',top:'6%',containLabel:true},
+        xAxis:{type:'value',min:-0.5,max:suites.length-0.5,interval:1,axisLabel:{formatter:function(v){var i=Math.round(v);return(i>=0&&i<suites.length)?suites[i]:'';},rotate:30,fontSize:8,color:AT},axisTick:{alignWithLabel:true},splitLine:{show:false},name:'Suite',nameTextStyle:{color:AT,fontSize:9}},
+        yAxis:{type:'log',min:yMin,name:yLab,nameTextStyle:{color:AT,fontSize:9},axisLabel:{formatter:function(v){return v>=1000?(v/1000).toFixed(v>=10000?0:1)+'k':v;},color:AT,fontSize:8},axisLine:{lineStyle:{color:AL}},splitLine:{lineStyle:{color:SL}}},
+        series:series,animation:true,animationDuration:300
+    };
+}
+
+/* ── VIEW: Scatter (throughput × QPS) ── */
+function rScatter(data){
+    var wo=[],no=[];data.forEach(function(d){if(d.scenarios.online&&d.scenarios.online.is_valid&&d.scenarios.online.throughput>0)wo.push(d);else no.push(d);});
+    var aT=data.map(function(d){return d.primary_throughput;}).filter(function(v){return v>0;}),aQ=wo.map(function(d){return d.scenarios.online.throughput;});
+    var xMn=aT.length?Math.max(0.5,Math.min.apply(null,aT)*0.5):1,yMn=aQ.length?Math.max(0.1,Math.min.apply(null,aQ)*0.4):0.1,yNo=yMn*0.5;
+    var vendors=[...new Set(data.map(function(d){return d.chip_vendor;}))].sort(),bestMap=new Map();
+    data.forEach(function(s){var k=s.suite;if(!bestMap.has(k)||s.primary_throughput>bestMap.get(k).primary_throughput)bestMap.set(k,s);});
+    var series=[];
+    vendors.forEach(function(v){
+        var vSubs=data.filter(function(d){return d.chip_vendor===v;}),pts=[],bps=[];
+        vSubs.forEach(function(d){var x=d.primary_throughput,y;if(d.scenarios.online&&d.scenarios.online.is_valid&&d.scenarios.online.throughput>0){y=d.scenarios.online.throughput;}else{x+=x*((hs(d.id)%100-50)/100*0.1);y=yNo;}pts.push({value:[x,y],submission:d});});
+        bestMap.forEach(function(s){if(s.chip_vendor!==v)return;var x=s.primary_throughput,y=yNo;if(s.scenarios.online&&s.scenarios.online.is_valid&&s.scenarios.online.throughput>0)y=s.scenarios.online.throughput;bps.push({value:[x,y],submission:s});});
+        if(pts.length){series.push({name:v,type:'scatter',data:pts,symbol:'circle',symbolSize:9,itemStyle:{color:vc(v),opacity:.65,borderColor:'rgba(0,0,0,.06)',borderWidth:1},emphasis:{scale:1.5,itemStyle:{opacity:1}},z:1});}
+        if(bps.length){series.push({name:v+' best',type:'scatter',data:bps,symbol:'circle',symbolSize:14,itemStyle:{color:vc(v),opacity:1},emphasis:{scale:1.3},z:5});}
+    });
+    if(no.length)series.push({name:'',type:'scatter',data:[],markLine:{silent:true,symbol:'none',lineStyle:{color:AL,type:'dashed',width:1},label:{formatter:'offline-only →',position:'start',fontSize:8,color:AT},data:[{yAxis:yNo}]},z:0});
+    var ax=axes(xMn,yMn);
+    return{tooltip:{trigger:'item',backgroundColor:TBG,borderColor:TBR,textStyle:{color:TFG,fontSize:11},formatter:function(p){var s=p.data&&p.data.submission;return s?tip(s):'';}},legend:{data:vendors,bottom:4,textStyle:{color:AT,fontSize:9},itemGap:14},grid:{left:'8%',right:'5%',bottom:'12%',top:'6%',containLabel:true},xAxis:ax[0],yAxis:ax[1],series:series,animation:true,animationDuration:300};
+}
+
+/* ── VIEW: Density ── */
+function rDensity(data){
+    var pts=[];data.forEach(function(d){if(d.primary_throughput>0){var q=(d.scenarios.online&&d.scenarios.online.is_valid&&d.scenarios.online.throughput>0)?d.scenarios.online.throughput:null;if(q)pts.push([d.primary_throughput,q,d]);}});
+    var lx=pts.map(function(p){return Math.log10(p[0]);}),ly=pts.map(function(p){return Math.log10(p[1]);});
+    var xMn=lx.length?Math.pow(10,Math.min.apply(null,lx)-0.3):1,xMx=lx.length?Math.pow(10,Math.max.apply(null,lx)+0.1):100000;
+    var yMn=ly.length?Math.pow(10,Math.min.apply(null,ly)-0.2):0.1,yMx=ly.length?Math.pow(10,Math.max.apply(null,ly)+0.1):1000;
+    var series=[{name:'Recipes',type:'scatter',data:pts,symbol:'circle',symbolSize:12,itemStyle:{color:'#3b82f6',opacity:.25,borderWidth:0},emphasis:{scale:1.4,itemStyle:{opacity:.6}},z:1}];
+    var ax=axes(xMn,yMn);ax[0].max=xMx;ax[1].max=yMx;
+    return{tooltip:{trigger:'item',backgroundColor:TBG,borderColor:TBR,textStyle:{color:TFG,fontSize:11},formatter:function(p){var s=p.data&&p.data[2];return s?tip(s):'';}},grid:{left:'8%',right:'5%',bottom:'8%',top:'6%',containLabel:true},xAxis:ax[0],yAxis:ax[1],series:series,animation:true,animationDuration:300};
+}
+
+/* ── VIEW: Heatmap (Suites × Chips) ── */
+function rHeatmap(data){
+    var suites=SUITE_LIST.filter(function(s){return data.some(function(d){return d.suite===s;});});
+    var chips=[];var cm=new Map();data.forEach(function(d){var k=d.chip;if(!cm.has(k)){cm.set(k,{chip:k,vendor:d.chip_vendor,bestThr:0});chips.push(cm.get(k));}var e=cm.get(k);if(d.primary_throughput>e.bestThr)e.bestThr=d.primary_throughput;});
+    chips.sort(function(a,b){return b.bestThr-a.bestThr;});
+    var hmd=[],xLabels=suites.map(function(s){return s.replace('suite_','');}),yLabels=[];
+    chips.forEach(function(c){yLabels.push(c.chip);var row=[];suites.forEach(function(suite){var subs=data.filter(function(d){return d.chip===c.chip&&d.suite===suite;});row.push(subs.length?Math.max.apply(null,subs.map(function(d){return d.primary_throughput;})):null);});hmd.push(row);});
+    var mx=0;hmd.forEach(function(r){r.forEach(function(v){if(v>mx)mx=v;});});
+    return{
+        tooltip:{trigger:'item',backgroundColor:TBG,borderColor:TBR,textStyle:{color:TFG,fontSize:11},formatter:function(p){if(!p.data||p.data[2]===null)return p.data[1]+' × '+p.data[0]+'<br/>No data';var subs=data.filter(function(d){return d.chip===p.data[1]&&d.suite==='suite_'+p.data[0];});var best=subs.reduce(function(a,b){return b.primary_throughput>a.primary_throughput?b:a;},subs[0]);return'<strong>'+best.chip+'</strong> · suite_'+p.data[0]+'<br/>📡 Best: <strong>'+fm(p.data[2])+'</strong> tok/s<br/>🔧 '+best.framework+' '+(best.framework_version||'')+'<br/>📦 '+subs.length+' recipes'+(best.precision?' · '+best.precision:'');}},
+        grid:{left:180,right:20,top:10,bottom:60},
+        xAxis:{type:'category',data:xLabels,axisLabel:{fontSize:9,color:AT,fontWeight:'bold'},position:'bottom'},
+        yAxis:{type:'category',data:yLabels,axisLabel:{fontSize:8,color:AT,width:170,overflow:'truncate'},inverse:true},
+        visualMap:{min:0,max:mx,calculable:true,orient:'vertical',right:4,bottom:'15%',textStyle:{color:AT,fontSize:8},inRange:{color:['#f0f4ff','#93c5fd','#3b82f6','#1d4ed8','#1e3a8a']}},
+        series:[{type:'heatmap',data:(function(){var d=[];hmd.forEach(function(row,y){row.forEach(function(v,x){if(v!==null)d.push([x,y,v]);});});return d;})(),label:{show:true,fontSize:7,color:AT,formatter:function(p){return p.data[2]?fm(p.data[2]):'';}},emphasis:{itemStyle:{shadowBlur:10,shadowColor:'rgba(0,0,0,.3)'}}}],
+        animation:true,animationDuration:300
+    };
+}
+
+/* ── VIEW: Small multiples ── */
+function rSmall(data){
+    var container=document.createElement('div');container.style.display='grid';container.style.gridTemplateColumns='repeat(auto-fill,minmax(260px,1fr))';container.style.gap='8px';
+    document.getElementById('chart-area').appendChild(container);
+    smCharts.forEach(function(c){if(c)c.dispose();});smCharts=[];
+    var suites=SUITE_LIST.filter(function(s){return data.some(function(d){return d.suite===s;});});
+    var aT=data.map(function(d){return d.primary_throughput;}).filter(function(v){return v>0;}),gxMn=aT.length?Math.max(0.5,Math.min.apply(null,aT)*0.5):1;
+    suites.forEach(function(suite){
+        var sd=data.filter(function(d){return d.suite===suite;});if(!sd.length)return;
+        var inner=document.createElement('div');inner.style.cssText='background:var(--bg-elev);border:1px solid var(--border-soft);border-radius:var(--r-md);padding:6px';
+        inner.innerHTML='<div style="font-size:.65rem;font-weight:600;color:var(--fg-strong);margin-bottom:2px">'+suite+' <span style="font-weight:400;color:var(--fg-muted)">('+sd.length+')</span></div><div style="width:100%;height:180px" class="sm-chart"></div>';
+        container.appendChild(inner);
+        setTimeout(function(){
+            var el=inner.querySelector('.sm-chart');if(!el)return;var c=echarts.init(el);smCharts.push(c);
+            var vendors=[...new Set(sd.map(function(d){return d.chip_vendor;}))],series=vendors.map(function(v){var pts=sd.filter(function(d){return d.chip_vendor===v&&d.primary_throughput>0;}).map(function(d){return{value:[d.primary_throughput,(d.scenarios.online&&d.scenarios.online.is_valid&&d.scenarios.online.throughput>0)?d.scenarios.online.throughput:0.05],submission:d};});return{name:v,type:'scatter',data:pts,symbol:'circle',symbolSize:6,itemStyle:{color:vc(v),opacity:.7},emphasis:{scale:1.5}};});
+            c.setOption({tooltip:{trigger:'item',backgroundColor:TBG,borderColor:TBR,textStyle:{color:TFG,fontSize:9},formatter:function(p){var s=p.data&&p.data.submission;return s?tip(s):'';}},grid:{left:36,right:6,top:6,bottom:18},xAxis:{type:'log',min:gxMn,axisLabel:{fontSize:6,color:AT},splitLine:{show:false}},yAxis:{type:'log',axisLabel:{fontSize:6,color:AT},splitLine:{show:false},min:0.05},series:series});
+        },30);
+    });
+    return null;
+}
+
+/* ── Master ── */
+function uChart(){
+    thm();
+    var data=gCD();if(!data.length){if(cChart)cChart.clear();if(hChart)hChart.clear();smCharts.forEach(function(c){if(c)c.dispose();});smCharts=[];uTable([]);return;}
+    var ca=document.getElementById('chart-area'),ha=document.getElementById('heatmap-area'),cs=document.getElementById('chart-sub');
+    /* Clean up */
+    smCharts.forEach(function(c){if(c)c.dispose();});smCharts=[];
+    while(ca.children.length>0){ca.removeChild(ca.firstChild);}
+    ca.style.display='block';ha.style.display='none';
+    if(cChart){cChart.dispose();cChart=null;}
+    if(hChart){hChart.dispose();hChart=null;}
+
+    var opt=null;
+    if(curView==='heatmap'){
+        ca.style.display='none';ha.style.display='block';
+        hChart=echarts.init(ha);hChart.setOption(rHeatmap(data));
+        hChart.off('click');hChart.on('click',function(p){if(p.data){var subs=data.filter(function(d){return d.chip===p.data[1]&&d.suite==='suite_'+p.data[0];});if(subs.length)showDrill(p.data[1],'suite_'+p.data[0]);}});
+        cs.textContent='Click a cell to drill down. Color depth = best throughput.';
+    }else if(curView==='small'){
+        opt=rSmall(data);
+        if(!opt){cs.textContent='Each suite in its own chart. Click to drill down.';}
+        else{cChart=echarts.init(ca);cChart.setOption(opt);}
+    }else if(curView==='density'){
+        opt=rDensity(data);cChart=echarts.init(ca);cChart.setOption(opt);
+        cs.textContent='Darker blue = more recipes overlapping. Top-right = frontier.';
+    }else if(curView==='scatter'){
+        opt=rScatter(data);cChart=echarts.init(ca);cChart.setOption(opt);
+        cs.textContent='Colour = vendor · glow = best recipe · click to drill down';
+    }else{
+        opt=rBeeswarm(data);cChart=echarts.init(ca);cChart.setOption(opt);
+        cs.textContent='Grouped by Suite. Dot spread within a Suite shows optimization variance.';
+    }
+    if(cChart){cChart.off('click');cChart.on('click',function(p){if(p.data&&p.data.submission)showDrill(p.data.submission.chip,p.data.submission.suite);});}
+    document.getElementById('filter-info').textContent='Showing '+data.length+' recipes · '+([...new Set(data.map(function(d){return d.chip_vendor;}))].length)+' vendors';
+    var af=gAF();cg=gCG(af);uStats(af,data);uCards(cg);uTable(cg);
+}
+
+function uStats(data,cd){var chips=new Set(),fws=new Set(),suites=new Set();data.forEach(function(s){chips.add(s.chip);fws.add(s.framework);suites.add(s.suite);});var wo=cd.filter(function(s){return s.scenarios.online&&s.scenarios.online.is_valid&&s.scenarios.online.throughput>0;}).length;document.getElementById('stats-row').innerHTML=[[chips.size,'Chips'],[data.length,'Recipes'],[suites.size,'Suites'],[fws.size,'Frameworks'],[wo,'With QPS']].map(function(c){return'<div class="card si"><div class="sn">'+c[0]+'</div><div class="sl">'+c[1]+'</div></div>';}).join('');}
+
+function uCards(groups){var vg=new Map();groups.forEach(function(g){var v=g.vendor;if(!vg.has(v))vg.set(v,[]);vg.get(v).push(g);});var vO=['NVIDIA','Huawei','AMD','Google','Apple','Moore Threads'];var sv=[...vg.keys()].sort(function(a,b){var ia=vO.indexOf(a),ib=vO.indexOf(b);if(ia===-1)ia=99;if(ib===-1)ib=99;return ia-ib;});var h='<div class="chip-cols">';sv.forEach(function(v){var chips=vg.get(v),vc2=vc(v),MAX=(v==='NVIDIA')?6:99,show=showAll?chips:chips.slice(0,MAX);chips.sort(function(a,b){return b.bestThr-a.bestThr;});h+='<div class="chip-col"><div class="chip-col-hd"><span class="ccdot" style="background:'+vc2+'"></span><span class="ccname">'+v+'</span><span class="cccount">'+chips.length+' chips</span></div>';show.forEach(function(g){h+='<div class="cc-item" onclick="scrollToChip(\''+g.chip.replace(/'/g,"\\'")+'\')"><span class="ccname2">'+g.chip+'</span><span class="ccruns">'+g.subs.length+'</span><span class="ccthr">'+fm(g.bestThr)+' tok/s</span></div>';});if(chips.length>MAX)h+='<span class="cc-more" onclick="showAll=!showAll;uCards(cg)">'+(showAll?'▲ Less':'▼ +'+(chips.length-MAX)+' more')+'</span>';h+='</div>';});h+='</div>';document.getElementById('chip-cols').innerHTML=h;}
+function scrollToChip(chip){document.querySelectorAll('#stats-tbody tr[data-chip]').forEach(function(r){if(r.getAttribute('data-chip')===chip){r.scrollIntoView({behavior:'smooth',block:'center'});r.style.background='rgba(59,130,246,.08)';setTimeout(function(){r.style.background='';},1800);}});}
+
+function showDrill(chip,suite){var subs=allSubs.filter(function(s){return s.chip===chip&&s.suite===suite;});if(!subs.length)return;var best=subs.reduce(function(a,b){return b.primary_throughput>a.primary_throughput?b:a;},subs[0]);var scSet=new Set();subs.forEach(function(s){Object.keys(s.scenarios).forEach(function(sc){scSet.add(sc);});});var scs=[...scSet].sort(),sr='';scs.forEach(function(sc){var sb=subs.filter(function(s){return s.scenarios[sc]&&s.scenarios[sc].is_valid;});if(!sb.length){sr+='<tr><td>'+sc+'</td><td class="na">—</td><td class="na">—</td><td>0</td></tr>';}else{var b=sb.reduce(function(a,b){return b.scenarios[sc].throughput>a.scenarios[sc].throughput?b:a;},sb[0]);sr+='<tr><td><strong>'+sc+'</strong></td><td><strong>'+fm(b.scenarios[sc].throughput)+'</strong> '+b.scenarios[sc].metric_label+'</td><td>'+b.framework+' '+(b.framework_version||'')+'</td><td>'+sb.length+'</td></tr>';}});var sl='';subs.slice().sort(function(a,b){return b.primary_throughput-a.primary_throughput;}).forEach(function(s){sl+='<tr><td>'+s.primary_scenario+'</td><td>'+fm(s.primary_throughput)+'</td><td>'+s.framework+' '+(s.framework_version||'')+'</td><td>'+s.precision+'</td><td>'+s.tier+'</td><td>'+(s.submitted_by||'-')+'</td></tr>';});document.getElementById('drilldown-title').innerHTML='<span style="color:'+vc(best.chip_vendor)+'">●</span> '+chip+' · '+suite;document.getElementById('drilldown-body').innerHTML='<div class="dsec"><h5>Info</h5>'+['Chip','Framework','Suite','Model','Precision','Recipes'].map(function(k){var vv={Chip:'<strong>'+chip+'</strong> ×'+best.chip_count+' · '+best.memory_gb+'GB · '+best.chip_vendor,Framework:'<strong>'+best.framework+'</strong> '+(best.framework_version||''),Suite:suite+' — '+(SLAB[suite]||''),Model:'<code>'+(best.model_full||best.model)+'</code>'+(best.model_params_b?' ('+best.model_params_b+'B)':''),Precision:(best.precision||'-')+(best.effective_dtype?' (eff:'+best.effective_dtype+')':''),Recipes:subs.length};return'<div class="drow"><span class="dlbl">'+k+'</span><span class="dval">'+vv[k]+'</span></div>';}).join('')+'</div><div class="dsec"><h5>Per-Scenario Best</h5><table class="stbl"><thead><tr><th>Scenario</th><th>Best Metric</th><th>Best Framework</th><th>Count</th></tr></thead><tbody>'+sr+'</tbody></table></div><div class="dsec"><h5>Top Recipe</h5>'+['Throughput','Submitter','Config'].map(function(k){var vv={Throughput:'<strong style="color:var(--gold);font-size:.8rem">'+fm(best.primary_throughput)+' '+(best.primary_metric_label||'tok/s')+'</strong>',Submitter:(best.submitted_by||'-')+' · '+(best.date||'-'),Config:'<pre class="cfg-pre">'+JSON.stringify(best.config||{},null,2)+'</pre>'};return'<div class="drow"><span class="dlbl">'+k+'</span><span class="dval">'+vv[k]+'</span></div>';}).join('')+(best.reproduce_script?'<div class="drow"><span class="dlbl">Script</span><span class="dval"><code>'+best.reproduce_script+'</code></span></div>':'')+'</div><div class="dsec"><h5>All Recipes ('+subs.length+')</h5><table class="stbl"><thead><tr><th>Scenario</th><th>Throughput</th><th>Framework</th><th>Precision</th><th>Tier</th><th>Submitter</th></tr></thead><tbody>'+sl+'</tbody></table></div>';document.getElementById('drilldown-overlay').classList.add('show');}
+function closeDrillDown(){document.getElementById('drilldown-overlay').classList.remove('show');}
+
+function uTable(groups){var tb=document.getElementById('stats-tbody');tb.innerHTML='';if(!groups.length){tb.innerHTML='<tr><td colspan="8" style="text-align:center;color:var(--fg-faint)">No data</td></tr>';return;}var gBest=Math.max.apply(null,groups.map(function(g){return g.bestThr;}));groups.forEach(function(g,idx){var ip=gBest>0?((gBest-g.bestThr)/gBest*100):0,iTxt=ip<1?'best':('+'+ip.toFixed(0)+'%');var vc2=VCLS[g.vendor]||'vMoore',bs=g.bestSub||{},cs=(bs.framework||'?')+' '+(bs.framework_version||'');if(bs.primary_throughput)cs+=' — '+fm(bs.primary_throughput)+' tok/s';var tested={};g.subs.forEach(function(s){tested[s.suite]=true;});var sd='';SUITE_LIST.forEach(function(s){sd+='<span class="sdot '+(tested[s]?'on':'off')+'">'+s.replace('suite_','')+'</span>';});var row=tb.insertRow();row.setAttribute('data-chip',g.chip);row.innerHTML='<td class="td-recipe" title="'+cs+'">'+cs+'</td><td><span class="v-badge '+vc2+'">'+(g.vendor||'-')+'</span></td><td style="font-weight:600">'+g.chip+'</td><td><div class="sdots">'+sd+'</div></td><td>'+g.subs.length+'</td><td><strong>'+fm(g.bestThr)+'</strong></td><td><span class="hroom">'+iTxt+'</span></td><td><span class="expand-icon" id="exp-'+idx+'">▶</span></td>';(function(g,i){row.onclick=function(e){toggleRow(row,g,i);};})(g,idx);var er=tb.insertRow();er.className='expand-row';er.style.display='none';er.id='er-'+idx;var sgm=new Map();g.subs.forEach(function(s){if(!sgm.has(s.suite))sgm.set(s.suite,[]);sgm.get(s.suite).push(s);});var eh='<div class="expand-inner"><table><thead><tr><th>Suite</th><th>Recipes</th><th>Best Throughput</th><th>Best Framework</th><th>Precisions</th></tr></thead><tbody>';sgm.forEach(function(subs,suite){var b=subs.reduce(function(a,b){return b.primary_throughput>a.primary_throughput?b:a;},subs[0]);var pl=[...new Set(subs.map(function(s){return s.precision;}))].sort().join(', ');eh+='<tr><td><span class="slink" style="color:var(--accent);cursor:pointer;font-weight:600" data-chip="'+g.chip+'" data-suite="'+suite+'">'+suite+'</span></td><td>'+subs.length+'</td><td>'+fm(b.primary_throughput)+'</td><td>'+b.framework+' '+(b.framework_version||'')+'</td><td>'+pl+'</td></tr>';});eh+='</tbody></table></div>';er.innerHTML='<td colspan="8">'+eh+'</td>';er.querySelectorAll('.slink').forEach(function(l){l.addEventListener('click',function(e){e.stopPropagation();showDrill(this.dataset.chip,this.dataset.suite);});});});}
+function toggleRow(r,g,idx){var er=document.getElementById('er-'+idx),ic=document.getElementById('exp-'+idx);if(!er)return;if(er.style.display==='none'){er.style.display='table-row';if(ic)ic.classList.add('open');}else{er.style.display='none';if(ic)ic.classList.remove('open');}}
+
+function bindEvents(){var ids=['f-suite','f-vendor','f-framework','f-precision','f-chip','f-metric'];ids.forEach(function(id){document.getElementById(id).addEventListener('change',uChart);});document.getElementById('btn-reset').addEventListener('click',function(){ids.forEach(function(id){document.getElementById(id).value='';});uChart();});document.querySelectorAll('.chart-tab').forEach(function(t){t.addEventListener('click',function(){document.querySelectorAll('.chart-tab').forEach(function(tt){tt.classList.remove('active');});t.classList.add('active');curView=t.dataset.view;uChart();});});document.getElementById('drilldown-overlay').addEventListener('click',function(e){if(e.target===this)closeDrillDown();});document.addEventListener('keydown',function(e){if(e.key==='Escape')closeDrillDown();});window.matchMedia('(prefers-color-scheme:light)').addEventListener('change',function(){uChart();});window.addEventListener('resize',function(){if(cChart)cChart.resize();if(hChart)hChart.resize();smCharts.forEach(function(c){if(c)c.resize();});});}
+function init(){if(!ld())return;iF();bindEvents();uChart();}
+init();
+</script>
+</body>
+</html>
\ No newline at end of file
diff --git a/leaderboard/site/distribution.js b/leaderboard/site/distribution.js
new file mode 100644
index 00000000..47255b81
--- /dev/null
+++ b/leaderboard/site/distribution.js
@@ -0,0 +1,12383 @@
+// Auto-generated by leaderboard/generate.py. Do not edit manually.
+
+const DISTRIBUTION_SUBMISSIONS = [
+  {
+    "id": "2349a925",
+    "chip": "Apple M1",
+    "chip_vendor": "Apple",
+    "chip_count": 1,
+    "memory_gb": 16.0,
+    "suite": "suite_F",
+    "model": "Qwen2.5-0.5B-Instruct",
+    "model_full": "Qwen/Qwen2.5-0.5B-Instruct",
+    "model_params_b": 0.5,
+    "precision": "FP16",
+    "effective_dtype": "float16",
+    "framework": "mlx-lm",
+    "framework_version": "0.31.2",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-20",
+    "reproduce_script": "runners/apple_mlx_lm_9546b8b5/runner.py",
+    "runner_id": "apple_mlx_lm_9546b8b5",
+    "scenarios": {
+      "offline": {
+        "throughput": 51.0,
+        "metric_label": "tokens/sec",
+        "concurrency": 4,
+        "peak_memory_gb": 1.24,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 51.0,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 4,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": 1.24
+    }
+  },
+  {
+    "id": "300ac34c",
+    "chip": "Google TPU v5e",
+    "chip_vendor": "Google",
+    "chip_count": 1,
+    "memory_gb": 16.0,
+    "suite": "suite_F",
+    "model": "Qwen2.5-0.5B-Instruct",
+    "model_full": "Qwen/Qwen2.5-0.5B-Instruct",
+    "model_params_b": 0.5,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vllm-tpu",
+    "framework_version": "0.13.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-18",
+    "reproduce_script": "runners/google_vllm_tpu_68cc9ffa/runner.py",
+    "runner_id": "google_vllm_tpu_68cc9ffa",
+    "scenarios": {
+      "offline": {
+        "throughput": 8127.1,
+        "metric_label": "tokens/sec",
+        "concurrency": 4,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 8127.1,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 4,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "d9b3177f",
+    "chip": "Google TPU v6e",
+    "chip_vendor": "Google",
+    "chip_count": 1,
+    "memory_gb": 32.0,
+    "suite": "suite_A",
+    "model": "Meta-Llama-3-8B-Instruct",
+    "model_full": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vllm-tpu",
+    "framework_version": "0.13.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-18",
+    "reproduce_script": "runners/google_vllm_tpu_68cc9ffa/runner.py",
+    "runner_id": "google_vllm_tpu_68cc9ffa",
+    "scenarios": {
+      "offline": {
+        "throughput": 2775.55,
+        "metric_label": "tokens/sec",
+        "concurrency": 8,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "speculative": {
+        "throughput": 2774.41,
+        "metric_label": "tok/s (speculative)",
+        "concurrency": 8,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 2775.55,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 8,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "d037f60f",
+    "chip": "Google TPU v6e",
+    "chip_vendor": "Google",
+    "chip_count": 1,
+    "memory_gb": 32.0,
+    "suite": "suite_D",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vllm-tpu",
+    "framework_version": "0.13.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-18",
+    "reproduce_script": "runners/google_vllm_tpu_68cc9ffa/runner.py",
+    "runner_id": "google_vllm_tpu_68cc9ffa",
+    "scenarios": {
+      "offline": {
+        "throughput": 54.52,
+        "metric_label": "tokens/sec",
+        "concurrency": 1,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 54.52,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 1,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "9318bfeb",
+    "chip": "Google TPU v6e",
+    "chip_vendor": "Google",
+    "chip_count": 1,
+    "memory_gb": 32.0,
+    "suite": "suite_F",
+    "model": "Qwen2.5-0.5B-Instruct",
+    "model_full": "Qwen/Qwen2.5-0.5B-Instruct",
+    "model_params_b": 0.5,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vllm-tpu",
+    "framework_version": "0.13.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-18",
+    "reproduce_script": "runners/google_vllm_tpu_68cc9ffa/runner.py",
+    "runner_id": "google_vllm_tpu_68cc9ffa",
+    "scenarios": {
+      "offline": {
+        "throughput": 6857.53,
+        "metric_label": "tokens/sec",
+        "concurrency": 64,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 6857.53,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 64,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "a2777c30",
+    "chip": "Huawei Ascend 910B2",
+    "chip_vendor": "Huawei",
+    "chip_count": 1,
+    "memory_gb": 64.0,
+    "suite": "suite_A",
+    "model": "Meta-Llama-3-8B-Instruct",
+    "model_full": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vllm-ascend",
+    "framework_version": "0.18.0rc1",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-14",
+    "reproduce_script": "runners/ascend_vllm_ascend_d4aa9fda/runner.py",
+    "runner_id": "ascend_vllm_ascend_d4aa9fda",
+    "scenarios": {
+      "offline": {
+        "throughput": 1699.04,
+        "metric_label": "tokens/sec",
+        "concurrency": 32,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 5,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "interactive": {
+        "throughput": 1699.04,
+        "metric_label": "tokens/sec",
+        "concurrency": 32,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 268.0,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": 8,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "speculative": {
+        "throughput": 1718.23,
+        "metric_label": "tok/s (speculative)",
+        "concurrency": 32,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "burst": {
+        "throughput": 0.0,
+        "metric_label": "1 − degradation_ratio",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 1699.04,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 32,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "a3547ba9",
+    "chip": "Huawei Ascend 910B2",
+    "chip_vendor": "Huawei",
+    "chip_count": 1,
+    "memory_gb": 64.0,
+    "suite": "suite_D",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vllm-ascend",
+    "framework_version": "0.18.0rc1",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-14",
+    "reproduce_script": "runners/ascend_vllm_ascend_d4aa9fda/runner.py",
+    "runner_id": "ascend_vllm_ascend_d4aa9fda",
+    "scenarios": {
+      "offline": {
+        "throughput": 53.22,
+        "metric_label": "tokens/sec",
+        "concurrency": 1,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "interactive": {
+        "throughput": 53.22,
+        "metric_label": "tokens/sec",
+        "concurrency": 1,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 53.2,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": 8,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 0.0,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 53.22,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 1,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "bd7d8f87",
+    "chip": "Huawei Ascend 910B2",
+    "chip_vendor": "Huawei",
+    "chip_count": 1,
+    "memory_gb": 64.0,
+    "suite": "suite_F",
+    "model": "Qwen2.5-0.5B-Instruct",
+    "model_full": "Qwen/Qwen2.5-0.5B-Instruct",
+    "model_params_b": 0.5,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vllm-ascend",
+    "framework_version": "0.18.0rc1",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-14",
+    "reproduce_script": "runners/ascend_vllm_ascend_d4aa9fda/runner.py",
+    "runner_id": "ascend_vllm_ascend_d4aa9fda",
+    "scenarios": {
+      "offline": {
+        "throughput": 4941.13,
+        "metric_label": "tokens/sec",
+        "concurrency": 4,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 10,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "interactive": {
+        "throughput": 4941.13,
+        "metric_label": "tokens/sec",
+        "concurrency": 4,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 1238.9,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": 32,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 4941.13,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 4,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "fcb9725c",
+    "chip": "Huawei Ascend 910B2",
+    "chip_vendor": "Huawei",
+    "chip_count": 8,
+    "memory_gb": 64.0,
+    "suite": "suite_B",
+    "model": "Meta-Llama-3-70B-Instruct",
+    "model_full": "meta-llama/Meta-Llama-3-70B-Instruct",
+    "model_params_b": 70.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vllm-ascend",
+    "framework_version": "0.18.0rc1",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-14",
+    "reproduce_script": "runners/ascend_vllm_ascend_d4aa9fda/runner.py",
+    "runner_id": "ascend_vllm_ascend_d4aa9fda",
+    "scenarios": {
+      "offline": {
+        "throughput": 769.88,
+        "metric_label": "tokens/sec",
+        "concurrency": 8,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 2,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 53.2,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": 4,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "interactive": {
+        "throughput": 769.88,
+        "metric_label": "tokens/sec",
+        "concurrency": 8,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "burst": {
+        "throughput": 0.0,
+        "metric_label": "1 − degradation_ratio",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 769.88,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 8,
+      "batch_size": null,
+      "tensor_parallel": 8,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "354e5562",
+    "chip": "Huawei Ascend 910B2",
+    "chip_vendor": "Huawei",
+    "chip_count": 8,
+    "memory_gb": 64.0,
+    "suite": "suite_E",
+    "model": "Meta-Llama-3-8B-Instruct",
+    "model_full": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vllm-ascend",
+    "framework_version": "0.18.0rc1",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-14",
+    "reproduce_script": "runners/ascend_vllm_ascend_d4aa9fda/runner.py",
+    "runner_id": "ascend_vllm_ascend_d4aa9fda",
+    "scenarios": {
+      "offline": {
+        "throughput": null,
+        "metric_label": "",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": false
+      }
+    },
+    "primary_scenario": "scaling",
+    "primary_throughput": 2430.05,
+    "primary_metric_label": "tokens/sec (1x baseline)",
+    "config": {}
+  },
+  {
+    "id": "d726144e",
+    "chip": "Huawei Ascend 910B2",
+    "chip_vendor": "Huawei",
+    "chip_count": 8,
+    "memory_gb": 64.0,
+    "suite": "suite_G",
+    "model": "Mixtral-8x7B-Instruct-v0.1",
+    "model_full": "mistralai/Mixtral-8x7B-Instruct-v0.1",
+    "model_params_b": 46.7,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vllm-ascend",
+    "framework_version": "0.18.0rc1",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-14",
+    "reproduce_script": "runners/ascend_vllm_ascend_d4aa9fda/runner.py",
+    "runner_id": "ascend_vllm_ascend_d4aa9fda",
+    "scenarios": {
+      "offline": {
+        "throughput": 1631.87,
+        "metric_label": "tokens/sec",
+        "concurrency": 16,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 2,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "interactive": {
+        "throughput": 1631.87,
+        "metric_label": "tokens/sec",
+        "concurrency": 16,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 226.6,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": 8,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 1631.87,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 16,
+      "batch_size": null,
+      "tensor_parallel": 8,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "635ecf42",
+    "chip": "Huawei Ascend Ascend910",
+    "chip_vendor": "Huawei",
+    "chip_count": 16,
+    "memory_gb": 64.0,
+    "suite": "suite_B",
+    "model": "Meta-Llama-3-70B-Instruct",
+    "model_full": "meta-llama/Meta-Llama-3-70B-Instruct",
+    "model_params_b": 70.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vllm-ascend",
+    "framework_version": "0.18.0rc1",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-14",
+    "reproduce_script": "runners/ascend_vllm_ascend_d4aa9fda/runner.py",
+    "runner_id": "ascend_vllm_ascend_d4aa9fda",
+    "scenarios": {
+      "offline": {
+        "throughput": 723.06,
+        "metric_label": "tokens/sec",
+        "concurrency": 128,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 2,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 53.5,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": 4,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "interactive": {
+        "throughput": 723.06,
+        "metric_label": "tokens/sec",
+        "concurrency": 128,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "burst": {
+        "throughput": 0.0,
+        "metric_label": "1 − degradation_ratio",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 723.06,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 128,
+      "batch_size": null,
+      "tensor_parallel": 16,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "b1fe92eb",
+    "chip": "Huawei Ascend Ascend910",
+    "chip_vendor": "Huawei",
+    "chip_count": 16,
+    "memory_gb": 64.0,
+    "suite": "suite_E",
+    "model": "Meta-Llama-3-8B-Instruct",
+    "model_full": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vllm-ascend",
+    "framework_version": "0.18.0rc1",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-15",
+    "reproduce_script": "runners/ascend_vllm_ascend_d4aa9fda/runner.py",
+    "runner_id": "ascend_vllm_ascend_d4aa9fda",
+    "scenarios": {
+      "offline": {
+        "throughput": null,
+        "metric_label": "",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": false
+      }
+    },
+    "primary_scenario": "scaling",
+    "primary_throughput": 2499.29,
+    "primary_metric_label": "tokens/sec (1x baseline)",
+    "config": {}
+  },
+  {
+    "id": "329a2b9e",
+    "chip": "Huawei Ascend Ascend910",
+    "chip_vendor": "Huawei",
+    "chip_count": 16,
+    "memory_gb": 64.0,
+    "suite": "suite_G",
+    "model": "Mixtral-8x7B-Instruct-v0.1",
+    "model_full": "mistralai/Mixtral-8x7B-Instruct-v0.1",
+    "model_params_b": 46.7,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vllm-ascend",
+    "framework_version": "0.18.0rc1",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-14",
+    "reproduce_script": "runners/ascend_vllm_ascend_d4aa9fda/runner.py",
+    "runner_id": "ascend_vllm_ascend_d4aa9fda",
+    "scenarios": {
+      "offline": {
+        "throughput": 1638.62,
+        "metric_label": "tokens/sec",
+        "concurrency": 4,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 2,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "interactive": {
+        "throughput": 1638.62,
+        "metric_label": "tokens/sec",
+        "concurrency": 4,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 262.2,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": 8,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 1638.62,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 4,
+      "batch_size": null,
+      "tensor_parallel": 16,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "74d19743",
+    "chip": "Huawei Ascend Ascend910",
+    "chip_vendor": "Huawei",
+    "chip_count": 1,
+    "memory_gb": 64.0,
+    "suite": "suite_A",
+    "model": "Meta-Llama-3-8B-Instruct",
+    "model_full": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vllm-ascend",
+    "framework_version": "0.18.0rc1",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-14",
+    "reproduce_script": "runners/ascend_vllm_ascend_d4aa9fda/runner.py",
+    "runner_id": "ascend_vllm_ascend_d4aa9fda",
+    "scenarios": {
+      "offline": {
+        "throughput": 1888.72,
+        "metric_label": "tokens/sec",
+        "concurrency": 128,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 5,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "interactive": {
+        "throughput": 1888.72,
+        "metric_label": "tokens/sec",
+        "concurrency": 128,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 376.6,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": 8,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "speculative": {
+        "throughput": 1858.56,
+        "metric_label": "tok/s (speculative)",
+        "concurrency": 32,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "burst": {
+        "throughput": 0.0,
+        "metric_label": "1 − degradation_ratio",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 1888.72,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 128,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "6c1e7ffe",
+    "chip": "Huawei Ascend Ascend910",
+    "chip_vendor": "Huawei",
+    "chip_count": 1,
+    "memory_gb": 64.0,
+    "suite": "suite_D",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vllm-ascend",
+    "framework_version": "0.18.0rc1",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-14",
+    "reproduce_script": "runners/ascend_vllm_ascend_d4aa9fda/runner.py",
+    "runner_id": "ascend_vllm_ascend_d4aa9fda",
+    "scenarios": {
+      "offline": {
+        "throughput": 55.0,
+        "metric_label": "tokens/sec",
+        "concurrency": 1,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "interactive": {
+        "throughput": 55.0,
+        "metric_label": "tokens/sec",
+        "concurrency": 1,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 54.2,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": 8,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 0.0,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 55.0,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 1,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "8826a63d",
+    "chip": "Huawei Ascend Ascend910",
+    "chip_vendor": "Huawei",
+    "chip_count": 1,
+    "memory_gb": 64.0,
+    "suite": "suite_F",
+    "model": "Qwen2.5-0.5B-Instruct",
+    "model_full": "Qwen/Qwen2.5-0.5B-Instruct",
+    "model_params_b": 0.5,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vllm-ascend",
+    "framework_version": "0.18.0rc1",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-14",
+    "reproduce_script": "runners/ascend_vllm_ascend_d4aa9fda/runner.py",
+    "runner_id": "ascend_vllm_ascend_d4aa9fda",
+    "scenarios": {
+      "offline": {
+        "throughput": 7848.84,
+        "metric_label": "tokens/sec",
+        "concurrency": 16,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 10,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "interactive": {
+        "throughput": 7848.84,
+        "metric_label": "tokens/sec",
+        "concurrency": 16,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 2217.9,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": 32,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 7848.84,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 16,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "b4a92b30",
+    "chip": "NVIDIA A100-SXM4-40GB",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 40.0,
+    "suite": "suite_A",
+    "model": "Meta-Llama-3-8B-Instruct",
+    "model_full": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-15",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 2415.99,
+        "metric_label": "tokens/sec",
+        "concurrency": 128,
+        "peak_memory_gb": 33.52,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 5,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "interactive": {
+        "throughput": 2415.99,
+        "metric_label": "tokens/sec",
+        "concurrency": 128,
+        "peak_memory_gb": 33.52,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 484.0,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": 8,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "speculative": {
+        "throughput": 947.4,
+        "metric_label": "tok/s (speculative)",
+        "concurrency": 8,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "burst": {
+        "throughput": 0.0,
+        "metric_label": "1 − degradation_ratio",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 2415.99,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 128,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": 33.52
+    }
+  },
+  {
+    "id": "57cc3fdf_BF16",
+    "chip": "NVIDIA A100-SXM4-40GB",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 40.0,
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-15",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 2387.7,
+        "metric_label": "tokens/sec (BF16)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 10,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 491.9,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "quantization",
+    "primary_throughput": 2387.7,
+    "primary_metric_label": "tokens/sec (BF16)",
+    "config": {
+      "concurrency": null,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "57cc3fdf_FP8",
+    "chip": "NVIDIA A100-SXM4-40GB",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 40.0,
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "FP8",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-15",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 2574.53,
+        "metric_label": "tokens/sec (FP8)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 10,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 709.1,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "quantization",
+    "primary_throughput": 2574.53,
+    "primary_metric_label": "tokens/sec (FP8)",
+    "config": {
+      "concurrency": null,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "57cc3fdf_W8A8",
+    "chip": "NVIDIA A100-SXM4-40GB",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 40.0,
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "W8A8",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-15",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 3205.63,
+        "metric_label": "tokens/sec (W8A8)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 10,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 657.7,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "quantization",
+    "primary_throughput": 3205.63,
+    "primary_metric_label": "tokens/sec (W8A8)",
+    "config": {
+      "concurrency": null,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "57cc3fdf_W8A16",
+    "chip": "NVIDIA A100-SXM4-40GB",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 40.0,
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "W8A16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-15",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 2465.33,
+        "metric_label": "tokens/sec (W8A16)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 10,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 700.0,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "quantization",
+    "primary_throughput": 2465.33,
+    "primary_metric_label": "tokens/sec (W8A16)",
+    "config": {
+      "concurrency": null,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "57cc3fdf_W4A16",
+    "chip": "NVIDIA A100-SXM4-40GB",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 40.0,
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "W4A16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-15",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 1563.39,
+        "metric_label": "tokens/sec (W4A16)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 10,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 813.5,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "quantization",
+    "primary_throughput": 1563.39,
+    "primary_metric_label": "tokens/sec (W4A16)",
+    "config": {
+      "concurrency": null,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "8e114cbe",
+    "chip": "NVIDIA A100-SXM4-40GB",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 40.0,
+    "suite": "suite_D",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-15",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 56.79,
+        "metric_label": "tokens/sec",
+        "concurrency": 1,
+        "peak_memory_gb": 35.46,
+        "is_valid": true
+      },
+      "interactive": {
+        "throughput": 56.79,
+        "metric_label": "tokens/sec",
+        "concurrency": 1,
+        "peak_memory_gb": 35.46,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 57.0,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": 8,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 0.0,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 56.79,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 1,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": 35.46
+    }
+  },
+  {
+    "id": "fe3156b5",
+    "chip": "NVIDIA A100-SXM4-40GB",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 40.0,
+    "suite": "suite_F",
+    "model": "Qwen2.5-0.5B-Instruct",
+    "model_full": "Qwen/Qwen2.5-0.5B-Instruct",
+    "model_params_b": 0.5,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-15",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 10805.85,
+        "metric_label": "tokens/sec",
+        "concurrency": 16,
+        "peak_memory_gb": 33.24,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 40,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "interactive": {
+        "throughput": 10805.85,
+        "metric_label": "tokens/sec",
+        "concurrency": 16,
+        "peak_memory_gb": 33.24,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 3972.5,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": 32,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 10805.85,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 16,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": 33.24
+    }
+  },
+  {
+    "id": "14410aea",
+    "chip": "NVIDIA A100-SXM4-40GB",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 8,
+    "memory_gb": 40.0,
+    "suite": "suite_B",
+    "model": "Meta-Llama-3-70B-Instruct",
+    "model_full": "meta-llama/Meta-Llama-3-70B-Instruct",
+    "model_params_b": 70.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-17",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 1704.71,
+        "metric_label": "tokens/sec",
+        "concurrency": 32,
+        "peak_memory_gb": 30.57,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 25,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 164.3,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": 4,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "interactive": {
+        "throughput": 1704.71,
+        "metric_label": "tokens/sec",
+        "concurrency": 32,
+        "peak_memory_gb": 30.57,
+        "is_valid": true
+      },
+      "burst": {
+        "throughput": 0.0,
+        "metric_label": "1 − degradation_ratio",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 1704.71,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 32,
+      "batch_size": null,
+      "tensor_parallel": 8,
+      "peak_memory_gb": 30.57
+    }
+  },
+  {
+    "id": "e76a4402",
+    "chip": "NVIDIA A100-SXM4-40GB",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 8,
+    "memory_gb": 40.0,
+    "suite": "suite_E",
+    "model": "Meta-Llama-3-8B-Instruct",
+    "model_full": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-18",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": null,
+        "metric_label": "",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": false
+      }
+    },
+    "primary_scenario": "scaling",
+    "primary_throughput": 3000.59,
+    "primary_metric_label": "tokens/sec (1x baseline)",
+    "config": {}
+  },
+  {
+    "id": "08de2dc2",
+    "chip": "NVIDIA A100-SXM4-40GB",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 8,
+    "memory_gb": 40.0,
+    "suite": "suite_G",
+    "model": "Mixtral-8x7B-Instruct-v0.1",
+    "model_full": "mistralai/Mixtral-8x7B-Instruct-v0.1",
+    "model_params_b": 46.7,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-17",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 3217.83,
+        "metric_label": "tokens/sec",
+        "concurrency": 4,
+        "peak_memory_gb": 32.24,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 10,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "interactive": {
+        "throughput": 3217.83,
+        "metric_label": "tokens/sec",
+        "concurrency": 4,
+        "peak_memory_gb": 32.24,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 472.7,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": 8,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 3217.83,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 4,
+      "batch_size": null,
+      "tensor_parallel": 8,
+      "peak_memory_gb": 32.24
+    }
+  },
+  {
+    "id": "8f83bfab",
+    "chip": "NVIDIA A100-SXM4-80GB",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 80.0,
+    "suite": "suite_A",
+    "model": "Meta-Llama-3-8B-Instruct",
+    "model_full": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.20.1+transformers-5.8.1",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-05-18",
+    "reproduce_script": "runners/nvidia_vllm020_0f6c56e4/runner.py",
+    "runner_id": "nvidia_vllm020_0f6c56e4",
+    "scenarios": {
+      "offline": {
+        "throughput": 3916.69,
+        "metric_label": "tokens/sec",
+        "concurrency": 32,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 5,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "interactive": {
+        "throughput": 3916.69,
+        "metric_label": "tokens/sec",
+        "concurrency": 32,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 712.3,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": 8,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "burst": {
+        "throughput": 0.0,
+        "metric_label": "1 − degradation_ratio",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 3916.69,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 32,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "ed4b0557",
+    "chip": "NVIDIA A100-SXM4-80GB",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 80.0,
+    "suite": "suite_A",
+    "model": "Meta-Llama-3-8B-Instruct",
+    "model_full": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-15",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 2700.61,
+        "metric_label": "tokens/sec",
+        "concurrency": 8,
+        "peak_memory_gb": 69.33,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 5,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "interactive": {
+        "throughput": 2700.61,
+        "metric_label": "tokens/sec",
+        "concurrency": 8,
+        "peak_memory_gb": 69.33,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 551.9,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": 8,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "speculative": {
+        "throughput": 999.61,
+        "metric_label": "tok/s (speculative)",
+        "concurrency": 8,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "burst": {
+        "throughput": 0.0,
+        "metric_label": "1 − degradation_ratio",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 2700.61,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 8,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": 69.33
+    }
+  },
+  {
+    "id": "ffd81462_BF16",
+    "chip": "NVIDIA A100-SXM4-80GB",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 80.0,
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.20.1+transformers-5.8.1",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-05-18",
+    "reproduce_script": "runners/nvidia_vllm020_0f6c56e4/runner.py",
+    "runner_id": "nvidia_vllm020_0f6c56e4",
+    "scenarios": {
+      "offline": {
+        "throughput": 3888.91,
+        "metric_label": "tokens/sec (BF16)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 10,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 706.9,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "quantization",
+    "primary_throughput": 3888.91,
+    "primary_metric_label": "tokens/sec (BF16)",
+    "config": {
+      "concurrency": null,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "ffd81462_FP8",
+    "chip": "NVIDIA A100-SXM4-80GB",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 80.0,
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "FP8",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.20.1+transformers-5.8.1",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-05-18",
+    "reproduce_script": "runners/nvidia_vllm020_0f6c56e4/runner.py",
+    "runner_id": "nvidia_vllm020_0f6c56e4",
+    "scenarios": {
+      "offline": {
+        "throughput": 4141.71,
+        "metric_label": "tokens/sec (FP8)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 5,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 438.9,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "quantization",
+    "primary_throughput": 4141.71,
+    "primary_metric_label": "tokens/sec (FP8)",
+    "config": {
+      "concurrency": null,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "ffd81462_W8A8",
+    "chip": "NVIDIA A100-SXM4-80GB",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 80.0,
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "W8A8",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.20.1+transformers-5.8.1",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-05-18",
+    "reproduce_script": "runners/nvidia_vllm020_0f6c56e4/runner.py",
+    "runner_id": "nvidia_vllm020_0f6c56e4",
+    "scenarios": {
+      "offline": {
+        "throughput": 3208.11,
+        "metric_label": "tokens/sec (W8A8)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 10,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 399.4,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "quantization",
+    "primary_throughput": 3208.11,
+    "primary_metric_label": "tokens/sec (W8A8)",
+    "config": {
+      "concurrency": null,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "ffd81462_W8A16",
+    "chip": "NVIDIA A100-SXM4-80GB",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 80.0,
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "W8A16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.20.1+transformers-5.8.1",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-05-18",
+    "reproduce_script": "runners/nvidia_vllm020_0f6c56e4/runner.py",
+    "runner_id": "nvidia_vllm020_0f6c56e4",
+    "scenarios": {
+      "offline": {
+        "throughput": 3547.44,
+        "metric_label": "tokens/sec (W8A16)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 10,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 494.1,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "quantization",
+    "primary_throughput": 3547.44,
+    "primary_metric_label": "tokens/sec (W8A16)",
+    "config": {
+      "concurrency": null,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "ffd81462_W4A16",
+    "chip": "NVIDIA A100-SXM4-80GB",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 80.0,
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "W4A16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.20.1+transformers-5.8.1",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-05-18",
+    "reproduce_script": "runners/nvidia_vllm020_0f6c56e4/runner.py",
+    "runner_id": "nvidia_vllm020_0f6c56e4",
+    "scenarios": {
+      "offline": {
+        "throughput": 1889.19,
+        "metric_label": "tokens/sec (W4A16)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 10,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 437.3,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "quantization",
+    "primary_throughput": 1889.19,
+    "primary_metric_label": "tokens/sec (W4A16)",
+    "config": {
+      "concurrency": null,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "6940965a_BF16",
+    "chip": "NVIDIA A100-SXM4-80GB",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 80.0,
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-15",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 2666.87,
+        "metric_label": "tokens/sec (BF16)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 10,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 537.9,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "quantization",
+    "primary_throughput": 2666.87,
+    "primary_metric_label": "tokens/sec (BF16)",
+    "config": {
+      "concurrency": null,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "6940965a_FP8",
+    "chip": "NVIDIA A100-SXM4-80GB",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 80.0,
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "FP8",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-15",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 2823.04,
+        "metric_label": "tokens/sec (FP8)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 10,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 709.7,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "quantization",
+    "primary_throughput": 2823.04,
+    "primary_metric_label": "tokens/sec (FP8)",
+    "config": {
+      "concurrency": null,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "6940965a_W8A8",
+    "chip": "NVIDIA A100-SXM4-80GB",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 80.0,
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "W8A8",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-15",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 3521.09,
+        "metric_label": "tokens/sec (W8A8)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 10,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 643.9,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "quantization",
+    "primary_throughput": 3521.09,
+    "primary_metric_label": "tokens/sec (W8A8)",
+    "config": {
+      "concurrency": null,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "6940965a_W8A16",
+    "chip": "NVIDIA A100-SXM4-80GB",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 80.0,
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "W8A16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-15",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 2661.86,
+        "metric_label": "tokens/sec (W8A16)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 10,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 694.8,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "quantization",
+    "primary_throughput": 2661.86,
+    "primary_metric_label": "tokens/sec (W8A16)",
+    "config": {
+      "concurrency": null,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "6940965a_W4A16",
+    "chip": "NVIDIA A100-SXM4-80GB",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 80.0,
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "W4A16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-15",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 1761.96,
+        "metric_label": "tokens/sec (W4A16)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 10,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 757.0,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "quantization",
+    "primary_throughput": 1761.96,
+    "primary_metric_label": "tokens/sec (W4A16)",
+    "config": {
+      "concurrency": null,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "43e96189",
+    "chip": "NVIDIA A100-SXM4-80GB",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 80.0,
+    "suite": "suite_D",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.20.1+transformers-5.8.1",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-05-18",
+    "reproduce_script": "runners/nvidia_vllm020_0f6c56e4/runner.py",
+    "runner_id": "nvidia_vllm020_0f6c56e4",
+    "scenarios": {
+      "offline": {
+        "throughput": 65.15,
+        "metric_label": "tokens/sec",
+        "concurrency": 1,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "interactive": {
+        "throughput": 65.15,
+        "metric_label": "tokens/sec",
+        "concurrency": 1,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 58.7,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": 8,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 0.0,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 65.15,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 1,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "7bef8eef",
+    "chip": "NVIDIA A100-SXM4-80GB",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 80.0,
+    "suite": "suite_D",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-15",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 70.21,
+        "metric_label": "tokens/sec",
+        "concurrency": 1,
+        "peak_memory_gb": 71.21,
+        "is_valid": true
+      },
+      "interactive": {
+        "throughput": 70.21,
+        "metric_label": "tokens/sec",
+        "concurrency": 1,
+        "peak_memory_gb": 71.21,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 67.1,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": 8,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 0.0,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 70.21,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 1,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": 71.21
+    }
+  },
+  {
+    "id": "a4e6a6e4",
+    "chip": "NVIDIA A100-SXM4-80GB",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 80.0,
+    "suite": "suite_F",
+    "model": "Qwen2.5-0.5B-Instruct",
+    "model_full": "Qwen/Qwen2.5-0.5B-Instruct",
+    "model_params_b": 0.5,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.20.1+transformers-5.8.1",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-05-18",
+    "reproduce_script": "runners/nvidia_vllm020_0f6c56e4/runner.py",
+    "runner_id": "nvidia_vllm020_0f6c56e4",
+    "scenarios": {
+      "offline": {
+        "throughput": 22884.92,
+        "metric_label": "tokens/sec",
+        "concurrency": 64,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 40,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "interactive": {
+        "throughput": 22884.92,
+        "metric_label": "tokens/sec",
+        "concurrency": 64,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 11576.2,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": 32,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 22884.92,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 64,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "52ad2fe3",
+    "chip": "NVIDIA A100-SXM4-80GB",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 80.0,
+    "suite": "suite_F",
+    "model": "Qwen2.5-0.5B-Instruct",
+    "model_full": "Qwen/Qwen2.5-0.5B-Instruct",
+    "model_params_b": 0.5,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-15",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 11011.89,
+        "metric_label": "tokens/sec",
+        "concurrency": 16,
+        "peak_memory_gb": 69.0,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 10,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "interactive": {
+        "throughput": 11011.89,
+        "metric_label": "tokens/sec",
+        "concurrency": 16,
+        "peak_memory_gb": 69.0,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 2386.8,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": 32,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 11011.89,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 16,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": 69.0
+    }
+  },
+  {
+    "id": "298e6500",
+    "chip": "NVIDIA A800-SXM4-80GB",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 80.0,
+    "suite": "suite_A",
+    "model": "Meta-Llama-3-8B-Instruct",
+    "model_full": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-15",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 2723.49,
+        "metric_label": "tokens/sec",
+        "concurrency": 8,
+        "peak_memory_gb": 69.33,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 5,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "interactive": {
+        "throughput": 2723.49,
+        "metric_label": "tokens/sec",
+        "concurrency": 8,
+        "peak_memory_gb": 69.33,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 546.4,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": 8,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "speculative": {
+        "throughput": 1050.9,
+        "metric_label": "tok/s (speculative)",
+        "concurrency": 8,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "burst": {
+        "throughput": 0.0,
+        "metric_label": "1 − degradation_ratio",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 2723.49,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 8,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": 69.33
+    }
+  },
+  {
+    "id": "944773aa_BF16",
+    "chip": "NVIDIA A800-SXM4-80GB",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 80.0,
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-15",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 2710.52,
+        "metric_label": "tokens/sec (BF16)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 10,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 534.5,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "quantization",
+    "primary_throughput": 2710.52,
+    "primary_metric_label": "tokens/sec (BF16)",
+    "config": {
+      "concurrency": null,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "944773aa_FP8",
+    "chip": "NVIDIA A800-SXM4-80GB",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 80.0,
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "FP8",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-15",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 2859.89,
+        "metric_label": "tokens/sec (FP8)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 10,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 760.1,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "quantization",
+    "primary_throughput": 2859.89,
+    "primary_metric_label": "tokens/sec (FP8)",
+    "config": {
+      "concurrency": null,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "944773aa_W8A8",
+    "chip": "NVIDIA A800-SXM4-80GB",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 80.0,
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "W8A8",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-15",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 3570.3,
+        "metric_label": "tokens/sec (W8A8)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 10,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 715.7,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "quantization",
+    "primary_throughput": 3570.3,
+    "primary_metric_label": "tokens/sec (W8A8)",
+    "config": {
+      "concurrency": null,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "944773aa_W8A16",
+    "chip": "NVIDIA A800-SXM4-80GB",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 80.0,
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "W8A16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-15",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 2695.72,
+        "metric_label": "tokens/sec (W8A16)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 10,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 745.0,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "quantization",
+    "primary_throughput": 2695.72,
+    "primary_metric_label": "tokens/sec (W8A16)",
+    "config": {
+      "concurrency": null,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "944773aa_W4A16",
+    "chip": "NVIDIA A800-SXM4-80GB",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 80.0,
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "W4A16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-15",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 1770.93,
+        "metric_label": "tokens/sec (W4A16)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 10,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 829.8,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "quantization",
+    "primary_throughput": 1770.93,
+    "primary_metric_label": "tokens/sec (W4A16)",
+    "config": {
+      "concurrency": null,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "4d0e7990",
+    "chip": "NVIDIA A800-SXM4-80GB",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 80.0,
+    "suite": "suite_D",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-15",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 70.34,
+        "metric_label": "tokens/sec",
+        "concurrency": 1,
+        "peak_memory_gb": 71.21,
+        "is_valid": true
+      },
+      "interactive": {
+        "throughput": 70.34,
+        "metric_label": "tokens/sec",
+        "concurrency": 1,
+        "peak_memory_gb": 71.21,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 67.0,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": 8,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 0.0,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 70.34,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 1,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": 71.21
+    }
+  },
+  {
+    "id": "54d0e7aa",
+    "chip": "NVIDIA A800-SXM4-80GB",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 80.0,
+    "suite": "suite_F",
+    "model": "Qwen2.5-0.5B-Instruct",
+    "model_full": "Qwen/Qwen2.5-0.5B-Instruct",
+    "model_params_b": 0.5,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-15",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 11972.12,
+        "metric_label": "tokens/sec",
+        "concurrency": 64,
+        "peak_memory_gb": 69.0,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 10,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "interactive": {
+        "throughput": 11972.12,
+        "metric_label": "tokens/sec",
+        "concurrency": 64,
+        "peak_memory_gb": 69.0,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 2804.8,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": 32,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 11972.12,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 64,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": 69.0
+    }
+  },
+  {
+    "id": "de0853fa",
+    "chip": "NVIDIA A800-SXM4-80GB",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 8,
+    "memory_gb": 80.0,
+    "suite": "suite_B",
+    "model": "Meta-Llama-3-70B-Instruct",
+    "model_full": "meta-llama/Meta-Llama-3-70B-Instruct",
+    "model_params_b": 70.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-17",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 1803.59,
+        "metric_label": "tokens/sec",
+        "concurrency": 8,
+        "peak_memory_gb": 67.58,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 25,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 184.0,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": 4,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "interactive": {
+        "throughput": 1803.59,
+        "metric_label": "tokens/sec",
+        "concurrency": 8,
+        "peak_memory_gb": 67.58,
+        "is_valid": true
+      },
+      "burst": {
+        "throughput": 0.0,
+        "metric_label": "1 − degradation_ratio",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 1803.59,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 8,
+      "batch_size": null,
+      "tensor_parallel": 8,
+      "peak_memory_gb": 67.58
+    }
+  },
+  {
+    "id": "74d08a7a",
+    "chip": "NVIDIA A800-SXM4-80GB",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 8,
+    "memory_gb": 80.0,
+    "suite": "suite_E",
+    "model": "Meta-Llama-3-8B-Instruct",
+    "model_full": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-17",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": null,
+        "metric_label": "",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": false
+      }
+    },
+    "primary_scenario": "scaling",
+    "primary_throughput": 3341.37,
+    "primary_metric_label": "tokens/sec (1x baseline)",
+    "config": {}
+  },
+  {
+    "id": "d31ba78b",
+    "chip": "NVIDIA A800-SXM4-80GB",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 8,
+    "memory_gb": 80.0,
+    "suite": "suite_G",
+    "model": "Mixtral-8x7B-Instruct-v0.1",
+    "model_full": "mistralai/Mixtral-8x7B-Instruct-v0.1",
+    "model_params_b": 46.7,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-17",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 3428.42,
+        "metric_label": "tokens/sec",
+        "concurrency": 4,
+        "peak_memory_gb": 69.21,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 10,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "interactive": {
+        "throughput": 3428.42,
+        "metric_label": "tokens/sec",
+        "concurrency": 4,
+        "peak_memory_gb": 69.21,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 569.1,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": 8,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 3428.42,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 4,
+      "batch_size": null,
+      "tensor_parallel": 8,
+      "peak_memory_gb": 69.21
+    }
+  },
+  {
+    "id": "e95e2caa",
+    "chip": "NVIDIA GeForce RTX 3090",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 24.0,
+    "suite": "suite_A",
+    "model": "Meta-Llama-3-8B-Instruct",
+    "model_full": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-15",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 1001.2,
+        "metric_label": "tokens/sec",
+        "concurrency": 32,
+        "peak_memory_gb": 19.33,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 0.0,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "interactive": {
+        "throughput": 1001.2,
+        "metric_label": "tokens/sec",
+        "concurrency": 32,
+        "peak_memory_gb": 19.33,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 309.9,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": 8,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "burst": {
+        "throughput": 0.0,
+        "metric_label": "1 − degradation_ratio",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 1001.2,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 32,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": 19.33
+    }
+  },
+  {
+    "id": "4955fbb1_BF16",
+    "chip": "NVIDIA GeForce RTX 3090",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 24.0,
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-15",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 898.24,
+        "metric_label": "tokens/sec (BF16)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 0.0,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 306.6,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "quantization",
+    "primary_throughput": 898.24,
+    "primary_metric_label": "tokens/sec (BF16)",
+    "config": {
+      "concurrency": null,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "4955fbb1_FP8",
+    "chip": "NVIDIA GeForce RTX 3090",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 24.0,
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "FP8",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-15",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 1404.18,
+        "metric_label": "tokens/sec (FP8)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 5,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 472.4,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "quantization",
+    "primary_throughput": 1404.18,
+    "primary_metric_label": "tokens/sec (FP8)",
+    "config": {
+      "concurrency": null,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "4955fbb1_W8A8",
+    "chip": "NVIDIA GeForce RTX 3090",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 24.0,
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "W8A8",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-15",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 2065.49,
+        "metric_label": "tokens/sec (W8A8)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 5,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 475.9,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "quantization",
+    "primary_throughput": 2065.49,
+    "primary_metric_label": "tokens/sec (W8A8)",
+    "config": {
+      "concurrency": null,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "4955fbb1_W8A16",
+    "chip": "NVIDIA GeForce RTX 3090",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 24.0,
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "W8A16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-15",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 1374.59,
+        "metric_label": "tokens/sec (W8A16)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 0.0,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 475.6,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "quantization",
+    "primary_throughput": 1374.59,
+    "primary_metric_label": "tokens/sec (W8A16)",
+    "config": {
+      "concurrency": null,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "4955fbb1_W4A16",
+    "chip": "NVIDIA GeForce RTX 3090",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 24.0,
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "W4A16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-15",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 1054.17,
+        "metric_label": "tokens/sec (W4A16)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 0.0,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 588.9,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "quantization",
+    "primary_throughput": 1054.17,
+    "primary_metric_label": "tokens/sec (W4A16)",
+    "config": {
+      "concurrency": null,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "faf550ec",
+    "chip": "NVIDIA GeForce RTX 3090",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 24.0,
+    "suite": "suite_F",
+    "model": "Qwen2.5-0.5B-Instruct",
+    "model_full": "Qwen/Qwen2.5-0.5B-Instruct",
+    "model_params_b": 0.5,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-15",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 8961.38,
+        "metric_label": "tokens/sec",
+        "concurrency": 16,
+        "peak_memory_gb": 19.03,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 10,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "interactive": {
+        "throughput": 8961.38,
+        "metric_label": "tokens/sec",
+        "concurrency": 16,
+        "peak_memory_gb": 19.03,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 2693.3,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": 32,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 8961.38,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 16,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": 19.03
+    }
+  },
+  {
+    "id": "81ca6d0e",
+    "chip": "NVIDIA GeForce RTX 3090",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 4,
+    "memory_gb": 24.0,
+    "suite": "suite_E",
+    "model": "Meta-Llama-3-8B-Instruct",
+    "model_full": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-16",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": null,
+        "metric_label": "",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": false
+      }
+    },
+    "primary_scenario": "scaling",
+    "primary_throughput": 1065.49,
+    "primary_metric_label": "tokens/sec (1x baseline)",
+    "config": {}
+  },
+  {
+    "id": "d6543f77",
+    "chip": "NVIDIA GeForce RTX 4090 D",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 24.0,
+    "suite": "suite_A",
+    "model": "Meta-Llama-3-8B-Instruct",
+    "model_full": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-16",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 1461.83,
+        "metric_label": "tokens/sec",
+        "concurrency": 128,
+        "peak_memory_gb": 19.17,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 5,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "interactive": {
+        "throughput": 1461.83,
+        "metric_label": "tokens/sec",
+        "concurrency": 128,
+        "peak_memory_gb": 19.17,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 400.1,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": 8,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "burst": {
+        "throughput": 0.0,
+        "metric_label": "1 − degradation_ratio",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 1461.83,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 128,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": 19.17
+    }
+  },
+  {
+    "id": "b59b0798_BF16",
+    "chip": "NVIDIA GeForce RTX 4090 D",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 24.0,
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-16",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 1292.71,
+        "metric_label": "tokens/sec (BF16)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 0.0,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 393.1,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "quantization",
+    "primary_throughput": 1292.71,
+    "primary_metric_label": "tokens/sec (BF16)",
+    "config": {
+      "concurrency": null,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "b59b0798_FP8",
+    "chip": "NVIDIA GeForce RTX 4090 D",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 24.0,
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "FP8",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-16",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 2933.85,
+        "metric_label": "tokens/sec (FP8)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 10,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 640.7,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "quantization",
+    "primary_throughput": 2933.85,
+    "primary_metric_label": "tokens/sec (FP8)",
+    "config": {
+      "concurrency": null,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "b59b0798_W8A8",
+    "chip": "NVIDIA GeForce RTX 4090 D",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 24.0,
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "W8A8",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-16",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 2678.33,
+        "metric_label": "tokens/sec (W8A8)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 10,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 567.7,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "quantization",
+    "primary_throughput": 2678.33,
+    "primary_metric_label": "tokens/sec (W8A8)",
+    "config": {
+      "concurrency": null,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "b59b0798_W8A16",
+    "chip": "NVIDIA GeForce RTX 4090 D",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 24.0,
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "W8A16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-16",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 2226.02,
+        "metric_label": "tokens/sec (W8A16)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 5,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 628.8,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "quantization",
+    "primary_throughput": 2226.02,
+    "primary_metric_label": "tokens/sec (W8A16)",
+    "config": {
+      "concurrency": null,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "b59b0798_W4A16",
+    "chip": "NVIDIA GeForce RTX 4090 D",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 24.0,
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "W4A16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-16",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 1343.61,
+        "metric_label": "tokens/sec (W4A16)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 10,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 854.9,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "quantization",
+    "primary_throughput": 1343.61,
+    "primary_metric_label": "tokens/sec (W4A16)",
+    "config": {
+      "concurrency": null,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "06662a14",
+    "chip": "NVIDIA GeForce RTX 4090 D",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 24.0,
+    "suite": "suite_F",
+    "model": "Qwen2.5-0.5B-Instruct",
+    "model_full": "Qwen/Qwen2.5-0.5B-Instruct",
+    "model_params_b": 0.5,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-16",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 14273.14,
+        "metric_label": "tokens/sec",
+        "concurrency": 16,
+        "peak_memory_gb": 18.86,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 40,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "interactive": {
+        "throughput": 14273.14,
+        "metric_label": "tokens/sec",
+        "concurrency": 16,
+        "peak_memory_gb": 18.86,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 5995.2,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": 32,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 14273.14,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 16,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": 18.86
+    }
+  },
+  {
+    "id": "bba67533",
+    "chip": "NVIDIA GeForce RTX 4090 D",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 4,
+    "memory_gb": 24.0,
+    "suite": "suite_E",
+    "model": "Meta-Llama-3-8B-Instruct",
+    "model_full": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-16",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": null,
+        "metric_label": "",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": false
+      }
+    },
+    "primary_scenario": "scaling",
+    "primary_throughput": 1633.89,
+    "primary_metric_label": "tokens/sec (1x baseline)",
+    "config": {}
+  },
+  {
+    "id": "675e325e",
+    "chip": "NVIDIA GeForce RTX 4090",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 24.0,
+    "suite": "suite_A",
+    "model": "Meta-Llama-3-8B-Instruct",
+    "model_full": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-17",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 1452.3,
+        "metric_label": "tokens/sec",
+        "concurrency": 128,
+        "peak_memory_gb": 19.21,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 0.0,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "interactive": {
+        "throughput": 1452.3,
+        "metric_label": "tokens/sec",
+        "concurrency": 128,
+        "peak_memory_gb": 19.21,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 339.8,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": 8,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "burst": {
+        "throughput": 0.0,
+        "metric_label": "1 − degradation_ratio",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 1452.3,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 128,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": 19.21
+    }
+  },
+  {
+    "id": "6d7e1d48_BF16",
+    "chip": "NVIDIA GeForce RTX 4090",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 24.0,
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-17",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 1299.02,
+        "metric_label": "tokens/sec (BF16)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 0.0,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 334.4,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "quantization",
+    "primary_throughput": 1299.02,
+    "primary_metric_label": "tokens/sec (BF16)",
+    "config": {
+      "concurrency": null,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "6d7e1d48_FP8",
+    "chip": "NVIDIA GeForce RTX 4090",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 24.0,
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "FP8",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-17",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 2888.23,
+        "metric_label": "tokens/sec (FP8)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 5,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 472.1,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "quantization",
+    "primary_throughput": 2888.23,
+    "primary_metric_label": "tokens/sec (FP8)",
+    "config": {
+      "concurrency": null,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "6d7e1d48_W8A8",
+    "chip": "NVIDIA GeForce RTX 4090",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 24.0,
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "W8A8",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-17",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 2622.06,
+        "metric_label": "tokens/sec (W8A8)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 5,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 438.9,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "quantization",
+    "primary_throughput": 2622.06,
+    "primary_metric_label": "tokens/sec (W8A8)",
+    "config": {
+      "concurrency": null,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "6d7e1d48_W8A16",
+    "chip": "NVIDIA GeForce RTX 4090",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 24.0,
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "W8A16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-17",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 2171.95,
+        "metric_label": "tokens/sec (W8A16)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 5,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 506.0,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "quantization",
+    "primary_throughput": 2171.95,
+    "primary_metric_label": "tokens/sec (W8A16)",
+    "config": {
+      "concurrency": null,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "6d7e1d48_W4A16",
+    "chip": "NVIDIA GeForce RTX 4090",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 24.0,
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "W4A16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-17",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 1345.66,
+        "metric_label": "tokens/sec (W4A16)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 10,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 606.2,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "quantization",
+    "primary_throughput": 1345.66,
+    "primary_metric_label": "tokens/sec (W4A16)",
+    "config": {
+      "concurrency": null,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "b228454f",
+    "chip": "NVIDIA GeForce RTX 4090",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 24.0,
+    "suite": "suite_F",
+    "model": "Qwen2.5-0.5B-Instruct",
+    "model_full": "Qwen/Qwen2.5-0.5B-Instruct",
+    "model_params_b": 0.5,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-17",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 11440.55,
+        "metric_label": "tokens/sec",
+        "concurrency": 16,
+        "peak_memory_gb": 18.89,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 10,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "interactive": {
+        "throughput": 11440.55,
+        "metric_label": "tokens/sec",
+        "concurrency": 16,
+        "peak_memory_gb": 18.89,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 1698.1,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": 32,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 11440.55,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 16,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": 18.89
+    }
+  },
+  {
+    "id": "cfd0bdc8",
+    "chip": "NVIDIA GeForce RTX 4090",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 8,
+    "memory_gb": 24.0,
+    "suite": "suite_B",
+    "model": "Meta-Llama-3-70B-Instruct",
+    "model_full": "meta-llama/Meta-Llama-3-70B-Instruct",
+    "model_params_b": 70.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-21",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 442.95,
+        "metric_label": "tokens/sec",
+        "concurrency": 32,
+        "peak_memory_gb": 18.89,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 0.0,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 104.5,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": 4,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "interactive": {
+        "throughput": 442.95,
+        "metric_label": "tokens/sec",
+        "concurrency": 32,
+        "peak_memory_gb": 18.89,
+        "is_valid": true
+      },
+      "burst": {
+        "throughput": 0.0,
+        "metric_label": "1 − degradation_ratio",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 442.95,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 32,
+      "batch_size": null,
+      "tensor_parallel": 8,
+      "peak_memory_gb": 18.89
+    }
+  },
+  {
+    "id": "54dccbd0",
+    "chip": "NVIDIA GeForce RTX 4090",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 8,
+    "memory_gb": 24.0,
+    "suite": "suite_E",
+    "model": "Meta-Llama-3-8B-Instruct",
+    "model_full": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-21",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": null,
+        "metric_label": "",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": false
+      }
+    },
+    "primary_scenario": "scaling",
+    "primary_throughput": 1609.89,
+    "primary_metric_label": "tokens/sec (1x baseline)",
+    "config": {}
+  },
+  {
+    "id": "a4179ecc",
+    "chip": "NVIDIA GeForce RTX 4090",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 8,
+    "memory_gb": 24.0,
+    "suite": "suite_G",
+    "model": "Mixtral-8x7B-Instruct-v0.1",
+    "model_full": "mistralai/Mixtral-8x7B-Instruct-v0.1",
+    "model_params_b": 46.7,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-21",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 1361.9,
+        "metric_label": "tokens/sec",
+        "concurrency": 4,
+        "peak_memory_gb": 20.58,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 2,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "interactive": {
+        "throughput": 1361.9,
+        "metric_label": "tokens/sec",
+        "concurrency": 4,
+        "peak_memory_gb": 20.58,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 325.5,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": 8,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 1361.9,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 4,
+      "batch_size": null,
+      "tensor_parallel": 8,
+      "peak_memory_gb": 20.58
+    }
+  },
+  {
+    "id": "b8f8ed0f",
+    "chip": "NVIDIA GeForce RTX 5090",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 31.8,
+    "suite": "suite_A",
+    "model": "Meta-Llama-3-8B-Instruct",
+    "model_full": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.19.1rc1.dev339+gedc364896",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-16",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 3487.52,
+        "metric_label": "tokens/sec",
+        "concurrency": 32,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 0.0,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "interactive": {
+        "throughput": 3487.52,
+        "metric_label": "tokens/sec",
+        "concurrency": 32,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 707.5,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": 8,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "burst": {
+        "throughput": 0.0,
+        "metric_label": "1 − degradation_ratio",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 3487.52,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 32,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "d1baa050_BF16",
+    "chip": "NVIDIA GeForce RTX 5090",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 31.8,
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.19.1rc1.dev339+gedc364896",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-16",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 3519.52,
+        "metric_label": "tokens/sec (BF16)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 10,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 676.2,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "quantization",
+    "primary_throughput": 3519.52,
+    "primary_metric_label": "tokens/sec (BF16)",
+    "config": {
+      "concurrency": null,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "d1baa050_W8A16",
+    "chip": "NVIDIA GeForce RTX 5090",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 31.8,
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "W8A16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.19.1rc1.dev339+gedc364896",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-16",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 5303.83,
+        "metric_label": "tokens/sec (W8A16)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 10,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 1148.7,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "quantization",
+    "primary_throughput": 5303.83,
+    "primary_metric_label": "tokens/sec (W8A16)",
+    "config": {
+      "concurrency": null,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "d1baa050_W4A16",
+    "chip": "NVIDIA GeForce RTX 5090",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 31.8,
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "W4A16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.19.1rc1.dev339+gedc364896",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-16",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 2934.27,
+        "metric_label": "tokens/sec (W4A16)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 10,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 1381.4,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "quantization",
+    "primary_throughput": 2934.27,
+    "primary_metric_label": "tokens/sec (W4A16)",
+    "config": {
+      "concurrency": null,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "e87e6c36",
+    "chip": "NVIDIA GeForce RTX 5090",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 31.8,
+    "suite": "suite_D",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.19.1rc1.dev339+gedc364896",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-16",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 54.26,
+        "metric_label": "tokens/sec",
+        "concurrency": 1,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "interactive": {
+        "throughput": 54.26,
+        "metric_label": "tokens/sec",
+        "concurrency": 1,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 51.5,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": 8,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 0.0,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 54.26,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 1,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "776d2702",
+    "chip": "NVIDIA GeForce RTX 5090",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 31.8,
+    "suite": "suite_F",
+    "model": "Qwen2.5-0.5B-Instruct",
+    "model_full": "Qwen/Qwen2.5-0.5B-Instruct",
+    "model_params_b": 0.5,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.19.1rc1.dev339+gedc364896",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-16",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 15323.19,
+        "metric_label": "tokens/sec",
+        "concurrency": 16,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 40,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "interactive": {
+        "throughput": 15323.19,
+        "metric_label": "tokens/sec",
+        "concurrency": 16,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 3941.2,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": 32,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 15323.19,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 16,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "831c95a7",
+    "chip": "NVIDIA H100 80GB HBM3",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 79.6,
+    "suite": "suite_A",
+    "model": "Meta-Llama-3-8B-Instruct",
+    "model_full": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-18",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 5128.31,
+        "metric_label": "tokens/sec",
+        "concurrency": 8,
+        "peak_memory_gb": 69.2,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 25,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "interactive": {
+        "throughput": 5128.31,
+        "metric_label": "tokens/sec",
+        "concurrency": 8,
+        "peak_memory_gb": 69.2,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 907.1,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": 8,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "speculative": {
+        "throughput": 1733.92,
+        "metric_label": "tok/s (speculative)",
+        "concurrency": 8,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "burst": {
+        "throughput": 0.0,
+        "metric_label": "1 − degradation_ratio",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 5128.31,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 8,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": 69.2
+    }
+  },
+  {
+    "id": "a4a8716a_BF16",
+    "chip": "NVIDIA H100 80GB HBM3",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 79.6,
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-18",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 5052.11,
+        "metric_label": "tokens/sec (BF16)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "quantization",
+    "primary_throughput": 5052.11,
+    "primary_metric_label": "tokens/sec (BF16)",
+    "config": {
+      "concurrency": null,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "a4a8716a_FP8",
+    "chip": "NVIDIA H100 80GB HBM3",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 79.6,
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "FP8",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-18",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 6314.92,
+        "metric_label": "tokens/sec (FP8)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "quantization",
+    "primary_throughput": 6314.92,
+    "primary_metric_label": "tokens/sec (FP8)",
+    "config": {
+      "concurrency": null,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "a4a8716a_W8A8",
+    "chip": "NVIDIA H100 80GB HBM3",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 79.6,
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "W8A8",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-18",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 6203.69,
+        "metric_label": "tokens/sec (W8A8)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "quantization",
+    "primary_throughput": 6203.69,
+    "primary_metric_label": "tokens/sec (W8A8)",
+    "config": {
+      "concurrency": null,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "a4a8716a_W8A16",
+    "chip": "NVIDIA H100 80GB HBM3",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 79.6,
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "W8A16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-18",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 5369.94,
+        "metric_label": "tokens/sec (W8A16)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "quantization",
+    "primary_throughput": 5369.94,
+    "primary_metric_label": "tokens/sec (W8A16)",
+    "config": {
+      "concurrency": null,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "a4a8716a_W4A16",
+    "chip": "NVIDIA H100 80GB HBM3",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 79.6,
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "W4A16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-18",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 3156.46,
+        "metric_label": "tokens/sec (W4A16)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "quantization",
+    "primary_throughput": 3156.46,
+    "primary_metric_label": "tokens/sec (W4A16)",
+    "config": {
+      "concurrency": null,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "02748da4",
+    "chip": "NVIDIA H100 80GB HBM3",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 79.6,
+    "suite": "suite_D",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-18",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 160.38,
+        "metric_label": "tokens/sec",
+        "concurrency": 1,
+        "peak_memory_gb": 71.13,
+        "is_valid": true
+      },
+      "interactive": {
+        "throughput": 160.38,
+        "metric_label": "tokens/sec",
+        "concurrency": 1,
+        "peak_memory_gb": 71.13,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 142.6,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": 8,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 0.0,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 160.38,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 1,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": 71.13
+    }
+  },
+  {
+    "id": "2c0b7beb",
+    "chip": "NVIDIA H100 80GB HBM3",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 79.6,
+    "suite": "suite_F",
+    "model": "Qwen2.5-0.5B-Instruct",
+    "model_full": "Qwen/Qwen2.5-0.5B-Instruct",
+    "model_params_b": 0.5,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-18",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 18910.96,
+        "metric_label": "tokens/sec",
+        "concurrency": 4,
+        "peak_memory_gb": 68.91,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 40,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "interactive": {
+        "throughput": 18910.96,
+        "metric_label": "tokens/sec",
+        "concurrency": 4,
+        "peak_memory_gb": 68.91,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 6144.7,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": 32,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 18910.96,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 4,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": 68.91
+    }
+  },
+  {
+    "id": "29b2ec38",
+    "chip": "NVIDIA H200",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 140.4,
+    "suite": "suite_A",
+    "model": "Meta-Llama-3-8B-Instruct",
+    "model_full": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-15",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 5730.96,
+        "metric_label": "tokens/sec",
+        "concurrency": 128,
+        "peak_memory_gb": 123.76,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 5,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "interactive": {
+        "throughput": 5730.96,
+        "metric_label": "tokens/sec",
+        "concurrency": 128,
+        "peak_memory_gb": 123.76,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 709.2,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": 8,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "speculative": {
+        "throughput": 1965.64,
+        "metric_label": "tok/s (speculative)",
+        "concurrency": 128,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "burst": {
+        "throughput": 0.0,
+        "metric_label": "1 − degradation_ratio",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 5730.96,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 128,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": 123.76
+    }
+  },
+  {
+    "id": "f07c60f8_BF16",
+    "chip": "NVIDIA H200",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 140.4,
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-14",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 5333.35,
+        "metric_label": "tokens/sec (BF16)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 10,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 709.4,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "quantization",
+    "primary_throughput": 5333.35,
+    "primary_metric_label": "tokens/sec (BF16)",
+    "config": {
+      "concurrency": null,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "f07c60f8_FP8",
+    "chip": "NVIDIA H200",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 140.4,
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "FP8",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-14",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 6195.58,
+        "metric_label": "tokens/sec (FP8)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 10,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 713.4,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "quantization",
+    "primary_throughput": 6195.58,
+    "primary_metric_label": "tokens/sec (FP8)",
+    "config": {
+      "concurrency": null,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "f07c60f8_W8A8",
+    "chip": "NVIDIA H200",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 140.4,
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "W8A8",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-14",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 6146.56,
+        "metric_label": "tokens/sec (W8A8)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 10,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 694.7,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "quantization",
+    "primary_throughput": 6146.56,
+    "primary_metric_label": "tokens/sec (W8A8)",
+    "config": {
+      "concurrency": null,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "f07c60f8_W8A16",
+    "chip": "NVIDIA H200",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 140.4,
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "W8A16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-14",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 5093.12,
+        "metric_label": "tokens/sec (W8A16)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 10,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 708.2,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "quantization",
+    "primary_throughput": 5093.12,
+    "primary_metric_label": "tokens/sec (W8A16)",
+    "config": {
+      "concurrency": null,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "f07c60f8_W4A16",
+    "chip": "NVIDIA H200",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 140.4,
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "W4A16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-14",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 3278.96,
+        "metric_label": "tokens/sec (W4A16)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 10,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 649.9,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "quantization",
+    "primary_throughput": 3278.96,
+    "primary_metric_label": "tokens/sec (W4A16)",
+    "config": {
+      "concurrency": null,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "62a36028",
+    "chip": "NVIDIA H200",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 140.4,
+    "suite": "suite_D",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-15",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 171.93,
+        "metric_label": "tokens/sec",
+        "concurrency": 1,
+        "peak_memory_gb": 125.69,
+        "is_valid": true
+      },
+      "interactive": {
+        "throughput": 171.93,
+        "metric_label": "tokens/sec",
+        "concurrency": 1,
+        "peak_memory_gb": 125.69,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 132.9,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": 8,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 0.0,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "speculative": {
+        "throughput": 170.37,
+        "metric_label": "tok/s (speculative)",
+        "concurrency": 4,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 171.93,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 1,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": 125.69
+    }
+  },
+  {
+    "id": "53471efa",
+    "chip": "NVIDIA H200",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 140.4,
+    "suite": "suite_F",
+    "model": "Qwen2.5-0.5B-Instruct",
+    "model_full": "Qwen/Qwen2.5-0.5B-Instruct",
+    "model_params_b": 0.5,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-09",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 12862.12,
+        "metric_label": "tokens/sec",
+        "concurrency": 16,
+        "peak_memory_gb": 123.42,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 10,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "interactive": {
+        "throughput": 12862.12,
+        "metric_label": "tokens/sec",
+        "concurrency": 16,
+        "peak_memory_gb": 123.42,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 1425.4,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": 32,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 12862.12,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 16,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": 123.42
+    }
+  },
+  {
+    "id": "b727568e",
+    "chip": "NVIDIA H200",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 8,
+    "memory_gb": 140.4,
+    "suite": "suite_B",
+    "model": "Meta-Llama-3-70B-Instruct",
+    "model_full": "meta-llama/Meta-Llama-3-70B-Instruct",
+    "model_params_b": 70.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-09",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 3553.46,
+        "metric_label": "tokens/sec",
+        "concurrency": 128,
+        "peak_memory_gb": 116.69,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 10,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 241.2,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": 4,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "interactive": {
+        "throughput": 3553.46,
+        "metric_label": "tokens/sec",
+        "concurrency": 128,
+        "peak_memory_gb": 116.69,
+        "is_valid": true
+      },
+      "burst": {
+        "throughput": 0.0,
+        "metric_label": "1 − degradation_ratio",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 3553.46,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 128,
+      "batch_size": null,
+      "tensor_parallel": 8,
+      "peak_memory_gb": 116.69
+    }
+  },
+  {
+    "id": "f005e907",
+    "chip": "NVIDIA H200",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 8,
+    "memory_gb": 140.4,
+    "suite": "suite_E",
+    "model": "Meta-Llama-3-8B-Instruct",
+    "model_full": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-09",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": null,
+        "metric_label": "",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": false
+      }
+    },
+    "primary_scenario": "scaling",
+    "primary_throughput": 6560.55,
+    "primary_metric_label": "tokens/sec (1x baseline)",
+    "config": {}
+  },
+  {
+    "id": "7f7a270e",
+    "chip": "NVIDIA H200",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 8,
+    "memory_gb": 140.4,
+    "suite": "suite_G",
+    "model": "Mixtral-8x7B-Instruct-v0.1",
+    "model_full": "mistralai/Mixtral-8x7B-Instruct-v0.1",
+    "model_params_b": 7.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-14",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 5334.28,
+        "metric_label": "tokens/sec",
+        "concurrency": 16,
+        "peak_memory_gb": 118.32,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 10,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "interactive": {
+        "throughput": 5334.28,
+        "metric_label": "tokens/sec",
+        "concurrency": 16,
+        "peak_memory_gb": 118.32,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 591.5,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": 8,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 5334.28,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 16,
+      "batch_size": null,
+      "tensor_parallel": 8,
+      "peak_memory_gb": 118.32
+    }
+  },
+  {
+    "id": "3f6269bb",
+    "chip": "NVIDIA H20-3e",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 140.4,
+    "suite": "suite_A",
+    "model": "Meta-Llama-3-8B-Instruct",
+    "model_full": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-16",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 2297.65,
+        "metric_label": "tokens/sec",
+        "concurrency": 8,
+        "peak_memory_gb": 123.81,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 5,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "interactive": {
+        "throughput": 2297.65,
+        "metric_label": "tokens/sec",
+        "concurrency": 8,
+        "peak_memory_gb": 123.81,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 486.6,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": 8,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "speculative": {
+        "throughput": 783.62,
+        "metric_label": "tok/s (speculative)",
+        "concurrency": 32,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "burst": {
+        "throughput": 0.0,
+        "metric_label": "1 − degradation_ratio",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 2297.65,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 8,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": 123.81
+    }
+  },
+  {
+    "id": "1bcdc710_BF16",
+    "chip": "NVIDIA H20-3e",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 140.4,
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-16",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 2214.68,
+        "metric_label": "tokens/sec (BF16)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 10,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 484.8,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "quantization",
+    "primary_throughput": 2214.68,
+    "primary_metric_label": "tokens/sec (BF16)",
+    "config": {
+      "concurrency": null,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "1bcdc710_FP8",
+    "chip": "NVIDIA H20-3e",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 140.4,
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "FP8",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-16",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 2945.86,
+        "metric_label": "tokens/sec (FP8)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 10,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 494.9,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "quantization",
+    "primary_throughput": 2945.86,
+    "primary_metric_label": "tokens/sec (FP8)",
+    "config": {
+      "concurrency": null,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "1bcdc710_W8A8",
+    "chip": "NVIDIA H20-3e",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 140.4,
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "W8A8",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-16",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 3120.93,
+        "metric_label": "tokens/sec (W8A8)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 10,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 533.8,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "quantization",
+    "primary_throughput": 3120.93,
+    "primary_metric_label": "tokens/sec (W8A8)",
+    "config": {
+      "concurrency": null,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "1bcdc710_W8A16",
+    "chip": "NVIDIA H20-3e",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 140.4,
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "W8A16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-16",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 2257.42,
+        "metric_label": "tokens/sec (W8A16)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 10,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 645.2,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "quantization",
+    "primary_throughput": 2257.42,
+    "primary_metric_label": "tokens/sec (W8A16)",
+    "config": {
+      "concurrency": null,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "1bcdc710_W4A16",
+    "chip": "NVIDIA H20-3e",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 140.4,
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "W4A16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-16",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 1572.76,
+        "metric_label": "tokens/sec (W4A16)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 10,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 648.8,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "quantization",
+    "primary_throughput": 1572.76,
+    "primary_metric_label": "tokens/sec (W4A16)",
+    "config": {
+      "concurrency": null,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "60c91bf0",
+    "chip": "NVIDIA H20-3e",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 140.4,
+    "suite": "suite_D",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-16",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 44.09,
+        "metric_label": "tokens/sec",
+        "concurrency": 4,
+        "peak_memory_gb": 125.72,
+        "is_valid": true
+      },
+      "interactive": {
+        "throughput": 44.09,
+        "metric_label": "tokens/sec",
+        "concurrency": 4,
+        "peak_memory_gb": 125.72,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 41.4,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": 8,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 0.0,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 44.09,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 4,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": 125.72
+    }
+  },
+  {
+    "id": "1e7ed8ca",
+    "chip": "NVIDIA H20-3e",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 140.4,
+    "suite": "suite_F",
+    "model": "Qwen2.5-0.5B-Instruct",
+    "model_full": "Qwen/Qwen2.5-0.5B-Instruct",
+    "model_params_b": 0.5,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-16",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 11716.68,
+        "metric_label": "tokens/sec",
+        "concurrency": 16,
+        "peak_memory_gb": 123.52,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 10,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "interactive": {
+        "throughput": 11716.68,
+        "metric_label": "tokens/sec",
+        "concurrency": 16,
+        "peak_memory_gb": 123.52,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 1771.6,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": 32,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 11716.68,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 16,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": 123.52
+    }
+  },
+  {
+    "id": "76ce4cd0",
+    "chip": "NVIDIA H20-3e",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 8,
+    "memory_gb": 140.4,
+    "suite": "suite_B",
+    "model": "Meta-Llama-3-70B-Instruct",
+    "model_full": "meta-llama/Meta-Llama-3-70B-Instruct",
+    "model_params_b": 70.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-17",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 1694.75,
+        "metric_label": "tokens/sec",
+        "concurrency": 32,
+        "peak_memory_gb": 116.75,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 10,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 176.0,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": 4,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "interactive": {
+        "throughput": 1694.75,
+        "metric_label": "tokens/sec",
+        "concurrency": 32,
+        "peak_memory_gb": 116.75,
+        "is_valid": true
+      },
+      "burst": {
+        "throughput": 0.0,
+        "metric_label": "1 − degradation_ratio",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 1694.75,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 32,
+      "batch_size": null,
+      "tensor_parallel": 8,
+      "peak_memory_gb": 116.75
+    }
+  },
+  {
+    "id": "f0d031f5",
+    "chip": "NVIDIA H20-3e",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 8,
+    "memory_gb": 140.4,
+    "suite": "suite_E",
+    "model": "Meta-Llama-3-8B-Instruct",
+    "model_full": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-17",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": null,
+        "metric_label": "",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": false
+      }
+    },
+    "primary_scenario": "scaling",
+    "primary_throughput": 2485.62,
+    "primary_metric_label": "tokens/sec (1x baseline)",
+    "config": {}
+  },
+  {
+    "id": "7bd76bb5",
+    "chip": "NVIDIA H20-3e",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 8,
+    "memory_gb": 140.4,
+    "suite": "suite_G",
+    "model": "Mixtral-8x7B-Instruct-v0.1",
+    "model_full": "mistralai/Mixtral-8x7B-Instruct-v0.1",
+    "model_params_b": 46.7,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-17",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 3757.41,
+        "metric_label": "tokens/sec",
+        "concurrency": 16,
+        "peak_memory_gb": 118.44,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 10,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "interactive": {
+        "throughput": 3757.41,
+        "metric_label": "tokens/sec",
+        "concurrency": 16,
+        "peak_memory_gb": 118.44,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 561.2,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": 8,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 3757.41,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 16,
+      "batch_size": null,
+      "tensor_parallel": 8,
+      "peak_memory_gb": 118.44
+    }
+  },
+  {
+    "id": "b991b4c1",
+    "chip": "NVIDIA L4",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 22.5,
+    "suite": "suite_A",
+    "model": "Meta-Llama-3-8B-Instruct",
+    "model_full": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-18",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 366.08,
+        "metric_label": "tokens/sec",
+        "concurrency": 32,
+        "peak_memory_gb": 17.85,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 0.0,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "interactive": {
+        "throughput": 366.08,
+        "metric_label": "tokens/sec",
+        "concurrency": 32,
+        "peak_memory_gb": 17.85,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 116.6,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": 8,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "speculative": {
+        "throughput": 261.34,
+        "metric_label": "tok/s (speculative)",
+        "concurrency": 128,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 366.08,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 32,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": 17.85
+    }
+  },
+  {
+    "id": "d58fa923",
+    "chip": "NVIDIA L4",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 22.5,
+    "suite": "suite_F",
+    "model": "Qwen2.5-0.5B-Instruct",
+    "model_full": "Qwen/Qwen2.5-0.5B-Instruct",
+    "model_params_b": 0.5,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-18",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 7188.02,
+        "metric_label": "tokens/sec",
+        "concurrency": 4,
+        "peak_memory_gb": 17.55,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 40,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "interactive": {
+        "throughput": 7188.02,
+        "metric_label": "tokens/sec",
+        "concurrency": 4,
+        "peak_memory_gb": 17.55,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 2837.3,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": 32,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 7188.02,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 4,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": 17.55
+    }
+  },
+  {
+    "id": "125c6b61",
+    "chip": "NVIDIA RTX 4000 Ada Generation",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 20.0,
+    "suite": "suite_F",
+    "model": "Qwen2.5-0.5B-Instruct",
+    "model_full": "Qwen/Qwen2.5-0.5B-Instruct",
+    "model_params_b": 0.5,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-21",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 9380.99,
+        "metric_label": "tokens/sec",
+        "concurrency": 16,
+        "peak_memory_gb": 15.33,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 40,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "interactive": {
+        "throughput": 9380.99,
+        "metric_label": "tokens/sec",
+        "concurrency": 16,
+        "peak_memory_gb": 15.33,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 3880.8,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": 32,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 9380.99,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 16,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": 15.33
+    }
+  },
+  {
+    "id": "bd3b5d27",
+    "chip": "NVIDIA RTX 6000 Ada Generation",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 48.0,
+    "suite": "suite_A",
+    "model": "Meta-Llama-3-8B-Instruct",
+    "model_full": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-19",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 1859.98,
+        "metric_label": "tokens/sec",
+        "concurrency": 8,
+        "peak_memory_gb": 40.64,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 5,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "interactive": {
+        "throughput": 1859.98,
+        "metric_label": "tokens/sec",
+        "concurrency": 8,
+        "peak_memory_gb": 40.64,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 376.2,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": 8,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "speculative": {
+        "throughput": 854.15,
+        "metric_label": "tok/s (speculative)",
+        "concurrency": 128,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "burst": {
+        "throughput": 0.0,
+        "metric_label": "1 − degradation_ratio",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 1859.98,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 8,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": 40.64
+    }
+  },
+  {
+    "id": "e60276e9_BF16",
+    "chip": "NVIDIA RTX 6000 Ada Generation",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 48.0,
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-19",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 1845.04,
+        "metric_label": "tokens/sec (BF16)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 10,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 371.9,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "quantization",
+    "primary_throughput": 1845.04,
+    "primary_metric_label": "tokens/sec (BF16)",
+    "config": {
+      "concurrency": null,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "e60276e9_FP8",
+    "chip": "NVIDIA RTX 6000 Ada Generation",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 48.0,
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "FP8",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-19",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 2667.64,
+        "metric_label": "tokens/sec (FP8)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 10,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 586.1,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "quantization",
+    "primary_throughput": 2667.64,
+    "primary_metric_label": "tokens/sec (FP8)",
+    "config": {
+      "concurrency": null,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "e60276e9_W8A8",
+    "chip": "NVIDIA RTX 6000 Ada Generation",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 48.0,
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "W8A8",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-19",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 2543.08,
+        "metric_label": "tokens/sec (W8A8)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 10,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 534.5,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "quantization",
+    "primary_throughput": 2543.08,
+    "primary_metric_label": "tokens/sec (W8A8)",
+    "config": {
+      "concurrency": null,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "e60276e9_W8A16",
+    "chip": "NVIDIA RTX 6000 Ada Generation",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 48.0,
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "W8A16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-19",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 2093.62,
+        "metric_label": "tokens/sec (W8A16)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 10,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 584.6,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "quantization",
+    "primary_throughput": 2093.62,
+    "primary_metric_label": "tokens/sec (W8A16)",
+    "config": {
+      "concurrency": null,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "e60276e9_W4A16",
+    "chip": "NVIDIA RTX 6000 Ada Generation",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 48.0,
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "W4A16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-19",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 1271.3,
+        "metric_label": "tokens/sec (W4A16)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 10,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 816.3,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "quantization",
+    "primary_throughput": 1271.3,
+    "primary_metric_label": "tokens/sec (W4A16)",
+    "config": {
+      "concurrency": null,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "42ab3af7",
+    "chip": "NVIDIA RTX 6000 Ada Generation",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 48.0,
+    "suite": "suite_D",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-19",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 33.21,
+        "metric_label": "tokens/sec",
+        "concurrency": 1,
+        "peak_memory_gb": 42.53,
+        "is_valid": true
+      },
+      "interactive": {
+        "throughput": 33.21,
+        "metric_label": "tokens/sec",
+        "concurrency": 1,
+        "peak_memory_gb": 42.53,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 32.3,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": 8,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 0.0,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 33.21,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 1,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": 42.53
+    }
+  },
+  {
+    "id": "2b905f5e",
+    "chip": "NVIDIA RTX 6000 Ada Generation",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 48.0,
+    "suite": "suite_F",
+    "model": "Qwen2.5-0.5B-Instruct",
+    "model_full": "Qwen/Qwen2.5-0.5B-Instruct",
+    "model_params_b": 0.5,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-19",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 8248.35,
+        "metric_label": "tokens/sec",
+        "concurrency": 16,
+        "peak_memory_gb": 40.31,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 10,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "interactive": {
+        "throughput": 8248.35,
+        "metric_label": "tokens/sec",
+        "concurrency": 16,
+        "peak_memory_gb": 40.31,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 2895.0,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": 32,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 8248.35,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 16,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": 40.31
+    }
+  },
+  {
+    "id": "7cd0b745",
+    "chip": "NVIDIA RTX A6000",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 48.0,
+    "suite": "suite_A",
+    "model": "Meta-Llama-3-8B-Instruct",
+    "model_full": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-17",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 1567.35,
+        "metric_label": "tokens/sec",
+        "concurrency": 8,
+        "peak_memory_gb": 40.71,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 5,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "interactive": {
+        "throughput": 1567.35,
+        "metric_label": "tokens/sec",
+        "concurrency": 8,
+        "peak_memory_gb": 40.71,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 265.3,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": 8,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "speculative": {
+        "throughput": 657.85,
+        "metric_label": "tok/s (speculative)",
+        "concurrency": 32,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "burst": {
+        "throughput": 0.0,
+        "metric_label": "1 − degradation_ratio",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 1567.35,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 8,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": 40.71
+    }
+  },
+  {
+    "id": "b87c1621_BF16",
+    "chip": "NVIDIA RTX A6000",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 48.0,
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-16",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 1542.59,
+        "metric_label": "tokens/sec (BF16)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 5,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 272.0,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "quantization",
+    "primary_throughput": 1542.59,
+    "primary_metric_label": "tokens/sec (BF16)",
+    "config": {
+      "concurrency": null,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "b87c1621_FP8",
+    "chip": "NVIDIA RTX A6000",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 48.0,
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "FP8",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-16",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 1469.08,
+        "metric_label": "tokens/sec (FP8)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 5,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 435.5,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "quantization",
+    "primary_throughput": 1469.08,
+    "primary_metric_label": "tokens/sec (FP8)",
+    "config": {
+      "concurrency": null,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "b87c1621_W8A8",
+    "chip": "NVIDIA RTX A6000",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 48.0,
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "W8A8",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-16",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 1971.81,
+        "metric_label": "tokens/sec (W8A8)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 10,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 419.5,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "quantization",
+    "primary_throughput": 1971.81,
+    "primary_metric_label": "tokens/sec (W8A8)",
+    "config": {
+      "concurrency": null,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "b87c1621_W8A16",
+    "chip": "NVIDIA RTX A6000",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 48.0,
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "W8A16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-16",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 1469.7,
+        "metric_label": "tokens/sec (W8A16)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 5,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 433.8,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "quantization",
+    "primary_throughput": 1469.7,
+    "primary_metric_label": "tokens/sec (W8A16)",
+    "config": {
+      "concurrency": null,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "b87c1621_W4A16",
+    "chip": "NVIDIA RTX A6000",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 48.0,
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "W4A16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-16",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 940.08,
+        "metric_label": "tokens/sec (W4A16)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 5,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 541.0,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "quantization",
+    "primary_throughput": 940.08,
+    "primary_metric_label": "tokens/sec (W4A16)",
+    "config": {
+      "concurrency": null,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "f2197473",
+    "chip": "NVIDIA RTX A6000",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 48.0,
+    "suite": "suite_D",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-17",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 31.11,
+        "metric_label": "tokens/sec",
+        "concurrency": 4,
+        "peak_memory_gb": 42.6,
+        "is_valid": true
+      },
+      "interactive": {
+        "throughput": 31.11,
+        "metric_label": "tokens/sec",
+        "concurrency": 4,
+        "peak_memory_gb": 42.6,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 30.5,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": 8,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 0.0,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 31.11,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 4,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": 42.6
+    }
+  },
+  {
+    "id": "a33d6eb3",
+    "chip": "NVIDIA RTX A6000",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 48.0,
+    "suite": "suite_F",
+    "model": "Qwen2.5-0.5B-Instruct",
+    "model_full": "Qwen/Qwen2.5-0.5B-Instruct",
+    "model_params_b": 0.5,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-17",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 10433.7,
+        "metric_label": "tokens/sec",
+        "concurrency": 16,
+        "peak_memory_gb": 40.41,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 10,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "interactive": {
+        "throughput": 10433.7,
+        "metric_label": "tokens/sec",
+        "concurrency": 16,
+        "peak_memory_gb": 40.41,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 1917.3,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": 32,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 10433.7,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 16,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": 40.41
+    }
+  },
+  {
+    "id": "0981ecf7",
+    "chip": "NVIDIA RTX A6000",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 8,
+    "memory_gb": 48.0,
+    "suite": "suite_B",
+    "model": "Meta-Llama-3-70B-Instruct",
+    "model_full": "meta-llama/Meta-Llama-3-70B-Instruct",
+    "model_params_b": 70.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-16",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 798.25,
+        "metric_label": "tokens/sec",
+        "concurrency": 128,
+        "peak_memory_gb": 40.12,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 5,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 105.3,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": 4,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "interactive": {
+        "throughput": 798.25,
+        "metric_label": "tokens/sec",
+        "concurrency": 128,
+        "peak_memory_gb": 40.12,
+        "is_valid": true
+      },
+      "burst": {
+        "throughput": 0.0,
+        "metric_label": "1 − degradation_ratio",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 798.25,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 128,
+      "batch_size": null,
+      "tensor_parallel": 8,
+      "peak_memory_gb": 40.12
+    }
+  },
+  {
+    "id": "334507e5",
+    "chip": "NVIDIA RTX A6000",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 8,
+    "memory_gb": 48.0,
+    "suite": "suite_E",
+    "model": "Meta-Llama-3-8B-Instruct",
+    "model_full": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-17",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": null,
+        "metric_label": "",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": false
+      }
+    },
+    "primary_scenario": "scaling",
+    "primary_throughput": 1929.75,
+    "primary_metric_label": "tokens/sec (1x baseline)",
+    "config": {}
+  },
+  {
+    "id": "a8cf2a0f",
+    "chip": "NVIDIA RTX A6000",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 8,
+    "memory_gb": 48.0,
+    "suite": "suite_G",
+    "model": "Mixtral-8x7B-Instruct-v0.1",
+    "model_full": "mistralai/Mixtral-8x7B-Instruct-v0.1",
+    "model_params_b": 46.7,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-17",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 1681.33,
+        "metric_label": "tokens/sec",
+        "concurrency": 64,
+        "peak_memory_gb": 41.83,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 10,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "interactive": {
+        "throughput": 1681.33,
+        "metric_label": "tokens/sec",
+        "concurrency": 64,
+        "peak_memory_gb": 41.83,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 343.0,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": 8,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 1681.33,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 64,
+      "batch_size": null,
+      "tensor_parallel": 8,
+      "peak_memory_gb": 41.83
+    }
+  },
+  {
+    "id": "4660bc0b",
+    "chip": "Tesla T4",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 15.0,
+    "suite": "suite_F",
+    "model": "Qwen2.5-0.5B-Instruct",
+    "model_full": "Qwen/Qwen2.5-0.5B-Instruct",
+    "model_params_b": 0.5,
+    "precision": "FP16",
+    "effective_dtype": "float16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-18",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 5125.58,
+        "metric_label": "tokens/sec",
+        "concurrency": 4,
+        "peak_memory_gb": 10.83,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 10,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "interactive": {
+        "throughput": 5125.58,
+        "metric_label": "tokens/sec",
+        "concurrency": 4,
+        "peak_memory_gb": 10.83,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 2006.9,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": 32,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 5125.58,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 4,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": 10.83
+    }
+  },
+  {
+    "id": "48261ecc",
+    "chip": "Tesla V100S-PCIE-32GB",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 32.0,
+    "suite": "suite_A",
+    "model": "Meta-Llama-3-8B-Instruct",
+    "model_full": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "FP16",
+    "effective_dtype": "float16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-15",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 933.93,
+        "metric_label": "tokens/sec",
+        "concurrency": 32,
+        "peak_memory_gb": 26.54,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 5,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "interactive": {
+        "throughput": 933.93,
+        "metric_label": "tokens/sec",
+        "concurrency": 32,
+        "peak_memory_gb": 26.54,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 268.3,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": 8,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "speculative": {
+        "throughput": 431.31,
+        "metric_label": "tok/s (speculative)",
+        "concurrency": 32,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "burst": {
+        "throughput": 0.0,
+        "metric_label": "1 − degradation_ratio",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 933.93,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 32,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": 26.54
+    }
+  },
+  {
+    "id": "b957e789_FP16",
+    "chip": "Tesla V100S-PCIE-32GB",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 32.0,
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "FP16",
+    "effective_dtype": "float16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-15",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 936.85,
+        "metric_label": "tokens/sec (FP16)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 0.0,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 265.9,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "quantization",
+    "primary_throughput": 936.85,
+    "primary_metric_label": "tokens/sec (FP16)",
+    "config": {
+      "concurrency": null,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "b957e789_W4A16",
+    "chip": "Tesla V100S-PCIE-32GB",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 32.0,
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "W4A16",
+    "effective_dtype": "float16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-15",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 783.65,
+        "metric_label": "tokens/sec (W4A16)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 0.0,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 416.4,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "quantization",
+    "primary_throughput": 783.65,
+    "primary_metric_label": "tokens/sec (W4A16)",
+    "config": {
+      "concurrency": null,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "6eb549a8",
+    "chip": "Tesla V100S-PCIE-32GB",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 32.0,
+    "suite": "suite_D",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "FP16",
+    "effective_dtype": "float16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-15",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 15.01,
+        "metric_label": "tokens/sec",
+        "concurrency": 1,
+        "peak_memory_gb": 28.46,
+        "is_valid": true
+      },
+      "interactive": {
+        "throughput": 15.01,
+        "metric_label": "tokens/sec",
+        "concurrency": 1,
+        "peak_memory_gb": 28.46,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 14.9,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": 8,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 0.0,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 15.01,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 1,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": 28.46
+    }
+  },
+  {
+    "id": "04fce6f6",
+    "chip": "Tesla V100S-PCIE-32GB",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 32.0,
+    "suite": "suite_F",
+    "model": "Qwen2.5-0.5B-Instruct",
+    "model_full": "Qwen/Qwen2.5-0.5B-Instruct",
+    "model_params_b": 0.5,
+    "precision": "FP16",
+    "effective_dtype": "float16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-15",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 7870.64,
+        "metric_label": "tokens/sec",
+        "concurrency": 4,
+        "peak_memory_gb": 26.25,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 10,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "interactive": {
+        "throughput": 7870.64,
+        "metric_label": "tokens/sec",
+        "concurrency": 4,
+        "peak_memory_gb": 26.25,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 2789.7,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": 32,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 7870.64,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 4,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": 26.25
+    }
+  },
+  {
+    "id": "48f19c22",
+    "chip": "Tesla V100S-PCIE-32GB",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 8,
+    "memory_gb": 32.0,
+    "suite": "suite_B",
+    "model": "Meta-Llama-3-70B-Instruct",
+    "model_full": "meta-llama/Meta-Llama-3-70B-Instruct",
+    "model_params_b": 70.0,
+    "precision": "FP16",
+    "effective_dtype": "float16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-16",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 466.49,
+        "metric_label": "tokens/sec",
+        "concurrency": 32,
+        "peak_memory_gb": 26.16,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 0.0,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 92.8,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": 4,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "interactive": {
+        "throughput": 466.49,
+        "metric_label": "tokens/sec",
+        "concurrency": 32,
+        "peak_memory_gb": 26.16,
+        "is_valid": true
+      },
+      "burst": {
+        "throughput": 0.0,
+        "metric_label": "1 − degradation_ratio",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 466.49,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 32,
+      "batch_size": null,
+      "tensor_parallel": 8,
+      "peak_memory_gb": 26.16
+    }
+  },
+  {
+    "id": "865d778c",
+    "chip": "Tesla V100S-PCIE-32GB",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 8,
+    "memory_gb": 32.0,
+    "suite": "suite_E",
+    "model": "Meta-Llama-3-8B-Instruct",
+    "model_full": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "FP16",
+    "effective_dtype": "float16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-16",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": null,
+        "metric_label": "",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": false
+      }
+    },
+    "primary_scenario": "scaling",
+    "primary_throughput": 1172.42,
+    "primary_metric_label": "tokens/sec (1x baseline)",
+    "config": {}
+  },
+  {
+    "id": "2ef567be",
+    "chip": "Tesla V100S-PCIE-32GB",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 8,
+    "memory_gb": 32.0,
+    "suite": "suite_G",
+    "model": "Mixtral-8x7B-Instruct-v0.1",
+    "model_full": "mistralai/Mixtral-8x7B-Instruct-v0.1",
+    "model_params_b": 46.7,
+    "precision": "FP16",
+    "effective_dtype": "float16",
+    "framework": "vLLM",
+    "framework_version": "0.7.3",
+    "tier": "verified",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-04-18",
+    "reproduce_script": "runners/nvidia_vllm_47f5d58e/runner.py",
+    "runner_id": "nvidia_vllm_47f5d58e",
+    "scenarios": {
+      "offline": {
+        "throughput": 1026.12,
+        "metric_label": "tokens/sec",
+        "concurrency": 4,
+        "peak_memory_gb": 27.83,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 2,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "interactive": {
+        "throughput": 1026.12,
+        "metric_label": "tokens/sec",
+        "concurrency": 4,
+        "peak_memory_gb": 27.83,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 293.9,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": 8,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 1026.12,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 4,
+      "batch_size": null,
+      "tensor_parallel": 8,
+      "peak_memory_gb": 27.83
+    }
+  },
+  {
+    "id": "cabb7bd0",
+    "chip": "MTT S4000",
+    "chip_vendor": "Moore Threads",
+    "chip_count": 1,
+    "memory_gb": 48.0,
+    "suite": "suite_A",
+    "model": "Meta-Llama-3-8B-Instruct",
+    "model_full": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "float16",
+    "framework": "vllm-musa",
+    "framework_version": "0.4.2",
+    "tier": "community",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-05-18",
+    "reproduce_script": "runners/moorethreads_vllm_musa_f2f6f965/runner.py",
+    "runner_id": "moorethreads_vllm_musa_f2f6f965",
+    "scenarios": {
+      "offline": {
+        "throughput": 332.62,
+        "metric_label": "tokens/sec",
+        "concurrency": 8,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 5,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 332.62,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 8,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "4f66d29d",
+    "chip": "MTT S4000",
+    "chip_vendor": "Moore Threads",
+    "chip_count": 1,
+    "memory_gb": 48.0,
+    "suite": "suite_F",
+    "model": "Qwen2.5-0.5B-Instruct",
+    "model_full": "Qwen/Qwen2.5-0.5B-Instruct",
+    "model_params_b": 0.5,
+    "precision": "BF16",
+    "effective_dtype": "float16",
+    "framework": "vllm-musa",
+    "framework_version": "0.4.2",
+    "tier": "community",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-05-18",
+    "reproduce_script": "runners/moorethreads_vllm_musa_f2f6f965/runner.py",
+    "runner_id": "moorethreads_vllm_musa_f2f6f965",
+    "scenarios": {
+      "offline": {
+        "throughput": 2004.02,
+        "metric_label": "tokens/sec",
+        "concurrency": 64,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 40,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "interactive": {
+        "throughput": 2004.02,
+        "metric_label": "tokens/sec",
+        "concurrency": 64,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 2004.02,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 64,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "958afbbd",
+    "chip": "NVIDIA A100-SXM4-40GB",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 40,
+    "suite": "suite_A",
+    "model": "Meta-Llama-3-8B-Instruct",
+    "model_full": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_params_b": 8,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "tier": "community",
+    "submitted_by": "Gong-K",
+    "date": "2026-05-06",
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "runner_id": "nvidia_sglang_c43a8309",
+    "scenarios": {
+      "offline": {
+        "throughput": 3146.66,
+        "metric_label": "tokens/sec",
+        "concurrency": 32,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 100,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "interactive": {
+        "throughput": 3146.66,
+        "metric_label": "tokens/sec",
+        "concurrency": 32,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 562.5,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": 8,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "speculative": {
+        "throughput": 705.16,
+        "metric_label": "tok/s (speculative)",
+        "concurrency": 8,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "burst": {
+        "throughput": 0.975,
+        "metric_label": "1 − degradation_ratio",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 3146.66,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 32,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "651fefa6_BF16",
+    "chip": "NVIDIA A100-SXM4-40GB",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 40,
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "tier": "community",
+    "submitted_by": "Gong-K",
+    "date": "2026-04-30",
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "runner_id": "nvidia_sglang_c43a8309",
+    "scenarios": {
+      "offline": {
+        "throughput": 3160.74,
+        "metric_label": "tokens/sec (BF16)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 50,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 558.6,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "quantization",
+    "primary_throughput": 3160.74,
+    "primary_metric_label": "tokens/sec (BF16)",
+    "config": {
+      "concurrency": null,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "651fefa6_W8A16",
+    "chip": "NVIDIA A100-SXM4-40GB",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 40,
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8,
+    "precision": "W8A16",
+    "effective_dtype": "bfloat16",
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "tier": "community",
+    "submitted_by": "Gong-K",
+    "date": "2026-04-30",
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "runner_id": "nvidia_sglang_c43a8309",
+    "scenarios": {
+      "offline": {
+        "throughput": 3396.91,
+        "metric_label": "tokens/sec (W8A16)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 50,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 841.8,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "quantization",
+    "primary_throughput": 3396.91,
+    "primary_metric_label": "tokens/sec (W8A16)",
+    "config": {
+      "concurrency": null,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "651fefa6_W4A16",
+    "chip": "NVIDIA A100-SXM4-40GB",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 40,
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8,
+    "precision": "W4A16",
+    "effective_dtype": "bfloat16",
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "tier": "community",
+    "submitted_by": "Gong-K",
+    "date": "2026-04-30",
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "runner_id": "nvidia_sglang_c43a8309",
+    "scenarios": {
+      "offline": {
+        "throughput": 1817.91,
+        "metric_label": "tokens/sec (W4A16)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 50,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 760.9,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "quantization",
+    "primary_throughput": 1817.91,
+    "primary_metric_label": "tokens/sec (W4A16)",
+    "config": {
+      "concurrency": null,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "99c43b97",
+    "chip": "NVIDIA A100-SXM4-40GB",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 40,
+    "suite": "suite_D",
+    "model": "Llama-3.1-8B-Instruct",
+    "model_full": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_params_b": 8,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "tier": "community",
+    "submitted_by": "Gong-K",
+    "date": "2026-05-07",
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "runner_id": "nvidia_sglang_c43a8309",
+    "scenarios": {
+      "offline": {
+        "throughput": 59.89,
+        "metric_label": "tokens/sec",
+        "concurrency": 1,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "interactive": {
+        "throughput": 59.89,
+        "metric_label": "tokens/sec",
+        "concurrency": 1,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 54.9,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": 8,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 0,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "speculative": {
+        "throughput": 36.86,
+        "metric_label": "tok/s (speculative)",
+        "concurrency": 1,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 59.89,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 1,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "435424a8",
+    "chip": "NVIDIA A100-SXM4-40GB",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 40,
+    "suite": "suite_F",
+    "model": "Qwen2.5-0.5B-Instruct",
+    "model_full": "Qwen/Qwen2.5-0.5B-Instruct",
+    "model_params_b": 0.5,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "tier": "community",
+    "submitted_by": "Gong-K",
+    "date": "2026-05-07",
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "runner_id": "nvidia_sglang_c43a8309",
+    "scenarios": {
+      "offline": {
+        "throughput": 11509.2,
+        "metric_label": "tokens/sec",
+        "concurrency": 64,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 40,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "interactive": {
+        "throughput": 11509.2,
+        "metric_label": "tokens/sec",
+        "concurrency": 64,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 7095.4,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": 32,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 11509.2,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 64,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "83e3ec26",
+    "chip": "NVIDIA H20-3e",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 140.4,
+    "suite": "suite_A",
+    "model": "Meta-Llama-3-8B-Instruct",
+    "model_full": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "BF16",
+    "effective_dtype": "bfloat16",
+    "framework": "SGLang",
+    "framework_version": "0.5.6",
+    "tier": "community",
+    "submitted_by": "Gong-K",
+    "date": "2026-06-25",
+    "reproduce_script": "runners/nvidia_sglang_c43a8309/runner.py",
+    "runner_id": "nvidia_sglang_c43a8309",
+    "scenarios": {
+      "offline": {
+        "throughput": 4342.21,
+        "metric_label": "tokens/sec",
+        "concurrency": 128,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 100,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "interactive": {
+        "throughput": 4342.21,
+        "metric_label": "tokens/sec",
+        "concurrency": 128,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "sustained": {
+        "throughput": 1272.2,
+        "metric_label": "tok/s (sustained mean)",
+        "concurrency": 8,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "speculative": {
+        "throughput": 613.8,
+        "metric_label": "tok/s (speculative)",
+        "concurrency": 128,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "burst": {
+        "throughput": 0.835,
+        "metric_label": "1 − degradation_ratio",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 4342.21,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 128,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "4e0e6eba",
+    "chip": "Tesla V100-PCIE-32GB",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 32.0,
+    "suite": "suite_A",
+    "model": "Meta-Llama-3-8B-Instruct",
+    "model_full": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_params_b": 8.0,
+    "precision": "FP16",
+    "effective_dtype": "float16",
+    "framework": "1Cat-vLLM",
+    "framework_version": "1.0.0+flash_attn_v100-1.0.0",
+    "tier": "community",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-05-18",
+    "reproduce_script": "runners/nvidia_onecat_vllm_12a253c2/runner.py",
+    "runner_id": "nvidia_onecat_vllm_12a253c2",
+    "scenarios": {
+      "offline": {
+        "throughput": 671.43,
+        "metric_label": "tokens/sec",
+        "concurrency": 128,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 0.0,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 671.43,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 128,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  },
+  {
+    "id": "419b138c",
+    "chip": "Tesla V100-PCIE-32GB",
+    "chip_vendor": "NVIDIA",
+    "chip_count": 1,
+    "memory_gb": 32.0,
+    "suite": "suite_F",
+    "model": "Qwen2.5-0.5B-Instruct",
+    "model_full": "Qwen/Qwen2.5-0.5B-Instruct",
+    "model_params_b": 0.5,
+    "precision": "FP16",
+    "effective_dtype": "float16",
+    "framework": "1Cat-vLLM",
+    "framework_version": "1.0.0+flash_attn_v100-1.0.0",
+    "tier": "community",
+    "submitted_by": "JuhaoLiang1997",
+    "date": "2026-05-18",
+    "reproduce_script": "runners/nvidia_onecat_vllm_12a253c2/runner.py",
+    "runner_id": "nvidia_onecat_vllm_12a253c2",
+    "scenarios": {
+      "offline": {
+        "throughput": 6292.79,
+        "metric_label": "tokens/sec",
+        "concurrency": 16,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "online": {
+        "throughput": 0.0,
+        "metric_label": "max valid QPS",
+        "concurrency": null,
+        "peak_memory_gb": null,
+        "is_valid": true
+      },
+      "interactive": {
+        "throughput": 6292.79,
+        "metric_label": "tokens/sec",
+        "concurrency": 16,
+        "peak_memory_gb": null,
+        "is_valid": true
+      }
+    },
+    "primary_scenario": "offline",
+    "primary_throughput": 6292.79,
+    "primary_metric_label": "tokens/sec",
+    "config": {
+      "concurrency": 16,
+      "batch_size": null,
+      "tensor_parallel": 1,
+      "peak_memory_gb": null
+    }
+  }
+];
+window.DISTRIBUTION_SUBMISSIONS = DISTRIBUTION_SUBMISSIONS;
+
+const DISTRIBUTION_GROUPS = [
+  {
+    "chip": "NVIDIA A100-SXM4-80GB",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_F",
+    "model": "Qwen2.5-0.5B-Instruct",
+    "submission_count": 2,
+    "best_throughput": 22884.92,
+    "median_throughput": 22884.92,
+    "min_throughput": 11011.89,
+    "max_throughput": 22884.92,
+    "stddev_throughput": 8395.5,
+    "scenario_summary": {
+      "offline": {
+        "count": 2,
+        "best_throughput": 22884.92,
+        "best_framework": "vLLM"
+      },
+      "online": {
+        "count": 2,
+        "best_throughput": 40,
+        "best_framework": "vLLM"
+      },
+      "interactive": {
+        "count": 2,
+        "best_throughput": 22884.92,
+        "best_framework": "vLLM"
+      },
+      "sustained": {
+        "count": 2,
+        "best_throughput": 11576.2,
+        "best_framework": "vLLM"
+      }
+    },
+    "best_submission_id": "a4e6a6e4",
+    "best_framework": "vLLM",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "NVIDIA H100 80GB HBM3",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_F",
+    "model": "Qwen2.5-0.5B-Instruct",
+    "submission_count": 1,
+    "best_throughput": 18910.96,
+    "median_throughput": 18910.96,
+    "min_throughput": 18910.96,
+    "max_throughput": 18910.96,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": 18910.96,
+        "best_framework": "vLLM"
+      },
+      "online": {
+        "count": 1,
+        "best_throughput": 40,
+        "best_framework": "vLLM"
+      },
+      "interactive": {
+        "count": 1,
+        "best_throughput": 18910.96,
+        "best_framework": "vLLM"
+      },
+      "sustained": {
+        "count": 1,
+        "best_throughput": 6144.7,
+        "best_framework": "vLLM"
+      }
+    },
+    "best_submission_id": "2c0b7beb",
+    "best_framework": "vLLM",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "NVIDIA GeForce RTX 5090",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_F",
+    "model": "Qwen2.5-0.5B-Instruct",
+    "submission_count": 1,
+    "best_throughput": 15323.19,
+    "median_throughput": 15323.19,
+    "min_throughput": 15323.19,
+    "max_throughput": 15323.19,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": 15323.19,
+        "best_framework": "vLLM"
+      },
+      "online": {
+        "count": 1,
+        "best_throughput": 40,
+        "best_framework": "vLLM"
+      },
+      "interactive": {
+        "count": 1,
+        "best_throughput": 15323.19,
+        "best_framework": "vLLM"
+      },
+      "sustained": {
+        "count": 1,
+        "best_throughput": 3941.2,
+        "best_framework": "vLLM"
+      }
+    },
+    "best_submission_id": "776d2702",
+    "best_framework": "vLLM",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "NVIDIA GeForce RTX 4090 D",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_F",
+    "model": "Qwen2.5-0.5B-Instruct",
+    "submission_count": 1,
+    "best_throughput": 14273.14,
+    "median_throughput": 14273.14,
+    "min_throughput": 14273.14,
+    "max_throughput": 14273.14,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": 14273.14,
+        "best_framework": "vLLM"
+      },
+      "online": {
+        "count": 1,
+        "best_throughput": 40,
+        "best_framework": "vLLM"
+      },
+      "interactive": {
+        "count": 1,
+        "best_throughput": 14273.14,
+        "best_framework": "vLLM"
+      },
+      "sustained": {
+        "count": 1,
+        "best_throughput": 5995.2,
+        "best_framework": "vLLM"
+      }
+    },
+    "best_submission_id": "06662a14",
+    "best_framework": "vLLM",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "NVIDIA H200",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_F",
+    "model": "Qwen2.5-0.5B-Instruct",
+    "submission_count": 1,
+    "best_throughput": 12862.12,
+    "median_throughput": 12862.12,
+    "min_throughput": 12862.12,
+    "max_throughput": 12862.12,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": 12862.12,
+        "best_framework": "vLLM"
+      },
+      "online": {
+        "count": 1,
+        "best_throughput": 10,
+        "best_framework": "vLLM"
+      },
+      "interactive": {
+        "count": 1,
+        "best_throughput": 12862.12,
+        "best_framework": "vLLM"
+      },
+      "sustained": {
+        "count": 1,
+        "best_throughput": 1425.4,
+        "best_framework": "vLLM"
+      }
+    },
+    "best_submission_id": "53471efa",
+    "best_framework": "vLLM",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "NVIDIA A800-SXM4-80GB",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_F",
+    "model": "Qwen2.5-0.5B-Instruct",
+    "submission_count": 1,
+    "best_throughput": 11972.12,
+    "median_throughput": 11972.12,
+    "min_throughput": 11972.12,
+    "max_throughput": 11972.12,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": 11972.12,
+        "best_framework": "vLLM"
+      },
+      "online": {
+        "count": 1,
+        "best_throughput": 10,
+        "best_framework": "vLLM"
+      },
+      "interactive": {
+        "count": 1,
+        "best_throughput": 11972.12,
+        "best_framework": "vLLM"
+      },
+      "sustained": {
+        "count": 1,
+        "best_throughput": 2804.8,
+        "best_framework": "vLLM"
+      }
+    },
+    "best_submission_id": "54d0e7aa",
+    "best_framework": "vLLM",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "NVIDIA H20-3e",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_F",
+    "model": "Qwen2.5-0.5B-Instruct",
+    "submission_count": 1,
+    "best_throughput": 11716.68,
+    "median_throughput": 11716.68,
+    "min_throughput": 11716.68,
+    "max_throughput": 11716.68,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": 11716.68,
+        "best_framework": "vLLM"
+      },
+      "online": {
+        "count": 1,
+        "best_throughput": 10,
+        "best_framework": "vLLM"
+      },
+      "interactive": {
+        "count": 1,
+        "best_throughput": 11716.68,
+        "best_framework": "vLLM"
+      },
+      "sustained": {
+        "count": 1,
+        "best_throughput": 1771.6,
+        "best_framework": "vLLM"
+      }
+    },
+    "best_submission_id": "1e7ed8ca",
+    "best_framework": "vLLM",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "NVIDIA A100-SXM4-40GB",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_F",
+    "model": "Qwen2.5-0.5B-Instruct",
+    "submission_count": 2,
+    "best_throughput": 11509.2,
+    "median_throughput": 11509.2,
+    "min_throughput": 10805.85,
+    "max_throughput": 11509.2,
+    "stddev_throughput": 497.34,
+    "scenario_summary": {
+      "offline": {
+        "count": 2,
+        "best_throughput": 11509.2,
+        "best_framework": "SGLang"
+      },
+      "online": {
+        "count": 2,
+        "best_throughput": 40,
+        "best_framework": "vLLM"
+      },
+      "interactive": {
+        "count": 2,
+        "best_throughput": 11509.2,
+        "best_framework": "SGLang"
+      },
+      "sustained": {
+        "count": 2,
+        "best_throughput": 7095.4,
+        "best_framework": "SGLang"
+      }
+    },
+    "best_submission_id": "435424a8",
+    "best_framework": "SGLang",
+    "best_submitted_by": "Gong-K"
+  },
+  {
+    "chip": "NVIDIA GeForce RTX 4090",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_F",
+    "model": "Qwen2.5-0.5B-Instruct",
+    "submission_count": 1,
+    "best_throughput": 11440.55,
+    "median_throughput": 11440.55,
+    "min_throughput": 11440.55,
+    "max_throughput": 11440.55,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": 11440.55,
+        "best_framework": "vLLM"
+      },
+      "online": {
+        "count": 1,
+        "best_throughput": 10,
+        "best_framework": "vLLM"
+      },
+      "interactive": {
+        "count": 1,
+        "best_throughput": 11440.55,
+        "best_framework": "vLLM"
+      },
+      "sustained": {
+        "count": 1,
+        "best_throughput": 1698.1,
+        "best_framework": "vLLM"
+      }
+    },
+    "best_submission_id": "b228454f",
+    "best_framework": "vLLM",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "NVIDIA RTX A6000",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_F",
+    "model": "Qwen2.5-0.5B-Instruct",
+    "submission_count": 1,
+    "best_throughput": 10433.7,
+    "median_throughput": 10433.7,
+    "min_throughput": 10433.7,
+    "max_throughput": 10433.7,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": 10433.7,
+        "best_framework": "vLLM"
+      },
+      "online": {
+        "count": 1,
+        "best_throughput": 10,
+        "best_framework": "vLLM"
+      },
+      "interactive": {
+        "count": 1,
+        "best_throughput": 10433.7,
+        "best_framework": "vLLM"
+      },
+      "sustained": {
+        "count": 1,
+        "best_throughput": 1917.3,
+        "best_framework": "vLLM"
+      }
+    },
+    "best_submission_id": "a33d6eb3",
+    "best_framework": "vLLM",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "NVIDIA RTX 4000 Ada Generation",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_F",
+    "model": "Qwen2.5-0.5B-Instruct",
+    "submission_count": 1,
+    "best_throughput": 9380.99,
+    "median_throughput": 9380.99,
+    "min_throughput": 9380.99,
+    "max_throughput": 9380.99,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": 9380.99,
+        "best_framework": "vLLM"
+      },
+      "online": {
+        "count": 1,
+        "best_throughput": 40,
+        "best_framework": "vLLM"
+      },
+      "interactive": {
+        "count": 1,
+        "best_throughput": 9380.99,
+        "best_framework": "vLLM"
+      },
+      "sustained": {
+        "count": 1,
+        "best_throughput": 3880.8,
+        "best_framework": "vLLM"
+      }
+    },
+    "best_submission_id": "125c6b61",
+    "best_framework": "vLLM",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "NVIDIA GeForce RTX 3090",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_F",
+    "model": "Qwen2.5-0.5B-Instruct",
+    "submission_count": 1,
+    "best_throughput": 8961.38,
+    "median_throughput": 8961.38,
+    "min_throughput": 8961.38,
+    "max_throughput": 8961.38,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": 8961.38,
+        "best_framework": "vLLM"
+      },
+      "online": {
+        "count": 1,
+        "best_throughput": 10,
+        "best_framework": "vLLM"
+      },
+      "interactive": {
+        "count": 1,
+        "best_throughput": 8961.38,
+        "best_framework": "vLLM"
+      },
+      "sustained": {
+        "count": 1,
+        "best_throughput": 2693.3,
+        "best_framework": "vLLM"
+      }
+    },
+    "best_submission_id": "faf550ec",
+    "best_framework": "vLLM",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "NVIDIA RTX 6000 Ada Generation",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_F",
+    "model": "Qwen2.5-0.5B-Instruct",
+    "submission_count": 1,
+    "best_throughput": 8248.35,
+    "median_throughput": 8248.35,
+    "min_throughput": 8248.35,
+    "max_throughput": 8248.35,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": 8248.35,
+        "best_framework": "vLLM"
+      },
+      "online": {
+        "count": 1,
+        "best_throughput": 10,
+        "best_framework": "vLLM"
+      },
+      "interactive": {
+        "count": 1,
+        "best_throughput": 8248.35,
+        "best_framework": "vLLM"
+      },
+      "sustained": {
+        "count": 1,
+        "best_throughput": 2895.0,
+        "best_framework": "vLLM"
+      }
+    },
+    "best_submission_id": "2b905f5e",
+    "best_framework": "vLLM",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "Tesla V100S-PCIE-32GB",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_F",
+    "model": "Qwen2.5-0.5B-Instruct",
+    "submission_count": 1,
+    "best_throughput": 7870.64,
+    "median_throughput": 7870.64,
+    "min_throughput": 7870.64,
+    "max_throughput": 7870.64,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": 7870.64,
+        "best_framework": "vLLM"
+      },
+      "online": {
+        "count": 1,
+        "best_throughput": 10,
+        "best_framework": "vLLM"
+      },
+      "interactive": {
+        "count": 1,
+        "best_throughput": 7870.64,
+        "best_framework": "vLLM"
+      },
+      "sustained": {
+        "count": 1,
+        "best_throughput": 2789.7,
+        "best_framework": "vLLM"
+      }
+    },
+    "best_submission_id": "04fce6f6",
+    "best_framework": "vLLM",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "NVIDIA L4",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_F",
+    "model": "Qwen2.5-0.5B-Instruct",
+    "submission_count": 1,
+    "best_throughput": 7188.02,
+    "median_throughput": 7188.02,
+    "min_throughput": 7188.02,
+    "max_throughput": 7188.02,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": 7188.02,
+        "best_framework": "vLLM"
+      },
+      "online": {
+        "count": 1,
+        "best_throughput": 40,
+        "best_framework": "vLLM"
+      },
+      "interactive": {
+        "count": 1,
+        "best_throughput": 7188.02,
+        "best_framework": "vLLM"
+      },
+      "sustained": {
+        "count": 1,
+        "best_throughput": 2837.3,
+        "best_framework": "vLLM"
+      }
+    },
+    "best_submission_id": "d58fa923",
+    "best_framework": "vLLM",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "NVIDIA H200",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_E",
+    "model": "Meta-Llama-3-8B-Instruct",
+    "submission_count": 1,
+    "best_throughput": 6560.55,
+    "median_throughput": 6560.55,
+    "min_throughput": 6560.55,
+    "max_throughput": 6560.55,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": null,
+        "best_framework": ""
+      }
+    },
+    "best_submission_id": "f005e907",
+    "best_framework": "vLLM",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "Tesla V100-PCIE-32GB",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_F",
+    "model": "Qwen2.5-0.5B-Instruct",
+    "submission_count": 1,
+    "best_throughput": 6292.79,
+    "median_throughput": 6292.79,
+    "min_throughput": 6292.79,
+    "max_throughput": 6292.79,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": 6292.79,
+        "best_framework": "1Cat-vLLM"
+      },
+      "online": {
+        "count": 1,
+        "best_throughput": null,
+        "best_framework": ""
+      },
+      "interactive": {
+        "count": 1,
+        "best_throughput": 6292.79,
+        "best_framework": "1Cat-vLLM"
+      }
+    },
+    "best_submission_id": "419b138c",
+    "best_framework": "1Cat-vLLM",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "NVIDIA H200",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_A",
+    "model": "Meta-Llama-3-8B-Instruct",
+    "submission_count": 1,
+    "best_throughput": 5730.96,
+    "median_throughput": 5730.96,
+    "min_throughput": 5730.96,
+    "max_throughput": 5730.96,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": 5730.96,
+        "best_framework": "vLLM"
+      },
+      "online": {
+        "count": 1,
+        "best_throughput": 5,
+        "best_framework": "vLLM"
+      },
+      "interactive": {
+        "count": 1,
+        "best_throughput": 5730.96,
+        "best_framework": "vLLM"
+      },
+      "sustained": {
+        "count": 1,
+        "best_throughput": 709.2,
+        "best_framework": "vLLM"
+      },
+      "speculative": {
+        "count": 1,
+        "best_throughput": 1965.64,
+        "best_framework": "vLLM"
+      },
+      "burst": {
+        "count": 1,
+        "best_throughput": null,
+        "best_framework": ""
+      }
+    },
+    "best_submission_id": "29b2ec38",
+    "best_framework": "vLLM",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "NVIDIA H100 80GB HBM3",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "submission_count": 5,
+    "best_throughput": 6314.92,
+    "median_throughput": 5369.94,
+    "min_throughput": 3156.46,
+    "max_throughput": 6314.92,
+    "stddev_throughput": 1272.34,
+    "scenario_summary": {
+      "offline": {
+        "count": 5,
+        "best_throughput": 6314.92,
+        "best_framework": "vLLM"
+      }
+    },
+    "best_submission_id": "a4a8716a_FP8",
+    "best_framework": "vLLM",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "NVIDIA H200",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_G",
+    "model": "Mixtral-8x7B-Instruct-v0.1",
+    "submission_count": 1,
+    "best_throughput": 5334.28,
+    "median_throughput": 5334.28,
+    "min_throughput": 5334.28,
+    "max_throughput": 5334.28,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": 5334.28,
+        "best_framework": "vLLM"
+      },
+      "online": {
+        "count": 1,
+        "best_throughput": 10,
+        "best_framework": "vLLM"
+      },
+      "interactive": {
+        "count": 1,
+        "best_throughput": 5334.28,
+        "best_framework": "vLLM"
+      },
+      "sustained": {
+        "count": 1,
+        "best_throughput": 591.5,
+        "best_framework": "vLLM"
+      }
+    },
+    "best_submission_id": "7f7a270e",
+    "best_framework": "vLLM",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "NVIDIA H200",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "submission_count": 5,
+    "best_throughput": 6195.58,
+    "median_throughput": 5333.35,
+    "min_throughput": 3278.96,
+    "max_throughput": 6195.58,
+    "stddev_throughput": 1183.88,
+    "scenario_summary": {
+      "offline": {
+        "count": 5,
+        "best_throughput": 6195.58,
+        "best_framework": "vLLM"
+      },
+      "online": {
+        "count": 5,
+        "best_throughput": 10,
+        "best_framework": "vLLM"
+      },
+      "sustained": {
+        "count": 5,
+        "best_throughput": 713.4,
+        "best_framework": "vLLM"
+      }
+    },
+    "best_submission_id": "f07c60f8_FP8",
+    "best_framework": "vLLM",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "NVIDIA H100 80GB HBM3",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_A",
+    "model": "Meta-Llama-3-8B-Instruct",
+    "submission_count": 1,
+    "best_throughput": 5128.31,
+    "median_throughput": 5128.31,
+    "min_throughput": 5128.31,
+    "max_throughput": 5128.31,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": 5128.31,
+        "best_framework": "vLLM"
+      },
+      "online": {
+        "count": 1,
+        "best_throughput": 25,
+        "best_framework": "vLLM"
+      },
+      "interactive": {
+        "count": 1,
+        "best_throughput": 5128.31,
+        "best_framework": "vLLM"
+      },
+      "sustained": {
+        "count": 1,
+        "best_throughput": 907.1,
+        "best_framework": "vLLM"
+      },
+      "speculative": {
+        "count": 1,
+        "best_throughput": 1733.92,
+        "best_framework": "vLLM"
+      },
+      "burst": {
+        "count": 1,
+        "best_throughput": null,
+        "best_framework": ""
+      }
+    },
+    "best_submission_id": "831c95a7",
+    "best_framework": "vLLM",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "Tesla T4",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_F",
+    "model": "Qwen2.5-0.5B-Instruct",
+    "submission_count": 1,
+    "best_throughput": 5125.58,
+    "median_throughput": 5125.58,
+    "min_throughput": 5125.58,
+    "max_throughput": 5125.58,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": 5125.58,
+        "best_framework": "vLLM"
+      },
+      "online": {
+        "count": 1,
+        "best_throughput": 10,
+        "best_framework": "vLLM"
+      },
+      "interactive": {
+        "count": 1,
+        "best_throughput": 5125.58,
+        "best_framework": "vLLM"
+      },
+      "sustained": {
+        "count": 1,
+        "best_throughput": 2006.9,
+        "best_framework": "vLLM"
+      }
+    },
+    "best_submission_id": "4660bc0b",
+    "best_framework": "vLLM",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "NVIDIA H20-3e",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_A",
+    "model": "Meta-Llama-3-8B-Instruct",
+    "submission_count": 2,
+    "best_throughput": 4342.21,
+    "median_throughput": 4342.21,
+    "min_throughput": 2297.65,
+    "max_throughput": 4342.21,
+    "stddev_throughput": 1445.72,
+    "scenario_summary": {
+      "offline": {
+        "count": 2,
+        "best_throughput": 4342.21,
+        "best_framework": "SGLang"
+      },
+      "online": {
+        "count": 2,
+        "best_throughput": 100,
+        "best_framework": "SGLang"
+      },
+      "interactive": {
+        "count": 2,
+        "best_throughput": 4342.21,
+        "best_framework": "SGLang"
+      },
+      "sustained": {
+        "count": 2,
+        "best_throughput": 1272.2,
+        "best_framework": "SGLang"
+      },
+      "speculative": {
+        "count": 2,
+        "best_throughput": 783.62,
+        "best_framework": "vLLM"
+      },
+      "burst": {
+        "count": 2,
+        "best_throughput": 0.835,
+        "best_framework": "SGLang"
+      }
+    },
+    "best_submission_id": "83e3ec26",
+    "best_framework": "SGLang",
+    "best_submitted_by": "Gong-K"
+  },
+  {
+    "chip": "NVIDIA A100-SXM4-80GB",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_A",
+    "model": "Meta-Llama-3-8B-Instruct",
+    "submission_count": 2,
+    "best_throughput": 3916.69,
+    "median_throughput": 3916.69,
+    "min_throughput": 2700.61,
+    "max_throughput": 3916.69,
+    "stddev_throughput": 859.9,
+    "scenario_summary": {
+      "offline": {
+        "count": 2,
+        "best_throughput": 3916.69,
+        "best_framework": "vLLM"
+      },
+      "online": {
+        "count": 2,
+        "best_throughput": 5,
+        "best_framework": "vLLM"
+      },
+      "interactive": {
+        "count": 2,
+        "best_throughput": 3916.69,
+        "best_framework": "vLLM"
+      },
+      "sustained": {
+        "count": 2,
+        "best_throughput": 712.3,
+        "best_framework": "vLLM"
+      },
+      "burst": {
+        "count": 2,
+        "best_throughput": null,
+        "best_framework": ""
+      },
+      "speculative": {
+        "count": 1,
+        "best_throughput": 999.61,
+        "best_framework": "vLLM"
+      }
+    },
+    "best_submission_id": "8f83bfab",
+    "best_framework": "vLLM",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "NVIDIA H20-3e",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_G",
+    "model": "Mixtral-8x7B-Instruct-v0.1",
+    "submission_count": 1,
+    "best_throughput": 3757.41,
+    "median_throughput": 3757.41,
+    "min_throughput": 3757.41,
+    "max_throughput": 3757.41,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": 3757.41,
+        "best_framework": "vLLM"
+      },
+      "online": {
+        "count": 1,
+        "best_throughput": 10,
+        "best_framework": "vLLM"
+      },
+      "interactive": {
+        "count": 1,
+        "best_throughput": 3757.41,
+        "best_framework": "vLLM"
+      },
+      "sustained": {
+        "count": 1,
+        "best_throughput": 561.2,
+        "best_framework": "vLLM"
+      }
+    },
+    "best_submission_id": "7bd76bb5",
+    "best_framework": "vLLM",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "NVIDIA H200",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_B",
+    "model": "Meta-Llama-3-70B-Instruct",
+    "submission_count": 1,
+    "best_throughput": 3553.46,
+    "median_throughput": 3553.46,
+    "min_throughput": 3553.46,
+    "max_throughput": 3553.46,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": 3553.46,
+        "best_framework": "vLLM"
+      },
+      "online": {
+        "count": 1,
+        "best_throughput": 10,
+        "best_framework": "vLLM"
+      },
+      "sustained": {
+        "count": 1,
+        "best_throughput": 241.2,
+        "best_framework": "vLLM"
+      },
+      "interactive": {
+        "count": 1,
+        "best_throughput": 3553.46,
+        "best_framework": "vLLM"
+      },
+      "burst": {
+        "count": 1,
+        "best_throughput": null,
+        "best_framework": ""
+      }
+    },
+    "best_submission_id": "b727568e",
+    "best_framework": "vLLM",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "NVIDIA GeForce RTX 5090",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "submission_count": 3,
+    "best_throughput": 5303.83,
+    "median_throughput": 3519.52,
+    "min_throughput": 2934.27,
+    "max_throughput": 5303.83,
+    "stddev_throughput": 1234.31,
+    "scenario_summary": {
+      "offline": {
+        "count": 3,
+        "best_throughput": 5303.83,
+        "best_framework": "vLLM"
+      },
+      "online": {
+        "count": 3,
+        "best_throughput": 10,
+        "best_framework": "vLLM"
+      },
+      "sustained": {
+        "count": 3,
+        "best_throughput": 1381.4,
+        "best_framework": "vLLM"
+      }
+    },
+    "best_submission_id": "d1baa050_W8A16",
+    "best_framework": "vLLM",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "NVIDIA GeForce RTX 5090",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_A",
+    "model": "Meta-Llama-3-8B-Instruct",
+    "submission_count": 1,
+    "best_throughput": 3487.52,
+    "median_throughput": 3487.52,
+    "min_throughput": 3487.52,
+    "max_throughput": 3487.52,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": 3487.52,
+        "best_framework": "vLLM"
+      },
+      "online": {
+        "count": 1,
+        "best_throughput": null,
+        "best_framework": ""
+      },
+      "interactive": {
+        "count": 1,
+        "best_throughput": 3487.52,
+        "best_framework": "vLLM"
+      },
+      "sustained": {
+        "count": 1,
+        "best_throughput": 707.5,
+        "best_framework": "vLLM"
+      },
+      "burst": {
+        "count": 1,
+        "best_throughput": null,
+        "best_framework": ""
+      }
+    },
+    "best_submission_id": "b8f8ed0f",
+    "best_framework": "vLLM",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "NVIDIA A800-SXM4-80GB",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_G",
+    "model": "Mixtral-8x7B-Instruct-v0.1",
+    "submission_count": 1,
+    "best_throughput": 3428.42,
+    "median_throughput": 3428.42,
+    "min_throughput": 3428.42,
+    "max_throughput": 3428.42,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": 3428.42,
+        "best_framework": "vLLM"
+      },
+      "online": {
+        "count": 1,
+        "best_throughput": 10,
+        "best_framework": "vLLM"
+      },
+      "interactive": {
+        "count": 1,
+        "best_throughput": 3428.42,
+        "best_framework": "vLLM"
+      },
+      "sustained": {
+        "count": 1,
+        "best_throughput": 569.1,
+        "best_framework": "vLLM"
+      }
+    },
+    "best_submission_id": "d31ba78b",
+    "best_framework": "vLLM",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "NVIDIA A800-SXM4-80GB",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_E",
+    "model": "Meta-Llama-3-8B-Instruct",
+    "submission_count": 1,
+    "best_throughput": 3341.37,
+    "median_throughput": 3341.37,
+    "min_throughput": 3341.37,
+    "max_throughput": 3341.37,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": null,
+        "best_framework": ""
+      }
+    },
+    "best_submission_id": "74d08a7a",
+    "best_framework": "vLLM",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "NVIDIA A100-SXM4-40GB",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_G",
+    "model": "Mixtral-8x7B-Instruct-v0.1",
+    "submission_count": 1,
+    "best_throughput": 3217.83,
+    "median_throughput": 3217.83,
+    "min_throughput": 3217.83,
+    "max_throughput": 3217.83,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": 3217.83,
+        "best_framework": "vLLM"
+      },
+      "online": {
+        "count": 1,
+        "best_throughput": 10,
+        "best_framework": "vLLM"
+      },
+      "interactive": {
+        "count": 1,
+        "best_throughput": 3217.83,
+        "best_framework": "vLLM"
+      },
+      "sustained": {
+        "count": 1,
+        "best_throughput": 472.7,
+        "best_framework": "vLLM"
+      }
+    },
+    "best_submission_id": "08de2dc2",
+    "best_framework": "vLLM",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "NVIDIA A100-SXM4-80GB",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "submission_count": 10,
+    "best_throughput": 4141.71,
+    "median_throughput": 3208.11,
+    "min_throughput": 1761.96,
+    "max_throughput": 4141.71,
+    "stddev_throughput": 797.93,
+    "scenario_summary": {
+      "offline": {
+        "count": 10,
+        "best_throughput": 4141.71,
+        "best_framework": "vLLM"
+      },
+      "online": {
+        "count": 10,
+        "best_throughput": 10,
+        "best_framework": "vLLM"
+      },
+      "sustained": {
+        "count": 10,
+        "best_throughput": 757.0,
+        "best_framework": "vLLM"
+      }
+    },
+    "best_submission_id": "ffd81462_FP8",
+    "best_framework": "vLLM",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "NVIDIA A100-SXM4-40GB",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_A",
+    "model": "Meta-Llama-3-8B-Instruct",
+    "submission_count": 2,
+    "best_throughput": 3146.66,
+    "median_throughput": 3146.66,
+    "min_throughput": 2415.99,
+    "max_throughput": 3146.66,
+    "stddev_throughput": 516.66,
+    "scenario_summary": {
+      "offline": {
+        "count": 2,
+        "best_throughput": 3146.66,
+        "best_framework": "SGLang"
+      },
+      "online": {
+        "count": 2,
+        "best_throughput": 100,
+        "best_framework": "SGLang"
+      },
+      "interactive": {
+        "count": 2,
+        "best_throughput": 3146.66,
+        "best_framework": "SGLang"
+      },
+      "sustained": {
+        "count": 2,
+        "best_throughput": 562.5,
+        "best_framework": "SGLang"
+      },
+      "speculative": {
+        "count": 2,
+        "best_throughput": 947.4,
+        "best_framework": "vLLM"
+      },
+      "burst": {
+        "count": 2,
+        "best_throughput": 0.975,
+        "best_framework": "SGLang"
+      }
+    },
+    "best_submission_id": "958afbbd",
+    "best_framework": "SGLang",
+    "best_submitted_by": "Gong-K"
+  },
+  {
+    "chip": "NVIDIA A100-SXM4-40GB",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_E",
+    "model": "Meta-Llama-3-8B-Instruct",
+    "submission_count": 1,
+    "best_throughput": 3000.59,
+    "median_throughput": 3000.59,
+    "min_throughput": 3000.59,
+    "max_throughput": 3000.59,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": null,
+        "best_framework": ""
+      }
+    },
+    "best_submission_id": "e76a4402",
+    "best_framework": "vLLM",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "NVIDIA A800-SXM4-80GB",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_A",
+    "model": "Meta-Llama-3-8B-Instruct",
+    "submission_count": 1,
+    "best_throughput": 2723.49,
+    "median_throughput": 2723.49,
+    "min_throughput": 2723.49,
+    "max_throughput": 2723.49,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": 2723.49,
+        "best_framework": "vLLM"
+      },
+      "online": {
+        "count": 1,
+        "best_throughput": 5,
+        "best_framework": "vLLM"
+      },
+      "interactive": {
+        "count": 1,
+        "best_throughput": 2723.49,
+        "best_framework": "vLLM"
+      },
+      "sustained": {
+        "count": 1,
+        "best_throughput": 546.4,
+        "best_framework": "vLLM"
+      },
+      "speculative": {
+        "count": 1,
+        "best_throughput": 1050.9,
+        "best_framework": "vLLM"
+      },
+      "burst": {
+        "count": 1,
+        "best_throughput": null,
+        "best_framework": ""
+      }
+    },
+    "best_submission_id": "298e6500",
+    "best_framework": "vLLM",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "NVIDIA A800-SXM4-80GB",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "submission_count": 5,
+    "best_throughput": 3570.3,
+    "median_throughput": 2710.52,
+    "min_throughput": 1770.93,
+    "max_throughput": 3570.3,
+    "stddev_throughput": 641.09,
+    "scenario_summary": {
+      "offline": {
+        "count": 5,
+        "best_throughput": 3570.3,
+        "best_framework": "vLLM"
+      },
+      "online": {
+        "count": 5,
+        "best_throughput": 10,
+        "best_framework": "vLLM"
+      },
+      "sustained": {
+        "count": 5,
+        "best_throughput": 829.8,
+        "best_framework": "vLLM"
+      }
+    },
+    "best_submission_id": "944773aa_W8A8",
+    "best_framework": "vLLM",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "NVIDIA A100-SXM4-40GB",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "submission_count": 8,
+    "best_throughput": 3396.91,
+    "median_throughput": 2574.53,
+    "min_throughput": 1563.39,
+    "max_throughput": 3396.91,
+    "stddev_throughput": 661.15,
+    "scenario_summary": {
+      "offline": {
+        "count": 8,
+        "best_throughput": 3396.91,
+        "best_framework": "SGLang"
+      },
+      "online": {
+        "count": 8,
+        "best_throughput": 50,
+        "best_framework": "SGLang"
+      },
+      "sustained": {
+        "count": 8,
+        "best_throughput": 841.8,
+        "best_framework": "SGLang"
+      }
+    },
+    "best_submission_id": "651fefa6_W8A16",
+    "best_framework": "SGLang",
+    "best_submitted_by": "Gong-K"
+  },
+  {
+    "chip": "NVIDIA H20-3e",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_E",
+    "model": "Meta-Llama-3-8B-Instruct",
+    "submission_count": 1,
+    "best_throughput": 2485.62,
+    "median_throughput": 2485.62,
+    "min_throughput": 2485.62,
+    "max_throughput": 2485.62,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": null,
+        "best_framework": ""
+      }
+    },
+    "best_submission_id": "f0d031f5",
+    "best_framework": "vLLM",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "NVIDIA H20-3e",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "submission_count": 5,
+    "best_throughput": 3120.93,
+    "median_throughput": 2257.42,
+    "min_throughput": 1572.76,
+    "max_throughput": 3120.93,
+    "stddev_throughput": 623.34,
+    "scenario_summary": {
+      "offline": {
+        "count": 5,
+        "best_throughput": 3120.93,
+        "best_framework": "vLLM"
+      },
+      "online": {
+        "count": 5,
+        "best_throughput": 10,
+        "best_framework": "vLLM"
+      },
+      "sustained": {
+        "count": 5,
+        "best_throughput": 648.8,
+        "best_framework": "vLLM"
+      }
+    },
+    "best_submission_id": "1bcdc710_W8A8",
+    "best_framework": "vLLM",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "NVIDIA GeForce RTX 4090 D",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "submission_count": 5,
+    "best_throughput": 2933.85,
+    "median_throughput": 2226.02,
+    "min_throughput": 1292.71,
+    "max_throughput": 2933.85,
+    "stddev_throughput": 753.22,
+    "scenario_summary": {
+      "offline": {
+        "count": 5,
+        "best_throughput": 2933.85,
+        "best_framework": "vLLM"
+      },
+      "online": {
+        "count": 5,
+        "best_throughput": 10,
+        "best_framework": "vLLM"
+      },
+      "sustained": {
+        "count": 5,
+        "best_throughput": 854.9,
+        "best_framework": "vLLM"
+      }
+    },
+    "best_submission_id": "b59b0798_FP8",
+    "best_framework": "vLLM",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "NVIDIA GeForce RTX 4090",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "submission_count": 5,
+    "best_throughput": 2888.23,
+    "median_throughput": 2171.95,
+    "min_throughput": 1299.02,
+    "max_throughput": 2888.23,
+    "stddev_throughput": 725.2,
+    "scenario_summary": {
+      "offline": {
+        "count": 5,
+        "best_throughput": 2888.23,
+        "best_framework": "vLLM"
+      },
+      "online": {
+        "count": 5,
+        "best_throughput": 10,
+        "best_framework": "vLLM"
+      },
+      "sustained": {
+        "count": 5,
+        "best_throughput": 606.2,
+        "best_framework": "vLLM"
+      }
+    },
+    "best_submission_id": "6d7e1d48_FP8",
+    "best_framework": "vLLM",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "NVIDIA RTX 6000 Ada Generation",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "submission_count": 5,
+    "best_throughput": 2667.64,
+    "median_throughput": 2093.62,
+    "min_throughput": 1271.3,
+    "max_throughput": 2667.64,
+    "stddev_throughput": 563.26,
+    "scenario_summary": {
+      "offline": {
+        "count": 5,
+        "best_throughput": 2667.64,
+        "best_framework": "vLLM"
+      },
+      "online": {
+        "count": 5,
+        "best_throughput": 10,
+        "best_framework": "vLLM"
+      },
+      "sustained": {
+        "count": 5,
+        "best_throughput": 816.3,
+        "best_framework": "vLLM"
+      }
+    },
+    "best_submission_id": "e60276e9_FP8",
+    "best_framework": "vLLM",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "NVIDIA RTX A6000",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_E",
+    "model": "Meta-Llama-3-8B-Instruct",
+    "submission_count": 1,
+    "best_throughput": 1929.75,
+    "median_throughput": 1929.75,
+    "min_throughput": 1929.75,
+    "max_throughput": 1929.75,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": null,
+        "best_framework": ""
+      }
+    },
+    "best_submission_id": "334507e5",
+    "best_framework": "vLLM",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "NVIDIA RTX 6000 Ada Generation",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_A",
+    "model": "Meta-Llama-3-8B-Instruct",
+    "submission_count": 1,
+    "best_throughput": 1859.98,
+    "median_throughput": 1859.98,
+    "min_throughput": 1859.98,
+    "max_throughput": 1859.98,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": 1859.98,
+        "best_framework": "vLLM"
+      },
+      "online": {
+        "count": 1,
+        "best_throughput": 5,
+        "best_framework": "vLLM"
+      },
+      "interactive": {
+        "count": 1,
+        "best_throughput": 1859.98,
+        "best_framework": "vLLM"
+      },
+      "sustained": {
+        "count": 1,
+        "best_throughput": 376.2,
+        "best_framework": "vLLM"
+      },
+      "speculative": {
+        "count": 1,
+        "best_throughput": 854.15,
+        "best_framework": "vLLM"
+      },
+      "burst": {
+        "count": 1,
+        "best_throughput": null,
+        "best_framework": ""
+      }
+    },
+    "best_submission_id": "bd3b5d27",
+    "best_framework": "vLLM",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "NVIDIA A800-SXM4-80GB",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_B",
+    "model": "Meta-Llama-3-70B-Instruct",
+    "submission_count": 1,
+    "best_throughput": 1803.59,
+    "median_throughput": 1803.59,
+    "min_throughput": 1803.59,
+    "max_throughput": 1803.59,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": 1803.59,
+        "best_framework": "vLLM"
+      },
+      "online": {
+        "count": 1,
+        "best_throughput": 25,
+        "best_framework": "vLLM"
+      },
+      "sustained": {
+        "count": 1,
+        "best_throughput": 184.0,
+        "best_framework": "vLLM"
+      },
+      "interactive": {
+        "count": 1,
+        "best_throughput": 1803.59,
+        "best_framework": "vLLM"
+      },
+      "burst": {
+        "count": 1,
+        "best_throughput": null,
+        "best_framework": ""
+      }
+    },
+    "best_submission_id": "de0853fa",
+    "best_framework": "vLLM",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "NVIDIA A100-SXM4-40GB",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_B",
+    "model": "Meta-Llama-3-70B-Instruct",
+    "submission_count": 1,
+    "best_throughput": 1704.71,
+    "median_throughput": 1704.71,
+    "min_throughput": 1704.71,
+    "max_throughput": 1704.71,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": 1704.71,
+        "best_framework": "vLLM"
+      },
+      "online": {
+        "count": 1,
+        "best_throughput": 25,
+        "best_framework": "vLLM"
+      },
+      "sustained": {
+        "count": 1,
+        "best_throughput": 164.3,
+        "best_framework": "vLLM"
+      },
+      "interactive": {
+        "count": 1,
+        "best_throughput": 1704.71,
+        "best_framework": "vLLM"
+      },
+      "burst": {
+        "count": 1,
+        "best_throughput": null,
+        "best_framework": ""
+      }
+    },
+    "best_submission_id": "14410aea",
+    "best_framework": "vLLM",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "NVIDIA H20-3e",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_B",
+    "model": "Meta-Llama-3-70B-Instruct",
+    "submission_count": 1,
+    "best_throughput": 1694.75,
+    "median_throughput": 1694.75,
+    "min_throughput": 1694.75,
+    "max_throughput": 1694.75,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": 1694.75,
+        "best_framework": "vLLM"
+      },
+      "online": {
+        "count": 1,
+        "best_throughput": 10,
+        "best_framework": "vLLM"
+      },
+      "sustained": {
+        "count": 1,
+        "best_throughput": 176.0,
+        "best_framework": "vLLM"
+      },
+      "interactive": {
+        "count": 1,
+        "best_throughput": 1694.75,
+        "best_framework": "vLLM"
+      },
+      "burst": {
+        "count": 1,
+        "best_throughput": null,
+        "best_framework": ""
+      }
+    },
+    "best_submission_id": "76ce4cd0",
+    "best_framework": "vLLM",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "NVIDIA RTX A6000",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_G",
+    "model": "Mixtral-8x7B-Instruct-v0.1",
+    "submission_count": 1,
+    "best_throughput": 1681.33,
+    "median_throughput": 1681.33,
+    "min_throughput": 1681.33,
+    "max_throughput": 1681.33,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": 1681.33,
+        "best_framework": "vLLM"
+      },
+      "online": {
+        "count": 1,
+        "best_throughput": 10,
+        "best_framework": "vLLM"
+      },
+      "interactive": {
+        "count": 1,
+        "best_throughput": 1681.33,
+        "best_framework": "vLLM"
+      },
+      "sustained": {
+        "count": 1,
+        "best_throughput": 343.0,
+        "best_framework": "vLLM"
+      }
+    },
+    "best_submission_id": "a8cf2a0f",
+    "best_framework": "vLLM",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "NVIDIA GeForce RTX 4090 D",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_E",
+    "model": "Meta-Llama-3-8B-Instruct",
+    "submission_count": 1,
+    "best_throughput": 1633.89,
+    "median_throughput": 1633.89,
+    "min_throughput": 1633.89,
+    "max_throughput": 1633.89,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": null,
+        "best_framework": ""
+      }
+    },
+    "best_submission_id": "bba67533",
+    "best_framework": "vLLM",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "NVIDIA GeForce RTX 4090",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_E",
+    "model": "Meta-Llama-3-8B-Instruct",
+    "submission_count": 1,
+    "best_throughput": 1609.89,
+    "median_throughput": 1609.89,
+    "min_throughput": 1609.89,
+    "max_throughput": 1609.89,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": null,
+        "best_framework": ""
+      }
+    },
+    "best_submission_id": "54dccbd0",
+    "best_framework": "vLLM",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "NVIDIA RTX A6000",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_A",
+    "model": "Meta-Llama-3-8B-Instruct",
+    "submission_count": 1,
+    "best_throughput": 1567.35,
+    "median_throughput": 1567.35,
+    "min_throughput": 1567.35,
+    "max_throughput": 1567.35,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": 1567.35,
+        "best_framework": "vLLM"
+      },
+      "online": {
+        "count": 1,
+        "best_throughput": 5,
+        "best_framework": "vLLM"
+      },
+      "interactive": {
+        "count": 1,
+        "best_throughput": 1567.35,
+        "best_framework": "vLLM"
+      },
+      "sustained": {
+        "count": 1,
+        "best_throughput": 265.3,
+        "best_framework": "vLLM"
+      },
+      "speculative": {
+        "count": 1,
+        "best_throughput": 657.85,
+        "best_framework": "vLLM"
+      },
+      "burst": {
+        "count": 1,
+        "best_throughput": null,
+        "best_framework": ""
+      }
+    },
+    "best_submission_id": "7cd0b745",
+    "best_framework": "vLLM",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "NVIDIA RTX A6000",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "submission_count": 5,
+    "best_throughput": 1971.81,
+    "median_throughput": 1469.7,
+    "min_throughput": 940.08,
+    "max_throughput": 1971.81,
+    "stddev_throughput": 366.58,
+    "scenario_summary": {
+      "offline": {
+        "count": 5,
+        "best_throughput": 1971.81,
+        "best_framework": "vLLM"
+      },
+      "online": {
+        "count": 5,
+        "best_throughput": 10,
+        "best_framework": "vLLM"
+      },
+      "sustained": {
+        "count": 5,
+        "best_throughput": 541.0,
+        "best_framework": "vLLM"
+      }
+    },
+    "best_submission_id": "b87c1621_W8A8",
+    "best_framework": "vLLM",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "NVIDIA GeForce RTX 4090 D",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_A",
+    "model": "Meta-Llama-3-8B-Instruct",
+    "submission_count": 1,
+    "best_throughput": 1461.83,
+    "median_throughput": 1461.83,
+    "min_throughput": 1461.83,
+    "max_throughput": 1461.83,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": 1461.83,
+        "best_framework": "vLLM"
+      },
+      "online": {
+        "count": 1,
+        "best_throughput": 5,
+        "best_framework": "vLLM"
+      },
+      "interactive": {
+        "count": 1,
+        "best_throughput": 1461.83,
+        "best_framework": "vLLM"
+      },
+      "sustained": {
+        "count": 1,
+        "best_throughput": 400.1,
+        "best_framework": "vLLM"
+      },
+      "burst": {
+        "count": 1,
+        "best_throughput": null,
+        "best_framework": ""
+      }
+    },
+    "best_submission_id": "d6543f77",
+    "best_framework": "vLLM",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "NVIDIA GeForce RTX 4090",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_A",
+    "model": "Meta-Llama-3-8B-Instruct",
+    "submission_count": 1,
+    "best_throughput": 1452.3,
+    "median_throughput": 1452.3,
+    "min_throughput": 1452.3,
+    "max_throughput": 1452.3,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": 1452.3,
+        "best_framework": "vLLM"
+      },
+      "online": {
+        "count": 1,
+        "best_throughput": null,
+        "best_framework": ""
+      },
+      "interactive": {
+        "count": 1,
+        "best_throughput": 1452.3,
+        "best_framework": "vLLM"
+      },
+      "sustained": {
+        "count": 1,
+        "best_throughput": 339.8,
+        "best_framework": "vLLM"
+      },
+      "burst": {
+        "count": 1,
+        "best_throughput": null,
+        "best_framework": ""
+      }
+    },
+    "best_submission_id": "675e325e",
+    "best_framework": "vLLM",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "NVIDIA GeForce RTX 3090",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "submission_count": 5,
+    "best_throughput": 2065.49,
+    "median_throughput": 1374.59,
+    "min_throughput": 898.24,
+    "max_throughput": 2065.49,
+    "stddev_throughput": 449.06,
+    "scenario_summary": {
+      "offline": {
+        "count": 5,
+        "best_throughput": 2065.49,
+        "best_framework": "vLLM"
+      },
+      "online": {
+        "count": 5,
+        "best_throughput": 5,
+        "best_framework": "vLLM"
+      },
+      "sustained": {
+        "count": 5,
+        "best_throughput": 588.9,
+        "best_framework": "vLLM"
+      }
+    },
+    "best_submission_id": "4955fbb1_W8A8",
+    "best_framework": "vLLM",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "NVIDIA GeForce RTX 4090",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_G",
+    "model": "Mixtral-8x7B-Instruct-v0.1",
+    "submission_count": 1,
+    "best_throughput": 1361.9,
+    "median_throughput": 1361.9,
+    "min_throughput": 1361.9,
+    "max_throughput": 1361.9,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": 1361.9,
+        "best_framework": "vLLM"
+      },
+      "online": {
+        "count": 1,
+        "best_throughput": 2,
+        "best_framework": "vLLM"
+      },
+      "interactive": {
+        "count": 1,
+        "best_throughput": 1361.9,
+        "best_framework": "vLLM"
+      },
+      "sustained": {
+        "count": 1,
+        "best_throughput": 325.5,
+        "best_framework": "vLLM"
+      }
+    },
+    "best_submission_id": "a4179ecc",
+    "best_framework": "vLLM",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "Tesla V100S-PCIE-32GB",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_E",
+    "model": "Meta-Llama-3-8B-Instruct",
+    "submission_count": 1,
+    "best_throughput": 1172.42,
+    "median_throughput": 1172.42,
+    "min_throughput": 1172.42,
+    "max_throughput": 1172.42,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": null,
+        "best_framework": ""
+      }
+    },
+    "best_submission_id": "865d778c",
+    "best_framework": "vLLM",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "NVIDIA GeForce RTX 3090",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_E",
+    "model": "Meta-Llama-3-8B-Instruct",
+    "submission_count": 1,
+    "best_throughput": 1065.49,
+    "median_throughput": 1065.49,
+    "min_throughput": 1065.49,
+    "max_throughput": 1065.49,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": null,
+        "best_framework": ""
+      }
+    },
+    "best_submission_id": "81ca6d0e",
+    "best_framework": "vLLM",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "Tesla V100S-PCIE-32GB",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_G",
+    "model": "Mixtral-8x7B-Instruct-v0.1",
+    "submission_count": 1,
+    "best_throughput": 1026.12,
+    "median_throughput": 1026.12,
+    "min_throughput": 1026.12,
+    "max_throughput": 1026.12,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": 1026.12,
+        "best_framework": "vLLM"
+      },
+      "online": {
+        "count": 1,
+        "best_throughput": 2,
+        "best_framework": "vLLM"
+      },
+      "interactive": {
+        "count": 1,
+        "best_throughput": 1026.12,
+        "best_framework": "vLLM"
+      },
+      "sustained": {
+        "count": 1,
+        "best_throughput": 293.9,
+        "best_framework": "vLLM"
+      }
+    },
+    "best_submission_id": "2ef567be",
+    "best_framework": "vLLM",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "NVIDIA GeForce RTX 3090",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_A",
+    "model": "Meta-Llama-3-8B-Instruct",
+    "submission_count": 1,
+    "best_throughput": 1001.2,
+    "median_throughput": 1001.2,
+    "min_throughput": 1001.2,
+    "max_throughput": 1001.2,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": 1001.2,
+        "best_framework": "vLLM"
+      },
+      "online": {
+        "count": 1,
+        "best_throughput": null,
+        "best_framework": ""
+      },
+      "interactive": {
+        "count": 1,
+        "best_throughput": 1001.2,
+        "best_framework": "vLLM"
+      },
+      "sustained": {
+        "count": 1,
+        "best_throughput": 309.9,
+        "best_framework": "vLLM"
+      },
+      "burst": {
+        "count": 1,
+        "best_throughput": null,
+        "best_framework": ""
+      }
+    },
+    "best_submission_id": "e95e2caa",
+    "best_framework": "vLLM",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "Tesla V100S-PCIE-32GB",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_C",
+    "model": "Llama-3.1-8B-Instruct",
+    "submission_count": 2,
+    "best_throughput": 936.85,
+    "median_throughput": 936.85,
+    "min_throughput": 783.65,
+    "max_throughput": 936.85,
+    "stddev_throughput": 108.33,
+    "scenario_summary": {
+      "offline": {
+        "count": 2,
+        "best_throughput": 936.85,
+        "best_framework": "vLLM"
+      },
+      "online": {
+        "count": 2,
+        "best_throughput": null,
+        "best_framework": ""
+      },
+      "sustained": {
+        "count": 2,
+        "best_throughput": 416.4,
+        "best_framework": "vLLM"
+      }
+    },
+    "best_submission_id": "b957e789_FP16",
+    "best_framework": "vLLM",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "Tesla V100S-PCIE-32GB",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_A",
+    "model": "Meta-Llama-3-8B-Instruct",
+    "submission_count": 1,
+    "best_throughput": 933.93,
+    "median_throughput": 933.93,
+    "min_throughput": 933.93,
+    "max_throughput": 933.93,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": 933.93,
+        "best_framework": "vLLM"
+      },
+      "online": {
+        "count": 1,
+        "best_throughput": 5,
+        "best_framework": "vLLM"
+      },
+      "interactive": {
+        "count": 1,
+        "best_throughput": 933.93,
+        "best_framework": "vLLM"
+      },
+      "sustained": {
+        "count": 1,
+        "best_throughput": 268.3,
+        "best_framework": "vLLM"
+      },
+      "speculative": {
+        "count": 1,
+        "best_throughput": 431.31,
+        "best_framework": "vLLM"
+      },
+      "burst": {
+        "count": 1,
+        "best_throughput": null,
+        "best_framework": ""
+      }
+    },
+    "best_submission_id": "48261ecc",
+    "best_framework": "vLLM",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "NVIDIA RTX A6000",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_B",
+    "model": "Meta-Llama-3-70B-Instruct",
+    "submission_count": 1,
+    "best_throughput": 798.25,
+    "median_throughput": 798.25,
+    "min_throughput": 798.25,
+    "max_throughput": 798.25,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": 798.25,
+        "best_framework": "vLLM"
+      },
+      "online": {
+        "count": 1,
+        "best_throughput": 5,
+        "best_framework": "vLLM"
+      },
+      "sustained": {
+        "count": 1,
+        "best_throughput": 105.3,
+        "best_framework": "vLLM"
+      },
+      "interactive": {
+        "count": 1,
+        "best_throughput": 798.25,
+        "best_framework": "vLLM"
+      },
+      "burst": {
+        "count": 1,
+        "best_throughput": null,
+        "best_framework": ""
+      }
+    },
+    "best_submission_id": "0981ecf7",
+    "best_framework": "vLLM",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "Tesla V100-PCIE-32GB",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_A",
+    "model": "Meta-Llama-3-8B-Instruct",
+    "submission_count": 1,
+    "best_throughput": 671.43,
+    "median_throughput": 671.43,
+    "min_throughput": 671.43,
+    "max_throughput": 671.43,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": 671.43,
+        "best_framework": "1Cat-vLLM"
+      },
+      "online": {
+        "count": 1,
+        "best_throughput": null,
+        "best_framework": ""
+      }
+    },
+    "best_submission_id": "4e0e6eba",
+    "best_framework": "1Cat-vLLM",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "Tesla V100S-PCIE-32GB",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_B",
+    "model": "Meta-Llama-3-70B-Instruct",
+    "submission_count": 1,
+    "best_throughput": 466.49,
+    "median_throughput": 466.49,
+    "min_throughput": 466.49,
+    "max_throughput": 466.49,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": 466.49,
+        "best_framework": "vLLM"
+      },
+      "online": {
+        "count": 1,
+        "best_throughput": null,
+        "best_framework": ""
+      },
+      "sustained": {
+        "count": 1,
+        "best_throughput": 92.8,
+        "best_framework": "vLLM"
+      },
+      "interactive": {
+        "count": 1,
+        "best_throughput": 466.49,
+        "best_framework": "vLLM"
+      },
+      "burst": {
+        "count": 1,
+        "best_throughput": null,
+        "best_framework": ""
+      }
+    },
+    "best_submission_id": "48f19c22",
+    "best_framework": "vLLM",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "NVIDIA GeForce RTX 4090",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_B",
+    "model": "Meta-Llama-3-70B-Instruct",
+    "submission_count": 1,
+    "best_throughput": 442.95,
+    "median_throughput": 442.95,
+    "min_throughput": 442.95,
+    "max_throughput": 442.95,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": 442.95,
+        "best_framework": "vLLM"
+      },
+      "online": {
+        "count": 1,
+        "best_throughput": null,
+        "best_framework": ""
+      },
+      "sustained": {
+        "count": 1,
+        "best_throughput": 104.5,
+        "best_framework": "vLLM"
+      },
+      "interactive": {
+        "count": 1,
+        "best_throughput": 442.95,
+        "best_framework": "vLLM"
+      },
+      "burst": {
+        "count": 1,
+        "best_throughput": null,
+        "best_framework": ""
+      }
+    },
+    "best_submission_id": "cfd0bdc8",
+    "best_framework": "vLLM",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "NVIDIA L4",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_A",
+    "model": "Meta-Llama-3-8B-Instruct",
+    "submission_count": 1,
+    "best_throughput": 366.08,
+    "median_throughput": 366.08,
+    "min_throughput": 366.08,
+    "max_throughput": 366.08,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": 366.08,
+        "best_framework": "vLLM"
+      },
+      "online": {
+        "count": 1,
+        "best_throughput": null,
+        "best_framework": ""
+      },
+      "interactive": {
+        "count": 1,
+        "best_throughput": 366.08,
+        "best_framework": "vLLM"
+      },
+      "sustained": {
+        "count": 1,
+        "best_throughput": 116.6,
+        "best_framework": "vLLM"
+      },
+      "speculative": {
+        "count": 1,
+        "best_throughput": 261.34,
+        "best_framework": "vLLM"
+      }
+    },
+    "best_submission_id": "b991b4c1",
+    "best_framework": "vLLM",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "NVIDIA H200",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_D",
+    "model": "Llama-3.1-8B-Instruct",
+    "submission_count": 1,
+    "best_throughput": 171.93,
+    "median_throughput": 171.93,
+    "min_throughput": 171.93,
+    "max_throughput": 171.93,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": 171.93,
+        "best_framework": "vLLM"
+      },
+      "interactive": {
+        "count": 1,
+        "best_throughput": 171.93,
+        "best_framework": "vLLM"
+      },
+      "sustained": {
+        "count": 1,
+        "best_throughput": 132.9,
+        "best_framework": "vLLM"
+      },
+      "online": {
+        "count": 1,
+        "best_throughput": null,
+        "best_framework": ""
+      },
+      "speculative": {
+        "count": 1,
+        "best_throughput": 170.37,
+        "best_framework": "vLLM"
+      }
+    },
+    "best_submission_id": "62a36028",
+    "best_framework": "vLLM",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "NVIDIA H100 80GB HBM3",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_D",
+    "model": "Llama-3.1-8B-Instruct",
+    "submission_count": 1,
+    "best_throughput": 160.38,
+    "median_throughput": 160.38,
+    "min_throughput": 160.38,
+    "max_throughput": 160.38,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": 160.38,
+        "best_framework": "vLLM"
+      },
+      "interactive": {
+        "count": 1,
+        "best_throughput": 160.38,
+        "best_framework": "vLLM"
+      },
+      "sustained": {
+        "count": 1,
+        "best_throughput": 142.6,
+        "best_framework": "vLLM"
+      },
+      "online": {
+        "count": 1,
+        "best_throughput": null,
+        "best_framework": ""
+      }
+    },
+    "best_submission_id": "02748da4",
+    "best_framework": "vLLM",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "NVIDIA A800-SXM4-80GB",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_D",
+    "model": "Llama-3.1-8B-Instruct",
+    "submission_count": 1,
+    "best_throughput": 70.34,
+    "median_throughput": 70.34,
+    "min_throughput": 70.34,
+    "max_throughput": 70.34,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": 70.34,
+        "best_framework": "vLLM"
+      },
+      "interactive": {
+        "count": 1,
+        "best_throughput": 70.34,
+        "best_framework": "vLLM"
+      },
+      "sustained": {
+        "count": 1,
+        "best_throughput": 67.0,
+        "best_framework": "vLLM"
+      },
+      "online": {
+        "count": 1,
+        "best_throughput": null,
+        "best_framework": ""
+      }
+    },
+    "best_submission_id": "4d0e7990",
+    "best_framework": "vLLM",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "NVIDIA A100-SXM4-80GB",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_D",
+    "model": "Llama-3.1-8B-Instruct",
+    "submission_count": 2,
+    "best_throughput": 70.21,
+    "median_throughput": 70.21,
+    "min_throughput": 65.15,
+    "max_throughput": 70.21,
+    "stddev_throughput": 3.58,
+    "scenario_summary": {
+      "offline": {
+        "count": 2,
+        "best_throughput": 70.21,
+        "best_framework": "vLLM"
+      },
+      "interactive": {
+        "count": 2,
+        "best_throughput": 70.21,
+        "best_framework": "vLLM"
+      },
+      "sustained": {
+        "count": 2,
+        "best_throughput": 67.1,
+        "best_framework": "vLLM"
+      },
+      "online": {
+        "count": 2,
+        "best_throughput": null,
+        "best_framework": ""
+      }
+    },
+    "best_submission_id": "7bef8eef",
+    "best_framework": "vLLM",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "NVIDIA A100-SXM4-40GB",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_D",
+    "model": "Llama-3.1-8B-Instruct",
+    "submission_count": 2,
+    "best_throughput": 59.89,
+    "median_throughput": 59.89,
+    "min_throughput": 56.79,
+    "max_throughput": 59.89,
+    "stddev_throughput": 2.19,
+    "scenario_summary": {
+      "offline": {
+        "count": 2,
+        "best_throughput": 59.89,
+        "best_framework": "SGLang"
+      },
+      "interactive": {
+        "count": 2,
+        "best_throughput": 59.89,
+        "best_framework": "SGLang"
+      },
+      "sustained": {
+        "count": 2,
+        "best_throughput": 57.0,
+        "best_framework": "vLLM"
+      },
+      "online": {
+        "count": 2,
+        "best_throughput": null,
+        "best_framework": ""
+      },
+      "speculative": {
+        "count": 1,
+        "best_throughput": 36.86,
+        "best_framework": "SGLang"
+      }
+    },
+    "best_submission_id": "99c43b97",
+    "best_framework": "SGLang",
+    "best_submitted_by": "Gong-K"
+  },
+  {
+    "chip": "NVIDIA GeForce RTX 5090",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_D",
+    "model": "Llama-3.1-8B-Instruct",
+    "submission_count": 1,
+    "best_throughput": 54.26,
+    "median_throughput": 54.26,
+    "min_throughput": 54.26,
+    "max_throughput": 54.26,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": 54.26,
+        "best_framework": "vLLM"
+      },
+      "interactive": {
+        "count": 1,
+        "best_throughput": 54.26,
+        "best_framework": "vLLM"
+      },
+      "sustained": {
+        "count": 1,
+        "best_throughput": 51.5,
+        "best_framework": "vLLM"
+      },
+      "online": {
+        "count": 1,
+        "best_throughput": null,
+        "best_framework": ""
+      }
+    },
+    "best_submission_id": "e87e6c36",
+    "best_framework": "vLLM",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "NVIDIA H20-3e",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_D",
+    "model": "Llama-3.1-8B-Instruct",
+    "submission_count": 1,
+    "best_throughput": 44.09,
+    "median_throughput": 44.09,
+    "min_throughput": 44.09,
+    "max_throughput": 44.09,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": 44.09,
+        "best_framework": "vLLM"
+      },
+      "interactive": {
+        "count": 1,
+        "best_throughput": 44.09,
+        "best_framework": "vLLM"
+      },
+      "sustained": {
+        "count": 1,
+        "best_throughput": 41.4,
+        "best_framework": "vLLM"
+      },
+      "online": {
+        "count": 1,
+        "best_throughput": null,
+        "best_framework": ""
+      }
+    },
+    "best_submission_id": "60c91bf0",
+    "best_framework": "vLLM",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "NVIDIA RTX 6000 Ada Generation",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_D",
+    "model": "Llama-3.1-8B-Instruct",
+    "submission_count": 1,
+    "best_throughput": 33.21,
+    "median_throughput": 33.21,
+    "min_throughput": 33.21,
+    "max_throughput": 33.21,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": 33.21,
+        "best_framework": "vLLM"
+      },
+      "interactive": {
+        "count": 1,
+        "best_throughput": 33.21,
+        "best_framework": "vLLM"
+      },
+      "sustained": {
+        "count": 1,
+        "best_throughput": 32.3,
+        "best_framework": "vLLM"
+      },
+      "online": {
+        "count": 1,
+        "best_throughput": null,
+        "best_framework": ""
+      }
+    },
+    "best_submission_id": "42ab3af7",
+    "best_framework": "vLLM",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "NVIDIA RTX A6000",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_D",
+    "model": "Llama-3.1-8B-Instruct",
+    "submission_count": 1,
+    "best_throughput": 31.11,
+    "median_throughput": 31.11,
+    "min_throughput": 31.11,
+    "max_throughput": 31.11,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": 31.11,
+        "best_framework": "vLLM"
+      },
+      "interactive": {
+        "count": 1,
+        "best_throughput": 31.11,
+        "best_framework": "vLLM"
+      },
+      "sustained": {
+        "count": 1,
+        "best_throughput": 30.5,
+        "best_framework": "vLLM"
+      },
+      "online": {
+        "count": 1,
+        "best_throughput": null,
+        "best_framework": ""
+      }
+    },
+    "best_submission_id": "f2197473",
+    "best_framework": "vLLM",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "Tesla V100S-PCIE-32GB",
+    "chip_vendor": "NVIDIA",
+    "suite": "suite_D",
+    "model": "Llama-3.1-8B-Instruct",
+    "submission_count": 1,
+    "best_throughput": 15.01,
+    "median_throughput": 15.01,
+    "min_throughput": 15.01,
+    "max_throughput": 15.01,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": 15.01,
+        "best_framework": "vLLM"
+      },
+      "interactive": {
+        "count": 1,
+        "best_throughput": 15.01,
+        "best_framework": "vLLM"
+      },
+      "sustained": {
+        "count": 1,
+        "best_throughput": 14.9,
+        "best_framework": "vLLM"
+      },
+      "online": {
+        "count": 1,
+        "best_throughput": null,
+        "best_framework": ""
+      }
+    },
+    "best_submission_id": "6eb549a8",
+    "best_framework": "vLLM",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "Huawei Ascend Ascend910",
+    "chip_vendor": "Huawei",
+    "suite": "suite_F",
+    "model": "Qwen2.5-0.5B-Instruct",
+    "submission_count": 1,
+    "best_throughput": 7848.84,
+    "median_throughput": 7848.84,
+    "min_throughput": 7848.84,
+    "max_throughput": 7848.84,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": 7848.84,
+        "best_framework": "vllm-ascend"
+      },
+      "online": {
+        "count": 1,
+        "best_throughput": 10,
+        "best_framework": "vllm-ascend"
+      },
+      "interactive": {
+        "count": 1,
+        "best_throughput": 7848.84,
+        "best_framework": "vllm-ascend"
+      },
+      "sustained": {
+        "count": 1,
+        "best_throughput": 2217.9,
+        "best_framework": "vllm-ascend"
+      }
+    },
+    "best_submission_id": "8826a63d",
+    "best_framework": "vllm-ascend",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "Huawei Ascend 910B2",
+    "chip_vendor": "Huawei",
+    "suite": "suite_F",
+    "model": "Qwen2.5-0.5B-Instruct",
+    "submission_count": 1,
+    "best_throughput": 4941.13,
+    "median_throughput": 4941.13,
+    "min_throughput": 4941.13,
+    "max_throughput": 4941.13,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": 4941.13,
+        "best_framework": "vllm-ascend"
+      },
+      "online": {
+        "count": 1,
+        "best_throughput": 10,
+        "best_framework": "vllm-ascend"
+      },
+      "interactive": {
+        "count": 1,
+        "best_throughput": 4941.13,
+        "best_framework": "vllm-ascend"
+      },
+      "sustained": {
+        "count": 1,
+        "best_throughput": 1238.9,
+        "best_framework": "vllm-ascend"
+      }
+    },
+    "best_submission_id": "bd7d8f87",
+    "best_framework": "vllm-ascend",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "Huawei Ascend Ascend910",
+    "chip_vendor": "Huawei",
+    "suite": "suite_E",
+    "model": "Meta-Llama-3-8B-Instruct",
+    "submission_count": 1,
+    "best_throughput": 2499.29,
+    "median_throughput": 2499.29,
+    "min_throughput": 2499.29,
+    "max_throughput": 2499.29,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": null,
+        "best_framework": ""
+      }
+    },
+    "best_submission_id": "b1fe92eb",
+    "best_framework": "vllm-ascend",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "Huawei Ascend 910B2",
+    "chip_vendor": "Huawei",
+    "suite": "suite_E",
+    "model": "Meta-Llama-3-8B-Instruct",
+    "submission_count": 1,
+    "best_throughput": 2430.05,
+    "median_throughput": 2430.05,
+    "min_throughput": 2430.05,
+    "max_throughput": 2430.05,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": null,
+        "best_framework": ""
+      }
+    },
+    "best_submission_id": "354e5562",
+    "best_framework": "vllm-ascend",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "Huawei Ascend Ascend910",
+    "chip_vendor": "Huawei",
+    "suite": "suite_A",
+    "model": "Meta-Llama-3-8B-Instruct",
+    "submission_count": 1,
+    "best_throughput": 1888.72,
+    "median_throughput": 1888.72,
+    "min_throughput": 1888.72,
+    "max_throughput": 1888.72,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": 1888.72,
+        "best_framework": "vllm-ascend"
+      },
+      "online": {
+        "count": 1,
+        "best_throughput": 5,
+        "best_framework": "vllm-ascend"
+      },
+      "interactive": {
+        "count": 1,
+        "best_throughput": 1888.72,
+        "best_framework": "vllm-ascend"
+      },
+      "sustained": {
+        "count": 1,
+        "best_throughput": 376.6,
+        "best_framework": "vllm-ascend"
+      },
+      "speculative": {
+        "count": 1,
+        "best_throughput": 1858.56,
+        "best_framework": "vllm-ascend"
+      },
+      "burst": {
+        "count": 1,
+        "best_throughput": null,
+        "best_framework": ""
+      }
+    },
+    "best_submission_id": "74d19743",
+    "best_framework": "vllm-ascend",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "Huawei Ascend 910B2",
+    "chip_vendor": "Huawei",
+    "suite": "suite_A",
+    "model": "Meta-Llama-3-8B-Instruct",
+    "submission_count": 1,
+    "best_throughput": 1699.04,
+    "median_throughput": 1699.04,
+    "min_throughput": 1699.04,
+    "max_throughput": 1699.04,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": 1699.04,
+        "best_framework": "vllm-ascend"
+      },
+      "online": {
+        "count": 1,
+        "best_throughput": 5,
+        "best_framework": "vllm-ascend"
+      },
+      "interactive": {
+        "count": 1,
+        "best_throughput": 1699.04,
+        "best_framework": "vllm-ascend"
+      },
+      "sustained": {
+        "count": 1,
+        "best_throughput": 268.0,
+        "best_framework": "vllm-ascend"
+      },
+      "speculative": {
+        "count": 1,
+        "best_throughput": 1718.23,
+        "best_framework": "vllm-ascend"
+      },
+      "burst": {
+        "count": 1,
+        "best_throughput": null,
+        "best_framework": ""
+      }
+    },
+    "best_submission_id": "a2777c30",
+    "best_framework": "vllm-ascend",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "Huawei Ascend Ascend910",
+    "chip_vendor": "Huawei",
+    "suite": "suite_G",
+    "model": "Mixtral-8x7B-Instruct-v0.1",
+    "submission_count": 1,
+    "best_throughput": 1638.62,
+    "median_throughput": 1638.62,
+    "min_throughput": 1638.62,
+    "max_throughput": 1638.62,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": 1638.62,
+        "best_framework": "vllm-ascend"
+      },
+      "online": {
+        "count": 1,
+        "best_throughput": 2,
+        "best_framework": "vllm-ascend"
+      },
+      "interactive": {
+        "count": 1,
+        "best_throughput": 1638.62,
+        "best_framework": "vllm-ascend"
+      },
+      "sustained": {
+        "count": 1,
+        "best_throughput": 262.2,
+        "best_framework": "vllm-ascend"
+      }
+    },
+    "best_submission_id": "329a2b9e",
+    "best_framework": "vllm-ascend",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "Huawei Ascend 910B2",
+    "chip_vendor": "Huawei",
+    "suite": "suite_G",
+    "model": "Mixtral-8x7B-Instruct-v0.1",
+    "submission_count": 1,
+    "best_throughput": 1631.87,
+    "median_throughput": 1631.87,
+    "min_throughput": 1631.87,
+    "max_throughput": 1631.87,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": 1631.87,
+        "best_framework": "vllm-ascend"
+      },
+      "online": {
+        "count": 1,
+        "best_throughput": 2,
+        "best_framework": "vllm-ascend"
+      },
+      "interactive": {
+        "count": 1,
+        "best_throughput": 1631.87,
+        "best_framework": "vllm-ascend"
+      },
+      "sustained": {
+        "count": 1,
+        "best_throughput": 226.6,
+        "best_framework": "vllm-ascend"
+      }
+    },
+    "best_submission_id": "d726144e",
+    "best_framework": "vllm-ascend",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "Huawei Ascend 910B2",
+    "chip_vendor": "Huawei",
+    "suite": "suite_B",
+    "model": "Meta-Llama-3-70B-Instruct",
+    "submission_count": 1,
+    "best_throughput": 769.88,
+    "median_throughput": 769.88,
+    "min_throughput": 769.88,
+    "max_throughput": 769.88,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": 769.88,
+        "best_framework": "vllm-ascend"
+      },
+      "online": {
+        "count": 1,
+        "best_throughput": 2,
+        "best_framework": "vllm-ascend"
+      },
+      "sustained": {
+        "count": 1,
+        "best_throughput": 53.2,
+        "best_framework": "vllm-ascend"
+      },
+      "interactive": {
+        "count": 1,
+        "best_throughput": 769.88,
+        "best_framework": "vllm-ascend"
+      },
+      "burst": {
+        "count": 1,
+        "best_throughput": null,
+        "best_framework": ""
+      }
+    },
+    "best_submission_id": "fcb9725c",
+    "best_framework": "vllm-ascend",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "Huawei Ascend Ascend910",
+    "chip_vendor": "Huawei",
+    "suite": "suite_B",
+    "model": "Meta-Llama-3-70B-Instruct",
+    "submission_count": 1,
+    "best_throughput": 723.06,
+    "median_throughput": 723.06,
+    "min_throughput": 723.06,
+    "max_throughput": 723.06,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": 723.06,
+        "best_framework": "vllm-ascend"
+      },
+      "online": {
+        "count": 1,
+        "best_throughput": 2,
+        "best_framework": "vllm-ascend"
+      },
+      "sustained": {
+        "count": 1,
+        "best_throughput": 53.5,
+        "best_framework": "vllm-ascend"
+      },
+      "interactive": {
+        "count": 1,
+        "best_throughput": 723.06,
+        "best_framework": "vllm-ascend"
+      },
+      "burst": {
+        "count": 1,
+        "best_throughput": null,
+        "best_framework": ""
+      }
+    },
+    "best_submission_id": "635ecf42",
+    "best_framework": "vllm-ascend",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "Huawei Ascend Ascend910",
+    "chip_vendor": "Huawei",
+    "suite": "suite_D",
+    "model": "Llama-3.1-8B-Instruct",
+    "submission_count": 1,
+    "best_throughput": 55.0,
+    "median_throughput": 55.0,
+    "min_throughput": 55.0,
+    "max_throughput": 55.0,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": 55.0,
+        "best_framework": "vllm-ascend"
+      },
+      "interactive": {
+        "count": 1,
+        "best_throughput": 55.0,
+        "best_framework": "vllm-ascend"
+      },
+      "sustained": {
+        "count": 1,
+        "best_throughput": 54.2,
+        "best_framework": "vllm-ascend"
+      },
+      "online": {
+        "count": 1,
+        "best_throughput": null,
+        "best_framework": ""
+      }
+    },
+    "best_submission_id": "6c1e7ffe",
+    "best_framework": "vllm-ascend",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "Huawei Ascend 910B2",
+    "chip_vendor": "Huawei",
+    "suite": "suite_D",
+    "model": "Llama-3.1-8B-Instruct",
+    "submission_count": 1,
+    "best_throughput": 53.22,
+    "median_throughput": 53.22,
+    "min_throughput": 53.22,
+    "max_throughput": 53.22,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": 53.22,
+        "best_framework": "vllm-ascend"
+      },
+      "interactive": {
+        "count": 1,
+        "best_throughput": 53.22,
+        "best_framework": "vllm-ascend"
+      },
+      "sustained": {
+        "count": 1,
+        "best_throughput": 53.2,
+        "best_framework": "vllm-ascend"
+      },
+      "online": {
+        "count": 1,
+        "best_throughput": null,
+        "best_framework": ""
+      }
+    },
+    "best_submission_id": "a3547ba9",
+    "best_framework": "vllm-ascend",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "Google TPU v5e",
+    "chip_vendor": "Google",
+    "suite": "suite_F",
+    "model": "Qwen2.5-0.5B-Instruct",
+    "submission_count": 1,
+    "best_throughput": 8127.1,
+    "median_throughput": 8127.1,
+    "min_throughput": 8127.1,
+    "max_throughput": 8127.1,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": 8127.1,
+        "best_framework": "vllm-tpu"
+      }
+    },
+    "best_submission_id": "300ac34c",
+    "best_framework": "vllm-tpu",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "Google TPU v6e",
+    "chip_vendor": "Google",
+    "suite": "suite_F",
+    "model": "Qwen2.5-0.5B-Instruct",
+    "submission_count": 1,
+    "best_throughput": 6857.53,
+    "median_throughput": 6857.53,
+    "min_throughput": 6857.53,
+    "max_throughput": 6857.53,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": 6857.53,
+        "best_framework": "vllm-tpu"
+      }
+    },
+    "best_submission_id": "9318bfeb",
+    "best_framework": "vllm-tpu",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "Google TPU v6e",
+    "chip_vendor": "Google",
+    "suite": "suite_A",
+    "model": "Meta-Llama-3-8B-Instruct",
+    "submission_count": 1,
+    "best_throughput": 2775.55,
+    "median_throughput": 2775.55,
+    "min_throughput": 2775.55,
+    "max_throughput": 2775.55,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": 2775.55,
+        "best_framework": "vllm-tpu"
+      },
+      "speculative": {
+        "count": 1,
+        "best_throughput": 2774.41,
+        "best_framework": "vllm-tpu"
+      }
+    },
+    "best_submission_id": "d9b3177f",
+    "best_framework": "vllm-tpu",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "Google TPU v6e",
+    "chip_vendor": "Google",
+    "suite": "suite_D",
+    "model": "Llama-3.1-8B-Instruct",
+    "submission_count": 1,
+    "best_throughput": 54.52,
+    "median_throughput": 54.52,
+    "min_throughput": 54.52,
+    "max_throughput": 54.52,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": 54.52,
+        "best_framework": "vllm-tpu"
+      }
+    },
+    "best_submission_id": "d037f60f",
+    "best_framework": "vllm-tpu",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "Apple M1",
+    "chip_vendor": "Apple",
+    "suite": "suite_F",
+    "model": "Qwen2.5-0.5B-Instruct",
+    "submission_count": 1,
+    "best_throughput": 51.0,
+    "median_throughput": 51.0,
+    "min_throughput": 51.0,
+    "max_throughput": 51.0,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": 51.0,
+        "best_framework": "mlx-lm"
+      }
+    },
+    "best_submission_id": "2349a925",
+    "best_framework": "mlx-lm",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "MTT S4000",
+    "chip_vendor": "Moore Threads",
+    "suite": "suite_F",
+    "model": "Qwen2.5-0.5B-Instruct",
+    "submission_count": 1,
+    "best_throughput": 2004.02,
+    "median_throughput": 2004.02,
+    "min_throughput": 2004.02,
+    "max_throughput": 2004.02,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": 2004.02,
+        "best_framework": "vllm-musa"
+      },
+      "online": {
+        "count": 1,
+        "best_throughput": 40,
+        "best_framework": "vllm-musa"
+      },
+      "interactive": {
+        "count": 1,
+        "best_throughput": 2004.02,
+        "best_framework": "vllm-musa"
+      }
+    },
+    "best_submission_id": "4f66d29d",
+    "best_framework": "vllm-musa",
+    "best_submitted_by": "JuhaoLiang1997"
+  },
+  {
+    "chip": "MTT S4000",
+    "chip_vendor": "Moore Threads",
+    "suite": "suite_A",
+    "model": "Meta-Llama-3-8B-Instruct",
+    "submission_count": 1,
+    "best_throughput": 332.62,
+    "median_throughput": 332.62,
+    "min_throughput": 332.62,
+    "max_throughput": 332.62,
+    "stddev_throughput": null,
+    "scenario_summary": {
+      "offline": {
+        "count": 1,
+        "best_throughput": 332.62,
+        "best_framework": "vllm-musa"
+      },
+      "online": {
+        "count": 1,
+        "best_throughput": 5,
+        "best_framework": "vllm-musa"
+      }
+    },
+    "best_submission_id": "cabb7bd0",
+    "best_framework": "vllm-musa",
+    "best_submitted_by": "JuhaoLiang1997"
+  }
+];
+window.DISTRIBUTION_GROUPS = DISTRIBUTION_GROUPS;
+
+const DISTRIBUTION_SUITE_META = {
+  "suite_A": {
+    "model_id": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_revision": "8afb486c1db24fe5011ec46dfbe5b5dccdb575c2",
+    "dataset": "sharegpt_standard_v1",
+    "precision_required": "BF16",
+    "allowed_precisions": [
+      "BF16",
+      "FP16"
+    ],
+    "max_model_len": 4096,
+    "concurrency_levels": [
+      8,
+      32,
+      128
+    ],
+    "online_qps_levels": [
+      5,
+      25,
+      100
+    ],
+    "online_sla_ttft_ms": 500,
+    "input_tokens_p50": 280,
+    "output_tokens_p50": 310,
+    "scenarios_default": [
+      "accuracy",
+      "offline",
+      "online"
+    ],
+    "scenarios_extra": [
+      "interactive",
+      "sustained",
+      "speculative",
+      "burst"
+    ]
+  },
+  "suite_B": {
+    "model_id": "meta-llama/Meta-Llama-3-70B-Instruct",
+    "model_revision": "50fd307e57011801c7833c87efa1984ddf2db42f",
+    "dataset": "sharegpt_standard_v1",
+    "precision_required": "BF16",
+    "allowed_precisions": [
+      "BF16",
+      "FP16"
+    ],
+    "max_model_len": 4096,
+    "concurrency_levels": [
+      8,
+      32,
+      128
+    ],
+    "online_qps_levels": [
+      2,
+      5,
+      10,
+      25
+    ],
+    "online_sla_ttft_ms": 1000,
+    "input_tokens_p50": 280,
+    "output_tokens_p50": 310,
+    "scenarios_default": [
+      "accuracy",
+      "offline",
+      "online"
+    ],
+    "scenarios_extra": [
+      "sustained",
+      "interactive",
+      "burst"
+    ]
+  },
+  "suite_C": {
+    "model_id": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
+    "dataset": "sharegpt_standard_v1",
+    "precision_required": "BF16",
+    "allowed_precisions": [
+      "BF16",
+      "FP16"
+    ],
+    "max_model_len": 4096,
+    "concurrency_levels": [
+      1,
+      4,
+      16,
+      64
+    ],
+    "online_qps_levels": [
+      5,
+      10,
+      25,
+      50
+    ],
+    "online_sla_ttft_ms": 500,
+    "scenarios_default": [
+      "accuracy",
+      "offline"
+    ],
+    "scenarios_extra": [
+      "online",
+      "sustained"
+    ]
+  },
+  "suite_D": {
+    "model_id": "meta-llama/Llama-3.1-8B-Instruct",
+    "model_revision": "0e9e39f249a16976918f6564b8830bc894c89659",
+    "dataset": "sharegpt_longctx_v1",
+    "precision_required": "BF16",
+    "allowed_precisions": [
+      "BF16",
+      "FP16"
+    ],
+    "max_model_len": 30208,
+    "concurrency_levels": [
+      1,
+      4
+    ],
+    "online_qps_levels": [
+      0.5,
+      1,
+      2
+    ],
+    "online_sla_ttft_ms": 5000,
+    "input_tokens_p50": 28650,
+    "output_tokens_p50": 256,
+    "scenarios_default": [
+      "accuracy",
+      "offline"
+    ],
+    "scenarios_extra": [
+      "interactive",
+      "sustained",
+      "online"
+    ]
+  },
+  "suite_E": {
+    "model_id": "meta-llama/Meta-Llama-3-8B-Instruct",
+    "model_revision": "8afb486c1db24fe5011ec46dfbe5b5dccdb575c2",
+    "dataset": "sharegpt_standard_v1",
+    "precision_required": "BF16",
+    "allowed_precisions": [
+      "BF16",
+      "FP16"
+    ],
+    "max_model_len": 4096,
+    "concurrency_levels": [
+      8,
+      32,
+      128
+    ],
+    "input_tokens_p50": 280,
+    "output_tokens_p50": 310,
+    "scenarios_default": [
+      "accuracy",
+      "offline"
+    ]
+  },
+  "suite_F": {
+    "model_id": "Qwen/Qwen2.5-0.5B-Instruct",
+    "model_revision": "7ae557604adf67be50417f59c2c2f167def9a775",
+    "dataset": "sharegpt_edge_v1",
+    "precision_required": "BF16",
+    "allowed_precisions": [
+      "FP16",
+      "BF16"
+    ],
+    "max_model_len": 2048,
+    "concurrency_levels": [
+      4,
+      16,
+      64
+    ],
+    "online_qps_levels": [
+      10,
+      40
+    ],
+    "online_sla_ttft_ms": 500,
+    "input_tokens_p50": 95,
+    "output_tokens_p50": 150,
+    "scenarios_default": [
+      "accuracy",
+      "offline",
+      "online",
+      "interactive"
+    ],
+    "scenarios_extra": [
+      "sustained"
+    ]
+  },
+  "suite_G": {
+    "model_id": "mistralai/Mixtral-8x7B-Instruct-v0.1",
+    "model_revision": "eba92302a2861cdc0098cc54bc9f17cb2c47eb61",
+    "dataset": "sharegpt_standard_v1",
+    "precision_required": "BF16",
+    "allowed_precisions": [
+      "BF16",
+      "FP16"
+    ],
+    "max_model_len": 4096,
+    "concurrency_levels": [
+      4,
+      16,
+      64
+    ],
+    "online_qps_levels": [
+      2,
+      10,
+      40
+    ],
+    "online_sla_ttft_ms": 500,
+    "input_tokens_p50": 280,
+    "output_tokens_p50": 310,
+    "scenarios_default": [
+      "accuracy",
+      "offline",
+      "online"
+    ],
+    "scenarios_extra": [
+      "interactive",
+      "sustained"
+    ]
+  }
+};
+window.DISTRIBUTION_SUITE_META = DISTRIBUTION_SUITE_META;