From 7c339abeaf55ddff04d4f7615324131d86c4d233 Mon Sep 17 00:00:00 2001
From: "promptless[bot]" <promptless[bot]@users.noreply.github.com>
Date: Thu, 25 Jun 2026 16:06:17 +0000
Subject: [PATCH] Remove Serverless pricing page, add pricing linkout to
 overview

---
 accounts-billing/billing.mdx |  2 +-
 docs.json                    |  5 ++-
 flash/pricing.mdx            |  6 ++--
 release-notes.mdx            |  2 +-
 serverless/overview.mdx      |  6 +++-
 serverless/pricing.mdx       | 66 ------------------------------------
 6 files changed, 14 insertions(+), 73 deletions(-)
 delete mode 100644 serverless/pricing.mdx
diff --git a/accounts-billing/billing.mdx b/accounts-billing/billing.mdx
index 134f13e1..d4e45bcd 100644
--- a/accounts-billing/billing.mdx
+++ b/accounts-billing/billing.mdx
@@ -89,7 +89,7 @@ Each Runpod product has its own pricing structure. See the detailed pricing page
   <Card title="Pods pricing" icon="server" href="/pods/pricing">
     On-demand and savings plan pricing for GPU Pods.
   </Card>
-  <Card title="Serverless pricing" icon="bolt" href="/serverless/pricing">
+  <Card title="Serverless pricing" icon="bolt" href="https://www.runpod.io/pricing">
     Pay-per-second pricing for flex and active workers.
   </Card>
 </CardGroup>
diff --git a/docs.json b/docs.json
index a70e9368..65af9b00 100644
--- a/docs.json
+++ b/docs.json
@@ -94,7 +94,6 @@
               "serverless/overview",
               "serverless/quickstart",
               "serverless/sdks",
-              "serverless/pricing",
               {
                 "group": "Create handler functions",
                 "pages": [
@@ -612,6 +611,10 @@
     }
   },
   "redirects": [
+    {
+      "source": "/serverless/pricing",
+      "destination": "/serverless/overview"
+    },
     {
       "source": "/public-endpoints/models/seedance-1-pro",
       "destination": "/public-endpoints/overview"
diff --git a/flash/pricing.mdx b/flash/pricing.mdx
index a02eacd3..82b76969 100644
--- a/flash/pricing.mdx
+++ b/flash/pricing.mdx
@@ -4,7 +4,7 @@ sidebarTitle: "Pricing"
 description: "Understand Flash pricing and optimize your costs."
 ---
 
-Flash follows the same pricing model as [Runpod Serverless](/serverless/pricing). You pay per second of compute time, with no charges when your code isn't running. Pricing depends on the GPU or CPU type you configure for your endpoints.
+Flash follows the same pricing model as [Runpod Serverless](/serverless/overview). You pay per second of compute time, with no charges when your code isn't running. Pricing depends on the GPU or CPU type you configure for your endpoints.
 
 ## How pricing works
 
@@ -25,7 +25,7 @@ Flash supports both GPU and CPU workers. Pricing varies based on the hardware ty
 - **GPU workers**: Use `@Endpoint(gpu=...)` configuration. Pricing depends on the GPU type (e.g., RTX 4090, A100 80GB).
 - **CPU workers**: Use `@Endpoint(cpu=...)` configuration. Pricing depends on the CPU instance type.
 
-See the [Serverless pricing page](/serverless/pricing) for current rates by GPU and CPU type.
+See the [Runpod pricing page](https://www.runpod.io/pricing) for current rates by GPU and CPU type.
 
 ## How to estimate and optimize costs
 
@@ -114,5 +114,5 @@ Monitor your usage in the [Runpod console](https://www.runpod.io/console/serverl
 ## Next steps
 
 - [Create endpoint functions](/flash/create-endpoints) with optimized configurations.
-- [View Serverless pricing details](/serverless/pricing) for current rates.
+- [View Runpod pricing details](https://www.runpod.io/pricing) for current rates.
 - [Configure resources](/flash/configuration/parameters) for your workloads.
diff --git a/release-notes.mdx b/release-notes.mdx
index 12f832ce..7b20db10 100644
--- a/release-notes.mdx
+++ b/release-notes.mdx
@@ -261,7 +261,7 @@ Flash now supports deploying endpoints to [multiple datacenters](/flash/configur
 - **Self-service worker upgrade**: Rebuild and roll workers from the dashboard without support tickets.
 - **Edit template from endpoint page**: Inline edit and redeploy the underlying template directly from the endpoint view.
 - **Improved Serverless metrics page**: Refinements to charts and filters for quicker root-cause analysis.
-- [Flex and active workers](/serverless/pricing): Always-on "active" workers for baseline load with on-demand "flex" workers for bursts.
+- [Flex and active workers](/serverless/overview): Always-on "active" workers for baseline load with on-demand "flex" workers for bursts.
 - **Billing explorer**: Inspect costs by resource, region, and time to identify optimization opportunities.
 
 </Update>
diff --git a/serverless/overview.mdx b/serverless/overview.mdx
index fecc5d0f..ecbf390d 100644
--- a/serverless/overview.mdx
+++ b/serverless/overview.mdx
@@ -172,4 +172,8 @@ flowchart TD
 
     linkStyle default stroke-width:2px,stroke:#5F4CFE
 ```
-</div>
\ No newline at end of file
+</div>
+
+## Pricing
+
+Serverless uses pay-per-second pricing with no upfront costs. You're billed only for the compute time your workers use, from the moment a worker starts until it fully stops, and you're never charged while workers are idle. For current rates by GPU type, see the [Runpod pricing page](https://www.runpod.io/pricing).
\ No newline at end of file
diff --git a/serverless/pricing.mdx b/serverless/pricing.mdx
deleted file mode 100644
index b5ff0105..00000000
--- a/serverless/pricing.mdx
+++ /dev/null
@@ -1,66 +0,0 @@
----
-title: "Pricing"
-sidebarTitle: "Pricing"
-description: "Learn how Serverless billing works to optimize your costs."
-mode: "wide"
----
-
-import GPUTable from '/snippets/serverless-gpu-pricing-table.mdx';
-
-<div className="overview-page-wrapper" />
-
-<Tip>
-Runpod offers custom pricing plans for large scale and enterprise workloads. [Contact our sales team](https://ecykq.share.hsforms.com/2MZdZATC3Rb62Dgci7knjbA) to learn more.
-</Tip>
-
-Serverless offers pay-per-second pricing with no upfront costs. You're billed from when a worker starts until it fully stops, rounded up to the nearest second.
-
-## Worker types
-
-| | Flex workers | Active workers |
-|---|--------------|----------------|
-| **Behavior** | Scale to zero when idle | Always running (24/7) |
-| **Pricing** | Standard per-second rate | Discounts available through sales inquiry |
-| **Best for** | Variable workloads, cost optimization | Consistent traffic, low-latency requirements |
-
-## GPU pricing
-
-<GPUTable/>
-
-For the latest pricing, visit the [Runpod pricing page](https://www.runpod.io/pricing).
-
-## What you're billed for
-
-Your total cost includes compute time and storage:
-
-| Cost component | Description | Rate |
-|----------------|-------------|------|
-| **Compute** | GPU time while workers run | See pricing table above |
-| **Container disk** | Worker storage (5-min intervals) | ~\$0.10/GB/month |
-| **Network volume** | Shared persistent storage | \$0.07/GB/month (< 1TB), \$0.05/GB/month (> 1TB) |
-
-### Compute cost breakdown
-
-Workers incur charges during three phases:
-
-1. **Start time**: Initializing the container and loading models into GPU memory. Minimize with [FlashBoot](/serverless/endpoints/endpoint-configurations#flashboot) or [model caching](/serverless/endpoints/model-caching).
-
-2. **Execution time**: Processing requests. Set [execution timeouts](/serverless/endpoints/endpoint-configurations#execution-timeout) to prevent runaway jobs.
-
-3. **Idle timeout duration**: The time a worker remains active (running) after completing a request, waiting for additional requests before scaling down (default: 5 seconds). Configure in [endpoint settings](/serverless/endpoints/endpoint-configurations#idle-timeout).
-
-<Tip>
-For high-volume workloads with significant storage needs, use [network volumes](/storage/network-volumes) to share data across workers and reduce per-worker storage costs.
-</Tip>
-
-## Account limits
-
-**Spend limit**: Default limit of \$80/hour across all resources. [Contact support](https://www.runpod.io/contact) to increase.
-
-## Billing support
-
-If you believe you've been billed incorrectly, [contact support](https://www.runpod.io/contact), including the following information in your ticket:
-
-- Endpoint ID
-- Request ID (if applicable)
-- Approximate time of the issue