From 7c339abeaf55ddff04d4f7615324131d86c4d233 Mon Sep 17 00:00:00 2001 From: "promptless[bot]" Date: Thu, 25 Jun 2026 16:06:17 +0000 Subject: [PATCH] Remove Serverless pricing page, add pricing linkout to overview --- accounts-billing/billing.mdx | 2 +- docs.json | 5 ++- flash/pricing.mdx | 6 ++-- release-notes.mdx | 2 +- serverless/overview.mdx | 6 +++- serverless/pricing.mdx | 66 ------------------------------------ 6 files changed, 14 insertions(+), 73 deletions(-) delete mode 100644 serverless/pricing.mdx diff --git a/accounts-billing/billing.mdx b/accounts-billing/billing.mdx index 134f13e1..d4e45bcd 100644 --- a/accounts-billing/billing.mdx +++ b/accounts-billing/billing.mdx @@ -89,7 +89,7 @@ Each Runpod product has its own pricing structure. See the detailed pricing page On-demand and savings plan pricing for GPU Pods. - + Pay-per-second pricing for flex and active workers. diff --git a/docs.json b/docs.json index a70e9368..65af9b00 100644 --- a/docs.json +++ b/docs.json @@ -94,7 +94,6 @@ "serverless/overview", "serverless/quickstart", "serverless/sdks", - "serverless/pricing", { "group": "Create handler functions", "pages": [ @@ -612,6 +611,10 @@ } }, "redirects": [ + { + "source": "/serverless/pricing", + "destination": "/serverless/overview" + }, { "source": "/public-endpoints/models/seedance-1-pro", "destination": "/public-endpoints/overview" diff --git a/flash/pricing.mdx b/flash/pricing.mdx index a02eacd3..82b76969 100644 --- a/flash/pricing.mdx +++ b/flash/pricing.mdx @@ -4,7 +4,7 @@ sidebarTitle: "Pricing" description: "Understand Flash pricing and optimize your costs." --- -Flash follows the same pricing model as [Runpod Serverless](/serverless/pricing). You pay per second of compute time, with no charges when your code isn't running. Pricing depends on the GPU or CPU type you configure for your endpoints. +Flash follows the same pricing model as [Runpod Serverless](/serverless/overview). You pay per second of compute time, with no charges when your code isn't running. Pricing depends on the GPU or CPU type you configure for your endpoints. ## How pricing works @@ -25,7 +25,7 @@ Flash supports both GPU and CPU workers. Pricing varies based on the hardware ty - **GPU workers**: Use `@Endpoint(gpu=...)` configuration. Pricing depends on the GPU type (e.g., RTX 4090, A100 80GB). - **CPU workers**: Use `@Endpoint(cpu=...)` configuration. Pricing depends on the CPU instance type. -See the [Serverless pricing page](/serverless/pricing) for current rates by GPU and CPU type. +See the [Runpod pricing page](https://www.runpod.io/pricing) for current rates by GPU and CPU type. ## How to estimate and optimize costs @@ -114,5 +114,5 @@ Monitor your usage in the [Runpod console](https://www.runpod.io/console/serverl ## Next steps - [Create endpoint functions](/flash/create-endpoints) with optimized configurations. -- [View Serverless pricing details](/serverless/pricing) for current rates. +- [View Runpod pricing details](https://www.runpod.io/pricing) for current rates. - [Configure resources](/flash/configuration/parameters) for your workloads. diff --git a/release-notes.mdx b/release-notes.mdx index 12f832ce..7b20db10 100644 --- a/release-notes.mdx +++ b/release-notes.mdx @@ -261,7 +261,7 @@ Flash now supports deploying endpoints to [multiple datacenters](/flash/configur - **Self-service worker upgrade**: Rebuild and roll workers from the dashboard without support tickets. - **Edit template from endpoint page**: Inline edit and redeploy the underlying template directly from the endpoint view. - **Improved Serverless metrics page**: Refinements to charts and filters for quicker root-cause analysis. -- [Flex and active workers](/serverless/pricing): Always-on "active" workers for baseline load with on-demand "flex" workers for bursts. +- [Flex and active workers](/serverless/overview): Always-on "active" workers for baseline load with on-demand "flex" workers for bursts. - **Billing explorer**: Inspect costs by resource, region, and time to identify optimization opportunities. diff --git a/serverless/overview.mdx b/serverless/overview.mdx index fecc5d0f..ecbf390d 100644 --- a/serverless/overview.mdx +++ b/serverless/overview.mdx @@ -172,4 +172,8 @@ flowchart TD linkStyle default stroke-width:2px,stroke:#5F4CFE ``` - \ No newline at end of file + + +## Pricing + +Serverless uses pay-per-second pricing with no upfront costs. You're billed only for the compute time your workers use, from the moment a worker starts until it fully stops, and you're never charged while workers are idle. For current rates by GPU type, see the [Runpod pricing page](https://www.runpod.io/pricing). \ No newline at end of file diff --git a/serverless/pricing.mdx b/serverless/pricing.mdx deleted file mode 100644 index b5ff0105..00000000 --- a/serverless/pricing.mdx +++ /dev/null @@ -1,66 +0,0 @@ ---- -title: "Pricing" -sidebarTitle: "Pricing" -description: "Learn how Serverless billing works to optimize your costs." -mode: "wide" ---- - -import GPUTable from '/snippets/serverless-gpu-pricing-table.mdx'; - -
- - -Runpod offers custom pricing plans for large scale and enterprise workloads. [Contact our sales team](https://ecykq.share.hsforms.com/2MZdZATC3Rb62Dgci7knjbA) to learn more. - - -Serverless offers pay-per-second pricing with no upfront costs. You're billed from when a worker starts until it fully stops, rounded up to the nearest second. - -## Worker types - -| | Flex workers | Active workers | -|---|--------------|----------------| -| **Behavior** | Scale to zero when idle | Always running (24/7) | -| **Pricing** | Standard per-second rate | Discounts available through sales inquiry | -| **Best for** | Variable workloads, cost optimization | Consistent traffic, low-latency requirements | - -## GPU pricing - - - -For the latest pricing, visit the [Runpod pricing page](https://www.runpod.io/pricing). - -## What you're billed for - -Your total cost includes compute time and storage: - -| Cost component | Description | Rate | -|----------------|-------------|------| -| **Compute** | GPU time while workers run | See pricing table above | -| **Container disk** | Worker storage (5-min intervals) | ~\$0.10/GB/month | -| **Network volume** | Shared persistent storage | \$0.07/GB/month (< 1TB), \$0.05/GB/month (> 1TB) | - -### Compute cost breakdown - -Workers incur charges during three phases: - -1. **Start time**: Initializing the container and loading models into GPU memory. Minimize with [FlashBoot](/serverless/endpoints/endpoint-configurations#flashboot) or [model caching](/serverless/endpoints/model-caching). - -2. **Execution time**: Processing requests. Set [execution timeouts](/serverless/endpoints/endpoint-configurations#execution-timeout) to prevent runaway jobs. - -3. **Idle timeout duration**: The time a worker remains active (running) after completing a request, waiting for additional requests before scaling down (default: 5 seconds). Configure in [endpoint settings](/serverless/endpoints/endpoint-configurations#idle-timeout). - - -For high-volume workloads with significant storage needs, use [network volumes](/storage/network-volumes) to share data across workers and reduce per-worker storage costs. - - -## Account limits - -**Spend limit**: Default limit of \$80/hour across all resources. [Contact support](https://www.runpod.io/contact) to increase. - -## Billing support - -If you believe you've been billed incorrectly, [contact support](https://www.runpod.io/contact), including the following information in your ticket: - -- Endpoint ID -- Request ID (if applicable) -- Approximate time of the issue