From e4762717d1b5eb8494fba9dd2a32e139ed525b19 Mon Sep 17 00:00:00 2001 From: Tim Pietrusky Date: Fri, 9 Jan 2026 14:43:50 +0100 Subject: [PATCH 1/3] feat(ai-sdk): add alibaba wan-2.6 image generation example --- .../generate-image-alibaba-wan-2.6.js | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 ai-sdk/getting-started/generate-image-alibaba-wan-2.6.js diff --git a/ai-sdk/getting-started/generate-image-alibaba-wan-2.6.js b/ai-sdk/getting-started/generate-image-alibaba-wan-2.6.js new file mode 100644 index 0000000..6ca6fd6 --- /dev/null +++ b/ai-sdk/getting-started/generate-image-alibaba-wan-2.6.js @@ -0,0 +1,29 @@ +import dotenv from "dotenv"; +import { runpod } from "@runpod/ai-sdk-provider"; +import { generateImage } from "ai"; +import { writeFileSync } from "fs"; + +dotenv.config({ quiet: true }); + +console.log("generate-image-alibaba-wan-2.6\n"); + +async function main() { + const { image } = await generateImage({ + model: runpod.image("alibaba/wan-2.6"), + prompt: + 'A spectacular complete Chinese dragon sculpture made of neon light tubes, full body visible from head to flowing tail, positioned behind a large street-level neon sign reading "WAN 2.6" in bright cyan and magenta, cyberpunk city background, wet streets reflecting all the neon colors, the dragon wraps gracefully but its entire form is visible, photorealistic, cinematic lighting. Negative prompt: incomplete, cropped, blurry', + size: "1280x1280", + }); + + const timestamp = new Date().toISOString().replace(/[:.]/g, "-"); + const filename = `generated-image-alibaba-wan-2.6-${timestamp}.jpg`; + + writeFileSync(filename, image.uint8Array); + console.log(`saved image: ${filename}`); + console.log(`size: ${(image.uint8Array.length / 1024).toFixed(1)}KB`); +} + +main().catch((err) => { + console.error("failed:", err?.message || err); + process.exit(1); +}); From d50c064d20dfa98a66d1162518c45edfe6c06f7c Mon Sep 17 00:00:00 2001 From: Tim Pietrusky Date: Fri, 9 Jan 2026 20:08:19 +0100 Subject: [PATCH 2/3] feat(ai-sdk): add qwen image edit 2511 examples with lora support --- .../edit-image-qwen-image-edit-2511-lora.js | 39 +++++++++++++++++++ .../edit-image-qwen-image-edit-2511.js | 34 ++++++++++++++++ 2 files changed, 73 insertions(+) create mode 100644 ai-sdk/getting-started/edit-image-qwen-image-edit-2511-lora.js create mode 100644 ai-sdk/getting-started/edit-image-qwen-image-edit-2511.js diff --git a/ai-sdk/getting-started/edit-image-qwen-image-edit-2511-lora.js b/ai-sdk/getting-started/edit-image-qwen-image-edit-2511-lora.js new file mode 100644 index 0000000..70ef2a2 --- /dev/null +++ b/ai-sdk/getting-started/edit-image-qwen-image-edit-2511-lora.js @@ -0,0 +1,39 @@ +import dotenv from "dotenv"; +import { runpod } from "@runpod/ai-sdk-provider"; +import { generateImage } from "ai"; +import { writeFileSync } from "fs"; + +dotenv.config({ quiet: true }); + +console.log("edit-image-qwen-image-edit-2511-lora\n"); + +async function main() { + const { image } = await generateImage({ + model: runpod.image("qwen/qwen-image-edit-2511"), + prompt: "Transform into anime style, vibrant colors, detailed illustration", + size: "1024x1024", + providerOptions: { + runpod: { + images: ["https://image.runpod.ai/asset/qwen/qwen-image-edit-2511.png"], + loras: [ + { + path: "https://huggingface.co/flymy-ai/qwen-image-anime-irl-lora/resolve/main/flymy_anime_irl.safetensors", + scale: 1, + }, + ], + }, + }, + }); + + const timestamp = new Date().toISOString().replace(/[:.]/g, "-"); + const filename = `edited-image-qwen-2511-lora-${timestamp}.jpg`; + + writeFileSync(filename, image.uint8Array); + console.log(`saved image: ${filename}`); + console.log(`size: ${(image.uint8Array.length / 1024).toFixed(1)}KB`); +} + +main().catch((err) => { + console.error("failed:", err?.message || err); + process.exit(1); +}); diff --git a/ai-sdk/getting-started/edit-image-qwen-image-edit-2511.js b/ai-sdk/getting-started/edit-image-qwen-image-edit-2511.js new file mode 100644 index 0000000..4025052 --- /dev/null +++ b/ai-sdk/getting-started/edit-image-qwen-image-edit-2511.js @@ -0,0 +1,34 @@ +import dotenv from "dotenv"; +import { runpod } from "@runpod/ai-sdk-provider"; +import { generateImage } from "ai"; +import { writeFileSync } from "fs"; + +dotenv.config({ quiet: true }); + +console.log("edit-image-qwen-image-edit-2511\n"); + +async function main() { + const { image } = await generateImage({ + model: runpod.image("qwen/qwen-image-edit-2511"), + prompt: + "A futuristic city with a slightly dark neon atmosphere and glowing street lights. The girl in the foreground, her face and body well lit by the street lighting", + size: "1024x1024", + providerOptions: { + runpod: { + images: ["https://image.runpod.ai/asset/qwen/qwen-image-edit-2511.png"], + }, + }, + }); + + const timestamp = new Date().toISOString().replace(/[:.]/g, "-"); + const filename = `edited-image-qwen-2511-${timestamp}.jpg`; + + writeFileSync(filename, image.uint8Array); + console.log(`saved image: ${filename}`); + console.log(`size: ${(image.uint8Array.length / 1024).toFixed(1)}KB`); +} + +main().catch((err) => { + console.error("failed:", err?.message || err); + process.exit(1); +}); From c3f06a67406d8471970e3439cbb9bdc3ffc98f64 Mon Sep 17 00:00:00 2001 From: Tim Pietrusky Date: Fri, 16 Jan 2026 17:46:51 +0100 Subject: [PATCH 3/3] feat: add transcription examples for pruna/whisper-v3-large - Add transcribe-audio-url.js - transcription from URL - Add transcribe-audio-file.js - transcription from local file - Add generate-transcription-demo-audio.js - generate demo audio - Fix generate-speech-chatterbox-turbo.js import (experimental_generateSpeech) - Add *.wav to gitignore --- .gitignore | 1 + .../generate-speech-chatterbox-turbo.js | 2 +- .../generate-transcription-demo-audio.js | 38 ++++++++++++ .../getting-started/transcribe-audio-file.js | 59 +++++++++++++++++++ .../getting-started/transcribe-audio-url.js | 45 ++++++++++++++ 5 files changed, 144 insertions(+), 1 deletion(-) create mode 100644 ai-sdk/getting-started/generate-transcription-demo-audio.js create mode 100644 ai-sdk/getting-started/transcribe-audio-file.js create mode 100644 ai-sdk/getting-started/transcribe-audio-url.js diff --git a/.gitignore b/.gitignore index fd0a3c0..085c0ee 100644 --- a/.gitignore +++ b/.gitignore @@ -181,6 +181,7 @@ node_modules generated-image* edited-image* *.mp4 +*.wav # mastra mastra.db diff --git a/ai-sdk/getting-started/generate-speech-chatterbox-turbo.js b/ai-sdk/getting-started/generate-speech-chatterbox-turbo.js index f611914..0535a08 100644 --- a/ai-sdk/getting-started/generate-speech-chatterbox-turbo.js +++ b/ai-sdk/getting-started/generate-speech-chatterbox-turbo.js @@ -1,7 +1,7 @@ import dotenv from "dotenv"; import { writeFileSync } from "fs"; import { runpod } from "@runpod/ai-sdk-provider"; -import { generateSpeech } from "ai"; +import { experimental_generateSpeech as generateSpeech } from "ai"; dotenv.config({ quiet: true }); diff --git a/ai-sdk/getting-started/generate-transcription-demo-audio.js b/ai-sdk/getting-started/generate-transcription-demo-audio.js new file mode 100644 index 0000000..bb6e62f --- /dev/null +++ b/ai-sdk/getting-started/generate-transcription-demo-audio.js @@ -0,0 +1,38 @@ +import dotenv from "dotenv"; +import { writeFileSync } from "fs"; +import { runpod } from "@runpod/ai-sdk-provider"; +import { experimental_generateSpeech as generateSpeech } from "ai"; + +dotenv.config({ quiet: true }); + +// This is the text we'll generate and then transcribe back +const DEMO_TEXT = `Welcome to Runpod. This is a demonstration of the Whisper transcription model. +Whisper can accurately transcribe speech in over 90 languages. +Let's see how well it works with this audio sample.`; + +async function main() { + console.log("Generating demo audio for transcription testing..."); + console.log("Text:", DEMO_TEXT); + + const { audio, providerMetadata, warnings } = await generateSpeech({ + model: runpod.speech("resembleai/chatterbox-turbo"), + voice: "abigail", + text: DEMO_TEXT, + }); + + const filename = "transcription-demo.wav"; + writeFileSync(filename, audio.uint8Array); + + console.log("\nSaved:", filename); + console.log("File size:", audio.uint8Array.length, "bytes"); + console.log("providerMetadata:", providerMetadata); + console.log("warnings:", warnings); + console.log("\nNext steps:"); + console.log("1. Upload to R2: cd /Users/timpietrusky/data/dev/runpod/r2 && bun run cli.ts u ../examples/ai-sdk/getting-started/transcription-demo.wav demo/transcription-demo.wav"); + console.log("2. Get public URL: bun run cli.ts url demo/transcription-demo.wav"); +} + +main().catch((err) => { + console.error("failed:", err?.message || err); + process.exit(1); +}); diff --git a/ai-sdk/getting-started/transcribe-audio-file.js b/ai-sdk/getting-started/transcribe-audio-file.js new file mode 100644 index 0000000..c6b333d --- /dev/null +++ b/ai-sdk/getting-started/transcribe-audio-file.js @@ -0,0 +1,59 @@ +import dotenv from "dotenv"; +import { readFileSync } from "fs"; +import { runpod } from "@runpod/ai-sdk-provider"; +import { experimental_transcribe as transcribe } from "ai"; + +dotenv.config({ quiet: true }); + +/** + * Transcribe audio from a local file using RunPod's Whisper model. + * + * This example demonstrates transcription using a local audio file. + * The file is read as a Uint8Array and sent to the API as base64. + */ +async function main() { + const filePath = "transcription-demo.wav"; + + console.log("Reading audio file..."); + console.log("File:", filePath); + + // Read the file as a Uint8Array + const audioBuffer = readFileSync(filePath); + const audioData = new Uint8Array(audioBuffer); + + console.log("File size:", audioData.length, "bytes"); + console.log(""); + console.log("Transcribing..."); + console.log(""); + + const result = await transcribe({ + model: runpod.transcription("pruna/whisper-v3-large"), + audio: audioData, + providerOptions: { + runpod: { + // Optional: specify language for better accuracy + // language: "en", + }, + }, + }); + + console.log("Transcription:"); + console.log(result.text); + console.log(""); + console.log("Language:", result.language); + console.log("Duration:", result.durationInSeconds, "seconds"); + console.log("Segments:", result.segments?.length || 0); + + if (result.segments && result.segments.length > 0) { + console.log(""); + console.log("Segment details:"); + result.segments.forEach((seg, i) => { + console.log(` [${seg.startSecond.toFixed(2)}s - ${seg.endSecond.toFixed(2)}s] ${seg.text}`); + }); + } +} + +main().catch((err) => { + console.error("failed:", err?.message || err); + process.exit(1); +}); diff --git a/ai-sdk/getting-started/transcribe-audio-url.js b/ai-sdk/getting-started/transcribe-audio-url.js new file mode 100644 index 0000000..c115132 --- /dev/null +++ b/ai-sdk/getting-started/transcribe-audio-url.js @@ -0,0 +1,45 @@ +import dotenv from "dotenv"; +import { runpod } from "@runpod/ai-sdk-provider"; +import { experimental_transcribe as transcribe } from "ai"; + +dotenv.config({ quiet: true }); + +/** + * Transcribe audio from a URL using RunPod's Whisper model. + * + * This example demonstrates transcription using an audio URL. + * The URL is passed via providerOptions.runpod.audio. + */ +async function main() { + const audioUrl = "https://image.runpod.ai/demo/transcription-demo.wav"; + + console.log("Transcribing audio from URL..."); + console.log("URL:", audioUrl); + console.log(""); + + const result = await transcribe({ + model: runpod.transcription("pruna/whisper-v3-large"), + // Note: When passing a URL, use new URL() - strings are interpreted as base64 + audio: new URL(audioUrl), + providerOptions: { + runpod: { + // Pass the audio URL directly to RunPod (avoids downloading and re-uploading) + audio: audioUrl, + }, + }, + }); + + console.log("Transcription:"); + console.log(result.text); + console.log(""); + console.log("Language:", result.language); + console.log("Duration:", result.durationInSeconds, "seconds"); + console.log("Segments:", result.segments?.length || 0); + console.log(""); + console.log("Provider metadata:", result.providerMetadata); +} + +main().catch((err) => { + console.error("failed:", err?.message || err); + process.exit(1); +});