diff --git a/frontend-snippets/generated.mdx b/frontend-snippets/generated.mdx new file mode 100644 index 0000000..9881289 --- /dev/null +++ b/frontend-snippets/generated.mdx @@ -0,0 +1,1990 @@ + + + + +```python python +# pip install retab +from retab import Retab + +# --------------------------------------------- +## Variables from your configuration +# --------------------------------------------- +api_key = "YOUR_RETAB_API_KEY" +document = "path/to/your/file" +json_schema = { + "type": "object", + "properties": { + "invoice_number": { + "type": "string" + }, + "total": { + "type": "number" + } + }, + "required": ["invoice_number", "total"] + } +image_resolution_dpi = 150 +model = "retab-small" +# --------------------------------------------- +# --------------------------------------------- + +client = Retab(api_key=api_key) +extraction = client.extractions.create( + json_schema = json_schema, + document = document, + model = model, + image_resolution_dpi = image_resolution_dpi +) + +print("Result:", extraction.output) +``` + +```typescript typescript +// npm install @retab/node +import { Retab } from '@retab/node'; + +// --------------------------------------------- +// Variables from your configuration +// --------------------------------------------- +const apiKey = "YOUR_RETAB_API_KEY"; +const document = "path/to/your/file"; +const jsonSchema = { + "type": "object", + "properties": { + "invoice_number": { + "type": "string" + }, + "total": { + "type": "number" + } + }, + "required": [ + "invoice_number", + "total" + ] +}; +const imageResolutionDpi = 150; +const model = "retab-small"; + +// --------------------------------------------- +// --------------------------------------------- + +const client = new Retab({ apiKey }); + +const extraction = await client.extractions.create( + document, + jsonSchema, + model, + imageResolutionDpi, + undefined, + undefined +); + +console.log("Result:", extraction.output); +``` + +```go go +import ( + "context" + "encoding/json" + "fmt" + + retab "github.com/retab-dev/retab/clients/go" +) + +ctx := context.Background() +client, err := retab.NewClient("YOUR_RETAB_API_KEY") +if err != nil { + panic(err) +} + +var jsonSchema map[string]interface{} +if err := json.Unmarshal([]byte(`{ + "type": "object", + "properties": { + "invoice_number": { + "type": "string" + }, + "total": { + "type": "number" + } + }, + "required": [ + "invoice_number", + "total" + ] +}`), &jsonSchema); err != nil { + panic(err) +} + +model := "retab-small" +imageResolutionDpi := 150 +extraction, err := client.Extractions.Create(ctx, &retab.ExtractionsCreateParams{ + Document: "path/to/your/file", + JSONSchema: jsonSchema, + Model: &model, + ImageResolutionDpi: &imageResolutionDpi, +}) +if err != nil { + panic(err) +} + +fmt.Println("Result:", extraction.Output) +``` + +```php php +extractions()->create( + document: 'path/to/your/file', + jsonSchema: $jsonSchema, + model: 'retab-small', + imageResolutionDpi: 150, +); + +print_r($extraction->output); +``` + +```csharp dotnet +using Newtonsoft.Json; +using Retab; +using RetabClient = Retab.Retab; + +var client = new RetabClient("YOUR_RETAB_API_KEY"); +var jsonSchema = JsonConvert.DeserializeObject>(@"{ + ""type"": ""object"", + ""properties"": { + ""invoice_number"": { + ""type"": ""string"" + }, + ""total"": { + ""type"": ""number"" + } + }, + ""required"": [ + ""invoice_number"", + ""total"" + ] +}")!; + +var extraction = await client.Extractions.CreateAsync(new ExtractionsCreateOptions +{ + Document = MimeData.FromFile("path/to/your/file.pdf"), + JsonSchema = jsonSchema, + Model = "retab-small", + ImageResolutionDpi = 150, +}); + +Console.WriteLine($"Extraction: {extraction.Id}"); +``` + +```ruby ruby +require 'json' +require 'retab' + +client = Retab::Client.new(api_key: "YOUR_RETAB_API_KEY") +json_schema = JSON.parse(%q({ + "type": "object", + "properties": { + "invoice_number": { + "type": "string" + }, + "total": { + "type": "number" + } + }, + "required": [ + "invoice_number", + "total" + ] +})) + +extraction = client.extractions.create( + document: 'path/to/your/file', + json_schema: json_schema, + model: "retab-small", + image_resolution_dpi: 150, +) + +puts extraction.output +``` + +```rust rust +use retab::{resources::extractions, Retab}; +use std::collections::HashMap; + +let client = Retab::new("YOUR_RETAB_API_KEY"); +let json_schema: HashMap = serde_json::from_str(r#"{ + "type": "object", + "properties": { + "invoice_number": { + "type": "string" + }, + "total": { + "type": "number" + } + }, + "required": [ + "invoice_number", + "total" + ] +}"#)?; + +let mut params = extractions::CreateParams::new("path/to/your/file.pdf", json_schema); +params.body.model = Some("retab-small".to_string()); +params.body.image_resolution_dpi = Some(150); +let _extraction = client.extractions().create(params).await?; + +println!("Extraction completed"); +``` + +```java java +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.retab.RetabClient; +import com.retab.models.Extraction; +import java.util.Map; + +public class Example { + public static void main(String[] args) throws Exception { + RetabClient client = new RetabClient("YOUR_RETAB_API_KEY"); + ObjectMapper objectMapper = new ObjectMapper(); + Map jsonSchema = objectMapper.readValue( + "{\n \"type\": \"object\",\n \"properties\": {\n \"invoice_number\": {\n \"type\": \"string\"\n },\n \"total\": {\n \"type\": \"number\"\n }\n },\n \"required\": [\n \"invoice_number\",\n \"total\"\n ]\n}", + new TypeReference>() {}); + + Extraction extraction = client.extractions().create( + "path/to/your/file.pdf", + jsonSchema, + "retab-small", + 150L, + null, + null, + null, + null, + null, + false, + null, + null); + + System.out.println("Extraction completed: " + extraction); + } +} +``` + +```bash curl +curl -X POST "https://api.retab.com/v1/extractions" \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer YOUR_RETAB_API_KEY" \ + -d '{ + "document": { + "filename": "your-document.pdf", + "url": "data:application/pdf;base64," + }, + "json_schema": { + "type": "object", + "properties": { + "invoice_number": { + "type": "string" + }, + "total": { + "type": "number" + } + }, + "required": [ + "invoice_number", + "total" + ] + }, + "model": "retab-small", + "image_resolution_dpi": 150 +}' + +# To encode your file as base64: +# base64 -i your-document.pdf +``` + + + + + + + +```python python +import base64 +from retab import Retab + +api_key = "YOUR_RETAB_API_KEY" + +client = Retab(api_key=api_key) + +# Edit the form using AI agent to detect and fill fields +# The SDK accepts file paths directly +edit = client.edits.create( + document="form.pdf", + instructions="Fill all visible invoice fields.", + model="retab-small", +) + +# Save the filled document (MIMEData with data URI) +# Extract base64 content from data URI +base64_content = edit.output.filled_document.url.split(",")[1] +filled_document_bytes = base64.b64decode(base64_content) +with open("filled_form.pdf", "wb") as f: + f.write(filled_document_bytes) + +# Access form data with filled values +print(f"Filled {len(edit.output.form_data)} form fields") +for field in edit.output.form_data: + if field.value: + print(f"Field: {field.description} = {field.value}") +``` + +```typescript typescript +import { Retab, type Edit } from '@retab/node'; +import { readFileSync, writeFileSync } from 'fs'; + +const apiKey = "YOUR_RETAB_API_KEY"; + +const client = new Retab({ apiKey }); + +// Read and encode document as MIMEData +const docBuffer = readFileSync("form.pdf"); +const docBase64 = docBuffer.toString('base64'); + +// Edit the document using AI agent to detect and fill fields +const edit: Edit = await client.edits.create( + "Fill all visible invoice fields.", + { + filename: "form.pdf", + url: `data:application/pdf;base64,${docBase64}` + }, + undefined, + "retab-small" +); + +// Save the filled document (MIMEData with data URI) +const base64Content = edit.output.filledDocument.url.split(",")[1]; +const filledBuffer = Buffer.from(base64Content, 'base64'); +writeFileSync("filled_form.pdf", filledBuffer); + +// Access form data with filled values +console.log(`Filled ${edit.output.formData.length} form fields`); +edit.output.formData.forEach(field => { + if (field.value) { + console.log(`Field: ${field.description} = ${field.value}`); + } +}); +``` + +```go go +import ( + "context" + "fmt" + + retab "github.com/retab-dev/retab/clients/go" +) + +ctx := context.Background() +client, err := retab.NewClient("YOUR_RETAB_API_KEY") +if err != nil { + panic(err) +} + +model := "retab-small" +edit, err := client.Edits.Create(ctx, &retab.EditsCreateParams{ + Document: "form.pdf", + Instructions: "Fill all visible invoice fields.", + Model: &model, +}) +if err != nil { + panic(err) +} + +fmt.Println("Filled document:", edit.Output.FilledDocument.URL) +fmt.Println("Field count:", len(edit.Output.FormData)) +``` + +```php php +edits()->create( + document: 'form.pdf', + instructions: 'Fill all visible invoice fields.', + model: 'retab-small', +); + +echo 'Filled document: ' . $edit->output->filled_document->url . PHP_EOL; +echo 'Field count: ' . count($edit->output->form_data) . PHP_EOL; +``` + +```csharp dotnet +using Retab; +using RetabClient = Retab.Retab; + +var client = new RetabClient("YOUR_RETAB_API_KEY"); + +var edit = await client.Edits.CreateAsync(new EditsCreateOptions +{ + Document = MimeData.FromFile("form.pdf"), + Instructions = "Fill all visible invoice fields.", + Model = "retab-small", +}); + +Console.WriteLine($"Filled document: {edit.Output.FilledDocument.Url}"); +Console.WriteLine($"Field count: {edit.Output.FormData.Count}"); +``` + +```ruby ruby +require 'retab' + +client = Retab::Client.new(api_key: "YOUR_RETAB_API_KEY") + +edit = client.edits.create( + document: 'form.pdf', + instructions: "Fill all visible invoice fields.", + model: "retab-small", +) + +puts "Filled document: #{edit.output.filled_document.url}" +puts "Field count: #{edit.output.form_data.length}" +``` + +```rust rust +use retab::{resources::edits, Retab}; + +let client = Retab::new("YOUR_RETAB_API_KEY"); +let mut params = edits::CreateParams::new("Fill all visible invoice fields."); +params.body.document = Some(retab::models::ClassificationRequestDocumentOneOf::from(retab::MimeData::from("form.pdf"))); +params.body.model = Some("retab-small".to_string()); + +let _edit = client.edits().create(params).await?; + +println!("Edit completed"); +``` + +```java java +import com.retab.RetabClient; +import com.retab.models.Edit; + +public class Example { + public static void main(String[] args) throws Exception { + RetabClient client = new RetabClient("YOUR_RETAB_API_KEY"); + Edit edit = client.edits().create( + "Fill all visible invoice fields.", + "form.pdf", + null, + "retab-small", + null, + false, + false); + + System.out.println("Edit completed: " + edit); + } +} +``` + +```bash curl +curl -X POST "https://api.retab.com/v1/edits" \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer YOUR_RETAB_API_KEY" \ + -d '{ + "document": { + "filename": "form.pdf", + "url": "data:application/pdf;base64,JVBERi0xLjQK..." + }, + "instructions": "Fill all visible invoice fields.", + "model": "retab-small" +}' + +# To encode your document as base64 data URI: +# echo "data:application/pdf;base64,$(base64 -i form.pdf)" + +# The response is an Edit resource: +# - data.form_data: list of form fields with filled values +# - data.filled_document: the filled document as MIMEData (filename + data URI) +``` + + + + + + + +```python python +# --------------------------------------------- +## Variables from your configuration +# --------------------------------------------- +api_key = "YOUR_RETAB_API_KEY" +document = "path/to/your/file" +model = "gpt-5.4" +table_parsing_format = "html" +image_resolution_dpi = 150 +# --------------------------------------------- +# --------------------------------------------- + +from retab import Retab + +client = Retab(api_key=api_key) +result = client.parses.create( + document = document, + model = model, + table_parsing_format = table_parsing_format, + image_resolution_dpi = image_resolution_dpi +) + +print("Parsed content:") +for i, page_content in enumerate(result.output.pages): + print(f"Page {i + 1}:") + print(page_content) + print("\n" + "="*50 + "\n") + +if result.usage: + print(f"Total pages: {len(result.output.pages)}") + print(f"Credits used: {result.usage.credits}") +``` + +```typescript typescript +import { Retab } from '@retab/node'; + +const apiKey = "YOUR_RETAB_API_KEY"; + +const client = new Retab({ apiKey }); + +const result = await client.parses.create( + "path/to/your/file", + "gpt-5.4", + "html", + 150 +); + +// Access parsed content +result.output.pages.forEach((pageContent: string, index: number) => { + console.log(`Page ${index + 1}:`); + console.log(pageContent); + console.log("\n" + "=".repeat(50) + "\n"); +}); + +if (result.usage) { + console.log(`Total pages: ${result.output.pages.length}`); + console.log(`Credits used: ${result.usage.credits}`); +} +``` + +```go go +import ( + "context" + "fmt" + + retab "github.com/retab-dev/retab/clients/go" +) + +ctx := context.Background() +client, err := retab.NewClient("YOUR_RETAB_API_KEY") +if err != nil { + panic(err) +} + +model := "gpt-5.4" +tableParsingFormat := retab.ParseRequestTableParsingFormatHTML +imageResolutionDpi := 150 +result, err := client.Parses.Create(ctx, &retab.ParsesCreateParams{ + Document: "path/to/your/file", + Model: &model, + TableParsingFormat: &tableParsingFormat, + ImageResolutionDpi: &imageResolutionDpi, +}) +if err != nil { + panic(err) +} + +for index, page := range result.Output.Pages { + fmt.Printf("Page %d:\n%s\n", index+1, page) +} +``` + +```php php +parses()->create( + document: 'path/to/your/file', + model: 'gpt-5.4', + tableParsingFormat: 'html', + imageResolutionDpi: 150, +); + +foreach ($result->output->pages as $index => $page) { + echo 'Page ' . ($index + 1) . ':' . PHP_EOL; + echo $page . PHP_EOL; +} +``` + +```csharp dotnet +using Retab; +using RetabClient = Retab.Retab; + +var client = new RetabClient("YOUR_RETAB_API_KEY"); + +var result = await client.Parses.CreateAsync(new ParsesCreateOptions +{ + Document = MimeData.FromFile("path/to/your/file.pdf"), + Model = "gpt-5.4", + TableParsingFormat = ParseRequestTableParsingFormat.Html, + ImageResolutionDpi = 150, +}); + +for (var index = 0; index < result.Output.Pages.Count; index++) +{ + Console.WriteLine($"Page {index + 1}:"); + Console.WriteLine(result.Output.Pages[index]); +} +``` + +```ruby ruby +require 'retab' + +client = Retab::Client.new(api_key: "YOUR_RETAB_API_KEY") + +result = client.parses.create( + document: 'path/to/your/file', + model: "gpt-5.4", + table_parsing_format: "html", + image_resolution_dpi: 150, +) + +result.output.pages.each_with_index do |page, index| + puts "Page #{index + 1}:" + puts page +end +``` + +```rust rust +use retab::{resources::parses, Retab}; + +let client = Retab::new("YOUR_RETAB_API_KEY"); +let mut params = parses::CreateParams::new("path/to/your/file.pdf"); +params.body.model = Some("gpt-5.4".to_string()); +params.body.table_parsing_format = Some(retab::enums::ParseRequestTableParsingFormat::Html); +params.body.image_resolution_dpi = Some(150); + +let _result = client.parses().create(params).await?; + +println!("Parse completed"); +``` + +```java java +import com.retab.RetabClient; +import com.retab.models.Parse; +import com.retab.types.ParseRequestTableParsingFormat; + +public class Example { + public static void main(String[] args) throws Exception { + RetabClient client = new RetabClient("YOUR_RETAB_API_KEY"); + Parse result = client.parses().create( + "path/to/your/file.pdf", + "gpt-5.4", + ParseRequestTableParsingFormat.HTML, + 150L, + null, + false, + false); + + System.out.println("Parse completed: " + result); + } +} +``` + +```bash curl +curl -X POST "https://api.retab.com/v1/parses" \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer YOUR_RETAB_API_KEY" \ + -d '{ + "document": { + "filename": "your-document.pdf", + "url": "data:application/pdf;base64," + }, + "model": "gpt-5.4", + "table_parsing_format": "html", + "image_resolution_dpi": 150 +}' + +# To encode your file as base64: +# base64 -i your-document.pdf + +# Example response: +# { +# "pages": [ +# "Content of page 1...", +# "Content of page 2...", +# ... +# ] +# } +``` + + + + + + + +```python python +# --------------------------------------------- +# Variables from your configuration +# --------------------------------------------- +api_key = "YOUR_RETAB_API_KEY" +document = "path/to/your/file" +key = "invoice_number" +instructions = """Group pages by invoice number.""" +model = "retab-small" +n_consensus = 1 +allow_overlap = False + +# --------------------------------------------- +# --------------------------------------------- + +from retab import Retab + +client = Retab(api_key=api_key) +partition_result = client.partitions.create( + document=document, + key=key, + instructions=instructions, + model=model, + n_consensus=n_consensus, + allow_overlap=allow_overlap, +) + +print("Document partition results:") +for chunk in partition_result.output: + print(f" {chunk.key}: pages {', '.join(map(str, chunk.pages))}") + +print(f"\nTotal chunks: {len(partition_result.output)}") +``` + +```typescript typescript +import { Retab } from '@retab/node'; + +const apiKey = "YOUR_RETAB_API_KEY"; + +const client = new Retab({ apiKey }); + +const result = await client.partitions.create( + "path/to/your/file", + "invoice_number", + "Group pages by invoice number.", + "retab-small", + 1, + false +); + +console.log("Document partition results:"); +result.output.forEach((chunk) => { + console.log(` ${chunk.key}: pages ${chunk.pages.join(', ')}`); +}); + +console.log(`\nTotal chunks: ${result.output.length}`); +``` + +```go go +import ( + "context" + "fmt" + + retab "github.com/retab-dev/retab/clients/go" +) + +ctx := context.Background() +client, err := retab.NewClient("YOUR_RETAB_API_KEY") +if err != nil { + panic(err) +} + +model := "retab-small" +nConsensus := 1 +allowOverlap := false +result, err := client.Partitions.Create(ctx, &retab.PartitionsCreateParams{ + Document: "path/to/your/file", + Key: "invoice_number", + Instructions: "Group pages by invoice number.", + Model: &model, + NConsensus: &nConsensus, + AllowOverlap: &allowOverlap, +}) +if err != nil { + panic(err) +} + +fmt.Println("Document partition results:") +for _, chunk := range result.Output { + fmt.Printf(" %s: pages %v\n", chunk.Key, chunk.Pages) +} +``` + +```php php +partitions()->create( + document: 'path/to/your/file', + key: 'invoice_number', + instructions: 'Group pages by invoice number.', + model: 'retab-small', + nConsensus: 1, + allowOverlap: false, +); + +foreach ($result->output as $chunk) { + echo ' ' . $chunk->key . ': pages ' . implode(', ', $chunk->pages) . PHP_EOL; +} +``` + +```csharp dotnet +using Retab; +using RetabClient = Retab.Retab; + +var client = new RetabClient("YOUR_RETAB_API_KEY"); + +var result = await client.Partitions.CreateAsync(new PartitionsCreateOptions +{ + Document = MimeData.FromFile("path/to/your/file.pdf"), + Key = "invoice_number", + Instructions = "Group pages by invoice number.", + Model = "retab-small", + NConsensus = 1, + AllowOverlap = false, +}); + +foreach (var chunk in result.Output) +{ + Console.WriteLine($" {chunk.Key}: pages {string.Join(", ", chunk.Pages)}"); +} +``` + +```ruby ruby +require 'retab' + +client = Retab::Client.new(api_key: "YOUR_RETAB_API_KEY") + +result = client.partitions.create( + document: 'path/to/your/file', + key: "invoice_number", + instructions: "Group pages by invoice number.", + model: "retab-small", + n_consensus: 1, + allow_overlap: false, +) + +result.output.each do |chunk| + puts " #{chunk.key}: pages #{chunk.pages.join(', ')}" +end +``` + +```rust rust +use retab::{resources::partitions, Retab}; + +let client = Retab::new("YOUR_RETAB_API_KEY"); +let mut params = partitions::CreateParams::new( + "path/to/your/file.pdf", + "invoice_number", + "Group pages by invoice number.", +); +params.body.model = Some("retab-small".to_string()); +params.body.n_consensus = Some(1); +params.body.allow_overlap = Some(false); + +let _result = client.partitions().create(params).await?; + +println!("Partition completed"); +``` + +```java java +import com.retab.RetabClient; +import com.retab.models.Partition; + +public class Example { + public static void main(String[] args) throws Exception { + RetabClient client = new RetabClient("YOUR_RETAB_API_KEY"); + Partition result = client.partitions().create( + "path/to/your/file.pdf", + "invoice_number", + "Group pages by invoice number.", + "retab-small", + 1L, + false, + false, + false); + + System.out.println("Partition completed: " + result); + } +} +``` + +```bash curl +curl -X POST "https://api.retab.com/v1/partitions" \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer YOUR_RETAB_API_KEY" \ + -d '{ + "document": { + "filename": "your-document.pdf", + "url": "data:application/pdf;base64," + }, + "key": "invoice_number", + "instructions": "Group pages by invoice number.", + "model": "retab-small", + "n_consensus": 1, + "allow_overlap": false +}' + +# To encode your file as base64: +# base64 -i your-document.pdf + +# Example response: +# { +# "output": [ +# {"key": "INV-001", "pages": [1, 2]}, +# {"key": "INV-002", "pages": [3, 4, 5]} +# ] +# } +``` + + + + + + + +```python python +# --------------------------------------------- +# Variables from your configuration +# --------------------------------------------- +api_key = "YOUR_RETAB_API_KEY" +document = "path/to/your/file" +model = "retab-small" +categories = [ + {"name": "Invoice", "description": "Commercial invoices with totals"}, + {"name": "Contract", "description": "Legal agreements"} +] +first_n_pages = 2 + +# --------------------------------------------- +# --------------------------------------------- + +from retab import Retab + +client = Retab(api_key=api_key) +result = client.classifications.create( + document=document, + model=model, + categories=categories, + first_n_pages=first_n_pages, +) + +print(f"Classification: {result.output.category}") +print(f"Reasoning: {result.output.reasoning}") +``` + +```typescript typescript +import { Retab } from '@retab/node'; + +const apiKey = "YOUR_RETAB_API_KEY"; + +const client = new Retab({ apiKey }); + +const categories = [ + { name: "Invoice", description: "Commercial invoices with totals" }, + { name: "Contract", description: "Legal agreements" } +]; + +const result = await client.classifications.create( + "path/to/your/file", + categories, + "retab-small", + 2 +); + +console.log("Classification:", result.output.category); +console.log("Reasoning:", result.output.reasoning); +``` + +```go go +import ( + "context" + "encoding/json" + "fmt" + + retab "github.com/retab-dev/retab/clients/go" +) + +ctx := context.Background() +client, err := retab.NewClient("YOUR_RETAB_API_KEY") +if err != nil { + panic(err) +} + +var categoriesPayload []struct { + Name string `json:"name"` + Description *string `json:"description,omitempty"` +} +if err := json.Unmarshal([]byte(`[ + { + "name": "Invoice", + "description": "Commercial invoices with totals" + }, + { + "name": "Contract", + "description": "Legal agreements" + } +]`), &categoriesPayload); err != nil { + panic(err) +} + +categories := make([]*retab.Category, 0, len(categoriesPayload)) +for _, category := range categoriesPayload { + categories = append(categories, &retab.Category{ + Name: category.Name, + Description: category.Description, + }) +} + +model := "retab-small" +firstNPages := 2 +result, err := client.Classifications.Create(ctx, &retab.ClassificationsCreateParams{ + Document: "path/to/your/file", + Categories: categories, + Model: &model, + FirstNPages: &firstNPages, +}) +if err != nil { + panic(err) +} + +fmt.Println("Classification:", result.Output.Category) +fmt.Println("Reasoning:", result.Output.Reasoning) +``` + +```php php +classifications()->create( + document: 'path/to/your/file', + categories: $categories, + model: 'retab-small', + firstNPages: 2, +); + +echo "Classification: " . $result->output->category . PHP_EOL; +echo "Reasoning: " . $result->output->reasoning . PHP_EOL; +``` + +```csharp dotnet +using Newtonsoft.Json; +using Retab; +using RetabClient = Retab.Retab; + +var client = new RetabClient("YOUR_RETAB_API_KEY"); +var categories = JsonConvert.DeserializeObject>(@"[ + { + ""name"": ""Invoice"", + ""description"": ""Commercial invoices with totals"" + }, + { + ""name"": ""Contract"", + ""description"": ""Legal agreements"" + } +]")!; + +var result = await client.Classifications.CreateAsync(new ClassificationsCreateOptions +{ + Document = MimeData.FromFile("path/to/your/file.pdf"), + Categories = categories, + Model = "retab-small", + FirstNPages = 2, +}); + +Console.WriteLine($"Classification: {result.Output.Category}"); +Console.WriteLine($"Reasoning: {result.Output.Reasoning}"); +``` + +```ruby ruby +require 'json' +require 'retab' + +client = Retab::Client.new(api_key: "YOUR_RETAB_API_KEY") +categories = JSON.parse(%q([ + { + "name": "Invoice", + "description": "Commercial invoices with totals" + }, + { + "name": "Contract", + "description": "Legal agreements" + } +])) + +result = client.classifications.create( + document: 'path/to/your/file', + categories: categories, + model: "retab-small", + first_n_pages: 2, +) + +puts "Classification: #{result.output.category}" +puts "Reasoning: #{result.output.reasoning}" +``` + +```rust rust +use retab::{models::Category, resources::classifications, Retab}; + +let client = Retab::new("YOUR_RETAB_API_KEY"); +let categories = vec![ + { + let mut category = Category::new("Invoice"); + category.description = Some("Commercial invoices with totals".to_string()); + category + }, + { + let mut category = Category::new("Contract"); + category.description = Some("Legal agreements".to_string()); + category + }, +]; + +let mut params = classifications::CreateParams::new("path/to/your/file.pdf", categories); +params.body.model = Some("retab-small".to_string()); +params.body.first_n_pages = Some(2); +let _result = client.classifications().create(params).await?; + +println!("Classification completed"); +``` + +```java java +import com.retab.RetabClient; +import com.retab.models.Category; +import com.retab.models.Classification; +import java.util.List; + +public class Example { + public static void main(String[] args) throws Exception { + RetabClient client = new RetabClient("YOUR_RETAB_API_KEY"); + List categories = List.of( + new Category("Invoice", null, "Commercial invoices with totals"), + new Category("Contract", null, "Legal agreements") + ); + + Classification result = client.classifications().create( + "path/to/your/file.pdf", + categories, + "retab-small", + 2L, + null, + null, + false, + false); + + System.out.println("Classification completed: " + result); + } +} +``` + +```bash curl +curl -X POST "https://api.retab.com/v1/classifications" \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer YOUR_RETAB_API_KEY" \ + -d '{ + "document": { + "filename": "your-document.pdf", + "url": "data:application/pdf;base64," + }, + "model": "retab-small", + "categories": [ + { + "name": "Invoice", + "description": "Commercial invoices with totals" + }, + { + "name": "Contract", + "description": "Legal agreements" + } + ], + "first_n_pages": 2 +}' + +# To encode your file as base64: +# base64 -i your-document.pdf + +# Example response: +# { +# "id": "classification_abc123", +# "output": { +# "reasoning": "The document contains billing details...", +# "category": "Invoice" +# }, +# "consensus": { +# "choices": [], +# "likelihoods": null +# } +# } +``` + + + + + + + +```python python +# --------------------------------------------- +# Variables from your configuration +# --------------------------------------------- +api_key = "YOUR_RETAB_API_KEY" +document = "path/to/your/file" +model = "retab-small" +subdocuments = [ + {"name": "Invoice", "description": "Invoice pages", "allow_multiple_instances": true}, + {"name": "Contract", "description": "Contract pages"} +] + +# --------------------------------------------- +# --------------------------------------------- + +from retab import Retab + +client = Retab(api_key=api_key) +split_result = client.splits.create( + document=document, + model=model, + subdocuments=subdocuments, +) + +print("Document split results:") +for split in split_result.output: + print(f" {split.name}: pages {', '.join(map(str, split.pages))}") + +print(f"\nTotal sections: {len(split_result.output)}") +``` + +```typescript typescript +import { Retab } from '@retab/node'; + +const apiKey = "YOUR_RETAB_API_KEY"; + +const client = new Retab({ apiKey }); + +const subdocuments = [ + { name: "Invoice", description: "Invoice pages", allow_multiple_instances: true }, + { name: "Contract", description: "Contract pages" } +]; + +const result = await client.splits.create( + "path/to/your/file", + subdocuments, + "retab-small" +); + +console.log("Document split results:"); +result.output.forEach((split) => { + console.log(` ${split.name}: pages ${split.pages.join(', ')}`); +}); + +console.log(`\nTotal sections: ${result.output.length}`); +``` + +```go go +import ( + "context" + "encoding/json" + "fmt" + + retab "github.com/retab-dev/retab/clients/go" +) + +ctx := context.Background() +client, err := retab.NewClient("YOUR_RETAB_API_KEY") +if err != nil { + panic(err) +} + +var payload []struct { + Name string `json:"name"` + Description *string `json:"description,omitempty"` + AllowMultipleInstances *bool `json:"allow_multiple_instances,omitempty"` +} +if err := json.Unmarshal([]byte(`[ + { + "name": "Invoice", + "description": "Invoice pages", + "allow_multiple_instances": true + }, + { + "name": "Contract", + "description": "Contract pages" + } +]`), &payload); err != nil { + panic(err) +} + +subdocuments := make([]*retab.Subdocument, 0, len(payload)) +for _, item := range payload { + subdocuments = append(subdocuments, &retab.Subdocument{ + Name: item.Name, + Description: item.Description, + AllowMultipleInstances: item.AllowMultipleInstances, + }) +} + +model := "retab-small" +result, err := client.Splits.Create(ctx, &retab.SplitsCreateParams{ + Document: "path/to/your/file", + Subdocuments: subdocuments, + Model: &model, +}) +if err != nil { + panic(err) +} + +fmt.Println("Document split results:") +for _, split := range result.Output { + fmt.Printf(" %s: pages %v\n", split.Name, split.Pages) +} +``` + +```php php +splits()->create( + document: 'path/to/your/file', + subdocuments: $subdocuments, + model: 'retab-small', +); + +foreach ($result->output as $split) { + echo ' ' . $split->name . ': pages ' . implode(', ', $split->pages) . PHP_EOL; +} +``` + +```csharp dotnet +using Newtonsoft.Json; +using Retab; +using RetabClient = Retab.Retab; + +var client = new RetabClient("YOUR_RETAB_API_KEY"); +var subdocuments = JsonConvert.DeserializeObject>(@"[ + { + ""name"": ""Invoice"", + ""description"": ""Invoice pages"", + ""allow_multiple_instances"": true + }, + { + ""name"": ""Contract"", + ""description"": ""Contract pages"" + } +]")!; + +var result = await client.Splits.CreateAsync(new SplitsCreateOptions +{ + Document = MimeData.FromFile("path/to/your/file.pdf"), + Subdocuments = subdocuments, + Model = "retab-small", +}); + +foreach (var split in result.Output) +{ + Console.WriteLine($" {split.Name}: pages {string.Join(", ", split.Pages)}"); +} +``` + +```ruby ruby +require 'json' +require 'retab' + +client = Retab::Client.new(api_key: "YOUR_RETAB_API_KEY") +subdocuments = JSON.parse(%q([ + { + "name": "Invoice", + "description": "Invoice pages", + "allow_multiple_instances": true + }, + { + "name": "Contract", + "description": "Contract pages" + } +])) + +result = client.splits.create( + document: 'path/to/your/file', + subdocuments: subdocuments, + model: "retab-small", +) + +result.output.each do |split| + puts " #{split.name}: pages #{split.pages.join(', ')}" +end +``` + +```rust rust +use retab::{models::Subdocument, resources::splits, Retab}; + +let client = Retab::new("YOUR_RETAB_API_KEY"); +let subdocuments = vec![ + { + let mut subdocument = Subdocument::new("Invoice"); + subdocument.description = Some("Invoice pages".to_string()); + subdocument.allow_multiple_instances = Some(true); + subdocument + }, + { + let mut subdocument = Subdocument::new("Contract"); + subdocument.description = Some("Contract pages".to_string()); + + subdocument + }, +]; + +let mut params = splits::CreateParams::new("path/to/your/file.pdf", subdocuments); +params.body.model = Some("retab-small".to_string()); + +let _result = client.splits().create(params).await?; + +println!("Split completed"); +``` + +```java java +import com.retab.RetabClient; +import com.retab.models.Split; +import com.retab.models.Subdocument; +import java.util.List; + +public class Example { + public static void main(String[] args) throws Exception { + RetabClient client = new RetabClient("YOUR_RETAB_API_KEY"); + List subdocuments = List.of( + new Subdocument("Invoice", "Invoice pages", true), + new Subdocument("Contract", "Contract pages", false) + ); + + Split result = client.splits().create( + "path/to/your/file.pdf", + subdocuments, + "retab-small", + null, + null, + false, + false); + + System.out.println("Split completed: " + result); + } +} +``` + +```bash curl +curl -X POST "https://api.retab.com/v1/splits" \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer YOUR_RETAB_API_KEY" \ + -d '{ + "document": { + "filename": "your-document.pdf", + "url": "data:application/pdf;base64," + }, + "model": "retab-small", + "subdocuments": [ + { + "name": "Invoice", + "description": "Invoice pages", + "allow_multiple_instances": true + }, + { + "name": "Contract", + "description": "Contract pages" + } + ] +}' + +# To encode your file as base64: +# base64 -i your-document.pdf + +# Example response: +# { +# "output": [ +# {"name": "Invoice", "pages": [1, 2, 3]}, +# {"name": "Contract", "pages": [4, 5, 6, 7, 8]} +# ] +# } +``` + + + + + + + +```python python +""" +Run Workflow: Invoice workflow + +This script runs the workflow using the Retab SDK. +The workflow execution happens asynchronously on the server. +""" + +import time +from pathlib import Path +from retab import Retab + +# Initialize the Retab client +client = Retab(api_key="YOUR_RETAB_API_KEY") + +def get_run_duration_ms(run): + if run.timing.started_at is None or run.timing.completed_at is None: + return None + elapsed_ms = int((run.timing.completed_at - run.timing.started_at).total_seconds() * 1000) + return max(0, elapsed_ms) + +# Input documents - replace with your document paths +document = Path("path/to/your/file.pdf") # Invoice PDF + +# JSON inputs - replace with your data +json_data = { + "customer_id": "Sample customer_id", + "priority": "Sample priority", +} # Run metadata + +# Run the workflow +initial_run = client.workflows.runs.create( + workflow_id="wf_abc123", + version="production", + documents={ + "document_input": document, # Invoice PDF + }, + json_inputs={ + "json_input": json_data, # Run metadata + } +) + +print(f"Workflow run started: {initial_run.id}") + +run = initial_run +deadline = time.monotonic() + 600 +terminal_kinds = {"completed", "error", "cancelled"} +while run.lifecycle.status not in terminal_kinds and run.lifecycle.status != "awaiting_review": + if time.monotonic() >= deadline: + raise TimeoutError(f"Timed out waiting for workflow run {run.id}") + time.sleep(2) + run = client.workflows.runs.get(run.id) + print(f"Status: {run.lifecycle.status}") + +if run.lifecycle.status == "awaiting_review": + print("\nWorkflow is awaiting review.") + print(f"Waiting on blocks: {run.lifecycle.waiting_for_block_ids}") +elif run.lifecycle.status == "error": + raise RuntimeError(run.lifecycle.message) +elif run.lifecycle.status == "cancelled": + raise RuntimeError(run.lifecycle.reason or "Workflow run was cancelled") +else: + print("\nWorkflow completed successfully!") + duration_ms = get_run_duration_ms(run) + if duration_ms is not None: + print(f"Duration: {duration_ms}ms") + steps = client.workflows.steps.list(run.id) + for step in steps.data: + if step.handle_outputs: + print(f"{step.block_label or step.block_id} outputs:", step.handle_outputs) + if step.artifact: + artifact = client.workflows.artifacts.get(step.artifact) + print(f"{step.block_label or step.block_id} artifact:", artifact) +``` + +```typescript typescript +/** + * Run Workflow: Invoice workflow + * + * This script runs the workflow using the Retab SDK. + * The workflow execution happens asynchronously on the server. + */ + +import { Retab } from "@retab/node"; + +const client = new Retab({ apiKey: "YOUR_RETAB_API_KEY" }); + +function getRunDurationMs(run: { timing?: { startedAt?: Date | null; completedAt?: Date | null } }) { + if (!run.timing?.startedAt || !run.timing.completedAt) { + return null; + } + return Math.max(0, run.timing.completedAt.getTime() - run.timing.startedAt.getTime()); +} + +// Input documents - replace with your document paths +const document = "./path/to/your/file.pdf"; // Invoice PDF + +// JSON inputs - replace with your data +const jsonData = { + "customer_id": "Sample customer_id", + "priority": "Sample priority" +}; // Run metadata + +async function runWorkflow() { + let run = await client.workflows.runs.create( + "wf_abc123", + { + "document_input": document, // Invoice PDF + }, + { + "json_input": jsonData, // Run metadata + }, + "production" + ); + const deadline = Date.now() + 600000; + while (run.lifecycle?.status === "pending" || run.lifecycle?.status === "running") { + if (Date.now() >= deadline) { + throw new Error(`Timed out waiting for workflow run ${run.id}`); + } + await new Promise((resolve) => setTimeout(resolve, 2000)); + run = await client.workflows.runs.get(run.id); + console.log(`Run ${run.id}: ${run.lifecycle?.status ?? "unknown"}`); + } + + if (run.lifecycle?.status === "awaiting_review") { + console.log("\nWorkflow is awaiting review."); + console.log("Waiting on blocks:", run.lifecycle.waitingForBlockIds); + return; + } + + if (run.lifecycle?.status === "error") { + throw new Error(run.lifecycle.message); + } + if (run.lifecycle?.status === "cancelled") { + throw new Error(run.lifecycle.reason ?? "Workflow run was cancelled"); + } + + console.log("\nWorkflow completed successfully!"); + const durationMs = getRunDurationMs(run); + if (durationMs != null) { + console.log(`Duration: ${durationMs}ms`); + } + const steps = await client.workflows.steps.list({ runId: run.id }); + for (const step of steps.data) { + const outputs = step.handleOutputs ?? {}; + if (Object.keys(outputs).length > 0) { + console.log(`${step.blockLabel || step.blockId} outputs:`, outputs); + } + if (step.artifact) { + const artifact = await client.workflows.artifacts.get(step.artifact.id); + console.log(`${step.blockLabel || step.blockId} artifact:`, artifact); + } + } +} + +runWorkflow().catch((error) => { + console.error(error); + process.exitCode = 1; +}); +``` + +```go go +import ( + "context" + "encoding/json" + "fmt" + + retab "github.com/retab-dev/retab/clients/go" +) + +ctx := context.Background() +client, err := retab.NewClient("YOUR_RETAB_API_KEY") +if err != nil { + panic(err) +} + +documents := map[string]interface{}{ + "document_input": "path/to/document.pdf", // Invoice PDF +} + +var jsonInputs map[string]interface{} +if err := json.Unmarshal([]byte(`{ + "json_input": { + "customer_id": "Sample customer_id", + "priority": "Sample priority" + } +}`), &jsonInputs); err != nil { + panic(err) +} + +version := "production" +run, err := client.Workflows.Runs.Create(ctx, &retab.WorkflowRunsCreateParams{ + WorkflowID: "wf_abc123", + Documents: &documents, + JSONInputs: &jsonInputs, + Version: &version, +}) +if err != nil { + panic(err) +} + +fmt.Println("Workflow run started:", run.ID) +fmt.Println("Workflow:", "Invoice workflow") +``` + +```php php +workflowRuns()->create( + workflowId: 'wf_abc123', + documents: $documents, + jsonInputs: $jsonInputs, + version: 'production', +); + +echo 'Workflow run started: ' . $run->id . PHP_EOL; +echo 'Workflow: ' . 'Invoice workflow' . PHP_EOL; +``` + +```csharp dotnet +using Newtonsoft.Json; +using Retab; +using RetabClient = Retab.Retab; + +var client = new RetabClient("YOUR_RETAB_API_KEY"); +var documents = new Dictionary +{ + ["document_input"] = MimeData.FromFile("path/to/document.pdf"), +}; +var jsonInputs = JsonConvert.DeserializeObject>(@"{ + ""json_input"": { + ""customer_id"": ""Sample customer_id"", + ""priority"": ""Sample priority"" + } +}")!; + +var run = await client.WorkflowRuns.CreateAsync(new WorkflowRunsCreateOptions +{ + WorkflowId = "wf_abc123", + Documents = documents, + JsonInputs = jsonInputs, + Version = "production", +}); + +Console.WriteLine($"Workflow run started: {run.Id}"); +Console.WriteLine("Workflow: Invoice workflow"); +``` + +```ruby ruby +require 'json' +require 'retab' + +client = Retab::Client.new(api_key: "YOUR_RETAB_API_KEY") +documents = JSON.parse(%q({ + "document_input": "path/to/document.pdf" +})) +json_inputs = JSON.parse(%q({ + "json_input": { + "customer_id": "Sample customer_id", + "priority": "Sample priority" + } +})) + +run = client.workflow_runs.create( + workflow_id: "wf_abc123", + documents: documents, + json_inputs: json_inputs, + version: 'production', +) + +puts "Workflow run started: #{run.id}" +puts "Workflow: Invoice workflow" +``` + +```rust rust +use retab::{ + models::{CreateWorkflowRunRequest, CreateWorkflowRunRequestDocumentsOneOf}, + resources::workflow_runs, + MimeData, + Retab, +}; +use std::collections::HashMap; + +let client = Retab::new("YOUR_RETAB_API_KEY"); + +let mut body = CreateWorkflowRunRequest::new("wf_abc123"); +body.version = Some("production".to_string()); + +let mut documents = HashMap::new(); +documents.insert( + "document_input".to_string(), + CreateWorkflowRunRequestDocumentsOneOf::from(MimeData::from("path/to/document.pdf")), +); +body.documents = Some(documents); + +let json_inputs: HashMap = + serde_json::from_str(r#"{ + "json_input": { + "customer_id": "Sample customer_id", + "priority": "Sample priority" + } +}"#)?; +body.json_inputs = Some(json_inputs); + +let _run = client + .workflows() + .runs() + .create(workflow_runs::CreateParams::new(body)) + .await?; + +println!("Workflow run started for Invoice workflow"); +``` + +```java java +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.retab.RetabClient; +import com.retab.models.WorkflowRun; +import java.util.LinkedHashMap; +import java.util.Map; + +public class Example { + public static void main(String[] args) throws Exception { + RetabClient client = new RetabClient("YOUR_RETAB_API_KEY"); + ObjectMapper objectMapper = new ObjectMapper(); + Map documents = new LinkedHashMap<>(); + documents.put("document_input", "path/to/document.pdf"); + Map jsonInputs = objectMapper.readValue( + "{\n \"json_input\": {\n \"customer_id\": \"Sample customer_id\",\n \"priority\": \"Sample priority\"\n }\n}", + new TypeReference>() {}); + + WorkflowRun run = client.workflows().runs().create( + "wf_abc123", + documents, + jsonInputs, + "production"); + + System.out.println("Workflow run started for Invoice workflow: " + run); + } +} +``` + +```bash curl +# Run Workflow: Invoice workflow +# +# Document block mappings: +# document_input: Invoice PDF +# +# JSON input block mappings: +# json_input: Run metadata + +curl -X POST "http://localhost:4000/v1/workflows/runs" \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer YOUR_RETAB_API_KEY" \ + -d '{ + "workflow_id": "wf_abc123", + "version": "production", + "documents": { + "document_input": { + "filename": "document.pdf", + "url": "data:application/pdf;base64," + } + }, + "json_inputs": { + "json_input": { + "customer_id": "Sample customer_id", + "priority": "Sample priority" + } + } +}' + +# To encode your file as base64: +# base64 -i your-document.pdf + +# Example response: +# { +# "id": "run_abc123", +# "workflow": { +# "workflow_id": "wf_abc123", +# "version_id": "ver_abc123" +# }, +# "lifecycle": { "status": "running" }, +# "timing": { "created_at": "2026-05-11T12:00:00Z" }, +# ... +# } + +# Check run lifecycle: +# curl -X GET "http://localhost:4000/v1/workflows/runs/" \ +# -H "Authorization: Bearer YOUR_RETAB_API_KEY" +``` + + \ No newline at end of file