Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ window.onload = connectWebSocket
```

<h3>SQL Dump</h3>
You can request a `database_dump.sql` file that exports your database schema and data into a single file.
You can request a `database_dump.sql` file that exports your database schema and data into a single file. The dump is generated and streamed page-by-page, so the database is never buffered into memory all at once and tables of any size can be exported.

<pre>
<code>
Expand All @@ -243,6 +243,15 @@ curl --location 'https://starbasedb.YOUR-ID-HERE.workers.dev/export/dump' \
</code>
</pre>

For very large databases the export can exceed the 30 second request limit. Bind an R2 bucket as `DATABASE_DUMP_BUCKET` (see `wrangler.toml`) and pass `?location=r2` to stream the dump into an R2 object named `dump_YYYYMMDD-HHMMSS.sql` instead of returning it in the response. The request returns immediately with the object key while the upload finishes in the background. Provide an optional `&callback=<url>` to receive a `POST` notification once the file is ready.

<pre>
<code>
curl --location 'https://starbasedb.YOUR-ID-HERE.workers.dev/export/dump?location=r2&callback=https://example.com/notify' \
--header 'Authorization: Bearer ABC123'
</code>
</pre>

<h3>JSON Data Export</h3>
<pre>
<code>
Expand Down
203 changes: 202 additions & 1 deletion src/export/dump.test.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
import { describe, it, expect, vi, beforeEach } from 'vitest'
import { dumpDatabaseRoute } from './dump'
import {
dumpDatabaseRoute,
generateDumpChunks,
escapeSqlValue,
formatDumpTimestamp,
} from './dump'
import { executeOperation } from '.'
import { createResponse } from '../utils'
import type { DataSource } from '../types'
Expand All @@ -24,6 +29,7 @@ let mockConfig: StarbaseDBConfiguration

beforeEach(() => {
vi.clearAllMocks()
vi.unstubAllGlobals()

mockDataSource = {
source: 'external',
Expand Down Expand Up @@ -141,5 +147,200 @@ describe('Database Dump Module', () => {
expect(response.status).toBe(500)
const jsonResponse: { error: string } = await response.json()
expect(jsonResponse.error).toBe('Failed to create database dump')
consoleErrorMock.mockRestore()
})
})

describe('escapeSqlValue', () => {
it('should render null and undefined as NULL', () => {
expect(escapeSqlValue(null)).toBe('NULL')
expect(escapeSqlValue(undefined)).toBe('NULL')
})

it('should render numbers and booleans without quotes', () => {
expect(escapeSqlValue(42)).toBe('42')
expect(escapeSqlValue(3.14)).toBe('3.14')
expect(escapeSqlValue(true)).toBe('1')
expect(escapeSqlValue(false)).toBe('0')
})

it('should render binary BLOB values as hex literals', () => {
const blob = new Uint8Array([0x00, 0x0f, 0xff])
expect(escapeSqlValue(blob)).toBe("X'000fff'")
})

it('should escape embedded single quotes in strings', () => {
expect(escapeSqlValue("O'Brien")).toBe("'O''Brien'")
})
})

describe('formatDumpTimestamp', () => {
it('should format a date as YYYYMMDD-HHMMSS in UTC', () => {
expect(formatDumpTimestamp(new Date('2024-01-01T17:00:00Z'))).toBe(
'20240101-170000'
)
})

it('should zero-pad single digit components', () => {
expect(formatDumpTimestamp(new Date('2024-03-05T07:08:09Z'))).toBe(
'20240305-070809'
)
})
})

describe('generateDumpChunks pagination', () => {
it('should page through table data without loading it all at once', async () => {
vi.mocked(executeOperation)
.mockResolvedValueOnce([{ sql: 'CREATE TABLE t (id INTEGER);' }])
.mockResolvedValueOnce([{ id: 1 }, { id: 2 }])
.mockResolvedValueOnce([{ id: 3 }])

const chunks: string[] = []
for await (const chunk of generateDumpChunks(
['t'],
mockDataSource,
mockConfig,
2
)) {
chunks.push(chunk)
}

const dump = chunks.join('')
expect(dump).toContain('INSERT INTO t VALUES (1);')
expect(dump).toContain('INSERT INTO t VALUES (2);')
expect(dump).toContain('INSERT INTO t VALUES (3);')

// schema query + two paged data queries (offset 0, then offset 2)
const calls = vi.mocked(executeOperation).mock.calls
expect(calls).toHaveLength(3)
expect(calls[1][0][0].sql).toContain('OFFSET 0')
expect(calls[2][0][0].sql).toContain('OFFSET 2')
})
})

describe('Database Dump R2 offload', () => {
function createMockUpload() {
return {
uploadPart: vi.fn((partNumber: number) =>
Promise.resolve({ partNumber, etag: `etag-${partNumber}` })
),
complete: vi.fn().mockResolvedValue({}),
abort: vi.fn().mockResolvedValue(undefined),
}
}

it('should stream the dump into an R2 multipart object', async () => {
const upload = createMockUpload()
const bucket = {
createMultipartUpload: vi.fn().mockResolvedValue(upload),
}
mockDataSource.dumpBucket = bucket as any

vi.mocked(executeOperation)
.mockResolvedValueOnce([{ name: 'logs' }])
.mockResolvedValueOnce([{ sql: 'CREATE TABLE logs (id INTEGER);' }])
.mockResolvedValueOnce([{ id: 1 }])

const request = new Request(
'https://example.com/export/dump?location=r2'
)
const response = await dumpDatabaseRoute(
mockDataSource,
mockConfig,
request
)

expect(response.status).toBe(200)
const body: { result: { key: string; status: string } } =
await response.json()
expect(body.result.key).toMatch(/^dump_\d{8}-\d{6}\.sql$/)
expect(body.result.status).toBe('completed')

expect(bucket.createMultipartUpload).toHaveBeenCalledWith(
body.result.key
)
expect(upload.uploadPart).toHaveBeenCalledTimes(1)
expect(upload.complete).toHaveBeenCalledWith([
{ partNumber: 1, etag: 'etag-1' },
])
})

it('should notify the callback URL once the upload completes', async () => {
const upload = createMockUpload()
const bucket = {
createMultipartUpload: vi.fn().mockResolvedValue(upload),
}
mockDataSource.dumpBucket = bucket as any

const fetchMock = vi.fn().mockResolvedValue(new Response('ok'))
vi.stubGlobal('fetch', fetchMock)

vi.mocked(executeOperation)
.mockResolvedValueOnce([{ name: 'logs' }])
.mockResolvedValueOnce([{ sql: 'CREATE TABLE logs (id INTEGER);' }])
.mockResolvedValueOnce([])

const request = new Request(
'https://example.com/export/dump?location=r2&callback=https://hooks.example.com/done'
)
await dumpDatabaseRoute(mockDataSource, mockConfig, request)

expect(fetchMock).toHaveBeenCalledTimes(1)
const [calledUrl, calledInit] = fetchMock.mock.calls[0]
expect(calledUrl).toBe('https://hooks.example.com/done')
expect(calledInit.method).toBe('POST')
expect(JSON.parse(calledInit.body).status).toBe('completed')
})

it('should return 202 and run in the background when an execution context is present', async () => {
const upload = createMockUpload()
const bucket = {
createMultipartUpload: vi.fn().mockResolvedValue(upload),
}
const pending: Promise<unknown>[] = []
mockDataSource.dumpBucket = bucket as any
mockDataSource.executionContext = {
waitUntil: vi.fn((p: Promise<unknown>) => pending.push(p)),
} as any

vi.mocked(executeOperation)
.mockResolvedValueOnce([{ name: 'logs' }])
.mockResolvedValueOnce([{ sql: 'CREATE TABLE logs (id INTEGER);' }])
.mockResolvedValueOnce([])

const request = new Request(
'https://example.com/export/dump?location=r2'
)
const response = await dumpDatabaseRoute(
mockDataSource,
mockConfig,
request
)

expect(response.status).toBe(202)
expect(
mockDataSource.executionContext!.waitUntil
).toHaveBeenCalledTimes(1)

// Let the backgrounded upload settle so assertions are deterministic.
await Promise.all(pending)
expect(upload.complete).toHaveBeenCalledTimes(1)
})

it('should return 400 when R2 offload is requested without a bucket binding', async () => {
vi.mocked(executeOperation).mockResolvedValueOnce([])

const request = new Request(
'https://example.com/export/dump?location=r2'
)
const response = await dumpDatabaseRoute(
mockDataSource,
mockConfig,
request
)

expect(response.status).toBe(400)
const body: { error: string } = await response.json()
expect(body.error).toContain('DATABASE_DUMP_BUCKET')
})
})
Loading