|
1 | | -import { Readable } from 'node:stream' |
| 1 | +import { Transform } from 'node:stream' |
2 | 2 | import { createLogger } from '@sim/logger' |
3 | 3 | import { getErrorMessage } from '@sim/utils/errors' |
4 | 4 | import { generateId } from '@sim/utils/id' |
@@ -26,7 +26,7 @@ import { |
26 | 26 | setTableSchemaForImport, |
27 | 27 | updateImportProgress, |
28 | 28 | } from '@/lib/table/service' |
29 | | -import { downloadFile } from '@/lib/uploads/core/storage-service' |
| 29 | +import { downloadFileStream, headObject } from '@/lib/uploads/core/storage-service' |
30 | 30 | import { normalizeColumn } from '@/app/api/table/utils' |
31 | 31 |
|
32 | 32 | const logger = createLogger('TableImportRunner') |
@@ -70,38 +70,31 @@ export async function runTableImport(payload: TableImportPayload): Promise<void> |
70 | 70 | if (!loaded) throw new Error(`Import target table ${tableId} not found`) |
71 | 71 | const table = loaded |
72 | 72 |
|
73 | | - const buffer = await downloadFile({ key: fileKey, context: 'workspace' }) |
| 73 | + // Total byte size for the progress estimate — a cheap HEAD, no download. May be null on |
| 74 | + // the local dev provider, in which case the bar stays indeterminate (rows still show). |
| 75 | + const totalBytes = (await headObject(fileKey, 'workspace'))?.size ?? 0 |
74 | 76 |
|
75 | | - // Delete only after the download succeeds — otherwise a failed download would wipe the |
76 | | - // table with nothing to replace it with. |
77 | | - if (mode === 'replace') await deleteAllTableRows(tableId) |
| 77 | + // Stream the file rather than buffering it — a ~1M-row import must never be held in memory. |
| 78 | + const source = await downloadFileStream({ key: fileKey, context: 'workspace' }) |
78 | 79 |
|
79 | | - // Estimate total data rows by counting line breaks (minus the header) for a |
80 | | - // determinate progress bar. It's an estimate — quoted newlines and blank lines |
81 | | - // make it imprecise — so the client caps the bar below 100% until the terminal |
82 | | - // `ready` event lands. Cheap: one O(bytes) pass over the already-buffered file. |
83 | | - let newlineCount = 0 |
84 | | - for (let i = 0; i < buffer.length; i++) { |
85 | | - if (buffer[i] === 0x0a) newlineCount++ |
86 | | - } |
87 | | - const estimatedTotal = Math.max(0, newlineCount - 1) |
| 80 | + // Delete only after the stream opens (a missing object rejects above) — otherwise a failed |
| 81 | + // download would wipe the table with nothing to replace it with. |
| 82 | + if (mode === 'replace') await deleteAllTableRows(tableId) |
88 | 83 |
|
89 | | - // Publish the estimated total up front so the client shows a determinate bar at 0% |
90 | | - // immediately, instead of "0 rows and counting" until the first batch lands. |
91 | | - void appendTableEvent({ |
92 | | - kind: 'import', |
93 | | - tableId, |
94 | | - importId, |
95 | | - status: 'importing', |
96 | | - progress: 0, |
97 | | - total: estimatedTotal, |
| 84 | + // Count bytes as they flow so the row total can be extrapolated from byte progress. |
| 85 | + let bytesRead = 0 |
| 86 | + const byteCounter = new Transform({ |
| 87 | + transform(chunk: Buffer, _enc, cb) { |
| 88 | + bytesRead += chunk.length |
| 89 | + cb(null, chunk) |
| 90 | + }, |
98 | 91 | }) |
99 | 92 |
|
100 | 93 | const parser = createCsvParser(delimiter) |
101 | 94 | // `.pipe` doesn't forward source errors; forward so the iterator throws. |
102 | | - const source = Readable.from(buffer) |
103 | 95 | source.on('error', (err) => parser.destroy(err)) |
104 | | - source.pipe(parser) |
| 96 | + byteCounter.on('error', (err) => parser.destroy(err)) |
| 97 | + source.pipe(byteCounter).pipe(parser) |
105 | 98 |
|
106 | 99 | let schema: TableSchema | null = null |
107 | 100 | let headerToColumn: Map<string, string> | null = null |
@@ -173,9 +166,19 @@ export async function runTableImport(payload: TableImportPayload): Promise<void> |
173 | 166 | { ...table, schema }, |
174 | 167 | requestId |
175 | 168 | ) |
176 | | - if (inserted - lastReported >= PROGRESS_INTERVAL_ROWS) { |
| 169 | + // Emit after the first batch lands, then every interval, so the bar appears early. |
| 170 | + if ( |
| 171 | + inserted - lastReported >= PROGRESS_INTERVAL_ROWS || |
| 172 | + (lastReported === 0 && inserted > 0) |
| 173 | + ) { |
177 | 174 | lastReported = inserted |
178 | 175 | await updateImportProgress(tableId, inserted) |
| 176 | + // Extrapolate the total from rows-per-byte observed so far; self-refines as it runs. |
| 177 | + // `Math.max(inserted, …)` keeps it monotonic; omit when the byte size is unknown. |
| 178 | + const estimatedTotal = |
| 179 | + totalBytes > 0 && bytesRead > 0 |
| 180 | + ? Math.max(inserted, Math.round((inserted / bytesRead) * totalBytes)) |
| 181 | + : undefined |
179 | 182 | void appendTableEvent({ |
180 | 183 | kind: 'import', |
181 | 184 | tableId, |
|
0 commit comments