diff --git a/packages/backend/src/repoIndexManager.ts b/packages/backend/src/repoIndexManager.ts index 0fb055a31..d965fd79c 100644 --- a/packages/backend/src/repoIndexManager.ts +++ b/packages/backend/src/repoIndexManager.ts @@ -100,6 +100,7 @@ export class RepoIndexManager { logger.debug('Starting scheduler'); // Cleanup any orphaned disk resources on startup await this.cleanupOrphanedDiskResources(); + await this.reconcileMissingShards(); this.interval = setIntervalAsync(async () => { await this.scheduleIndexJobs(); await this.scheduleCleanupJobs(); @@ -703,6 +704,8 @@ export class RepoIndexManager { } } + + // --- Index shards --- // Shard files are prefixed with _: DATA_CACHE_DIR/index/__*.zoekt if (existsSync(INDEX_CACHE_DIR)) { @@ -736,6 +739,67 @@ export class RepoIndexManager { } } + // Scans the DB for repos marked as indexed but missing their shard files + // on disk. This handles the case where the index directory is wiped (e.g. + // ephemeral storage on a pod restart) but the DB still has stale indexedAt + // timestamps, causing the scheduler to skip re-indexing them. + private async reconcileMissingShards() { + if (!existsSync(INDEX_CACHE_DIR)) { + logger.debug('Index cache directory does not exist, skipping shard reconciliation.'); + return; + } + + const entries = await readdir(INDEX_CACHE_DIR); + const repoIdsOnDisk = new Set(); + for (const entry of entries) { + const repoId = getRepoIdFromShardFileName(entry); + if (repoId !== undefined) { + repoIdsOnDisk.add(repoId); + } + } + + const indexedRepos = await this.db.repo.findMany({ + where: { + indexedAt: { + not: null, + }, + }, + select: { + id: true, + name: true, + indexedAt: true, + }, + }); + + if (indexedRepos.length === 0) { + return; + } + + // Only treat a missing shard as stale if indexedAt is older than + // the reindex interval. This avoids resetting repos that legitimately + // produce zero shards (e.g. empty repos). + const thresholdDate = new Date(Date.now() - this.settings.reindexIntervalMs); + + let resetCount = 0; + + for (const repo of indexedRepos) { + if (!repoIdsOnDisk.has(repo.id) && repo.indexedAt! < thresholdDate) { + logger.warn(`Repo "${repo.name}" (id: ${repo.id}) is marked as indexed in the DB but has no shard file on disk. Marking as stale.`); + await this.db.repo.update({ + where: { id: repo.id }, + data: { indexedAt: null }, + }); + resetCount++; + } + } + + if (resetCount > 0) { + logger.info(`Shard reconciliation complete. Reset ${resetCount} repo(s) to stale — they will be re-indexed shortly.`); + } else { + logger.debug('Shard reconciliation complete. All indexed repos have shard files on disk.'); + } + } + public async dispose() { if (this.interval) { clearInterval(this.interval);