Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 64 additions & 0 deletions packages/backend/src/repoIndexManager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ export class RepoIndexManager {
logger.debug('Starting scheduler');
// Cleanup any orphaned disk resources on startup
await this.cleanupOrphanedDiskResources();
await this.reconcileMissingShards();
this.interval = setIntervalAsync(async () => {
await this.scheduleIndexJobs();
await this.scheduleCleanupJobs();
Expand Down Expand Up @@ -703,6 +704,8 @@ export class RepoIndexManager {
}
}



// --- Index shards ---
// Shard files are prefixed with <orgId>_<repoId>: DATA_CACHE_DIR/index/<orgId>_<repoId>_*.zoekt
if (existsSync(INDEX_CACHE_DIR)) {
Expand Down Expand Up @@ -736,6 +739,67 @@ export class RepoIndexManager {
}
}

// Scans the DB for repos marked as indexed but missing their shard files
// on disk. This handles the case where the index directory is wiped (e.g.
// ephemeral storage on a pod restart) but the DB still has stale indexedAt
// timestamps, causing the scheduler to skip re-indexing them.
private async reconcileMissingShards() {
if (!existsSync(INDEX_CACHE_DIR)) {
logger.debug('Index cache directory does not exist, skipping shard reconciliation.');
return;
}

const entries = await readdir(INDEX_CACHE_DIR);
const repoIdsOnDisk = new Set<number>();
for (const entry of entries) {
const repoId = getRepoIdFromShardFileName(entry);
if (repoId !== undefined) {
repoIdsOnDisk.add(repoId);
}
}

const indexedRepos = await this.db.repo.findMany({
where: {
indexedAt: {
not: null,
},
},
select: {
id: true,
name: true,
indexedAt: true,
},
});

if (indexedRepos.length === 0) {
return;
}

// Only treat a missing shard as stale if indexedAt is older than
// the reindex interval. This avoids resetting repos that legitimately
// produce zero shards (e.g. empty repos).
const thresholdDate = new Date(Date.now() - this.settings.reindexIntervalMs);

let resetCount = 0;

for (const repo of indexedRepos) {
if (!repoIdsOnDisk.has(repo.id) && repo.indexedAt! < thresholdDate) {
logger.warn(`Repo "${repo.name}" (id: ${repo.id}) is marked as indexed in the DB but has no shard file on disk. Marking as stale.`);
await this.db.repo.update({
where: { id: repo.id },
data: { indexedAt: null },
});
resetCount++;
}
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.

if (resetCount > 0) {
logger.info(`Shard reconciliation complete. Reset ${resetCount} repo(s) to stale — they will be re-indexed shortly.`);
} else {
logger.debug('Shard reconciliation complete. All indexed repos have shard files on disk.');
}
}

public async dispose() {
if (this.interval) {
clearInterval(this.interval);
Expand Down