From 929f9a70df98db40719298ba5c7d0d235e5c2af5 Mon Sep 17 00:00:00 2001 From: Roo Code Date: Tue, 14 Apr 2026 08:15:45 +0000 Subject: [PATCH] fix: clean up stale vectors on directory deletion and add Qdrant collection alias - Detect directory deletions in FileWatcher by checking cache for child paths, queuing all children for vector deletion - Add human-readable Qdrant collection alias based on workspace folder name - Add tests for both features Addresses #12115 --- .../processors/__tests__/file-watcher.spec.ts | 70 ++++++++++++++++ .../code-index/processors/file-watcher.ts | 31 ++++++- .../__tests__/qdrant-client.spec.ts | 83 +++++++++++++++++++ .../code-index/vector-store/qdrant-client.ts | 39 +++++++++ 4 files changed, 220 insertions(+), 3 deletions(-) diff --git a/src/services/code-index/processors/__tests__/file-watcher.spec.ts b/src/services/code-index/processors/__tests__/file-watcher.spec.ts index 2a3b7e11677..60e16bc12c7 100644 --- a/src/services/code-index/processors/__tests__/file-watcher.spec.ts +++ b/src/services/code-index/processors/__tests__/file-watcher.spec.ts @@ -108,6 +108,7 @@ describe("FileWatcher", () => { getHash: vi.fn(), updateHash: vi.fn(), deleteHash: vi.fn(), + getAllHashes: vi.fn().mockReturnValue({}), } mockEmbedder = { @@ -277,6 +278,75 @@ describe("FileWatcher", () => { }) }) + describe("directory deletion handling", () => { + it("should queue all cached child files for deletion when a directory is deleted", async () => { + // Setup cache with files that are children of a directory + const directoryPath = "/mock/workspace/src/components" + mockCacheManager.getAllHashes.mockReturnValue({ + [`${directoryPath}/Button.tsx`]: "hash1", + [`${directoryPath}/Modal.tsx`]: "hash2", + [`${directoryPath}/utils/helpers.ts`]: "hash3", + ["/mock/workspace/src/index.ts"]: "hash4", + }) + + await fileWatcher.initialize() + + // Trigger directory deletion event + await mockOnDidDelete({ fsPath: directoryPath }) + + // Wait for batch processing + await new Promise((resolve) => setTimeout(resolve, 600)) + + // Verify that deletePointsByMultipleFilePaths was called with all child paths + expect(mockVectorStore.deletePointsByMultipleFilePaths).toHaveBeenCalled() + const deletedPaths = mockVectorStore.deletePointsByMultipleFilePaths.mock.calls[0][0] + expect(deletedPaths).toContain(`${directoryPath}/Button.tsx`) + expect(deletedPaths).toContain(`${directoryPath}/Modal.tsx`) + expect(deletedPaths).toContain(`${directoryPath}/utils/helpers.ts`) + // Should NOT include files outside the deleted directory + expect(deletedPaths).not.toContain("/mock/workspace/src/index.ts") + }) + + it("should handle single file deletion normally when no cached children exist", async () => { + const filePath = "/mock/workspace/src/index.ts" + mockCacheManager.getAllHashes.mockReturnValue({ + [filePath]: "hash1", + ["/mock/workspace/src/other.ts"]: "hash2", + }) + + await fileWatcher.initialize() + + // Trigger single file deletion + await mockOnDidDelete({ fsPath: filePath }) + + // Wait for batch processing + await new Promise((resolve) => setTimeout(resolve, 600)) + + // Should process deletion for just the one file + expect(mockVectorStore.deletePointsByMultipleFilePaths).toHaveBeenCalled() + const deletedPaths = mockVectorStore.deletePointsByMultipleFilePaths.mock.calls[0][0] + expect(deletedPaths).toContain(filePath) + expect(deletedPaths).not.toContain("/mock/workspace/src/other.ts") + }) + + it("should handle deletion of path not in cache", async () => { + mockCacheManager.getAllHashes.mockReturnValue({ + ["/mock/workspace/src/other.ts"]: "hash1", + }) + + await fileWatcher.initialize() + + // Trigger deletion of a file not in cache + await mockOnDidDelete({ fsPath: "/mock/workspace/src/nonexistent.ts" }) + + // Wait for batch processing + await new Promise((resolve) => setTimeout(resolve, 600)) + + // Should still attempt deletion (the vector store will handle the no-op) + expect(mockVectorStore.deletePointsByMultipleFilePaths).toHaveBeenCalled() + }) + }) + describe("dispose", () => { it("should dispose of the watcher when disposed", async () => { await fileWatcher.initialize() diff --git a/src/services/code-index/processors/file-watcher.ts b/src/services/code-index/processors/file-watcher.ts index a6a3122c36c..89431843d42 100644 --- a/src/services/code-index/processors/file-watcher.ts +++ b/src/services/code-index/processors/file-watcher.ts @@ -1,3 +1,4 @@ +import * as path from "path" import * as vscode from "vscode" import { QDRANT_CODE_BLOCK_NAMESPACE, @@ -152,11 +153,35 @@ export class FileWatcher implements IFileWatcher { } /** - * Handles file deletion events - * @param uri URI of the deleted file + * Handles file deletion events. + * When a directory is deleted, VSCode's FileSystemWatcher may not fire + * individual delete events for each file inside it. This method detects + * directory deletions by checking the cache for any files whose paths + * start with the deleted path prefix, and queues them all for deletion. + * @param uri URI of the deleted file or directory */ private async handleFileDeleted(uri: vscode.Uri): Promise { - this.accumulatedEvents.set(uri.fsPath, { uri, type: "delete" }) + const deletedPath = uri.fsPath + + // Check if any cached files have this as a prefix (directory deletion) + const allHashes = this.cacheManager.getAllHashes() + const childPaths = Object.keys(allHashes).filter( + (cachedPath) => cachedPath.startsWith(deletedPath + path.sep) || cachedPath === deletedPath, + ) + + if (childPaths.length > 1) { + // Directory was deleted - queue all child files for deletion + for (const childPath of childPaths) { + this.accumulatedEvents.set(childPath, { + uri: vscode.Uri.file(childPath), + type: "delete", + }) + } + } else { + // Single file deletion (or a file matching exactly) + this.accumulatedEvents.set(deletedPath, { uri, type: "delete" }) + } + this.scheduleBatchProcessing() } diff --git a/src/services/code-index/vector-store/__tests__/qdrant-client.spec.ts b/src/services/code-index/vector-store/__tests__/qdrant-client.spec.ts index ab7b15783e3..0c939252bd9 100644 --- a/src/services/code-index/vector-store/__tests__/qdrant-client.spec.ts +++ b/src/services/code-index/vector-store/__tests__/qdrant-client.spec.ts @@ -35,6 +35,7 @@ const mockQdrantClientInstance = { createCollection: vitest.fn(), deleteCollection: vitest.fn(), createPayloadIndex: vitest.fn(), + updateCollectionAliases: vitest.fn(), upsert: vitest.fn(), query: vitest.fn(), delete: vitest.fn(), @@ -980,6 +981,88 @@ describe("QdrantVectorStore", () => { }) }) + describe("workspace alias creation", () => { + it("should create a workspace alias during initialization", async () => { + mockQdrantClientInstance.getCollection.mockRejectedValue({ + response: { status: 404 }, + message: "Not found", + }) + mockQdrantClientInstance.createCollection.mockResolvedValue(true as any) + mockQdrantClientInstance.createPayloadIndex.mockResolvedValue({} as any) + mockQdrantClientInstance.updateCollectionAliases.mockResolvedValue(true as any) + vitest.spyOn(console, "log").mockImplementation(() => {}) + + await vectorStore.initialize() + + expect(mockQdrantClientInstance.updateCollectionAliases).toHaveBeenCalledTimes(1) + expect(mockQdrantClientInstance.updateCollectionAliases).toHaveBeenCalledWith({ + actions: [ + { + create_alias: { + collection_name: expectedCollectionName, + alias_name: "workspace", + }, + }, + ], + }) + ;(console.log as any).mockRestore() + }) + + it("should not fail initialization if alias creation fails", async () => { + mockQdrantClientInstance.getCollection.mockRejectedValue({ + response: { status: 404 }, + message: "Not found", + }) + mockQdrantClientInstance.createCollection.mockResolvedValue(true as any) + mockQdrantClientInstance.createPayloadIndex.mockResolvedValue({} as any) + mockQdrantClientInstance.updateCollectionAliases.mockRejectedValue(new Error("Alias creation failed")) + vitest.spyOn(console, "warn").mockImplementation(() => {}) + + const result = await vectorStore.initialize() + + // Should still succeed even if alias creation fails + expect(result).toBe(true) + expect(mockQdrantClientInstance.updateCollectionAliases).toHaveBeenCalledTimes(1) + expect(console.warn).toHaveBeenCalledWith( + expect.stringContaining("Could not create workspace alias"), + expect.any(String), + ) + ;(console.warn as any).mockRestore() + }) + + it("should sanitize workspace name for alias", async () => { + // Create a vector store with a workspace path that has special characters + const specialPathStore = new QdrantVectorStore( + "/test/My Project (v2)", + mockQdrantUrl, + mockVectorSize, + mockApiKey, + ) + mockQdrantClientInstance.getCollection.mockRejectedValue({ + response: { status: 404 }, + message: "Not found", + }) + mockQdrantClientInstance.createCollection.mockResolvedValue(true as any) + mockQdrantClientInstance.createPayloadIndex.mockResolvedValue({} as any) + mockQdrantClientInstance.updateCollectionAliases.mockResolvedValue(true as any) + vitest.spyOn(console, "log").mockImplementation(() => {}) + + await specialPathStore.initialize() + + expect(mockQdrantClientInstance.updateCollectionAliases).toHaveBeenCalledWith({ + actions: [ + { + create_alias: { + collection_name: expect.any(String), + alias_name: "my-project--v2-", + }, + }, + ], + }) + ;(console.log as any).mockRestore() + }) + }) + it("should return true when collection exists", async () => { mockQdrantClientInstance.getCollection.mockResolvedValue({ config: { diff --git a/src/services/code-index/vector-store/qdrant-client.ts b/src/services/code-index/vector-store/qdrant-client.ts index ba62afc5f81..cc2bdd90733 100644 --- a/src/services/code-index/vector-store/qdrant-client.ts +++ b/src/services/code-index/vector-store/qdrant-client.ts @@ -194,6 +194,10 @@ export class QdrantVectorStore implements IVectorStore { // Create payload indexes await this._createPayloadIndexes() + + // Create a human-readable alias for the collection using the workspace folder name + await this._createWorkspaceAlias() + return created } catch (error: any) { const errorMessage = error?.message || error @@ -331,6 +335,41 @@ export class QdrantVectorStore implements IVectorStore { } } + /** + * Creates a human-readable Qdrant alias for the collection using the workspace folder name. + * This allows external tools to discover and query the collection without reverse-engineering + * the hashed naming scheme. Non-fatal: failures are logged but do not block initialization. + */ + private async _createWorkspaceAlias(): Promise { + try { + const workspaceName = path.basename(this.workspacePath) + if (!workspaceName) { + return + } + + // Sanitize the alias name: only allow alphanumeric, hyphens, underscores + const aliasName = workspaceName.replace(/[^a-zA-Z0-9_-]/g, "-").toLowerCase() + if (!aliasName) { + return + } + + await this.client.updateCollectionAliases({ + actions: [ + { + create_alias: { + collection_name: this.collectionName, + alias_name: aliasName, + }, + }, + ], + }) + console.log(`[QdrantVectorStore] Created alias "${aliasName}" for collection "${this.collectionName}"`) + } catch (aliasError: any) { + // Non-fatal - log warning but don't fail initialization + console.warn(`[QdrantVectorStore] Could not create workspace alias:`, aliasError?.message || aliasError) + } + } + /** * Upserts points into the vector store * @param points Array of points to upsert