diff --git a/.gitignore b/.gitignore index 41b4384b..c1cc4f93 100644 --- a/.gitignore +++ b/.gitignore @@ -25,6 +25,8 @@ uploads/ .idea/ *.swp *.swo +.env +.env.* # OS .DS_Store diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 00000000..129bd501 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,76 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Commands + +```bash +# Install dependencies +uv sync + +# Start the development server (from repo root) +cd backend && uv run uvicorn app:app --reload --port 8000 +# or use the helper script: +bash run.sh + +# App available at http://localhost:8000 +# OpenAPI docs at http://localhost:8000/docs +``` + +There are no test or lint commands configured in this project. + +## Architecture + +This is a **RAG (Retrieval-Augmented Generation) chatbot** that answers questions about course content using Claude's tool-calling API + ChromaDB semantic search. + +### Stack + +- **Backend**: FastAPI (Python 3.13), managed with `uv` +- **AI**: Anthropic Claude API (`claude-sonnet-4-20250514`) with tool calling +- **Vector DB**: ChromaDB (persistent, stored at `backend/chroma_db/`) +- **Embeddings**: `sentence-transformers` (`all-MiniLM-L6-v2`) +- **Frontend**: Vanilla HTML/CSS/JS served statically by FastAPI + +### Key Data Flows + +**Document ingestion** (runs automatically on startup via `app.py`): +``` +docs/*.txt → DocumentProcessor → CourseChunk objects → VectorStore (ChromaDB) +``` +Course docs follow a specific format: `Course Title:`, `Course Link:`, `Course Instructor:`, then `Lesson N:` sections. + +**Query flow**: +``` +POST /api/query → RAGSystem.query() → AIGenerator.generate_response() + → Claude calls `search_course_content` tool (in search_tools.py) + → VectorStore.search() (semantic search, optional course/lesson filters) + → Claude synthesizes answer → response + sources back to frontend +``` + +Claude decides autonomously when to invoke the search tool vs. answer from general knowledge — this is not prompt-injected RAG, it uses Claude's native tool-calling agentic loop. + +### Module Responsibilities + +| File | Responsibility | +|------|---------------| +| `backend/app.py` | FastAPI routes, startup document loading | +| `backend/rag_system.py` | Orchestrates query pipeline; coordinates all other modules | +| `backend/ai_generator.py` | All Anthropic API calls; handles tool execution loop | +| `backend/vector_store.py` | ChromaDB management; two collections: `course_catalog` and `course_content` | +| `backend/document_processor.py` | Parses `.txt` course files into `Course` + `CourseChunk` objects | +| `backend/search_tools.py` | Tool schema for Claude + search execution; tracks sources for UI | +| `backend/session_manager.py` | In-memory conversation history per session | +| `backend/models.py` | Pydantic models: `Lesson`, `Course`, `CourseChunk` | +| `backend/config.py` | Central config loaded from `.env` (chunk size, model, max results, etc.) | + +### Configuration + +All tunable parameters live in `backend/config.py` and are sourced from `.env`: + +- `ANTHROPIC_MODEL` — Claude model ID +- `CHUNK_SIZE` / `CHUNK_OVERLAP` — text chunking (default 800 / 100 chars) +- `MAX_RESULTS` — semantic search results returned per tool call (default 5) +- `MAX_HISTORY` — conversation turns kept in session (default 2) +- `CHROMA_PATH` — path to ChromaDB persistence directory + +Copy `.env.example` to `.env` and add your `ANTHROPIC_API_KEY` to run the app. diff --git a/backend/ai_generator.py b/backend/ai_generator.py index 0363ca90..69927f27 100644 --- a/backend/ai_generator.py +++ b/backend/ai_generator.py @@ -8,7 +8,8 @@ class AIGenerator: SYSTEM_PROMPT = """ You are an AI assistant specialized in course materials and educational content with access to a comprehensive search tool for course information. Search Tool Usage: -- Use the search tool **only** for questions about specific course content or detailed educational materials +- Use **get_course_outline** when the user asks for a course outline, structure, syllabus, or lesson list. Always include the course title, course link, and each lesson's number and title in your response. +- Use **search_course_content** only for questions about specific course content or detailed educational materials. - **One search per query maximum** - Synthesize search results into accurate, fact-based responses - If search yields no results, state this clearly without offering alternatives @@ -132,4 +133,6 @@ def _handle_tool_execution(self, initial_response, base_params: Dict[str, Any], # Get final response final_response = self.client.messages.create(**final_params) - return final_response.content[0].text \ No newline at end of file + if not final_response.content: + return "I found relevant information but was unable to generate a response. Please try rephrasing your question." + return final_response.content[0].text diff --git a/backend/app.py b/backend/app.py index 5a69d741..b19fd8d1 100644 --- a/backend/app.py +++ b/backend/app.py @@ -40,10 +40,15 @@ class QueryRequest(BaseModel): query: str session_id: Optional[str] = None +class SourceItem(BaseModel): + """A single source reference returned with a query response""" + label: str + url: Optional[str] = None + class QueryResponse(BaseModel): """Response model for course queries""" answer: str - sources: List[str] + sources: List[SourceItem] session_id: str class CourseStats(BaseModel): @@ -73,6 +78,12 @@ async def query_documents(request: QueryRequest): except Exception as e: raise HTTPException(status_code=500, detail=str(e)) +@app.delete("/api/session/{session_id}") +async def delete_session(session_id: str): + """Clear conversation history for a session""" + rag_system.session_manager.clear_session(session_id) + return {"status": "ok"} + @app.get("/api/courses", response_model=CourseStats) async def get_course_stats(): """Get course analytics and statistics""" diff --git a/backend/config.py b/backend/config.py index d9f6392e..29c4f468 100644 --- a/backend/config.py +++ b/backend/config.py @@ -22,7 +22,7 @@ class Config: MAX_HISTORY: int = 2 # Number of conversation messages to remember # Database paths - CHROMA_PATH: str = "./chroma_db" # ChromaDB storage location + CHROMA_PATH: str = os.path.join(os.path.dirname(__file__), "chroma_db") # ChromaDB storage location config = Config() diff --git a/backend/rag_system.py b/backend/rag_system.py index 50d848c8..443649f0 100644 --- a/backend/rag_system.py +++ b/backend/rag_system.py @@ -4,7 +4,7 @@ from vector_store import VectorStore from ai_generator import AIGenerator from session_manager import SessionManager -from search_tools import ToolManager, CourseSearchTool +from search_tools import ToolManager, CourseSearchTool, CourseOutlineTool from models import Course, Lesson, CourseChunk class RAGSystem: @@ -23,6 +23,8 @@ def __init__(self, config): self.tool_manager = ToolManager() self.search_tool = CourseSearchTool(self.vector_store) self.tool_manager.register_tool(self.search_tool) + self.outline_tool = CourseOutlineTool(self.vector_store) + self.tool_manager.register_tool(self.outline_tool) def add_course_document(self, file_path: str) -> Tuple[Course, int]: """ diff --git a/backend/search_tools.py b/backend/search_tools.py index adfe8235..6c2725a9 100644 --- a/backend/search_tools.py +++ b/backend/search_tools.py @@ -89,30 +89,74 @@ def _format_results(self, results: SearchResults) -> str: """Format search results with course and lesson context""" formatted = [] sources = [] # Track sources for the UI - + seen = set() # Deduplicate sources + for doc, meta in zip(results.documents, results.metadata): course_title = meta.get('course_title', 'unknown') lesson_num = meta.get('lesson_number') - + # Build context header header = f"[{course_title}" if lesson_num is not None: header += f" - Lesson {lesson_num}" header += "]" - - # Track source for the UI - source = course_title - if lesson_num is not None: - source += f" - Lesson {lesson_num}" - sources.append(source) - + + # Track source for the UI (deduplicated) + source_key = (course_title, lesson_num) + if source_key not in seen: + seen.add(source_key) + label = course_title + if lesson_num is not None: + label += f" - Lesson {lesson_num}" + url = None + if lesson_num is not None: + url = self.store.get_lesson_link(course_title, lesson_num) + sources.append({"label": label, "url": url}) + formatted.append(f"{header}\n{doc}") - + # Store sources for retrieval self.last_sources = sources - + return "\n\n".join(formatted) +class CourseOutlineTool(Tool): + """Tool for retrieving a course's full outline from the course catalog""" + + def __init__(self, vector_store: VectorStore): + self.store = vector_store + + def get_tool_definition(self) -> Dict[str, Any]: + return { + "name": "get_course_outline", + "description": "Get the full outline of a course: its title, link, and ordered list of lessons with their numbers and titles. Use this for questions about course structure, outline, syllabus, or lesson list.", + "input_schema": { + "type": "object", + "properties": { + "course_title": { + "type": "string", + "description": "The name or partial name of the course to look up" + } + }, + "required": ["course_title"] + } + } + + def execute(self, course_title: str) -> str: + data = self.store.get_course_metadata_by_name(course_title) + if not data: + return f"No course found matching '{course_title}'." + + lines = [ + f"Course: {data['title']}", + f"Link: {data['course_link']}", + "Lessons:" + ] + for lesson in data["lessons"]: + lines.append(f" Lesson {lesson['lesson_number']}: {lesson['lesson_title']}") + return "\n".join(lines) + + class ToolManager: """Manages available tools for the AI""" diff --git a/backend/tests/__init__.py b/backend/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/backend/tests/conftest.py b/backend/tests/conftest.py new file mode 100644 index 00000000..a115e798 --- /dev/null +++ b/backend/tests/conftest.py @@ -0,0 +1,46 @@ +"""Shared fixtures for backend tests.""" +import sys +import os + +# Ensure backend directory is on the path so imports work +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) + +import pytest +from config import config +from vector_store import VectorStore +from search_tools import CourseSearchTool, CourseOutlineTool, ToolManager +from ai_generator import AIGenerator +from rag_system import RAGSystem + + +@pytest.fixture(scope="session") +def vector_store(): + return VectorStore(config.CHROMA_PATH, config.EMBEDDING_MODEL, config.MAX_RESULTS) + + +@pytest.fixture(scope="session") +def search_tool(vector_store): + return CourseSearchTool(vector_store) + + +@pytest.fixture(scope="session") +def ai_generator(): + return AIGenerator(config.ANTHROPIC_API_KEY, config.ANTHROPIC_MODEL) + + +@pytest.fixture(scope="session") +def outline_tool(vector_store): + return CourseOutlineTool(vector_store) + + +@pytest.fixture(scope="session") +def tool_manager(search_tool, outline_tool): + tm = ToolManager() + tm.register_tool(search_tool) + tm.register_tool(outline_tool) + return tm + + +@pytest.fixture(scope="session") +def rag_system(): + return RAGSystem(config) diff --git a/backend/tests/test_ai_generator.py b/backend/tests/test_ai_generator.py new file mode 100644 index 00000000..f3a6b857 --- /dev/null +++ b/backend/tests/test_ai_generator.py @@ -0,0 +1,115 @@ +""" +Tests for AIGenerator in ai_generator.py. + +Verifies that: +- The generator calls the search tool for course-specific questions +- The generator does NOT call the search tool for general knowledge questions +- Tool execution results are incorporated into the final response +- The two-turn agentic loop works correctly +""" +import sys +import os +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) + +import pytest +from unittest.mock import MagicMock, patch +from ai_generator import AIGenerator +from search_tools import CourseSearchTool, ToolManager +from vector_store import VectorStore +from config import config + + +@pytest.fixture(scope="module") +def generator(): + return AIGenerator(config.ANTHROPIC_API_KEY, config.ANTHROPIC_MODEL) + + +@pytest.fixture(scope="module") +def real_tool_manager(): + """Tool manager backed by real vector store.""" + store = VectorStore(config.CHROMA_PATH, config.EMBEDDING_MODEL, config.MAX_RESULTS) + tool = CourseSearchTool(store) + tm = ToolManager() + tm.register_tool(tool) + return tm + + +# --------------------------------------------------------------------------- +# Tool definitions are passed correctly +# --------------------------------------------------------------------------- + +def test_tool_definitions_non_empty(real_tool_manager): + """ToolManager must expose at least one tool definition to Claude.""" + defs = real_tool_manager.get_tool_definitions() + assert isinstance(defs, list) and len(defs) > 0, "No tool definitions registered" + names = [d["name"] for d in defs] + assert "search_course_content" in names, f"search_course_content missing from: {names}" + + +# --------------------------------------------------------------------------- +# General knowledge — should NOT invoke a tool +# --------------------------------------------------------------------------- + +def test_general_question_does_not_use_tool(generator, real_tool_manager): + """A general question like 'what is Python?' should be answered without tool use.""" + real_tool_manager.reset_sources() + response = generator.generate_response( + query="What is Python programming language?", + tools=real_tool_manager.get_tool_definitions(), + tool_manager=real_tool_manager + ) + assert isinstance(response, str) and len(response) > 0, "Response should not be empty" + sources = real_tool_manager.get_last_sources() + assert sources == [], ( + f"General question should not trigger tool use, but got sources: {sources}" + ) + + +# --------------------------------------------------------------------------- +# Course-specific question — should invoke the search tool +# --------------------------------------------------------------------------- + +def test_course_specific_question_uses_tool(generator, real_tool_manager): + """A course-specific question should trigger search_course_content tool use.""" + real_tool_manager.reset_sources() + response = generator.generate_response( + query="Answer this question about course materials: What topics are covered in the MCP course?", + tools=real_tool_manager.get_tool_definitions(), + tool_manager=real_tool_manager + ) + assert isinstance(response, str) and len(response) > 0, ( + f"Response should not be empty, got: {response!r}" + ) + + +def test_generate_response_returns_string_not_exception(generator, real_tool_manager): + """generate_response() must never raise — always return a string.""" + real_tool_manager.reset_sources() + try: + result = generator.generate_response( + query="Answer this question about course materials: Explain RAG systems", + tools=real_tool_manager.get_tool_definitions(), + tool_manager=real_tool_manager + ) + assert isinstance(result, str), f"Expected str, got {type(result)}: {result!r}" + except Exception as e: + pytest.fail(f"generate_response() raised an exception: {e}") + + +# --------------------------------------------------------------------------- +# Tool manager execute_tool dispatching +# --------------------------------------------------------------------------- + +def test_tool_manager_executes_search_tool(real_tool_manager): + """ToolManager.execute_tool() should dispatch to search_course_content.""" + result = real_tool_manager.execute_tool( + "search_course_content", + query="chromadb embeddings" + ) + assert isinstance(result, str), f"Expected str from execute_tool, got: {type(result)}" + + +def test_tool_manager_unknown_tool_returns_error(real_tool_manager): + """Calling a non-existent tool should return an error string, not raise.""" + result = real_tool_manager.execute_tool("nonexistent_tool", query="test") + assert "not found" in result.lower(), f"Expected 'not found' message, got: {result!r}" diff --git a/backend/tests/test_outline_tool.py b/backend/tests/test_outline_tool.py new file mode 100644 index 00000000..5b536992 --- /dev/null +++ b/backend/tests/test_outline_tool.py @@ -0,0 +1,57 @@ +""" +Tests for CourseOutlineTool.execute() in search_tools.py. +""" +import sys +import os +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) + +import pytest +from search_tools import CourseOutlineTool +from vector_store import VectorStore +from config import config + + +@pytest.fixture(scope="module") +def tool(): + store = VectorStore(config.CHROMA_PATH, config.EMBEDDING_MODEL, config.MAX_RESULTS) + return CourseOutlineTool(store) + + +def test_tool_definition_shape(tool): + """get_tool_definition() must return a valid Anthropic tool schema.""" + defn = tool.get_tool_definition() + assert defn["name"] == "get_course_outline" + assert "description" in defn + assert defn["input_schema"]["required"] == ["course_title"] + + +def test_known_course_returns_title_and_lessons(tool): + """A known course name should return its title and at least one lesson.""" + result = tool.execute(course_title="MCP") + assert isinstance(result, str), f"Expected str, got {type(result)}" + assert "Course:" in result, f"Missing 'Course:' header: {result!r}" + assert "Lesson" in result, f"Expected lesson list, got: {result!r}" + + +def test_known_course_includes_link(tool): + """Result for a known course should include a course link.""" + result = tool.execute(course_title="MCP") + assert "Link:" in result, f"Expected 'Link:' in result: {result!r}" + + +def test_unknown_course_returns_error_string(tool): + """An unknown course name must return an informative error string, not raise.""" + result = tool.execute(course_title="ZZZ_TOTALLY_NONEXISTENT_COURSE_XYZ") + assert isinstance(result, str) + assert "no course found" in result.lower(), ( + f"Expected 'No course found' message, got: {result!r}" + ) + + +def test_execute_never_raises(tool): + """execute() must not raise for any input.""" + try: + result = tool.execute(course_title="asdfjkl qwerty") + assert isinstance(result, str) + except Exception as e: + pytest.fail(f"CourseOutlineTool.execute() raised: {e}") diff --git a/backend/tests/test_rag_system.py b/backend/tests/test_rag_system.py new file mode 100644 index 00000000..a63ab588 --- /dev/null +++ b/backend/tests/test_rag_system.py @@ -0,0 +1,127 @@ +""" +Tests for RAGSystem.query() in rag_system.py. + +Verifies the full end-to-end pipeline for content-related queries: +- Query returns a (response, sources) tuple without raising +- Response is a non-empty string +- Sources is a list +- Specific content questions return relevant answers +""" +import sys +import os +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) + +import pytest +from rag_system import RAGSystem +from config import config + + +@pytest.fixture(scope="module") +def rag(): + return RAGSystem(config) + + +# --------------------------------------------------------------------------- +# Return type contract +# --------------------------------------------------------------------------- + +def test_query_returns_tuple(rag): + """query() must return a (str, list) tuple.""" + result = rag.query("What is RAG?") + assert isinstance(result, tuple) and len(result) == 2, ( + f"Expected (str, list) tuple, got: {result!r}" + ) + answer, sources = result + assert isinstance(answer, str), f"answer must be str, got {type(answer)}" + assert isinstance(sources, list), f"sources must be list, got {type(sources)}" + + +def test_query_answer_nonempty(rag): + """query() must return a non-empty answer for any reasonable question.""" + answer, _ = rag.query("What is RAG?") + assert len(answer.strip()) > 0, "Answer should not be empty" + + +# --------------------------------------------------------------------------- +# Content-related queries (the failing case) +# --------------------------------------------------------------------------- + +def test_content_query_does_not_crash(rag): + """A content-specific question must not raise an exception.""" + try: + answer, sources = rag.query( + "What topics are covered in the MCP course?" + ) + assert isinstance(answer, str), f"Expected str answer, got: {type(answer)}" + except Exception as e: + pytest.fail(f"RAGSystem.query() raised an exception: {e}") + + +def test_content_query_no_query_failed_message(rag): + """A content query should not produce a 'query failed' or error response.""" + answer, _ = rag.query("What is covered in lesson 1 of the MCP course?") + lower = answer.lower() + assert "query failed" not in lower, ( + f"Got 'query failed' in response: {answer!r}" + ) + assert "error" not in lower or len(answer) > 50, ( + f"Response looks like an error message: {answer!r}" + ) + + +def test_rag_query_with_known_course(rag): + """Querying about a course that exists in ChromaDB should return content.""" + answer, sources = rag.query( + "Tell me about the Advanced Retrieval for AI course" + ) + assert isinstance(answer, str) and len(answer) > 20, ( + f"Expected substantive answer, got: {answer!r}" + ) + + +def test_rag_query_returns_sources_for_course_content(rag): + """A course-content query should populate sources.""" + _, sources = rag.query("What does lesson 2 of the MCP course cover?") + # sources may be empty if tool wasn't invoked, but must be a list + assert isinstance(sources, list), f"sources must be a list, got: {type(sources)}" + + +# --------------------------------------------------------------------------- +# Session handling +# --------------------------------------------------------------------------- + +def test_query_with_session_id(rag): + """query() with a session_id must not crash and must return valid results.""" + session_id = rag.session_manager.create_session() + answer, sources = rag.query("What is ChromaDB?", session_id=session_id) + assert isinstance(answer, str) and len(answer) > 0 + + +def test_multi_turn_conversation(rag): + """A second query in the same session should work without error.""" + session_id = rag.session_manager.create_session() + rag.query("What is RAG?", session_id=session_id) + answer, _ = rag.query("Can you give me an example?", session_id=session_id) + assert isinstance(answer, str) and len(answer) > 0, ( + f"Second turn failed, got: {answer!r}" + ) + + +# --------------------------------------------------------------------------- +# Vector store connectivity +# --------------------------------------------------------------------------- + +def test_courses_are_loaded(rag): + """ChromaDB should have courses loaded — if 0, ingestion failed.""" + count = rag.vector_store.get_course_count() + assert count > 0, ( + f"No courses in vector store! Ingestion may have failed. count={count}" + ) + + +def test_course_search_tool_registered(rag): + """search_course_content tool must be registered in the tool manager.""" + names = [d["name"] for d in rag.tool_manager.get_tool_definitions()] + assert "search_course_content" in names, ( + f"search_course_content not registered. Registered: {names}" + ) diff --git a/backend/tests/test_search_tool.py b/backend/tests/test_search_tool.py new file mode 100644 index 00000000..23226770 --- /dev/null +++ b/backend/tests/test_search_tool.py @@ -0,0 +1,93 @@ +""" +Tests for CourseSearchTool.execute() in search_tools.py. + +These are integration tests against the real ChromaDB vector store. +They verify that the tool returns usable results (or graceful errors) +for the kinds of queries the RAG chatbot receives. +""" +import sys +import os +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) + +import pytest +from search_tools import CourseSearchTool +from vector_store import VectorStore +from config import config + + +@pytest.fixture(scope="module") +def tool(): + store = VectorStore(config.CHROMA_PATH, config.EMBEDDING_MODEL, config.MAX_RESULTS) + return CourseSearchTool(store) + + +# --------------------------------------------------------------------------- +# Basic execute() smoke tests +# --------------------------------------------------------------------------- + +def test_execute_returns_string(tool): + """execute() must always return a string, never raise.""" + result = tool.execute(query="what is RAG?") + assert isinstance(result, str), f"Expected str, got {type(result)}" + + +def test_execute_nonempty_for_known_topic(tool): + """A broad topic query should return content, not an empty result message.""" + result = tool.execute(query="RAG retrieval augmented generation") + assert "No relevant content found" not in result, ( + f"Expected real results but got: {result!r}" + ) + + +def test_execute_with_course_name_filter(tool): + """Filtering by a known partial course name should narrow results to that course.""" + result = tool.execute(query="lesson content", course_name="MCP") + # Should either find results or report course not found — must not crash + assert isinstance(result, str) + if "No relevant content found" not in result and "No course found" not in result: + assert "MCP" in result or "lesson" in result.lower(), ( + f"Expected MCP-related content, got: {result[:300]}" + ) + + +def test_execute_with_lesson_number_filter(tool): + """Filtering by lesson number should return content for that lesson.""" + result = tool.execute(query="introduction overview", lesson_number=1) + assert isinstance(result, str) + + +def test_execute_nonexistent_course_returns_error_string(tool): + """An unknown course name must return an error string, not raise.""" + result = tool.execute(query="anything", course_name="ZZZ_NONEXISTENT_COURSE_XYZ") + assert isinstance(result, str) + assert len(result) > 0 + + +def test_execute_garbage_query_does_not_crash(tool): + """Completely irrelevant query must not raise — may return no results.""" + result = tool.execute(query="asdfjkl qwerty zxcvbnm 12345") + assert isinstance(result, str) + + +# --------------------------------------------------------------------------- +# Sources tracking +# --------------------------------------------------------------------------- + +def test_sources_populated_after_successful_search(tool): + """After a successful search, last_sources should be a non-empty list.""" + tool.last_sources = [] # Reset + result = tool.execute(query="chromadb vector database") + if "No relevant content found" not in result: + assert isinstance(tool.last_sources, list), "last_sources should be a list" + assert len(tool.last_sources) > 0, "last_sources should not be empty after a hit" + first = tool.last_sources[0] + assert "label" in first, f"Source entry missing 'label': {first}" + + +def test_sources_is_list_after_any_search(tool): + """last_sources must always be a list after any search (semantic search always finds nearest match).""" + tool.last_sources = [] + tool.execute(query="some query", course_name="some course") + assert isinstance(tool.last_sources, list), ( + f"last_sources should always be a list, got: {type(tool.last_sources)}" + ) diff --git a/backend/vector_store.py b/backend/vector_store.py index 390abe71..42874366 100644 --- a/backend/vector_store.py +++ b/backend/vector_store.py @@ -1,4 +1,5 @@ import chromadb +import json from chromadb.config import Settings from typing import List, Dict, Any, Optional from dataclasses import dataclass @@ -264,4 +265,24 @@ def get_lesson_link(self, course_title: str, lesson_number: int) -> Optional[str return None except Exception as e: print(f"Error getting lesson link: {e}") - \ No newline at end of file + + def get_course_metadata_by_name(self, course_name: str) -> Optional[Dict[str, Any]]: + """Resolve a course name via semantic search and return its full metadata.""" + try: + results = self.course_catalog.query( + query_texts=[course_name], + n_results=1, + include=["metadatas"] + ) + if not results["metadatas"] or not results["metadatas"][0]: + return None + meta = results["metadatas"][0][0] + lessons = json.loads(meta.get("lessons_json", "[]")) + return { + "title": meta.get("title", ""), + "course_link": meta.get("course_link", ""), + "lessons": lessons + } + except Exception as e: + print(f"Error getting course metadata by name: {e}") + return None diff --git a/frontend/index.html b/frontend/index.html index f8e25a62..2bcd76dd 100644 --- a/frontend/index.html +++ b/frontend/index.html @@ -7,7 +7,10 @@