https-deeplearning-ai · Raveendra-Pai · Mar 31, 2026 · Apr 7, 2026 · Apr 7, 2026 · Apr 7, 2026
diff --git a/.gitignore b/.gitignore
@@ -25,6 +25,8 @@ uploads/
 .idea/
 *.swp
 *.swo
+.env
+.env.*
 
 # OS
 .DS_Store

diff --git a/CLAUDE.md b/CLAUDE.md
@@ -0,0 +1,76 @@
+# CLAUDE.md
+
+This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
+
+## Commands
+
+```bash
+# Install dependencies
+uv sync
+
+# Start the development server (from repo root)
+cd backend && uv run uvicorn app:app --reload --port 8000
+# or use the helper script:
+bash run.sh
+
+# App available at http://localhost:8000
+# OpenAPI docs at http://localhost:8000/docs
+```
+
+There are no test or lint commands configured in this project.
+
+## Architecture
+
+This is a **RAG (Retrieval-Augmented Generation) chatbot** that answers questions about course content using Claude's tool-calling API + ChromaDB semantic search.
+
+### Stack
+
+- **Backend**: FastAPI (Python 3.13), managed with `uv`
+- **AI**: Anthropic Claude API (`claude-sonnet-4-20250514`) with tool calling
+- **Vector DB**: ChromaDB (persistent, stored at `backend/chroma_db/`)
+- **Embeddings**: `sentence-transformers` (`all-MiniLM-L6-v2`)
+- **Frontend**: Vanilla HTML/CSS/JS served statically by FastAPI
+
+### Key Data Flows
+
+**Document ingestion** (runs automatically on startup via `app.py`):
+```
+docs/*.txt → DocumentProcessor → CourseChunk objects → VectorStore (ChromaDB)
+```
+Course docs follow a specific format: `Course Title:`, `Course Link:`, `Course Instructor:`, then `Lesson N:` sections.
+
+**Query flow**:
+```
+POST /api/query → RAGSystem.query() → AIGenerator.generate_response()
+  → Claude calls `search_course_content` tool (in search_tools.py)
+  → VectorStore.search() (semantic search, optional course/lesson filters)
+  → Claude synthesizes answer → response + sources back to frontend
+```
+
+Claude decides autonomously when to invoke the search tool vs. answer from general knowledge — this is not prompt-injected RAG, it uses Claude's native tool-calling agentic loop.
+
+### Module Responsibilities
+
+| File | Responsibility |
+|------|---------------|
+| `backend/app.py` | FastAPI routes, startup document loading |
+| `backend/rag_system.py` | Orchestrates query pipeline; coordinates all other modules |
+| `backend/ai_generator.py` | All Anthropic API calls; handles tool execution loop |
+| `backend/vector_store.py` | ChromaDB management; two collections: `course_catalog` and `course_content` |
+| `backend/document_processor.py` | Parses `.txt` course files into `Course` + `CourseChunk` objects |
+| `backend/search_tools.py` | Tool schema for Claude + search execution; tracks sources for UI |
+| `backend/session_manager.py` | In-memory conversation history per session |
+| `backend/models.py` | Pydantic models: `Lesson`, `Course`, `CourseChunk` |
+| `backend/config.py` | Central config loaded from `.env` (chunk size, model, max results, etc.) |
+
+### Configuration
+
+All tunable parameters live in `backend/config.py` and are sourced from `.env`:
+
+- `ANTHROPIC_MODEL` — Claude model ID
+- `CHUNK_SIZE` / `CHUNK_OVERLAP` — text chunking (default 800 / 100 chars)
+- `MAX_RESULTS` — semantic search results returned per tool call (default 5)
+- `MAX_HISTORY` — conversation turns kept in session (default 2)
+- `CHROMA_PATH` — path to ChromaDB persistence directory
+
+Copy `.env.example` to `.env` and add your `ANTHROPIC_API_KEY` to run the app.
diff --git a/backend/ai_generator.py b/backend/ai_generator.py
@@ -8,7 +8,8 @@ class AIGenerator:
     SYSTEM_PROMPT = """ You are an AI assistant specialized in course materials and educational content with access to a comprehensive search tool for course information.
 
 Search Tool Usage:
-- Use the search tool **only** for questions about specific course content or detailed educational materials
+- Use **get_course_outline** when the user asks for a course outline, structure, syllabus, or lesson list. Always include the course title, course link, and each lesson's number and title in your response.
+- Use **search_course_content** only for questions about specific course content or detailed educational materials.
 - **One search per query maximum**
 - Synthesize search results into accurate, fact-based responses
 - If search yields no results, state this clearly without offering alternatives
@@ -132,4 +133,6 @@ def _handle_tool_execution(self, initial_response, base_params: Dict[str, Any],
 
         # Get final response
         final_response = self.client.messages.create(**final_params)
-        return final_response.content[0].text
+        if not final_response.content:
+            return "I found relevant information but was unable to generate a response. Please try rephrasing your question."
+        return final_response.content[0].text
diff --git a/backend/app.py b/backend/app.py
@@ -40,10 +40,15 @@ class QueryRequest(BaseModel):
     query: str
     session_id: Optional[str] = None
 
+class SourceItem(BaseModel):
+    """A single source reference returned with a query response"""
+    label: str
+    url: Optional[str] = None
+
 class QueryResponse(BaseModel):
     """Response model for course queries"""
     answer: str
-    sources: List[str]
+    sources: List[SourceItem]
     session_id: str
 
 class CourseStats(BaseModel):
@@ -73,6 +78,12 @@ async def query_documents(request: QueryRequest):
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
 
+@app.delete("/api/session/{session_id}")
+async def delete_session(session_id: str):
+    """Clear conversation history for a session"""
+    rag_system.session_manager.clear_session(session_id)
+    return {"status": "ok"}
+
 @app.get("/api/courses", response_model=CourseStats)
 async def get_course_stats():
     """Get course analytics and statistics"""

diff --git a/backend/config.py b/backend/config.py
@@ -22,7 +22,7 @@ class Config:
     MAX_HISTORY: int = 2         # Number of conversation messages to remember
 
     # Database paths
-    CHROMA_PATH: str = "./chroma_db"  # ChromaDB storage location
+    CHROMA_PATH: str = os.path.join(os.path.dirname(__file__), "chroma_db")  # ChromaDB storage location
 
 config = Config()
 

diff --git a/backend/rag_system.py b/backend/rag_system.py
@@ -4,7 +4,7 @@
 from vector_store import VectorStore
 from ai_generator import AIGenerator
 from session_manager import SessionManager
-from search_tools import ToolManager, CourseSearchTool
+from search_tools import ToolManager, CourseSearchTool, CourseOutlineTool
 from models import Course, Lesson, CourseChunk
 
 class RAGSystem:
@@ -23,6 +23,8 @@ def __init__(self, config):
         self.tool_manager = ToolManager()
         self.search_tool = CourseSearchTool(self.vector_store)
         self.tool_manager.register_tool(self.search_tool)
+        self.outline_tool = CourseOutlineTool(self.vector_store)
+        self.tool_manager.register_tool(self.outline_tool)
 
     def add_course_document(self, file_path: str) -> Tuple[Course, int]:
         """

diff --git a/backend/search_tools.py b/backend/search_tools.py
@@ -89,30 +89,74 @@ def _format_results(self, results: SearchResults) -> str:
         """Format search results with course and lesson context"""
         formatted = []
         sources = []  # Track sources for the UI
-
+        seen = set()  # Deduplicate sources
+
         for doc, meta in zip(results.documents, results.metadata):
             course_title = meta.get('course_title', 'unknown')
             lesson_num = meta.get('lesson_number')
-            
+
             # Build context header
             header = f"[{course_title}"
             if lesson_num is not None:
                 header += f" - Lesson {lesson_num}"
             header += "]"
-
-            # Track source for the UI
-            source = course_title
-            if lesson_num is not None:
-                source += f" - Lesson {lesson_num}"
-            sources.append(source)
-
+
+            # Track source for the UI (deduplicated)
+            source_key = (course_title, lesson_num)
+            if source_key not in seen:
+                seen.add(source_key)
+                label = course_title
+                if lesson_num is not None:
+                    label += f" - Lesson {lesson_num}"
+                url = None
+                if lesson_num is not None:
+                    url = self.store.get_lesson_link(course_title, lesson_num)
+                sources.append({"label": label, "url": url})
+
             formatted.append(f"{header}\n{doc}")
-        
+
         # Store sources for retrieval
         self.last_sources = sources
-        
+
         return "\n\n".join(formatted)
 
+class CourseOutlineTool(Tool):
+    """Tool for retrieving a course's full outline from the course catalog"""
+
+    def __init__(self, vector_store: VectorStore):
+        self.store = vector_store
+
+    def get_tool_definition(self) -> Dict[str, Any]:
+        return {
+            "name": "get_course_outline",
+            "description": "Get the full outline of a course: its title, link, and ordered list of lessons with their numbers and titles. Use this for questions about course structure, outline, syllabus, or lesson list.",
+            "input_schema": {
+                "type": "object",
+                "properties": {
+                    "course_title": {
+                        "type": "string",
+                        "description": "The name or partial name of the course to look up"
+                    }
+                },
+                "required": ["course_title"]
+            }
+        }
+
+    def execute(self, course_title: str) -> str:
+        data = self.store.get_course_metadata_by_name(course_title)
+        if not data:
+            return f"No course found matching '{course_title}'."
+
+        lines = [
+            f"Course: {data['title']}",
+            f"Link: {data['course_link']}",
+            "Lessons:"
+        ]
+        for lesson in data["lessons"]:
+            lines.append(f"  Lesson {lesson['lesson_number']}: {lesson['lesson_title']}")
+        return "\n".join(lines)
+
+
 class ToolManager:
     """Manages available tools for the AI"""
 

diff --git a/backend/tests/__init__.py b/backend/tests/__init__.py
diff --git a/backend/tests/conftest.py b/backend/tests/conftest.py
@@ -0,0 +1,46 @@
+"""Shared fixtures for backend tests."""
+import sys
+import os
+
+# Ensure backend directory is on the path so imports work
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
+
+import pytest
+from config import config
+from vector_store import VectorStore
+from search_tools import CourseSearchTool, CourseOutlineTool, ToolManager
+from ai_generator import AIGenerator
+from rag_system import RAGSystem
+
+
+@pytest.fixture(scope="session")
+def vector_store():
+    return VectorStore(config.CHROMA_PATH, config.EMBEDDING_MODEL, config.MAX_RESULTS)
+
+
+@pytest.fixture(scope="session")
+def search_tool(vector_store):
+    return CourseSearchTool(vector_store)
+
+
+@pytest.fixture(scope="session")
+def ai_generator():
+    return AIGenerator(config.ANTHROPIC_API_KEY, config.ANTHROPIC_MODEL)
+
+
+@pytest.fixture(scope="session")
+def outline_tool(vector_store):
+    return CourseOutlineTool(vector_store)
+
+
+@pytest.fixture(scope="session")
+def tool_manager(search_tool, outline_tool):
+    tm = ToolManager()
+    tm.register_tool(search_tool)
+    tm.register_tool(outline_tool)
+    return tm
+
+
+@pytest.fixture(scope="session")
+def rag_system():
+    return RAGSystem(config)
-Original file line number
+Diff line change
@@ Expand Up / @@ -25,6 +25,8 @@ uploads/ @@
     .idea/
     *.swp
     *.swo
+    .env
+    .env.*
     # OS
     .DS_Store
@@ Expand Down @@