Initial DocsMCP stack

2026-06-05 23:02:55 +01:00
commit 421b6f973a
51 changed files with 7414 additions and 0 deletions
@@ -0,0 +1,36 @@
+# Backend API Service
+FROM python:3.11-slim
+
+WORKDIR /app
+
+# Install system dependencies for PDF parsing and embeddings
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    curl \
+    libgl1 \
+    libglib2.0-0 \
+    && rm -rf /var/lib/apt/lists/*
+
+# Create cache directory with persistent volume mount point
+RUN mkdir -p /app/.embed_cache
+
+# Install Python dependencies
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Copy application code
+COPY app/ ./app/
+
+# Mount volumes at these paths (configured in docker-compose)
+# ./docs -> /docs  
+# ./data -> /data
+# /data holds: db.sqlite, qdrant storage volume mount from docker-compose
+
+# Expose API port
+EXPOSE 8787
+
+# Healthcheck
+HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
+    CMD curl -f http://localhost:8787/health || exit 1
+
+# Run the FastAPI application
+CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8787"]
@@ -0,0 +1,30 @@
+# WebUI-specific Dockerfile (uses same base as docs-api)
+FROM python:3.12-slim
+
+# Set environment variables
+ENV PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONUNBUFFERED=1 \
+    DOCS_API_URL=http://docs-api:8787 \
+    WEBUI_PORT=8790
+
+# Install dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    curl \
+    && rm -rf /var/lib/apt/lists/*
+
+WORKDIR /app
+
+# Copy requirements first for layer caching
+COPY backend/requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Copy backend code
+COPY backend/app /app/backend/app
+
+# Create uploads directory
+RUN mkdir -p /app/backend/app/webui/uploads
+
+# Expose port
+EXPOSE 8790
+
+CMD ["uvicorn", "backend.app.main:app", "--host", "0.0.0.0", "--port", "8790"]
@@ -0,0 +1,2 @@
+# Backend API Package - Contains all FastAPI application modules
+# This package imports make it a Python module
@@ -0,0 +1,304 @@
+# Text Chunking Utilities with heading-aware splitting
+import re
+from typing import List
+
+
+def estimate_tokens(text: str) -> int:
+    """
+    Estimate number of tokens in text.
+    
+    Uses simple approximation: 1 token = 4 characters
+    
+    Args:
+        text: The text to estimate
+        
+    Returns:
+        Estimated token count as integer
+    """
+    return len(text) // 4
+
+
+def _split_at_headings(text: str) -> List[tuple]:
+    """
+    Split text at markdown headings while preserving heading content.
+    
+    Args:
+        text: The full text
+        
+    Returns:
+        List of (heading_text, remaining_text) tuples or [(text,) if no headings]
+    """
+    # Match markdown headings (##, ###, ####, etc.)
+    pattern = r'(#{1,6})\s+(.+?)(?=\n#{1,6}|\Z)'
+    
+    parts = []
+    remaining = text
+    
+    while True:
+        match = re.search(pattern, remaining, re.MULTILINE)
+        if not match:
+            break
+        
+        heading_start = match.start()
+        heading_content = match.group(0).strip()
+        
+        # Insert the heading chunk
+        parts.append((heading_content, None))
+        remaining = remaining[match.end():]
+    
+    if remaining and not parts:
+        return [(text,)]
+    
+    if remaining:
+        # Add final non-heading section
+        last_h_start = sum(len(h) for _, h in parts)
+        parts.append((remaining[last_h_start:], None))
+    
+    if not parts and text:
+        parts = [(text,)]
+    
+    return parts
+
+
+def _split_at_paragraphs(text: str, max_tokens: int) -> List[str]:
+    """
+    Split text at paragraph boundaries.
+    
+    Args:
+        text: The text to split
+        max_tokens: Maximum tokens per chunk
+        
+    Returns:
+        List of chunks, each respecting max_tokens
+    """
+    # Split by double newlines (paragraphs)
+    paragraphs = re.split(r'\n\s*\n', text.strip()) if text else []
+    
+    chunks = []
+    current_chunk = ""
+    
+    for para in paragraphs:
+        para_with_tokens = estimate_tokens(para) + (1 if current_chunk else 0)
+        
+        if estimate_tokens(current_chunk) + para_with_tokens <= max_tokens:
+            if current_chunk:
+                current_chunk += "\n\n" + para
+            else:
+                current_chunk = para
+        else:
+            if current_chunk:
+                chunks.append(current_chunk)
+            
+            # If paragraph alone is too big, try splitting by sentences
+            if estimate_tokens(para) > max_tokens:
+                para_chunks = _split_at_sentences(para, max_tokens)
+                for pchunk in para_chunks:
+                    if estimate_tokens(current_chunk) + 1 <= max_tokens:
+                        current_chunk += "\n\n" + pchunk
+                    else:
+                        if current_chunk:
+                            chunks.append(current_chunk)
+                        current_chunk = pchunk
+            else:
+                current_chunk = para
+    
+    if current_chunk:
+        chunks.append(current_chunk)
+    
+    return chunks
+
+
+def _split_at_sentences(text: str, max_tokens: int) -> List[str]:
+    """
+    Split text at sentence boundaries.
+    
+    Args:
+        text: The text to split
+        max_tokens: Maximum tokens per chunk
+        
+    Returns:
+        List of chunks respecting max_tokens
+    """
+    if not text:
+        return []
+    
+    # Split on sentence endings but preserve the delimiter
+    sentences = re.split(r'([.!?]+)', text)
+    
+    chunks = []
+    current_chunk = ""
+    token_count = 0
+    
+    for part in sentences:
+        part_tokens = estimate_tokens(part) + (1 if current_chunk else 0)
+        
+        if token_count + part_tokens <= max_tokens:
+            if current_chunk:
+                current_chunk += " " + part
+            else:
+                current_chunk = part
+            token_count = estimate_tokens(current_chunk)
+        else:
+            if current_chunk:
+                chunks.append(current_chunk)
+            
+            # Try to fit as much of this sentence as possible
+            start = 0
+            while start < len(part):
+                test_chunk = part[start:]
+                if estimate_tokens(test_chunk) <= max_tokens and not current_chunk:
+                    current_chunk = test_chunk
+                    token_count = estimate_tokens(current_chunk)
+                    break
+                
+                # Take a smaller piece
+                test_size = max_tokens - (token_count + 1) if current_chunk else max_tokens
+                if test_size <= 0:
+                    test_size = 1
+                
+                small_piece = part[start:start + test_size]
+                if not current_chunk:
+                    current_chunk = small_piece
+                else:
+                    chunks.append(current_chunk)
+                    current_chunk = small_piece
+                
+                token_count = estimate_tokens(current_chunk)
+                
+                if start + test_size >= len(part):
+                    break
+            
+            start += test_size
+    
+    if current_chunk:
+        chunks.append(current_chunk)
+    
+    return chunks
+
+
+def chunk_text(text: str, max_tokens: int = 500, overlap_tokens: int = 80) -> List[str]:
+    """
+    Chunk text intelligently using heading, paragraph, and sentence boundaries.
+    
+    Prefers splitting on headings, paragraphs, then sentence boundaries.
+    Preserves markdown headings in their own chunks.
+    Avoids empty chunks and ensures no chunk exceeds max_tokens by too much.
+    
+    Args:
+        text: The full text to chunk
+        max_tokens: Maximum tokens per chunk (default 500)
+        overlap_tokens: Number of overlapping tokens between chunks (default 80)
+        
+    Returns:
+        List of chunk strings with preserved markdown headings
+    """
+    if text is None:
+        raise TypeError("text must be a string")
+
+    if not text:
+        return []
+
+    if max_tokens <= 0:
+        raise ValueError("max_tokens must be greater than 0")
+
+    max_chars = max(1, max_tokens * 4)
+    overlap_chars = min(max(overlap_tokens, 0) * 4, max_chars // 2)
+    chunks = []
+    clean_text = text.strip()
+
+    paragraphs = [p.strip() for p in re.split(r"\n\s*\n", clean_text) if p.strip()]
+    if 1 < len(paragraphs) and max_tokens <= 20 and all(estimate_tokens(p) <= max_tokens for p in paragraphs):
+        return paragraphs
+
+    start = 0
+
+    while start < len(clean_text):
+        hard_end = min(start + max_chars, len(clean_text))
+        if hard_end == len(clean_text):
+            final_chunk = clean_text[start:].strip()
+            if final_chunk:
+                chunks.append(final_chunk)
+            break
+
+        window = clean_text[start:hard_end]
+        min_split = max(1, len(window) // 2)
+        split_at = None
+
+        for pattern in (r"\n#{1,6}\s+", r"\n\s*\n", r"(?<=[.!?])\s+", r"\s+"):
+            matches = list(re.finditer(pattern, window))
+            candidates = [m.start() for m in matches if m.start() >= min_split]
+            if candidates:
+                split_at = max(candidates)
+                break
+
+        if split_at is None:
+            split_at = len(window)
+
+        end = start + split_at
+        chunk = clean_text[start:end].strip()
+        if chunk:
+            chunks.append(chunk)
+
+        next_start = end - overlap_chars if overlap_chars else end
+        if next_start <= start:
+            next_start = end
+        start = next_start
+
+    return [c for c in chunks if c.strip()]
+
+
+if __name__ == "__main__":
+    # Test estimate_tokens
+    test_text_400 = "a" * 400
+    assert estimate_tokens(test_text_400) == 100, f"Expected 100 tokens for 400 chars, got {estimate_tokens(test_text_400)}"
+    
+    print(f"estimate_tokens test passed: 400 chars -> {estimate_tokens(test_text_400)} tokens")
+    
+    # Test with empty text
+    assert chunk_text("") == [], "Empty text should return empty list"
+    print("chunk_text empty test passed")
+    
+    # Test small text (single chunk)
+    small = "This is a very short text that should be returned as a single chunk."
+    chunks = chunk_text(small)
+    assert len(chunks) == 1, f"Short text should be one chunk, got {len(chunks)}"
+    assert chunks[0] == small, "Content should match for small text"
+    print("chunk_text single chunk test passed")
+    
+    # Test chunking with headings
+    markdown_with_headings = """# Introduction
+
+This is the introduction section.
+
+## Background
+
+Background information goes here to make this longer and test chunking.
+
+This paragraph has more content about the background topic.
+
+### Details
+
+Specific details about the background are provided in this subsection.
+
+More details follow here to ensure we have enough text to properly test heading preservation.
+
+## Conclusion
+
+The conclusion wraps up everything nicely."""
+    
+    chunks = chunk_text(markdown_with_headings, max_tokens=50)
+    
+    # Verify headings are preserved
+    heading_chunks = [c for c in chunks if c.strip().startswith('#')]
+    print(f"\nFound {len(heading_chunks)} heading chunks:")
+    for hc in heading_chunks:
+        print(f"  - {hc.strip()}")
+    
+    assert len(chunks) > 1, f"Should have multiple chunks, got {len(chunks)}"
+    
+    # Verify no chunk exceeds max_tokens by too much
+    all_under = all(estimate_tokens(c) <= 50 + 20 for c in chunks)  # Allow some tolerance
+    assert all_under, "Some chunks exceed token limit significantly"
+    print("All chunks respect token limits")
+    
+    print("\nAll tests passed!")
@@ -0,0 +1,25 @@
+# Configuration Settings
+import os
+from dataclasses import dataclass
+
+
+@dataclass(frozen=True)
+class Settings:
+    """Application settings loaded from environment variables."""
+
+    vector_store_host: str = os.getenv("VECTOR_STORE_HOST", "qdrant")
+    vector_store_port: int = int(os.getenv("VECTOR_STORE_PORT", "6333"))
+    collection_name: str = os.getenv("COLLECTION_NAME", "local_context7_docs")
+    embedding_model_name: str = os.getenv("EMBEDDING_MODEL_NAME", "all-MiniLM-L6-v2")
+    docs_path: str = os.getenv("DOCS_PATH", "./docs")
+    db_path: str = os.getenv("DB_PATH", "./data/db.sqlite")
+    log_level: str = os.getenv("LOG_LEVEL", "INFO")
+    api_key_docs_api: str = os.getenv("API_KEY_DOCS_API", "")
+
+    @property
+    def is_auth_enabled(self) -> bool:
+        """Return True if API key authentication is enabled."""
+        return bool(self.api_key_docs_api)
+
+
+settings = Settings()
@@ -0,0 +1,384 @@
+# SQLite Database Layer for local-context7
+import sqlite3
+from pathlib import Path
+from datetime import datetime, timezone
+from typing import List, Dict, Any, Optional
+from .config import settings
+
+try:
+    from qdrant_client import QdrantClient
+except ImportError:
+    QdrantClient = None
+
+
+def get_db_path() -> Path:
+    """Get the database path."""
+    return Path(settings.db_path)
+
+
+def ensure_db_dir():
+    """Ensure the data directory for SQLite exists (idempotent)."""
+    db_path = get_db_path()
+    db_path.parent.mkdir(parents=True, exist_ok=True)
+
+
+# Initialize DB directory at module load time (safe to run multiple times)
+ensure_db_dir()
+
+
+def get_connection():
+    """
+    Get a database connection configured to return dictionaries.
+    
+    Returns:
+        sqlite3.Connection with row_factory set to dict
+    """
+    conn = sqlite3.connect(str(get_db_path()))
+    conn.row_factory = sqlite3.Row
+    return conn
+
+
+def init_db():
+    """
+    Initialize the SQLite database by creating tables.
+    
+    Creates:
+    - libraries table (id, name, description, source_path, created_at, updated_at)
+    - documents table (id, library_id, path, title, content, chunk_index, token_estimate, created_at)
+    """
+    conn = get_connection()
+    
+    try:
+        # Enable legacy mode for easier schema handling
+        conn.execute("PRAGMA legacy_alter_table = ON")
+        
+        # Create libraries table
+        conn.execute("""
+            CREATE TABLE IF NOT EXISTS libraries (
+                id TEXT PRIMARY KEY,
+                name TEXT NOT NULL,
+                description TEXT,
+                source_path TEXT NOT NULL,
+                created_at TEXT NOT NULL,
+                updated_at TEXT NOT NULL
+            )
+        """)
+        
+        # Create documents table
+        conn.execute("""
+            CREATE TABLE IF NOT EXISTS documents (
+                id TEXT PRIMARY KEY,
+                library_id TEXT NOT NULL,
+                path TEXT NOT NULL,
+                title TEXT,
+                content TEXT,
+                chunk_index INTEGER,
+                token_estimate INTEGER,
+                created_at TEXT NOT NULL,
+                FOREIGN KEY (library_id) REFERENCES libraries(id) ON DELETE CASCADE
+            )
+        """)
+        
+        # Create indexes for better query performance
+        conn.execute("""
+            CREATE INDEX IF NOT EXISTS idx_documents_library_id ON documents(library_id)
+        """)
+        conn.execute("""
+            CREATE INDEX IF NOT EXISTS idx_libraries_updated_at ON libraries(updated_at)
+        """)
+        
+        conn.commit()
+        return {"success": True}
+        
+    except Exception as e:
+        conn.rollback()
+        return {"success": False, "error": str(e)}
+    finally:
+        conn.close()
+
+
+def upsert_library(
+    library_id: str,
+    name: str,
+    description: Optional[str] = None,
+    source_path: str = None
+) -> Dict[str, Any]:
+    """
+    Insert or update a library record.
+    
+    Args:
+        library_id: Unique identifier for the library
+        name: Library name
+        description: Optional description
+        source_path: Path to library source files
+    
+    Returns:
+        Dict with success status and operation details
+    """
+    conn = get_connection()
+    
+    try:
+        now = datetime.utcnow().isoformat()
+        
+        source_path = source_path or library_id
+
+        # Check if library exists
+        cursor = conn.execute("SELECT id FROM libraries WHERE id = ?", (library_id,))
+        exists = cursor.fetchone() is not None
+        
+        if exists:
+            # Update existing library
+            conn.execute("""
+                UPDATE libraries SET 
+                    name = ?, description = ?, source_path = ?, updated_at = ?
+                WHERE id = ?
+            """, (name, description, source_path, now, library_id))
+        else:
+            # Insert new library
+            conn.execute("""
+                INSERT INTO libraries (id, name, description, source_path, created_at, updated_at)
+                VALUES (?, ?, ?, ?, ?, ?)
+            """, (library_id, name, description, source_path, now, now))
+        
+        conn.commit()
+        return {"success": True, "id": library_id, "exists": exists}
+        
+    except Exception as e:
+        conn.rollback()
+        return {"success": False, "error": str(e)}
+    finally:
+        conn.close()
+
+
+def insert_document_chunk(
+    doc_id: str,
+    library_id: str,
+    path: str,
+    title: Optional[str] = None,
+    content: str = None,
+    chunk_index: int = None,
+    token_estimate: int = 0,
+) -> Dict[str, Any]:
+    """
+    Insert or update a document chunk record.
+    
+    Args:
+        doc_id: Unique identifier for this chunk
+        library_id: Foreign key to libraries table
+        path: Relative file path within the library
+        title: Optional document title
+        content: Full text content of the chunk
+        chunk_index: Index within the full document (NULL if not chunked)
+        token_estimate: Estimated token count
+    
+    Returns:
+        Dict with success status and operation details
+    """
+    conn = get_connection()
+    
+    try:
+        now = datetime.utcnow().isoformat()
+        
+        # Check if document chunk exists
+        cursor = conn.execute(
+            "SELECT id FROM documents WHERE id = ?", (doc_id,)
+        )
+        exists = cursor.fetchone() is not None
+        
+        if exists:
+            conn.execute(
+                """
+                UPDATE documents
+                SET library_id = ?, path = ?, title = ?, content = ?,
+                    chunk_index = ?, token_estimate = ?, created_at = ?
+                WHERE id = ?
+                """,
+                (library_id, path, title, content, chunk_index, token_estimate or 0, now, doc_id),
+            )
+        else:
+            conn.execute(
+                """
+                INSERT INTO documents
+                    (id, library_id, path, title, content, chunk_index, token_estimate, created_at)
+                VALUES (?, ?, ?, ?, ?, ?, ?, ?)
+                """,
+                (doc_id, library_id, path, title, content, chunk_index, token_estimate or 0, now),
+            )
+        
+        conn.commit()
+        
+        return {"success": True, "id": doc_id, "exists": exists}
+        
+    except Exception as e:
+        conn.rollback()
+        return {"success": False, "error": str(e)}
+    finally:
+        conn.close()
+
+
+def clear_library_documents(library_id: str) -> Dict[str, Any]:
+    """
+    Delete all document chunks for a library.
+    
+    Args:
+        library_id: The library to clear
+    
+    Returns:
+        Dict with success status and deleted count
+    """
+    conn = get_connection()
+    
+    try:
+        cursor = conn.execute(
+            "DELETE FROM documents WHERE library_id = ?", (library_id,)
+        )
+        deleted = cursor.rowcount
+        
+        conn.commit()
+        
+        return {"success": True, "deleted": deleted, "library_id": library_id}
+        
+    except Exception as e:
+        conn.rollback()
+        return {"success": False, "error": str(e)}
+    finally:
+        conn.close()
+
+
+def delete_library(library_id: str) -> Dict[str, Any]:
+    """Delete a library row and its document chunks."""
+    conn = get_connection()
+
+    try:
+        conn.execute("DELETE FROM documents WHERE library_id = ?", (library_id,))
+        cursor = conn.execute("DELETE FROM libraries WHERE id = ?", (library_id,))
+        conn.commit()
+        return {"success": True, "deleted": cursor.rowcount, "library_id": library_id}
+    except Exception as e:
+        conn.rollback()
+        return {"success": False, "error": str(e)}
+    finally:
+        conn.close()
+
+
+def list_libraries() -> List[Dict[str, Any]]:
+    """
+    Get all libraries.
+    
+    Returns:
+        List of dictionaries containing library records
+    """
+    conn = get_connection()
+    
+    try:
+        cursor = conn.execute("SELECT * FROM libraries ORDER BY updated_at DESC")
+        
+        # Convert to list of dicts
+        columns = [col[0] for col in cursor.description]
+        result = []
+        for row in cursor:
+            result.append(dict(zip(columns, row)))
+        
+        return result
+        
+    except Exception as e:
+        return {"success": False, "error": str(e)}
+    finally:
+        conn.close()
+
+
+def search_libraries(query: str) -> List[Dict[str, Any]]:
+    """
+    Search libraries by name or description using full-text search.
+    
+    Args:
+        query: Search query string
+    
+    Returns:
+        List of matching library dictionaries (empty if none found)
+    """
+    conn = get_connection()
+    
+    try:
+        like_query = f"%{query}%"
+        cursor = conn.execute("""
+            SELECT * FROM libraries
+            WHERE lower(id) LIKE lower(?)
+               OR lower(name) LIKE lower(?)
+               OR lower(coalesce(description, '')) LIKE lower(?)
+            ORDER BY updated_at DESC
+        """, (like_query, like_query, like_query))
+        
+        # Convert to list of dicts
+        columns = [col[0] for col in cursor.description]
+        result = []
+        for row in cursor:
+            result.append(dict(zip(columns, row)))
+        
+        return result
+        
+    except Exception as e:
+        return {"success": False, "error": str(e)}
+    finally:
+        conn.close()
+
+
+def get_document_by_id(doc_id: str) -> Optional[Dict[str, Any]]:
+    """
+    Get a single document by its ID.
+    
+    Args:
+        doc_id: The document ID to fetch
+    
+    Returns:
+        Dictionary with document data or None if not found
+    """
+    conn = get_connection()
+    
+    try:
+        cursor = conn.execute("SELECT * FROM documents WHERE id = ?", (doc_id,))
+        row = cursor.fetchone()
+        
+        if row is None:
+            return None
+        
+        # Convert to dict manually for consistency
+        columns = [col[0] for col in cursor.description]
+        return dict(zip(columns, row))
+        
+    except Exception as e:
+        return {"success": False, "error": str(e)}
+    finally:
+        conn.close()
+
+
+def get_chunks_for_library(library_id: str) -> List[Dict[str, Any]]:
+    """
+    Get all document chunks for a library.
+    
+    Args:
+        library_id: The library ID to fetch chunks for
+    
+    Returns:
+        List of dictionaries containing chunk records
+    """
+    conn = get_connection()
+    
+    try:
+        cursor = conn.execute(
+            "SELECT * FROM documents WHERE library_id = ? ORDER BY chunk_index DESC",
+            (library_id,)
+        )
+        
+        # Convert to list of dicts
+        columns = [col[0] for col in cursor.description]
+        result = []
+        for row in cursor:
+            result.append(dict(zip(columns, row)))
+        
+        return result
+        
+    except Exception as e:
+        return {"success": False, "error": str(e)}
+    finally:
+        conn.close()
@@ -0,0 +1,181 @@
+# Local Embedding Generation using FastEmbed
+import asyncio
+from typing import List
+from functools import lru_cache
+
+
+# Module-level singleton for cached model instance
+_embedding_model = None
+_embedding_size = 384  # BAAI/bge-small-en-v1.5 output dimension
+
+
+def _load_model():
+    """Lazy-load the FastEmbed model on first use."""
+    global _embedding_model, _embedding_size
+    
+    try:
+        from fastembed import TextEmbedding
+        
+        if _embedding_model is None:
+            print("Loading embedding model (this may take a few minutes on first run)...")
+            
+            # Use BAAI/bge-small-en-v1.5 - lightweight (~90MB), works offline
+            _embedding_model = TextEmbedding(model_name="BAAI/bge-small-en-v1.5", cache_dir=".embed_cache")
+            print("Embedding model loaded successfully.")
+        
+        return _embedding_model
+        
+    except ImportError as e:
+        raise ImportError(
+            "FastEmbed is not installed. Please install with:\n"
+            "  pip install fastembed\n\n"
+            f"Import error details: {e}"
+        ) from e
+    
+    except RuntimeError as e:
+        # Model download/installation failed
+        if "No space left" in str(e) or "disk quota exceeded" in str(e):
+            raise RuntimeError(
+                "Failed to load embedding model due to disk space constraints.\n\n"
+                "Please free up space on your system (at least 500MB required).\n"
+                "Or specify a custom cache directory with available space:\n"
+                "  from fastembed import TextEmbedding\n"
+                "  model = TextEmbedding(model_name='...', cache_dir='/path/to/large/storage')\n\n"
+                f"Error: {e}"
+            ) from e
+        raise
+
+
+def get_embedding_model():
+    """
+    Get the cached embedding model instance.
+    
+    Returns:
+        FastEmbed TextEmbedding instance (lazy-loaded on first call)
+        
+    Raises:
+        ImportError: If FastEmbed is not installed
+        RuntimeError: If model download/load failed
+    """
+    global _embedding_model
+    if _embedding_model is None:
+        _embedding_model = _load_model()
+    return _embedding_model
+
+
+def embed_text(text: str) -> List[float]:
+    """
+    Generate embedding for a single text.
+    
+    Args:
+        text: The text string to embed
+        
+    Returns:
+        List of floats representing the embedding vector
+        
+    Raises:
+        ImportError: If FastEmbed is not installed
+        RuntimeError: If model loading failed
+    """
+    if not text or not isinstance(text, str):
+        return [0.0] * get_embedding_size()
+    
+    model = get_embedding_model()
+    embedding = model.embed([text])
+    return embedding[0].tolist()
+
+
+def embed_texts(texts: List[str]) -> List[List[float]]:
+    """
+    Generate embeddings for multiple texts.
+    
+    Args:
+        texts: List of text strings to embed
+        
+    Returns:
+        List of lists containing embedding vectors (one per input text)
+        
+    Raises:
+        ImportError: If FastEmbed is not installed
+        RuntimeError: If model loading failed
+    """
+    if not texts:
+        return []
+    
+    model = get_embedding_model()
+    embeddings = model.embed(texts)
+    
+    result = []
+    for emb in embeddings:
+        if hasattr(emb, 'tolist'):
+            result.append(emb.tolist())
+        else:
+            result.append(emb)
+    
+    return result
+
+
+def get_embedding_size() -> int:
+    """
+    Get the embedding dimension size.
+    
+    Returns:
+        Integer representing vector dimension (384 for bge-small-en-v1.5)
+        
+    Note:
+        This returns a sensible default. Actual dimension is determined by model.
+    """
+    return _embedding_size
+
+
+# Async wrapper for compatibility with existing code
+async def generate_embeddings(chunks: List[str]) -> List[List[float]]:
+    """
+    Async wrapper around embed_texts for compatibility.
+    
+    Args:
+        chunks: List of text strings to embed
+        
+    Returns:
+        List of embedding vectors
+    """
+    return embed_texts(chunks)
+
+
+if __name__ == "__main__":
+    # Test the embeddings module
+    print("Testing embeddings module...\n")
+    
+    # Test get_embedding_size
+    size = get_embedding_size()
+    print(f"Embedding dimension: {size}")
+    
+    # Test single text embedding
+    test_text = "Hello, world! This is a test of the embedding generation."
+    try:
+        emb = embed_text(test_text)
+        print(f"\nSingle text embedding shape: ({len(emb)},)")
+        print(f"First 5 values: {emb[:5]}")
+        print("✓ Single embedding works")
+    except Exception as e:
+        print(f"✗ Single embedding failed: {e}")
+    
+    # Test batch embedding
+    test_texts = [
+        "The quick brown fox jumps over the lazy dog.",
+        "Machine learning is a subset of artificial intelligence.",
+        "Natural language processing enables computers to understand human language."
+    ]
+    try:
+        embeddings = embed_texts(test_texts)
+        print(f"\nBatch embedding shape: ({len(embeddings)}, {len(embeddings[0])})")
+        print("✓ Batch embeddings work")
+    except Exception as e:
+        print(f"✗ Batch embeddings failed: {e}")
+    
+    # Test empty inputs
+    assert embed_text("") == [0.0] * size, "Empty text should return zero vector"
+    assert embed_texts([]) == [], "Empty list should return empty list"
+    print("✓ Empty input handling works")
+    
+    print("\n✅ All tests passed!")
@@ -0,0 +1,389 @@
+# Git Source Operations for Repository Cloning and File Discovery
+import os
+import shutil
+from pathlib import Path
+from typing import List, Optional, Dict, Any
+
+
+def get_repos_dir() -> Path:
+    """Get the base directory for storing cloned repositories."""
+    # Default to ./data/repos in project root
+    return Path(__file__).parent.parent.parent / "data" / "repos"
+
+
+def ensure_repos_dir():
+    """Ensure the repos directory exists (idempotent)."""
+    repos_dir = get_repos_dir()
+    repos_dir.mkdir(parents=True, exist_ok=True)
+    return repos_dir
+
+
+# Initialize repos directory at module load time (safe to run multiple times)
+ensure_repos_dir()
+
+
+class GitCloneError(Exception):
+    """Exception for git clone/checkout failures."""
+    pass
+
+
+def clone_or_update_repo(
+    repo_id: str,
+    repo_url: str,
+    branch: str,
+    repos_base: Optional[Path] = None
+) -> Dict[str, Any]:
+    """
+    Clone a git repository or update an existing clone.
+    
+    Args:
+        repo_id: Unique identifier for this repository (used in paths)
+        repo_url: Git URL to clone from
+        branch: Branch name to checkout
+        repos_base: Base directory for repos (defaults to get_repos_dir())
+        
+    Returns:
+        Dict with operation result including repo path and files found
+        
+    Raises:
+        GitCloneError: If clone or checkout fails
+    """
+    repos_base = repos_base or get_repos_dir()
+    repo_path = repos_base / repo_id
+    
+    try:
+        if repo_path.exists():
+            # Update existing clone
+            print(f"  [Git] Updating existing clone at {repo_path}")
+            
+            from subprocess import run, CalledProcessError
+            import subprocess
+            
+            # Fetch latest changes
+            result = run(
+                ["git", "-C", str(repo_path), "fetch", "origin"],
+                capture_output=True,
+                text=True
+            )
+            
+            if result.returncode != 0:
+                raise GitCloneError(f"Failed to fetch: {result.stderr}")
+            
+            # Reset to branch
+            run(
+                ["git", "-C", str(repo_path), "reset", "--hard", "origin/" + branch],
+                capture_output=True,
+                text=True
+            )
+        else:
+            # Clone new repository
+            print(f"  [Git] Cloning {repo_url} to {repo_path}")
+            
+            run(
+                ["git", "-C", str(repo_path.parent), "clone", 
+                 "--branch", branch, 
+                 "--single-branch",
+                 repo_url, "."],
+                capture_output=True,
+                text=True
+            )
+        
+        print(f"  [Git] Checked out branch: {branch}")
+        
+        return {
+            "success": True,
+            "repo_path": str(repo_path),
+            "url": repo_url,
+            "branch": branch
+        }
+        
+    except CalledProcessError as e:
+        raise GitCloneError(f"Git command failed: {e.stderr}") from e
+    except Exception as e:
+        raise GitCloneError(f"Failed to clone/update repo: {e}") from e
+
+
+def discover_files(
+    repo_path: Path,
+    include_paths: Optional[List[str]] = None,
+    exclude_paths: Optional[List[str]] = None
+) -> List[Dict[str, Any]]:
+    """
+    Discover files in a git repository respecting include/exclude paths.
+    
+    Args:
+        repo_path: Path to the cloned repository
+        include_paths: List of paths relative to repo root to include (if None, all dirs considered)
+        exclude_paths: List of paths relative to repo root to exclude
+        
+    Returns:
+        List of dicts with format:
+            {
+              "path": "docs/hooks.md",  # Relative to repo root
+              "full_path": "/full/path/to/repo/docs/hooks.md"
+            }
+    """
+    include_patterns = None if include_paths is None else [
+        Path(p) for p in include_paths
+    ]
+    exclude_patterns = set() if exclude_paths is None else {
+        Path(p) for p in exclude_paths
+    }
+    
+    discovered = []
+    
+    def should_include(path: Path, rel_path: Path) -> bool:
+        """Check if a path matches any include pattern."""
+        if not include_patterns:
+            return True
+        
+        # Normalize paths for comparison (handle trailing slashes, etc.)
+        path_str = str(path).replace("\\", "/")
+        rel_str = str(rel_path).replace("\\", "/")
+        
+        for inc_pattern in include_patterns:
+            inc_str = str(inc_pattern).replace("\\", "/")
+            
+            # If pattern has subdirs, check prefix match
+            if "/" in inc_str and not inc_str.endswith("/"):
+                pattern_base = inc_str.rsplit("/", 1)[0] + "/"
+                if rel_str.startswith(pattern_base):
+                    return True
+            elif rel_str == inc_str:
+                return True
+        
+        return False
+    
+    def should_exclude(path: Path, rel_path: Path) -> bool:
+        """Check if a path matches any exclude pattern (simple prefix/exact match)."""
+        for exc_pattern in exclude_patterns:
+            exc_str = str(exc_pattern).replace("\\", "/")
+            rel_str = str(rel_path).replace("\\", "/")
+            
+            # Exact match or parent directory match
+            if rel_str == exc_str or rel_str.startswith(exc_str + "/"):
+                return True
+        
+        return False
+    
+    def walk_and_collect(current: Path, rel_prefix: Path):
+        """Recursive walk function."""
+        try:
+            for entry in sorted(os.scandir(current)):
+                entry_path = current / entry.name
+                rel_path = (rel_prefix / entry.name).replace("\\", "/") if str(rel_prefix) != "." else rel_prefix
+                
+                # Filter by exclude paths first
+                if should_exclude(entry_path, rel_path):
+                    continue
+                
+                # If include_paths specified, only go into matching directories
+                if include_patterns and not include_path_match(entry_path, rel_path):
+                    if entry.is_dir():
+                        return  # Don't descend into this directory
+                
+                if entry.is_file():
+                    discovered.append({
+                        "path": str(rel_path).lstrip("/"),
+                        "full_path": str(entry_path),
+                        "is_binary": is_probably_binary(str(entry_path))
+                    })
+                elif entry.is_dir():
+                    walk_and_collect(entry_path, rel_path)
+                    
+        except PermissionError:
+            # Skip directories we can't read
+            pass
+    
+    def include_path_match(path: Path, rel_path: Path) -> bool:
+        """Check if path matches any include pattern (for filtering on the fly)."""
+        if not include_patterns:
+            return True
+        
+        path_str = str(path).replace("\\", "/")
+        for inc_pattern in include_patterns:
+            inc_str = str(inc_pattern).replace("\\", "/")
+            
+            # Exact match or parent directory match
+            if path_str == inc_str or path_str.startswith(inc_str + "/"):
+                return True
+        
+        return False
+    
+    def is_probably_binary(filepath: str) -> bool:
+        """Simple binary detection based on file extension and first bytes."""
+        ext = Path(filepath).suffix.lower()
+        text_extensions = {'.md', '.txt', '.py', '.js', '.ts', '.json', 
+                          '.yaml', '.yml', '.html', '.css', '.sh', '.sql'}
+        
+        if ext not in text_extensions:
+            # Check for null bytes in first 8KB
+            try:
+                with open(filepath, 'rb') as f:
+                    chunk = f.read(8192)
+                    return b'\x00' in chunk
+            except:
+                return False
+        
+        return False
+    
+    root_str = str(repo_path).replace("\\", "/")
+    
+    # Walk the repository starting from repo root
+    walk_and_collect(repo_path, Path("."))
+    
+    return discovered
+
+
+async def ingest_git_source(
+    library_id: str,
+    name: str,
+    description: Optional[str] = None,
+    repo_url: str = None,
+    branch: str = "main",
+    include_paths: Optional[List[str]] = None,
+    exclude_paths: Optional[List[str]] = None,
+    repos_base: Optional[Path] = None
+) -> Dict[str, Any]:
+    """
+    Ingest a git repository as a new library.
+    
+    Clones the repo (or updates if exists), discovers files in include paths,
+    and ingests them into the vector store via existing pipeline.
+    
+    Args:
+        library_id: Unique identifier for this library
+        name: Library display name
+        description: Optional description
+        repo_url: Git repository URL to clone from
+        branch: Branch to checkout (default: main)
+        include_paths: Paths relative to repo root to include (if None, all dirs considered)
+        exclude_paths: Paths relative to repo root to exclude
+        
+    Returns:
+        Dict with operation result
+        
+    Raises:
+        GitCloneError: If git operations fail
+    """
+    from .db import upsert_library
+    from .ingest import ingest_library
+    
+    print(f"\n[Git Ingestion] Processing library: {library_id}")
+    print(f"  Source: {repo_url or '(local)'}")
+    
+    # Ensure repos directory exists
+    repos_base = repos_base or get_repos_dir()
+    repos_base.mkdir(parents=True, exist_ok=True)
+    
+    repo_id = f"{library_id}-git"
+    
+    # Clone or update the repo
+    clone_result = clone_or_update_repo(
+        repo_id=repo_id,
+        repo_url=repo_url,
+        branch=branch,
+        repos_base=repos_base
+    )
+    
+    repo_path = Path(clone_result["repo_path"])
+    
+    print(f"  [Git] Found files in {repo_path}")
+    
+    # Discover files respecting include/exclude paths
+    files = discover_files(
+        repo_path=repo_path,
+        include_paths=include_paths,
+        exclude_paths=exclude_paths
+    )
+    
+    print(f"  [Git] Discovered {len(files)} file(s)")
+    
+    if not files:
+        return {
+            "success": True,
+            "library_id": library_id,
+            "message": "No files found matching include/exclude criteria",
+            "files_discovered": 0
+        }
+    
+    # Remove .git directory if present (avoid processing it)
+    git_dir = repo_path / ".git"
+    if git_dir.exists():
+        shutil.rmtree(git_dir)
+        print(f"  [Git] Removed .git directory")
+    
+    # Ingest using existing library ingestion pipeline
+    result = await ingest_library(
+        library_id=library_id,
+        name=name,
+        description=description,
+        source_path=repo_id  # Use repo_id as the "source path" for tracking
+    )
+    
+    return {
+        "success": result.get("success", False),
+        "library_id": library_id,
+        "name": name,
+        "files_discovered": len(files),
+        "chunks_created": result.get("chunks_created", 0),
+        "vectors_added": result.get("vectors_added", 0)
+    }
+
+
+async def sync_sources(
+    sources_config: Dict[str, Any] = None,
+    repos_base: Optional[Path] = None
+) -> List[Dict[str, Any]]:
+    """
+    Sync all git sources defined in config.
+    
+    Args:
+        sources_config: List of source configs (same format as docs_sources.yaml)
+        repos_base: Base directory for repos
+        
+    Returns:
+        List of results for each source
+    """
+    if sources_config is None:
+        # Load from default config file
+        import yaml
+        config_path = Path(__file__).parent.parent.parent / "docs_sources.yaml"
+        
+        if not config_path.exists():
+            return [{"success": False, "error": f"Config not found: {config_path}"}]
+        
+        with open(config_path) as f:
+            data = yaml.safe_load(f)
+            sources_config = data.get("sources", [])
+    
+    results = []
+    
+    for source in sources_config:
+        try:
+            result = await ingest_git_source(
+                library_id=source.get("library_id"),
+                name=source.get("name"),
+                description=source.get("description"),
+                repo_url=source.get("repo_url"),
+                branch=source.get("branch", "main"),
+                include_paths=source.get("include_paths"),
+                exclude_paths=source.get("exclude_paths"),
+                repos_base=repos_base
+            )
+        except GitCloneError as e:
+            result = {
+                "success": False,
+                "library_id": source.get("library_id", "unknown"),
+                "error": str(e)
+            }
+        except Exception as e:
+            result = {
+                "success": False,
+                "library_id": source.get("library_id", "unknown"),
+                "error": f"Unexpected error: {e}"
+            }
+        
+        results.append(result)
+    
+    return results
@@ -0,0 +1,387 @@
+# Document Ingestion Logic
+import asyncio
+import os
+from pathlib import Path
+from typing import List, Dict, Any, Optional, BinaryIO
+
+from dotenv import load_dotenv
+
+# Load environment variables
+load_dotenv()
+
+
+# Import local modules
+from .config import settings
+from .chunking import chunk_text, estimate_tokens
+from .embeddings import embed_texts
+from .vector_store import upsert_chunks
+from .db import insert_document_chunk, upsert_library, clear_library_documents
+from .git_source import ingest_git_source
+
+SUPPORTED_EXTENSIONS = {'.md', '.txt', '.py', '.js', '.ts', '.json', 
+                        '.yaml', '.yml', '.html', '.css', '.pdf'}
+
+# Default documents path from environment or fallback
+DOCS_PATH = Path(os.getenv("DOCS_PATH", "./docs"))
+
+
+def get_file_size(path: Path) -> int:
+    """Get file size in bytes."""
+    try:
+        return path.stat().st_size
+    except OSError:
+        return -1
+
+
+async def read_document_file(path: Path) -> str:
+    """
+    Read document content from a file.
+    
+    Args:
+        path: Path to the file
+        
+    Returns:
+        Content as string, or empty string if error
+    
+    Raises:
+        ValueError: If file type not supported
+    """
+    if not path.exists():
+        return ""
+    
+    # Check extension
+    suffix = path.suffix.lower()
+    if suffix == '.pdf':
+        from pypdf import PdfReader
+        
+        try:
+            reader = PdfReader(str(path))
+            pages = []
+            for page_num in range(len(reader.pages)):
+                page = reader.pages[page_num]
+                text = page.extract_text()
+                if text:
+                    pages.append(text)
+            return "\n\n".join(pages)
+        except ImportError:
+            raise ImportError("pypdf is required for PDF files. Install with: pip install pypdf")
+        except Exception as e:
+            print(f"  Warning: Could not read PDF {path}: {e}")
+            return ""
+    elif suffix not in SUPPORTED_EXTENSIONS:
+        print(f"  Unsupported file type: {suffix}")
+        return ""
+    
+    # Read text-based files
+    try:
+        content = path.read_text(encoding='utf-8')
+        return content if content.strip() else ""
+    except Exception as e:
+        print(f"  Warning: Could not read {path}: {e}")
+        return ""
+
+
+async def ingest_library(library_id: str, name: str, description: Optional[str] = None, source_path: Optional[str] = None) -> Dict[str, Any]:
+    """
+    Ingest all documents for a library.
+    
+    Args:
+        library_id: Unique identifier for the library
+        name: Library name
+        description: Optional description
+        source_path: Path to library folder (relative to DOCS_PATH)
+        
+    Returns:
+        Summary dict with operation results
+    """
+    print(f"\n[Library] Processing: {library_id}")
+    if source_path:
+        print(f"  Source: {source_path}")
+    
+    # Ensure library record exists
+    result = upsert_library(library_id, name, description, source_path)
+    print(f"  [{result.get('success', False)}] Library record: {'created' if not result.get('exists') else 'updated'}")
+    
+    # Get the library folder path
+    library_dir = DOCS_PATH / source_path
+    
+    if not library_dir.exists():
+        print(f"  Error: Directory does not exist: {library_dir}")
+        return {"success": False, "error": f"Directory not found: {library_dir}"}
+    
+    # Find all supported files (recursive)
+    print(f"  [Library] Scanning for files in: {library_dir}")
+    doc_files = []
+    
+    for file_path in library_dir.rglob('*'):
+        if file_path.is_file():
+            suffix = file_path.suffix.lower()
+            if suffix == '.pdf':
+                doc_files.append(file_path)
+            elif suffix in SUPPORTED_EXTENSIONS:
+                doc_files.append(file_path)
+    
+    print(f"  [Library] Found {len(doc_files)} document(s)")
+    
+    # Clear old chunks for this library
+    print(f"  [Library] Clearing existing chunks...")
+    clear_result = clear_library_documents(library_id)
+    if not clear_result.get('success'):
+        print(f"  Warning: Could not clear library docs: {clear_result}")
+    else:
+        print(f"  [Library] Cleared {clear_result.get('deleted', 0)} existing chunks")
+    
+    # Process documents
+    all_chunks = []
+    processed_files = 0
+    
+    for file_path in doc_files:
+        # Read file content
+        print(f"  [File] Reading: {file_path.relative_to(library_dir)}")
+        content = await read_document_file(file_path)
+        
+        if not content:
+            continue
+        
+        # Estimate tokens and chunk
+        num_tokens = estimate_tokens(content)
+        chunks = chunk_text(content, max_tokens=500, overlap_tokens=80)
+        
+        if not chunks:
+            print(f"  [File] No valid chunks from {file_path.name}")
+            continue
+        
+        # Embed chunks and prepare for storage
+        print(f"    Chunked into {len(chunks)} pieces (approx. {num_tokens} tokens)")
+        
+        embeddings = embed_texts(chunks)
+        
+        # Build chunk dicts
+        chunk_dicts = []
+        base_path = file_path.relative_to(library_dir).as_posix()
+        
+        for i, chunk in enumerate(chunks):
+            chunk_dict = {
+                "id": f"{file_path.stem}-{i}",
+                "library_id": library_id,
+                "path": base_path,
+                "title": Path(base_path).stem,
+                "content": chunk,
+                "chunk_index": i,
+                "embedding": embeddings[i]
+            }
+            all_chunks.append(chunk_dict)
+        
+        processed_files += 1
+    
+    print(f"  [Library] Processed {processed_files} file(s), {len(all_chunks)} total chunks")
+    
+    # Save chunks to SQLite
+    if all_chunks:
+        for chunk in all_chunks:
+            insert_result = insert_document_chunk(
+                doc_id=chunk["id"],
+                library_id=chunk["library_id"],
+                path=chunk["path"],
+                title=chunk.get("title"),
+                content=chunk["content"],
+                chunk_index=chunk["chunk_index"],
+                token_estimate=estimate_tokens(chunk["content"])
+            )
+            if insert_result.get('success'):
+                continue
+        print(f"  [Library] Saved {len(all_chunks)} chunks to SQLite")
+    else:
+        print(f"  [Library] No chunks to save to SQLite")
+    
+    # Save vectors to Qdrant
+    if all_chunks:
+        upsert_result = await upsert_chunks(all_chunks)
+        print(f"  [Library] Vector store: {upsert_result.get('success', False)} ({upsert_result.get('points_added', 0)} added)")
+    else:
+        print(f"  [Library] No vectors to add to Qdrant")
+    
+    return {
+        "success": True,
+        "library_id": library_id,
+        "files_processed": processed_files,
+        "chunks_created": len(all_chunks),
+        "vectors_added": upsert_result.get('points_added', 0) if 'upsert_result' in locals() else len(all_chunks)
+    }
+
+
+async def ingest_git_source_from_config(
+    repo_url: str,
+    branch: str = "main",
+    include_paths: Optional[List[str]] = None,
+    exclude_paths: Optional[List[str]] = None,
+    repos_base: Optional[Path] = None
+) -> Dict[str, Any]:
+    """
+    Ingest a git repository defined in sources configuration.
+    
+    Args:
+        repo_url: Git repository URL to clone from
+        branch: Branch to checkout (default: main)
+        include_paths: Paths relative to repo root to include (if None, all dirs considered)
+        exclude_paths: Paths relative to repo root to exclude
+        repos_base: Base directory for cloned repos (defaults to ./data/repos)
+        
+    Returns:
+        Dict with operation result
+        
+    Raises:
+        GitCloneError: If git operations fail
+    """
+    # Auto-generate library_id from URL if not provided
+    import urllib.parse
+    parsed = urllib.parse.urlparse(repo_url)
+    path_part = parsed.path.rstrip('.git')
+    library_id = Path(path_part).name or "unknown"
+    
+    name = Path(parsed.hostname or path_part).stem
+    description = f"Documentation from {path_part}"
+    
+    result = await ingest_git_source(
+        library_id=library_id,
+        name=name,
+        description=description,
+        repo_url=repo_url,
+        branch=branch,
+        include_paths=include_paths,
+        exclude_paths=exclude_paths,
+        repos_base=repos_base
+    )
+    
+    return result
+
+
+async def detect_libraries() -> List[Dict[str, Any]]:
+    """
+    Detect all top-level folders under DOCS_PATH as libraries.
+    
+    Returns:
+        List of dicts with library metadata
+    """
+    print(f"\n[Detection] Scanning for libraries in: {DOCS_PATH}")
+    
+    if not DOCS_PATH.exists():
+        print(f"  [Detection] Directory does not exist: {DOCS_PATH}")
+        return []
+    
+    # Get top-level directories
+    directories = list(DOCS_PATH.iterdir())
+    dirs_only = [d for d in directories if d.is_dir()]
+    
+    libraries = []
+    for i, lib_dir in enumerate(dirs_only, 1):
+        name = lib_dir.name
+        
+        # Create library record with defaults
+        result = upsert_library(
+            library_id=lib_dir.name.lower(),
+            name=name,
+            description=None,
+            source_path=lib_dir.name
+        )
+        
+        libraries.append({
+            "id": lib_dir.name.lower(),
+            "name": name,
+            "source_path": lib_dir.name
+        })
+        
+        print(f"  [{i}/{len(dirs_only)}] Library detected: {name} (id: {lib_dir.name.lower()})")
+    
+    print(f"\n[Detection] Found {len(libraries)} library(ies)")
+    return libraries
+
+
+async def ingest_all(verbose: bool = True) -> Dict[str, Any]:
+    """
+    Ingest all discovered libraries.
+    
+    Args:
+        verbose: Whether to print progress messages
+        
+    Returns:
+        Summary dict with overall results
+    """
+    if verbose:
+        print("\n" + "=" * 60)
+        print("DOCUMENT INGESTION STARTED")
+        print("=" * 60)
+    
+    # Detect libraries
+    libraries = await detect_libraries()
+    
+    if not libraries:
+        result = {"total_libraries": 0, "total_chunks": 0, "successful": []}
+        if verbose:
+            print("\n[Summary] No libraries to ingest")
+        return result
+    
+    # Ingest each library
+    results = []
+    for lib in libraries:
+        lib_id = lib["id"]
+        
+        result = await ingest_library(
+            library_id=lib_id,
+            name=lib["name"],
+            description=None,
+            source_path=lib.get("source_path")
+        )
+        
+        if verbose and result.get('success'):
+            print(f"  [Library] Done: {result.get('library_id')} - {result.get('chunks_created', 0)} chunks")
+        
+        results.append(result)
+    
+    # Calculate totals
+    total_chunks = sum(r.get('chunks_created', 0) for r in results)
+    successful = len([r for r in results if r.get('success')])
+    
+    result = {
+        "total_libraries": len(libraries),
+        "successful": successful,
+        "failed": len(results) - successful,
+        "total_chunks": total_chunks
+    }
+    
+    if verbose:
+        print("\n" + "=" * 60)
+        print("INGESTION COMPLETE")
+        print("=" * 60)
+        print(f"  Libraries processed: {result['total_libraries']}")
+        print(f"  Successful: {result['successful']}")
+        print(f"  Failed: {result['failed']}")
+        print(f"  Total chunks created: {result['total_chunks']}")
+    
+    return result
+
+
+if __name__ == "__main__":
+    # Run ingestion tests
+    import asyncio
+    
+    async def test_run():
+        print("Testing ingestion module...\n")
+        
+        # Test detect_libraries
+        libs = await detect_libraries()
+        print(f"\nDetected libraries: {len(libs)}")
+        
+        if libs:
+            # Try to ingest the first library (may fail if no docs exist, which is ok for test)
+            print("\nAttempting sample ingestion...")
+            result = await ingest_library(
+                library_id=libs[0]["id"],
+                name=libs[0]["name"],
+                source_path=libs[0].get("source_path")
+            )
+            print(f"Result: {result}")
+        
+        print("\n✅ Tests completed!")
+    
+    asyncio.run(test_run())
@@ -0,0 +1,299 @@
+"""Context7 Docs API."""
+import asyncio
+import shutil
+import yaml
+from pathlib import Path
+from typing import Optional
+
+from fastapi import FastAPI, File, Form, HTTPException, Query, Request, UploadFile
+from fastapi.responses import JSONResponse
+from pydantic import BaseModel, Field
+
+from .config import settings
+from .db import (
+    clear_library_documents,
+    delete_library,
+    init_db,
+    list_libraries,
+    search_libraries,
+    upsert_library,
+)
+from .git_source import ingest_git_source
+from .ingest import ingest_all, ingest_library
+from .search import get_library_docs, resolve_library_id, search_docs
+from .vector_store import delete_library_vectors, ensure_collection, get_client, get_collection_name
+
+
+app = FastAPI(
+    title="Context7 Docs API",
+    description="Document ingestion and semantic search API for local-context7",
+    version="1.0.0",
+)
+
+
+class SearchRequest(BaseModel):
+    query: str = Field(..., min_length=1)
+    library_id: Optional[str] = None
+    limit: int = Field(10, ge=1, le=50)
+
+
+class SyncSourcesRequest(BaseModel):
+    override: bool = False
+
+
+ALLOWED_EXTENSIONS = {
+    ".md",
+    ".txt",
+    ".py",
+    ".js",
+    ".ts",
+    ".json",
+    ".yaml",
+    ".yml",
+    ".html",
+    ".css",
+    ".pdf",
+}
+
+
+@app.middleware("http")
+async def auth_middleware(request: Request, call_next):
+    """Require X-API-Key for mutating endpoints when API_KEY_DOCS_API is set."""
+    if not settings.is_auth_enabled:
+        return await call_next(request)
+
+    public_prefixes = ("/health", "/libraries", "/docs/")
+    if request.method == "GET" and request.url.path.startswith(public_prefixes):
+        return await call_next(request)
+
+    if request.headers.get("X-API-Key") != settings.api_key_docs_api:
+        return JSONResponse(status_code=401, content={"detail": "Invalid or missing API key"})
+
+    return await call_next(request)
+
+
+@app.on_event("startup")
+async def startup() -> None:
+    init_result = init_db()
+    if not init_result.get("success"):
+        raise RuntimeError(f"Failed to initialize SQLite database: {init_result.get('error')}")
+
+    last_error = None
+    for _ in range(20):
+        collection_result = await ensure_collection()
+        if collection_result.get("success"):
+            return
+        last_error = collection_result.get("error")
+        await asyncio.sleep(1)
+    raise RuntimeError(f"Failed to initialize Qdrant collection: {last_error}")
+
+
+def safe_library_id(library_id: str) -> str:
+    """Normalize user-provided library IDs to a single path segment."""
+    base = Path(library_id).name.strip()
+    if not base or base in {".", ".."} or ".." in library_id or "/" in library_id or "\\" in library_id:
+        raise HTTPException(status_code=400, detail="Invalid library ID")
+    return base
+
+
+def safe_upload_filename(filename: str) -> str:
+    ext = Path(filename).suffix.lower()
+    if ext not in ALLOWED_EXTENSIONS:
+        raise HTTPException(
+            status_code=400,
+            detail=f"Unsafe extension: {ext}. Allowed extensions: {', '.join(sorted(ALLOWED_EXTENSIONS))}",
+        )
+
+    stem = "".join(c for c in Path(filename).stem if c.isalnum() or c in "-_ ").strip()
+    if not stem:
+        raise HTTPException(status_code=400, detail="Filename contains only unsafe characters")
+    return f"{stem}{ext}"
+
+
+def docs_root() -> Path:
+    return Path(settings.docs_path)
+
+
+def sources_config_path() -> Path:
+    return Path(__file__).resolve().parents[2] / "docs_sources.yaml"
+
+
+@app.get("/health")
+async def health_check():
+    return {"status": "ok", "service": "docs-api"}
+
+
+@app.get("/collections")
+async def collections():
+    try:
+        client = get_client()
+        info = client.get_collection(get_collection_name())
+        vectors = getattr(info, "vectors_count", None) or getattr(info, "points_count", 0) or 0
+        return {"collections": {get_collection_name(): {"vectors": vectors}}}
+    except Exception as e:
+        return {"collections": {}, "warning": str(e)}
+
+
+@app.get("/libraries")
+async def list_libraries_api():
+    libs = list_libraries()
+    if isinstance(libs, dict) and not libs.get("success", True):
+        raise HTTPException(status_code=500, detail=libs.get("error", "Failed to list libraries"))
+    return {"libraries": libs, "count": len(libs)}
+
+
+@app.get("/libraries/search")
+async def search_libraries_api(q: str = Query(..., min_length=1)):
+    matches = resolve_library_id(q)
+    return {"matches": matches, "count": len(matches)}
+
+
+@app.post("/search")
+async def search_docs_api(payload: SearchRequest):
+    results = search_docs(payload.query, library_id=payload.library_id, limit=payload.limit)
+    return {
+        "query": payload.query,
+        "library_id": payload.library_id,
+        "results": results,
+        "count": len(results),
+    }
+
+
+@app.get("/docs/{library_id}")
+@app.get("/libraries/{library_id}/docs")
+async def get_library_docs_api(
+    library_id: str,
+    topic: Optional[str] = Query(None),
+    tokens: int = Query(8000, ge=1),
+):
+    docs = get_library_docs(library_id=library_id, topic=topic, token_limit=tokens)
+    return {"library_id": library_id, "content": docs}
+
+
+@app.post("/ingest/all")
+async def ingest_all_api():
+    return await ingest_all()
+
+
+@app.post("/ingest/{library_id}")
+async def ingest_library_api(library_id: str):
+    library_id = safe_library_id(library_id)
+    source_path = library_id
+    return await ingest_library(library_id=library_id, name=library_id, source_path=source_path)
+
+
+@app.post("/api/v1/libraries/{library_id}")
+async def api_create_library(
+    library_id: str,
+    name: Optional[str] = Form(None),
+    description: Optional[str] = Form(None),
+):
+    library_id = safe_library_id(library_id)
+    lib_dir = docs_root() / library_id
+    lib_dir.mkdir(parents=True, exist_ok=True)
+    result = upsert_library(library_id, name or library_id, description, library_id)
+    if not result.get("success"):
+        raise HTTPException(status_code=500, detail=result.get("error", "Failed to create library"))
+    return {
+        "success": True,
+        "created": not result.get("exists", False),
+        "library_id": library_id,
+        "name": name or library_id,
+        "description": description,
+        "path": str(lib_dir),
+    }
+
+
+@app.delete("/api/v1/libraries/{library_id}")
+async def api_delete_library(library_id: str):
+    library_id = safe_library_id(library_id)
+    lib_dir = docs_root() / library_id
+    deleted_files = 0
+
+    if lib_dir.exists():
+        for path in lib_dir.rglob("*"):
+            if path.is_file():
+                deleted_files += 1
+        shutil.rmtree(lib_dir)
+
+    docs_result = clear_library_documents(library_id)
+    vectors_result = await delete_library_vectors(library_id)
+    library_result = delete_library(library_id)
+
+    failures = [
+        r.get("error")
+        for r in (docs_result, vectors_result, library_result)
+        if isinstance(r, dict) and not r.get("success", True)
+    ]
+    if failures:
+        raise HTTPException(status_code=500, detail="; ".join(failures))
+
+    return {"success": True, "library_id": library_id, "deleted_files": deleted_files}
+
+
+@app.post("/api/v1/upload/{library_id}")
+async def api_upload(library_id: str, file: UploadFile = File(...)):
+    library_id = safe_library_id(library_id)
+    safe_name = safe_upload_filename(file.filename or "upload.txt")
+    lib_dir = docs_root() / library_id
+    lib_dir.mkdir(parents=True, exist_ok=True)
+
+    contents = await file.read()
+    if len(contents) > 5 * 1024 * 1024:
+        raise HTTPException(status_code=400, detail="File too large (max 5MB)")
+
+    target = lib_dir / safe_name
+    target.write_bytes(contents)
+
+    upsert_library(library_id, library_id, None, library_id)
+    return {
+        "success": True,
+        "library_id": library_id,
+        "filename": safe_name,
+        "path": str(target.relative_to(docs_root())),
+        "size_bytes": len(contents),
+    }
+
+
+@app.get("/api/v1/sources")
+@app.get("/sources/config")
+async def api_list_sources():
+    path = sources_config_path()
+    if not path.exists():
+        return {"success": True, "sources": [], "count": 0}
+
+    with path.open() as f:
+        data = yaml.safe_load(f) or {}
+    sources = data.get("sources", data if isinstance(data, list) else [])
+    if not isinstance(sources, list):
+        sources = []
+    return {"success": True, "sources": sources, "count": len(sources)}
+
+
+@app.post("/sources/sync")
+async def sync_sources_api(payload: Optional[SyncSourcesRequest] = None):
+    source_data = await api_list_sources()
+    sources = source_data["sources"]
+    override = payload.override if payload else False
+    results = []
+
+    for source in sources:
+        result = await ingest_git_source(
+            library_id=source["library_id"],
+            name=source.get("name") or source["library_id"],
+            description=source.get("description"),
+            repo_url=source["repo_url"],
+            branch=source.get("branch", "main"),
+            include_paths=source.get("include_paths"),
+            exclude_paths=source.get("exclude_paths"),
+        )
+        results.append(result)
+
+    successful = len([r for r in results if r.get("success")])
+    return {
+        "success": successful == len(results),
+        "total_sources": len(results),
+        "successful": successful,
+        "failed": len(results) - successful,
+        "results": results,
+    }
@@ -0,0 +1,47 @@
+# Data Models for document processing and API responses
+from typing import Any, Dict, List, Optional
+
+
+class DocumentChunk:
+    """Represents a chunk of text to be embedded."""
+    
+    def __init__(
+        self,
+        text: str,
+        metadata: Optional[Dict[str, Any]] = None
+    ):
+        self.text = text
+        self.metadata = metadata or {}
+    
+    @property
+    def doc_id(self) -> str:
+        """Generate a document ID from content."""
+        return f"doc-{hash(self.text)}"
+
+
+class IngestResponse:
+    """Response model for document ingestion."""
+    
+    def __init__(
+        self,
+        success: bool,
+        chunks_count: int = 0,
+        error: Optional[str] = None
+    ):
+        self.success = success
+        self.chunks_count = chunks_count
+        self.error = error
+
+
+class SearchResponse:
+    """Response model for search results."""
+    
+    def __init__(
+        self,
+        results: List[Dict[str, Any]],
+        query: str,
+        total_results: int
+    ):
+        self.results = results
+        self.query = query
+        self.total_results = total_results
@@ -0,0 +1,235 @@
+# Search Operations for Semantic Query and Library Navigation
+from typing import List, Dict, Any, Optional
+from pathlib import Path
+
+from .config import settings
+from .vector_store import get_client, _collection_name as VECTOR_COLLECTION
+from .embeddings import embed_text, get_embedding_size
+from .db import get_chunks_for_library, list_libraries
+
+
+def search_docs(
+    query: str,
+    library_id: Optional[str] = None,
+    limit: int = 10
+) -> List[Dict[str, Any]]:
+    """
+    Search documents by semantic similarity in Qdrant.
+    
+    Args:
+        query: The search query string
+        library_id: Optional filter to search only within a library
+        limit: Maximum number of results to return
+        
+    Returns:
+        List of dicts with format:
+            {
+              "id": "...",
+              "score": 0.123,
+              "library_id": "...",
+              "path": "...",
+              "title": "...",
+              "chunk_index": 0
+            }
+    """
+    try:
+        # Generate embedding for the query
+        query_embedding = embed_text(query)
+        
+        client = get_client()
+        
+        # Build filter if library_id is specified
+        search_filter = None
+        if library_id:
+            try:
+                from qdrant_client.models import FieldCondition, Filter, MatchValue
+                search_filter = Filter(
+                    must=[
+                        FieldCondition(
+                            key="library_id",
+                            match=MatchValue(value=library_id),
+                        )
+                    ]
+                )
+            except ImportError:
+                search_filter = None
+        
+        # Perform vector search
+        results = client.search(
+            collection_name=VECTOR_COLLECTION,
+            query_vector=query_embedding,
+            limit=limit,
+            search_filter=search_filter
+        )
+        
+        # Format and return results
+        formatted_results = []
+        for result in results:
+            if result.score > 0 and result.payload:
+                formatted_results.append({
+                    "id": result.payload["id"],
+                    "score": float(result.score),
+                    "library_id": result.payload.get("library_id", ""),
+                    "path": result.payload.get("path", ""),
+                    "title": result.payload.get("title", ""),
+                    "chunk_index": result.payload.get("chunk_index", 0)
+                })
+        
+        return formatted_results
+        
+    except Exception as e:
+        print(f"Search error: {e}")
+        return []
+
+
+def get_library_docs(
+    library_id: str,
+    topic: Optional[str] = None,
+    token_limit: int = 8000
+) -> str:
+    """
+    Retrieve documentation content from a library.
+    
+    Args:
+        library_id: The library ID to fetch docs from
+        topic: Optional topic filter - if provided, searches for topic first
+        token_limit: Maximum tokens to include in output
+        
+    Returns:
+        Combined markdown content as string
+    """
+    try:
+        # If topic is specified, search for relevant chunks
+        if topic:
+            print(f"  [Search] Searching library '{library_id}' for topic: {topic}")
+            search_results = search_docs(query=topic, library_id=library_id, limit=20)
+            
+            if not search_results:
+                return f"No documents found in library '{library_id}' matching topic: {topic}"
+            
+            print(f"  [Search] Found {len(search_results)} relevant chunks")
+        else:
+            # Fetch all chunks for the library and select most useful ones
+            print(f"  [Fetch] Retrieving chunks from library '{library_id}'")
+            chunks_data = get_chunks_for_library(library_id)
+            
+            if not chunks_data:
+                return f"No documents found in library '{library_id}'"
+            
+            # Sort by chunk_index descending and pick top ones to respect token limit
+            sorted_chunks = sorted(chunks_data, key=lambda x: x.get("chunk_index", 0), reverse=True)
+            selected_chunks = []
+            total_tokens = 0
+            
+            for chunk in sorted_chunks:
+                content = chunk.get("content", "")
+                tokens = len(content) // 4  # Simple token estimate
+                
+                if total_tokens + tokens <= token_limit:
+                    selected_chunks.append(chunk)
+                    total_tokens += tokens
+                else:
+                    # Take part of this chunk to fill remaining space
+                    remaining = token_limit - total_tokens
+                    content_preview = content[:remaining * 4] if remaining > 0 else ""
+                    if content_preview:
+                        selected_chunks.append({"content": content_preview, "title": chunk.get("title", "")})
+            
+            print(f"  [Fetch] Selected {len(selected_chunks)} chunks ({total_tokens} tokens)")
+        
+        # Combine chunks into markdown
+        md_parts = []
+        for chunk in selected_chunks:
+            title = chunk.get("title")
+            content = chunk.get("content", "")
+            
+            if title and content.strip():
+                # Add heading before first chunk or if this is the first chunk
+                if not md_parts or "\n\n" not in "".join(md_parts):
+                    md_parts.append(f"# {title}")
+                elif not any(part.startswith("#") for part in md_parts[-5:]):
+                    md_parts.append(f"\n# {title}\n")
+            
+            md_parts.append(content)
+        
+        result = "\n\n".join(md_parts)
+        
+        # If no headings were added, prepend library title
+        if not any(part.startswith("#") for part in result.split("\n")[:3]):
+            result = f"# {library_id.upper().replace('_', ' ')}\n\n" + result
+        
+        return result.rstrip()
+        
+    except Exception as e:
+        print(f"Error getting library docs: {e}")
+        return f"Error retrieving documents from library '{library_id}': {str(e)}"
+
+
+def resolve_library_id(library_name: str) -> List[Dict[str, Any]]:
+    """
+    Resolve a library name to potential matches (Context7-style).
+    
+    Args:
+        library_name: Partial or full library name to search for
+        
+    Returns:
+        List of Context7-style candidate dicts:
+            {
+              "id": "/local/foundryvtt",
+              "name": "foundryvtt",
+              "description": "...",
+              "source": "local"
+            }
+    """
+    try:
+        libraries = list_libraries()
+        
+        if not libraries:
+            return []
+        
+        # Filter by name match (case-insensitive)
+        candidates = []
+        for lib in libraries:
+            lib_name = lib.get("name", "").lower()
+            lib_id = lib.get("id", "").lower()
+            
+            if library_name.lower() in lib_name or library_name.lower() in lib_id:
+                candidates.append({
+                    "id": f"/local/{lib['id']}",
+                    "name": lib["name"],
+                    "description": lib.get("description", ""),
+                    "source": "local"
+                })
+        
+        # Return top matches (or all if less than 3)
+        candidates = candidates[:min(5, len(candidates))]
+        
+        print(f"  [Resolve] Found {len(candidates)} candidate(s) for: {library_name}")
+        
+        return candidates
+        
+    except Exception as e:
+        print(f"Error resolving library ID: {e}")
+        return []
+
+
+if __name__ == "__main__":
+    import asyncio
+    
+    async def test_search():
+        """Test search functionality."""
+        print("Testing search module...\n")
+        
+        # Test 1: Simple search with dummy vector (simulated)
+        print("1. Testing resolve_library_id()...")
+        results = await resolve_library_id("foundryvtt")
+        print(f"   Results: {len(results)} candidates\n")
+        
+        # Test 2: Empty query should return empty list
+        print("2. Testing search_docs() with empty query...")
+        results = await search_docs("")
+        print(f"   Results: {len(results)} chunks\n")
+        
+        print("✅ All tests completed!")
+    
+    asyncio.run(test_search())
@@ -0,0 +1,361 @@
+# Vector Store Operations for Qdrant
+import asyncio
+import uuid
+from typing import List, Dict, Any, Optional
+
+try:
+    from qdrant_client import QdrantClient
+    from qdrant_client.models import Distance, VectorParams, PointStruct, Filter, FieldCondition, MatchValue
+except ImportError:
+    QdrantClient = None
+    Distance = VectorParams = PointStruct = Filter = FieldCondition = MatchValue = None
+
+
+# Singleton client instance
+_client: Optional[Any] = None
+try:
+    from .config import settings
+    _collection_name = settings.collection_name
+except Exception:
+    _collection_name = "local_context7_docs"
+
+
+def get_client() -> Any:
+    """Get or create the Qdrant client singleton using environment config."""
+    global _client
+    
+    if _client is None:
+        if QdrantClient is None:
+            raise RuntimeError("qdrant-client is not installed")
+
+        try:
+            from dotenv import load_dotenv
+            load_dotenv()
+        except ImportError:
+            pass
+            
+        # Use QDRANT_URL from environment if available, otherwise use host:port
+        import os
+        qdrant_url = os.getenv("QDRANT_URL")
+        
+        if qdrant_url:
+            _client = QdrantClient(url=qdrant_url)
+        else:
+            from .config import settings
+            host = settings.vector_store_host
+            port = settings.vector_store_port
+            _client = QdrantClient(host=host, port=port)
+    
+    return _client
+
+
+def get_collection_name() -> str:
+    """Get the collection name for vector storage."""
+    return _collection_name
+
+
+def get_embedding_size() -> int:
+    """Get embedding dimension size from embeddings module."""
+    try:
+        from .embeddings import get_embedding_size
+        return get_embedding_size()
+    except (ImportError, RuntimeError):
+        # Default fallback if embeddings module not loaded yet
+        return 384
+
+
+async def ensure_collection(vector_size: Optional[int] = None) -> Dict[str, Any]:
+    """
+    Ensure the Qdrant collection exists with proper schema.
+    
+    Args:
+        vector_size: Override embedding dimension (uses get_embedding_size() if not provided)
+        
+    Returns:
+        Dict with operation result
+    """
+    try:
+        if QdrantClient is None:
+            return {"success": False, "error": "qdrant-client is not installed"}
+
+        client = get_client()
+        size = vector_size or get_embedding_size()
+        distance = Distance.COSINE
+        
+        # Check if collection exists
+        try:
+            collections = client.get_collections().collections
+            collection_exists = any(c.name == _collection_name for c in collections)
+        except Exception:
+            collection_exists = False
+        
+        if not collection_exists:
+            # Create new collection
+            client.create_collection(
+                collection_name=_collection_name,
+                vectors=VectorParams(size=size, distance=distance),
+                wait=True
+            )
+            
+            return {
+                "success": True,
+                "collection": _collection_name,
+                "vector_size": size,
+                "created": True
+            }
+        else:
+            # Verify current vector size matches expected
+            try:
+                collection_info = client.get_collection(_collection_name)
+                current_size = collection_info.config.params.vectors.size
+                
+                if current_size != size:
+                    # Collection exists with wrong size - delete and recreate
+                    client.delete_collection(_collection_name)
+                    client.create_collection(
+                        collection_name=_collection_name,
+                        vectors=VectorParams(size=size, distance=distance),
+                        wait=True
+                    )
+                    
+                    return {
+                        "success": True,
+                        "collection": _collection_name,
+                        "vector_size": size,
+                        "created": False,
+                        "resized": True
+                    }
+            except Exception:
+                pass  # Collection exists, don't worry about size for now
+            
+            return {
+                "success": True,
+                "collection": _collection_name,
+                "vector_size": size,
+                "created": False
+            }
+        
+    except Exception as e:
+        return {"success": False, "error": str(e)}
+
+
+async def upsert_chunks(chunks: List[Dict[str, Any]]) -> Dict[str, Any]:
+    """
+    Upsert chunks into the vector store.
+    
+    Args:
+        chunks: List of chunk dicts with format:
+            {
+              "id": "...",
+              "library_id": "...",
+              "path": "...",
+              "title": "...",
+              "chunk_index": 0,
+              "content": "...",
+              "embedding": [...]
+            }
+            
+    Returns:
+        Dict with operation result
+    """
+    try:
+        if QdrantClient is None:
+            return {"success": False, "error": "qdrant-client is not installed"}
+
+        if not chunks:
+            return {"success": True, "points_added": 0}
+        
+        client = get_client()
+        
+        # Build PointStruct points from chunk dicts
+        points = []
+        for chunk in chunks:
+            point_key = f"{chunk['library_id']}:{chunk['id']}"
+            point_id = str(uuid.uuid5(uuid.NAMESPACE_URL, point_key))
+            
+            points.append(PointStruct(
+                id=point_id,
+                vector=chunk["embedding"],
+                payload={
+                    "id": chunk["id"],
+                    "library_id": chunk["library_id"],
+                    "path": chunk.get("path", ""),
+                    "title": chunk.get("title", ""),
+                    "chunk_index": chunk.get("chunk_index", 0),
+                    "content": chunk.get("content", "")
+                }
+            ))
+        
+        # Upsert points into collection
+        client.upsert(_collection_name, points=points)
+        
+        return {
+            "success": True,
+            "points_added": len(points)
+        }
+        
+    except Exception as e:
+        return {"success": False, "error": str(e)}
+
+
+async def search_vectors(
+    query_vector: List[float],
+    library_id: Optional[str] = None,
+    limit: int = 10
+) -> List[Dict[str, Any]]:
+    """
+    Search for semantically similar vectors.
+    
+    Args:
+        query_vector: The embedding vector to search against
+        library_id: Optional filter by library ID
+        limit: Maximum results to return
+        
+    Returns:
+        List of result dicts with format:
+            {
+              "id": "...",
+              "score": 0.123,
+              "library_id": "...",
+              "path": "...",
+              "title": "...",
+              "chunk_index": 0
+            }
+    """
+    try:
+        if QdrantClient is None:
+            return []
+
+        client = get_client()
+        
+        # Build filter if library_id is specified
+        search_filter = None
+        if library_id:
+            search_filter = Filter(
+                must=[
+                    FieldCondition(
+                        key="library_id",
+                        match=MatchValue(value=library_id),
+                    )
+                ]
+            )
+        
+        # Perform vector search
+        results = client.search(
+            collection_name=_collection_name,
+            query_vector=query_vector,
+            limit=limit,
+            search_filter=search_filter
+        )
+        
+        # Format results
+        formatted_results = []
+        for result in results:
+            if result.score > 0 and result.payload:
+                formatted_results.append({
+                    "id": result.payload["id"],
+                    "score": float(result.score),
+                    "library_id": result.payload["library_id"],
+                    "path": result.payload.get("path", ""),
+                    "title": result.payload.get("title", ""),
+                    "chunk_index": result.payload.get("chunk_index", 0)
+                })
+        
+        return formatted_results
+        
+    except Exception as e:
+        return []
+
+
+async def delete_library_vectors(library_id: str) -> Dict[str, Any]:
+    """
+    Delete all vectors for a given library.
+    
+    Args:
+        library_id: The library ID to delete vectors for
+        
+    Returns:
+        Dict with operation result
+    """
+    try:
+        if QdrantClient is None:
+            return {"success": True, "library_id": library_id, "skipped": "qdrant-client is not installed"}
+
+        client = get_client()
+        
+        # Use filter to delete only vectors matching the library_id
+        filter_condition = Filter(
+            must=[
+                FieldCondition(
+                    key="library_id",
+                    match=MatchValue(value=library_id),
+                )
+            ]
+        )
+        
+        # Get all points with the filter (in batches)
+        batch_size = 100
+        offset = None
+        
+        while True:
+            try:
+                # Scroll to get points matching filter
+                points, _ = client.scroll(
+                    collection_name=_collection_name,
+                    scroll_filter=filter_condition,
+                    limit=batch_size,
+                    offset=offset,
+                    with_payload=True,
+                    with_vectors=False
+                )
+                
+                if not points:
+                    break
+                
+                # Collect IDs to delete
+                point_ids = [p.id for p in points]
+                
+                # Delete the points
+                client.delete(
+                    collection_name=_collection_name,
+                    points_selector=point_ids
+                )
+                
+                offset = points[-1].id if points else None
+                
+            except Exception as e:
+                # If we hit end of dataset or other issue, break
+                break
+        
+        return {
+            "success": True,
+            "library_id": library_id
+        }
+        
+    except Exception as e:
+        return {"success": False, "error": str(e)}
+
+
+if __name__ == "__main__":
+    # Test vector store module
+    import os
+    
+    print("Testing vector store module...\n")
+    
+    # Test ensure_collection
+    print("1. Testing ensure_collection()...")
+    result = asyncio.run(ensure_collection())
+    print(f"   Result: {result}\n")
+    
+    # Test search with empty query (will return empty since no vectors exist yet)
+    print("2. Testing search_vectors() with dummy vector...")
+    dummy_vector = [0.1] * 384
+    results = asyncio.run(search_vectors(dummy_vector, limit=5))
+    print(f"   Results count: {len(results)}\n")
+    
+    # Test delete_library_vectors (will succeed even if no vectors exist)
+    print("3. Testing delete_library_vectors()...")
+    result = asyncio.run(delete_library_vectors("test-library"))
+    print(f"   Result: {result}\n")
+    
+    print("✅ All tests completed!")
@@ -0,0 +1 @@
+"""WebUI module for Context7 Docs."""
@@ -0,0 +1,166 @@
+.container {
+    max-width: 1000px;
+    margin: 0 auto;
+    padding: 20px;
+}
+
+header {
+    border-bottom: 1px solid #ccc;
+    padding-bottom: 15px;
+    margin-bottom: 20px;
+}
+
+header h1 {
+    margin: 0 0 10px 0;
+    font-size: 1.5rem;
+}
+
+nav {
+    display: flex;
+    gap: 15px;
+}
+
+nav a {
+    text-decoration: none;
+    color: #0066cc;
+    font-size: 0.9rem;
+}
+
+nav a.active {
+    font-weight: bold;
+    text-decoration: underline;
+}
+
+main h2 {
+    margin-bottom: 15px;
+}
+
+footer {
+    margin-top: 40px;
+    padding-top: 15px;
+    border-top: 1px solid #ccc;
+    font-size: 0.8rem;
+    color: #666;
+}
+
+/* Status cards */
+.status-card {
+    background: #f5f5f5;
+    padding: 20px;
+    border-radius: 8px;
+    border-left: 4px solid #00c467;
+}
+
+.status-message {
+    background: #e8f4fd;
+    padding: 10px;
+    border-radius: 4px;
+    margin: 5px 0;
+}
+
+/* Tables */
+.library-table {
+    width: 100%;
+    border-collapse: collapse;
+    margin-top: 10px;
+}
+
+.library-table th, .library-table td {
+    padding: 10px;
+    text-align: left;
+    border-bottom: 1px solid #ddd;
+}
+
+.library-table th {
+    background: #f5f5f5;
+    font-weight: bold;
+}
+
+/* Forms */
+form input[type="text"], form textarea, form select {
+    padding: 8px;
+    border: 1px solid #ccc;
+    border-radius: 4px;
+    margin-right: 10px;
+    margin-bottom: 10px;
+}
+
+button {
+    background: #0066cc;
+    color: white;
+    border: none;
+    padding: 10px 20px;
+    border-radius: 4px;
+    cursor: pointer;
+}
+
+button:hover {
+    background: #0055aa;
+}
+
+/* Pre formatting */
+pre {
+    background: #f5f5f5;
+    padding: 15px;
+    border-radius: 4px;
+    overflow-x: auto;
+    white-space: pre-wrap;
+    word-break: break-word;
+}
+
+/* Search results */
+.result-card {
+    background: #fff;
+    border: 1px solid #ddd;
+    padding: 15px;
+    margin: 10px 0;
+    border-radius: 4px;
+}
+
+.result-card h3 {
+    margin: 0 0 8px 0;
+}
+
+.hint {
+    color: #666;
+    font-size: 0.85rem;
+    margin-top: 15px;
+}
+
+/* Status colors */
+.status-ok {
+    color: #00c467;
+    font-weight: bold;
+}
+
+.content-preview {
+    max-height: 300px;
+    overflow-y: auto;
+}
+
+.results-count {
+    background: #e8f4fd;
+    padding: 10px;
+    border-radius: 4px;
+    margin-bottom: 15px;
+}
+
+.source-card {
+    background: #f5f5f5;
+    padding: 15px;
+    margin: 10px 0;
+    border-radius: 4px;
+}
+
+.actions-bar {
+    margin-top: 15px;
+}
+
+.actions-bar form {
+    display: inline-flex;
+}
+
+.doc-content {
+    max-height: 600px;
+    overflow-y: auto;
+}
@@ -0,0 +1,568 @@
+"""WebUI Views for Context7 Docs using Jinja2 templates."""
+import os
+import json
+from pathlib import Path
+from typing import Any, Optional
+from fastapi import Request
+from fastapi.responses import HTML, JSONResponse
+import requests
+
+# Internal API base URL
+DOCS_API_URL = os.environ.get("DOCS_API_URL", "http://docs-api:8787")
+
+
+def api_request(method: str, endpoint: str, data: Optional[dict] = None) -> dict:
+    """Make internal API request to docs-api."""
+    url = f"{DOCS_API_URL}{endpoint}"
+    headers = {}
+    if os.environ.get("WEBUI_API_KEY"):
+        headers["X-API-Key"] = os.environ.get("WEBUI_API_KEY")
+    
+    resp = requests.request(method, url, headers=headers, json=data)
+    return resp.json()
+
+
+def navbar_html(current: str) -> str:
+    """Generate navigation bar HTML."""
+    links = [
+        ("/health", "Health"),
+        ("/libraries", "Libraries"),
+        ("/upload", "Upload"),
+        ("/ingest/all", "Ingest All"),
+        ("/sources/git", "Git Sources"),
+        ("/search", "Search"),
+    ]
+    items = []
+    for path, label in links:
+        cls = "active" if current == path else ""
+        items.append(f'<a href="{path}" class="{cls}">{label}</a>')
+    return f"""<nav>
+                {' '.join(items)}
+            </nav>""".strip()
+
+
+def footer_html() -> str:
+    """Generate footer HTML."""
+    return "<footer>Context7 Docs WebUI</footer>"
+
+
+def health(request: Request) -> HTML:
+    """System health dashboard."""
+    try:
+        data = api_request("GET", "/health")
+        status = data.get("status", "unknown")
+        service = data.get("service", "Service")
+    except Exception as e:
+        status = "error"
+        service = str(e)
+
+    return HTML(f"""<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Context7 Docs - Health</title>
+    <link rel="stylesheet" href="/static/css/main.css">
+</head>
+<body>
+    <div class="container">
+        <header><h1>Context7 Docs UI</h1>{navbar_html("/health")}</header>
+        <main><h2>System Health</h2>
+            <div class="status-card" data-status="{status}"><h3>{service}</h3>
+                <p>Status: <span class="status-ok">{status}</span></p></div>
+        </main>{footer_html()}</div>
+</body></html>""", media_type="text/html")
+
+
+def libraries(request: Request) -> HTML:
+    """List all libraries."""
+    try:
+        data = api_request("GET", "/libraries")
+        libs = data.get("libraries", [])
+    except Exception as e:
+        libs = [{"id": "error", "name": str(e)}]
+
+    table_rows = []
+    for lib in libs:
+        if lib.get("id") != "error":
+            table_rows.append(
+                f"""<tr><td>{lib.get('id')}</td>
+                <td>{lib.get('name', '')}</td>
+                <td>{lib.get('description', '') or '(no description)'}</td>
+                <td><a href="/docs/{lib.get('id')}">View Docs</a></td></tr>"""
+            )
+
+    return HTML(f"""<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Context7 Docs - Libraries</title>
+    <link rel="stylesheet" href="/static/css/main.css">
+</head>
+<body>
+    <div class="container">
+        <header><h1>Context7 Docs UI</h1>{navbar_html("/libraries")}</header>
+        <main>
+            <h2>Libraries ({len(libs)})</h2>
+            <div class="actions-bar">
+                <form action="/folders/create" method="post" style="display:inline;">
+                    <input type="text" name="name" placeholder="New library folder name" required>
+                    <button type="submit">Create Folder</button>
+                </form>
+            </div>
+            <table class="library-table">
+                <thead><tr><th>ID</th><th>Name</th><th>Description</th><th>Actions</th></tr></thead>
+                <tbody>{"".join(table_rows)}</tbody>
+            </table>
+        </main>{footer_html()}</div>
+</body></html>""", media_type="text/html")
+
+
+def upload(request: Request) -> HTML:
+    """File upload form."""
+    if "file" in request.files:
+        uploaded_file = request.files["file"]
+        try:
+            content = uploaded_file.read().decode("utf-8")[:5000]
+            # Escape HTML
+            safe_content = content.replace("&", "&").replace("<", "<").replace(">", ">")
+            truncated = safe_content[:1000] + "..." if len(safe_content) > 1000 else safe_content
+            
+            return HTML(f"""<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Context7 Docs - Upload</title>
+    <link rel="stylesheet" href="/static/css/main.css">
+</head>
+<body>
+    <div class="container">
+        <header><h1>Context7 Docs UI</h1>{navbar_html("/upload")}</header>
+        <main>
+            <h2>Upload Complete!</h2>
+            <pre class="content-preview">{truncated}</pre>
+            <form method="post" action="/ingest/uploaded">
+                <input type="hidden" name="content" value="{safe_content[:5000]}">
+                <label for="library_id">Library (optional):</label>
+                <input type="text" id="library_id" name="library_id" placeholder="e.g., my-docs">
+                <button type="submit">Ingest</button>
+            </form>
+        </main>{footer_html()}</div>
+</body></html>""", media_type="text/html")
+        except Exception:
+            return HTML(f"""<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Context7 Docs - Upload</title>
+    <link rel="stylesheet" href="/static/css/main.css">
+</head>
+<body>
+    <div class="container">
+        <header><h1>Context7 Docs UI</h1>{navbar_html("/upload")}</header>
+        <main>
+            <h2>File too large!</h2>
+            <p>Please upload smaller text files (limit: ~5MB).</p>
+        </main>{footer_html()}</div>
+</body></html>""", media_type="text/html")
+    else:
+        return HTML(f"""<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Context7 Docs - Upload</title>
+    <link rel="stylesheet" href="/static/css/main.css">
+</head>
+<body>
+    <div class="container">
+        <header><h1>Context7 Docs UI</h1>{navbar_html("/upload")}</header>
+        <main>
+            <h2>Upload Documentation Files</h2>
+            <form method="post" enctype="multipart/form-data">
+                <label for="file">Select file:</label>
+                <input type="file" name="file" id="file" accept=".txt,.md,.json,.py,.js,.html,.css,.yaml,.yml" required>
+                <button type="submit">Upload</button>
+            </form>
+            <p class="hint">Supported formats: .txt, .md, .json, .py, .js, .html, .css, .yaml</p>
+        </main>{footer_html()}</div>
+</body></html>""", media_type="text/html")
+
+
+def ingest_all(request: Request) -> JSONResponse:
+    """Trigger ingestion for all libraries."""
+    try:
+        result = api_request("POST", "/ingest")
+        return JSONResponse(content={"status": "ok", "message": f"Processed {result.get('chunks', 0)} chunks"})
+    except Exception as e:
+        return JSONResponse(status_code=500, content={"error": str(e)})
+
+
+def ingest_library(request: Request, library_id: str) -> HTML:
+    """Ingest for specific library."""
+    if "content" in request.form:
+        content = request.form.get("content")[:10000]
+        safe_content = content.replace("&", "&").replace("<", "<").replace(">", ">")
+        return HTML(f"""<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Context7 Docs - Ingest</title>
+    <link rel="stylesheet" href="/static/css/main.css">
+</head>
+<body>
+    <div class="container">
+        <header><h1>Context7 Docs UI</h1>{navbar_html("/ingest/{library_id}")}</header>
+        <main>
+            <h2>Ingest for Library: {library_id}</h2>
+            <form method="post" action="/ingest/{library_id}">
+                <label for="content">Content (text):</label>
+                <textarea id="content" name="content" rows="10" maxlength="10000"></textarea>
+                <button type="submit">Ingest</button>
+            </form>
+        </main>{footer_html()}</div>
+</body></html>""", media_type="text/html")
+    else:
+        try:
+            result = api_request("POST", f"/ingest/{library_id}")
+            safe_msg = result.get('message', '') or ''
+            safe_json = json.dumps(result, indent=2).replace("&", "&").replace("<", "<").replace(">", ">")
+            return HTML(f"""<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Context7 Docs - Ingest Result</title>
+    <link rel="stylesheet" href="/static/css/main.css">
+</head>
+<body>
+    <div class="container">
+        <header><h1>Context7 Docs UI</h1>{navbar_html("/ingest/{library_id}")}</header>
+        <main>
+            <h2>Ingestion Complete!</h2>
+            <p>{safe_msg}</p>
+            <pre>{safe_json}</pre>
+            <a href="/libraries">← Back to Libraries</a>
+        </main>{footer_html()}</div>
+</body></html>""", media_type="text/html")
+        except Exception as e:
+            safe_error = str(e).replace("&", "&").replace("<", "<").replace(">", ">")
+            return HTML(f"""<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Context7 Docs - Error</title>
+    <link rel="stylesheet" href="/static/css/main.css">
+</head>
+<body>
+    <div class="container">
+        <header><h1>Context7 Docs UI</h1>{navbar_html("/ingest/{library_id}")}</header>
+        <main>
+            <h2>Error</h2>
+            <pre>{safe_error}</pre>
+        </main>{footer_html()}</div>
+</body></html>""", media_type="text/html")
+
+
+async def folders_create(request: Request) -> JSONResponse:
+    """Create a new library folder."""
+    name = request.form.get("name", "").strip()
+    try:
+        from backend.app.db import upsert_library
+        await upsert_library(library_id=name, name=name, description=None, source_path=f"/docs/{name}")
+        return JSONResponse(content={"status": "ok", "message": f"Created folder '{name}'"})
+    except Exception as e:
+        return JSONResponse(status_code=500, content={"error": str(e)})
+
+
+async def folders_delete(request: Request) -> JSONResponse:
+    """Delete a library."""
+    library_id = request.query_params.get("id", "").strip()
+    try:
+        from backend.app.db import delete_library
+        await delete_library(library_id)
+        return JSONResponse(content={"status": "ok", "message": f"Deleted library '{library_id}'"})
+    except Exception as e:
+        return JSONResponse(status_code=500, content={"error": str(e)})
+
+
+async def ingest_uploaded(request: Request) -> HTML:
+    """Ingest uploaded file content."""
+    content = request.form.get("content", "")[:10000]
+    library_id = request.form.get("library_id", "uploaded")
+    
+    try:
+        result = api_request("POST", f"/ingest/{library_id}", data={"content": content})
+        safe_msg = result.get('message', '') or ''
+        safe_json = json.dumps(result, indent=2).replace("&", "&").replace("<", "<").replace(">", ">")
+        return HTML(f"""<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Context7 Docs - Upload Result</title>
+    <link rel="stylesheet" href="/static/css/main.css">
+</head>
+<body>
+    <div class="container">
+        <header><h1>Context7 Docs UI</h1>{navbar_html("/upload")}</header>
+        <main>
+            <h2>Ingestion Complete!</h2>
+            <p>{safe_msg}</p>
+            <pre>{safe_json}</pre>
+            <a href="/upload">← Upload Another</a>
+        </main>{footer_html()}</div>
+</body></html>""", media_type="text/html")
+    except Exception as e:
+        safe_error = str(e).replace("&", "&").replace("<", "<").replace(">", ">")
+        return HTML(f"""<!DOCTYPE html>
+<html lang="en">
+<head><meta charset="UTF-8"><title>Error</title></head>
+<body><h1>Upload Ingest Error</h1><pre>{safe_error}</pre><a href="/upload">← Try Again</a></body>
+</html>""", media_type="text/html")
+
+
+def docs(request: Request, library_id: str, topic: Optional[str] = None, tokens: int = 8000) -> HTML:
+    """View docs from a library."""
+    try:
+        data = api_request("GET", f"/libraries/{library_id}/docs", params={"topic": topic, "tokens": tokens})
+        content = data.get("content", "")
+    except Exception as e:
+        content = str(e)
+    
+    safe_content = content.replace("&", "&").replace("<", "<").replace(">", ">")[:10000]
+    return HTML(f"""<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Context7 Docs - Library: {library_id}</title>
+    <link rel="stylesheet" href="/static/css/main.css">
+</head>
+<body>
+    <div class="container">
+        <header><h1>Context7 Docs UI</h1>{navbar_html("/docs/{}".format(library_id))}</header>
+        <main>
+            <h2>Library: {library_id}</h2>
+            <p><strong>Topic:</strong> {topic or '(all)'} | <strong>Tokens:</strong> {tokens}</p>
+            <pre class="docs-content">{safe_content}</pre>
+        </main>{footer_html()}</div>
+</body></html>""", media_type="text/html")
+
+
+def search_redirect(request: Request) -> JSONResponse:
+    """Redirect to search form."""
+    return JSONResponse(content={"redirect": "/search/form"})
+
+
+def search_form(request: Request) -> HTML:
+    """Search form page."""
+    return HTML(f"""<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Context7 Docs - Search</title>
+    <link rel="stylesheet" href="/static/css/main.css">
+</head>
+<body>
+    <div class="container">
+        <header><h1>Context7 Docs UI</h1>{navbar_html("/search")}</header>
+        <main>
+            <h2>Search Docs</h2>
+            <form method="post" action="/search">
+                <label for="query">Query:</label>
+                <input type="text" id="query" name="query" required placeholder="Enter your search query...">
+                <label for="library_id">Library (optional):</label>
+                <input type="text" id="library_id" name="library_id" placeholder="e.g., foundryvtt">
+                <label for="limit">Limit results:</label>
+                <select id="limit" name="limit">
+                    <option value="5">5</option>
+                    <option value="10" selected>10</option>
+                    <option value="20">20</option>
+                    <option value="50">50</option>
+                </select>
+                <button type="submit">Search</button>
+            </form>
+        </main>{footer_html()}</div>
+</body></html>""", media_type="text/html")
+
+
+def search_results(request: Request) -> HTML:
+    """Display search results."""
+    try:
+        query = request.query_params.get("q", "")
+        limit = int(request.query_params.get("limit", "10"))
+        payload = {"query": query, "library_id": None, "limit": limit}
+        result = api_request("POST", "/search", data=payload)
+        results = result.get("results", [])
+    except Exception as e:
+        return HTML(f"""<!DOCTYPE html>
+<html lang="en">
+<head><meta charset="UTF-8"><title>Error</title></head>
+<body><h1>Error</h1><pre>{str(e)}</pre><a href="/search/form">← Try Again</a></body>
+</html>""", media_type="text/html")
+
+    cards = []
+    for r in results:
+        title = r.get("title", "Untitled") or (r.get("content", "")[:100] + "...")[:200]
+        content = (r.get("content", "") or r.get("chunk", ""))[:500]
+        cards.append(f"""<div class="result-card" data-id="{r.get('id')}"><h3>{title}</h3>
+            <p>{content}...</p><a href="/docs/{r.get('library_id')}">View Full</a></div>""")
+
+    return HTML(f"""<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Context7 Docs - Search Results</title>
+    <link rel="stylesheet" href="/static/css/main.css">
+</head>
+<body>
+    <div class="container">
+        <header><h1>Context7 Docs UI</h1>{navbar_html("/search")}</header>
+        <main>
+            <h2>Search Results for "{query}"</h2>
+            <div class="results-count">{len(results)} results found</div>
+            {''.join(cards)}
+            <a href="/search/form">← New Search</a>
+        </main>{footer_html()}</div>
+</body></html>""", media_type="text/html")
+
+
+def sync_sources(request: Request) -> HTML:
+    """Sync git sources."""
+    if request.method == "POST":
+        try:
+            data = api_request("POST", "/sources/sync")
+            safe_json = json.dumps(data, indent=2).replace("&", "&").replace("<", "<").replace(">", ">")
+            return HTML(f"""<!DOCTYPE html>
+<html lang="en">
+<head><meta charset="UTF-8"><title>Sync Result</title></head>
+<body>
+    <div class="container">
+        <header><h1>Context7 Docs UI</h1>{navbar_html("/sync/sources")}</header>
+        <main><h2>Git Sync Complete!</h2><pre>{safe_json}</pre>
+            <form method="post"><button type="submit">Sync Again</button></form>
+        </main>{footer_html()}</div>
+</body></html>""", media_type="text/html")
+        except Exception as e:
+            safe_error = str(e).replace("&", "&").replace("<", "<").replace(">", ">")
+            return HTML(f"""<!DOCTYPE html>
+<html lang="en">
+<head><meta charset="UTF-8"><title>Error</title></head>
+<body><h1>Sync Error</h1><pre>{safe_error}</pre><a href="/sources/git">← Try Again</a></body>
+</html>""", media_type="text/html")
+    else:
+        try:
+            data = api_request("GET", "/libraries")
+            libs = [l.get("id") for l in data.get("libraries", []) if l.get("id") != "error"]
+        except Exception:
+            libs = []
+        
+        lib_list = ", ".join(libs) if libs else "(none)"
+        return HTML(f"""<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Context7 Docs - Git Sync</title>
+    <link rel="stylesheet" href="/static/css/main.css">
+</head>
+<body>
+    <div class="container">
+        <header><h1>Context7 Docs UI</h1>{navbar_html("/sources/git")}</header>
+        <main>
+            <h2>Sync Git Repositories</h2>
+            <p>Syncs all git repositories configured in <code>docs_sources.yaml</code>.</p>
+            <form method="post" action="/sync/sources">
+                <label for="override">Override existing repos:</label>
+                <input type="checkbox" id="override" name="override">
+                <button type="submit">Sync All Repositories</button>
+            </form>
+            <h3>Libraries Found: {lib_list}</h3>
+        </main>{footer_html()}</div>
+</body></html>""", media_type="text/html")
+
+
+def git_sources(request: Request) -> HTML:
+    """List configured git sources."""
+    import yaml
+    config_path = Path(__file__).parent.parent.parent / "docs_sources.yaml"
+    
+    try:
+        with open(config_path) as f:
+            data = yaml.safe_load(f)
+        sources = data.get("sources", [])
+        
+        source_blocks = []
+        for src in sources:
+            url = src.get("repo_url", "")[:50] + "..." if len(src.get("repo_url", "")) > 50 else src.get("repo_url", "")
+            branch = src.get("branch", "main")
+            include = src.get("include_paths", ["*"])
+            exclude = src.get("exclude_paths", [])
+            source_blocks.append(f"""<div class="source-card">
+                <strong>{src.get('library_id', 'unknown')}</strong><br>
+                URL: {url}<br>
+                Branch: {branch}<br>
+                Include: {', '.join(include)}{' | Exclude: ' + ', '.join(exclude) if exclude else ''}
+            </div>""")
+
+        return HTML(f"""<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Context7 Docs - Git Sources</title>
+    <link rel="stylesheet" href="/static/css/main.css">
+</head>
+<body>
+    <div class="container">
+        <header><h1>Context7 Docs UI</h1>{navbar_html("/sources/git")}</header>
+        <main>
+            <h2>Configured Git Sources ({len(sources)})</h2>
+            {''.join(source_blocks)}
+        </main>{footer_html()}</div>
+</body></html>""", media_type="text/html")
+    except Exception as e:
+        safe_error = str(e).replace("&", "&").replace("<", "<").replace(">", ">")
+        return HTML(f"""<!DOCTYPE html>
+<html lang="en">
+<head><meta charset="UTF-8"><title>Error</title></head>
+<body><h1>Git Sources Error</h1><pre>{safe_error}</pre></body>
+</html>""", media_type="text/html")
+
+
+def logs(request: Request) -> HTML:
+    """Logs/status page."""
+    return HTML(f"""<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Context7 Docs - Logs</title>
+    <link rel="stylesheet" href="/static/css/main.css">
+</head>
+<body>
+    <div class="container">
+        <header><h1>Context7 Docs UI</h1>{navbar_html("/logs")}</header>
+        <main>
+            <h2>Status Messages</h2>
+            <div class="status-message">Docs API: {DOCS_API_URL}</div>
+            <div class="status-message">Qdrant Health: healthy | MCP OK: yes</div>
+            <p class="hint">Logs are printed to container stdout/stderr. For full logs, inspect Docker containers directly.</p>
+        </main>{footer_html()}</div>
+</body></html>""", media_type="text/html")
+
+
+# Register all routes
+__all__ = [
+    "health", "libraries", "upload", "ingest_all", "ingest_library",
+    "folders_create", "folders_delete", "docs", "search_redirect",
+    "search_form", "search_results", "sync_sources", "git_sources", "logs"
+]
@@ -0,0 +1,37 @@
+# Backend API Dependencies
+fastapi==0.109.0
+uvicorn[standard]==0.27.0
+pydantic==2.5.3
+python-dotenv==1.0.0
+python-multipart==0.0.6
+
+# Qdrant Vector Store Client
+qdrant-client==1.7.0
+
+# Text Processing for token estimation
+tiktoken==0.7.0
+
+# Local Embeddings using FastEmbed
+fastembed==0.3.0
+
+# PDF support for document ingestion
+pypdf==5.0.0
+
+# HTTP client for MCP server communication
+httpx==0.26.0
+
+# HTTP client for WebUI (used to call docs-api from WebUI)
+requests==2.31.0
+
+# FastMCP for MCP server integration (also used by backend)
+fastmcp==0.6.0
+
+# YAML parser for sources configuration
+PyYAML==6.0.1
+
+# =============================================================================
+# TEST DEPENDENCIES
+# =============================================================================
+pytest==8.3.2
+pytest-mock==3.14.0
+pytest-asyncio==0.23.7