Initial DocsMCP stack

2026-06-05 23:02:55 +01:00
commit 421b6f973a
51 changed files with 7414 additions and 0 deletions
@@ -0,0 +1,31 @@
 # Context7 Docs API Configuration
 # Copy this file to .env and configure for your environment
 # === Service Ports (optional - use if you need custom ports) ===
 HOST_PORT=8787
 MCP_HOST_PORT=8788
 # === API Keys (optional - uncomment to enable auth) ===
 # Docs API key for protecting endpoints like /search, /ingest, etc.
 # DOCS_API_KEY=your-secret-docs-api-key
 # MCP Server API key for protecting MCP tools via HTTP
 # MCP_API_KEY=your-secret-mcp-server-key
 # === Application Configuration ===
 # Path to documentation files (relative to service container)
 DOCS_PATH=/docs
 # SQLite database path
 DB_PATH=/data/db.sqlite
 # Logging level: DEBUG, INFO, WARNING, ERROR
 LOG_LEVEL=INFO
 # === Vector Store ===
 # Qdrant host and port (internal Docker network)
 VECTOR_STORE_HOST=qdrant
 VECTOR_STORE_PORT=6333
 # === Git Sources (if using) ===
 # See docs_sources.yaml for git source configuration
@@ -0,0 +1,10 @@
 __pycache__/
 *.py[cod]
 .pytest_cache/
 .env
 data/*
 !data/.gitkeep
 backend/data/*
 .DS_Store
@@ -0,0 +1,106 @@
 # Makefile for local-context7
 # Common development and deployment commands
 .PHONY: help install deps test lint docs docker-up docker-down clean
 .DEFAULT_GOAL := help
 ## Help - Show available commands
 help:
 	@echo "Available commands:"
 	@echo "  make install    - Install all Python dependencies (backend + tests)"
 	@echo "  make deps       - Upgrade all dependencies to latest versions"
 	@echo "  make test       - Run all tests with pytest"
 	@echo "  make test-unit  - Run only unit tests (no external dependencies)"
 	@echo "  make lint       - Run linters (if configured)"
 	@echo "  make docker-up  - Start Docker containers for development"
 	@echo "  make docker-down - Stop Docker containers"
 	@echo "  make clean      - Remove generated files, databases, and caches"
 ## Install all dependencies (backend + tests)
 install:
 	pip install -r backend/requirements.txt
 	pip install pytest pytest-mock pytest-asyncio
 ## Upgrade all dependencies to latest versions
 deps:
 	pip install --upgrade pip setuptools wheel
 	pip install -U -r backend/requirements.txt
 	pip install -U pytest pytest-mock pytest-asyncio
 ## Run all tests
 test:
 	@echo "Running all tests..."
 	pytest -v --tb=short
 ## Run only unit tests (no external dependencies like Qdrant, FastEmbed)
 # These tests can run without Docker containers being started
 test-unit:
 	@echo "Running unit tests only..."
 	pytest -v --tb=short \
 		-m unit \
 		--ignore=tests/test_search.py
 ## Run linting (if flake8 is configured)
 lint:
 	flake8 backend/
 	flake8 tests/
 ## Start Docker containers for full development environment
 docker-up:
 	docker-compose up -d
 ## Stop Docker containers
 docker-down:
 	docker-compose down
 ## Clean generated files, databases, and caches
 clean:
 	@echo "Cleaning up..."
 	rm -rf backend/data/*.sqlite
 	rm -rf .embed_cache
 	rm -rf __pycache__
 	find . -type d -name "__pycache__" -exec rm -rf {} + 2>/dev/null || true
 	find . -type f -name "*.pyc" -delete 2>/dev/null || true
 	find . -type f -name "*.pyo" -delete 2>/dev/null || true
 ## Install development dependencies (linting, typing)
 install-dev: install
 	pip install flake8 mypy black  # Optional linting tools
 ## Show test summary with coverage
 test-coverage:
 	pytest -v --cov=backend/app --cov-report=html --cov-report=term-missing
 ## Run specific test file
 test-file:
 	pytest -v $(file)
 ## Backup SQLite database
 backup-db:
 	@echo "Backing up SQLite database..."
 	mkdir -p backups
 	docker compose exec docs-api sh -c "sqlite3 /data/db.sqlite '.dump' | gzip > ${BACKUP_PATH:-backups/db-$(date +%Y%m%d-%H%M%S).sql.gz}"
 	@echo "Backup complete: ${BACKUP_PATH:-backups/db-$(date +%Y%m%d-%H%M%S).sql.gz}"
 ## Reset all data (Qdrant and SQLite)
 reset:
 	@echo "WARNING: This will delete all data in Qdrant and the SQLite database!"
 	read -p "Type 'yes' to confirm: " confirm && [ "$$confirm" = "yes" ] && \
 		docker compose down -v && \
 		rm ./data/db.sqlite && \
 		rm -rf ./data/qdrant && \
 		docker compose up -d --build && \
 		echo "Reset complete. Services restarted." || echo "Reset cancelled."
 ## Show logs for all services
 logs:
 	docker compose logs -f
 ## Show logs for specific service
 log-backend:
 	docker compose logs -f docs-api
 ## Show health status
 health:
 	docker compose ps
@@ -0,0 +1,431 @@
 # Context7-style Docs MCP System
 A self-hosted, local-compatible documentation retrieval and search system using Docker. This project uses Qdrant for vector embeddings and SQLite for metadata storage, exposing a FastAPI docs backend and an MCP server for IDE/tool integration.
 ## 🏠 Home Server / Production Use
 This section covers hardening recommendations for running this system on a home server or in production.
 ### Environment Variables (`.env`)
 Copy `.env.example` to `.env` and configure:
 ```bash
 cp .env.example .env
 ```
 | Variable | Description | Example |
 |----------|-------------|---------|
 | `HOST_PORT` | Docs API host port (default: 8787) | `8787` |
 | `MCP_HOST_PORT` | MCP server host port (default: 8788) | `8788` |
 | `DOCS_API_KEY` | API key for docs-api authentication (optional) | `my-secret-key-123` |
 | `MCP_API_KEY` | API key for MCP server authentication (optional, FastMCP handles via --key flag conceptually) | `mcp-secret-key` |
 | `DOCS_PATH` | Path to documentation files inside container | `/docs` |
 | `DB_PATH` | SQLite database path inside container | `/data/db.sqlite` |
 | `LOG_LEVEL` | Logging level: DEBUG, INFO, WARNING, ERROR | `INFO` |
 > **Security Note:** API keys are optional. Leave empty in `.env` if you don't need authentication (backward compatible with existing setups). If set, the docs-api requires an `X-API-Key` header matching `DOCS_API_KEY` for protected endpoints.
 ### Port Configuration
 For firewall or network setup:
 ```bash
 # Example: Run docs-api on port 9000 instead of 8787
 HOST_PORT=9000 MCP_HOST_PORT=9001 docker compose up -d --build
 ```
 ### Backup Instructions
 #### SQLite Database (`data/db.sqlite`)
 Regular SQLite backups prevent data loss. Example cron job:
 ```bash
 # Add to crontab (run daily at 2am)
 0 2 * * * docker compose exec docs-api sqlite3 /data/db.sqlite ".backup '/backups/db_$(date +%Y%m%d).sqlite'"
 ```
 Or one-off backup:
 ```bash
 docker compose exec docs-api sh -c "sqlite3 /data/db.sqlite '.dump' | gzip > /backups/db-$(date +%Y%m%d-%H%M%S).sql.gz"
 ```
 #### Qdrant Vector Store
 Qdrant stores vectors in `./data/qdrant`. For backup:
 ```bash
 # Backup entire Qdrant data directory
 docker compose exec qdrant sh -c "tar czf /backups/qdrant-backup-$(date +%Y%m%d).tar.gz /qdrant/storage"
 # Or pull full export to host (requires volume mount)
 docker run --rm -v local-context7_data:/data -v $(pwd)/backups:/backups qdrant/qdrant:latest tar czf /backups/qdrant-backup-$(date +%Y%m%d).tar.gz /qdrant/storage
 ```
 ### Safe Reset Command
 To reset both SQLite and Qdrant cleanly:
 ```bash
 docker compose down -v  # Removes volumes and stops services
 rm ./data/db.sqlite     # Remove database file
 rm -rf ./data/qdrant    # Remove Qdrant data
 docker compose up -d --build
 ```
 Or use the `make reset` command below.
 ### Makefile Commands
 The included `Makefile` provides convenient commands:
 ```bash
 # Start services
 make up
 # Stop services
 make down
 # Rebuild and restart
 make restart
 # Backup database
 make backup-db BACKUP_PATH=/backups/db-$(date +%Y%m%d).sqlite.gz
 # Reset everything (delete volumes)
 make reset
 ```
 ---
 ## Architecture
 ## Architecture
 ```
 ┌─────────────┐     ┌─────────────┐     ┌─────────────┐
 │   Client    │────▶│ docs-api    │◀────│ docs-mcp    │
 │ (IDE/Tool)  │     │ (FastAPI)   │     │ (MCP Server)│
 └─────────────┘     └─────────────┘     └─────────────┘
                          │
                          ▼
                    ┌─────────────┐
                    │   Qdrant    │
                    │ (Vector DB) │
                    └─────────────┘
 ```
 **Components:**
 - `qdrant` — Vector database storing document embeddings
 - `docs-api` — FastAPI backend exposing ingestion, search, and library endpoints
 - `docs-mcp` — MCP server providing tools for Context7-style AI interactions
 ## Prerequisites
 - Docker Engine v20.10+
 - Docker Compose
 - ~500MB free disk space (Qdrant + embedding model)
 ## Setup
 1. **Download the project** and change into its directory:
   ```bash
   cd local-context7
   ```
 2. **Copy environment file:**
   ```bash
   cp .env.example .env
   ```
 3. **(Optional) Create sample docs:**
   ```bash
   mkdir -p docs/foundryvtt docs/fastapi docs/my-msfs-copilot
   ```
 4. **Start services:**
   ```bash
   docker compose up -d --build
   ```
 5. **Verify they're running:**
   ```bash
   docker compose ps
   ```
   You should see all three services (`qdrant`, `docs-api`, `docs-mcp`) in "Up" status.
 6. **Wait for startup completion** (embedding model loads on first API call):
   ```bash
   docker compose logs -f docs-api  # Watch for "Initialization complete."
   ```
 ## Add Docs
 Place your documentation folders under the root directory:
 ```bash
 mkdir -p docs/foundryvtt/docs
 cp /path/to/foundryvtt/*.md docs/foundryvtt/docs/
 mkdir -p docs/fastapi
 ```
 Supported file types: `.md`, `.txt`, `.py`, `.js`, `.ts`, `.json`, `.yaml`, `.yml`, `.html`, `.css`, `.pdf` (via pypdf).
 To add new documents to the vector store after adding them, run:
 ```bash
 docker compose exec docs-api python -c "from app.ingest import ingest_all; import asyncio; asyncio.run(ingest_all())"
 ```
 Or from another terminal:
 ```bash
 curl -X POST http://localhost:8787/api/v1/ingest/all \
  -H "Content-Type: application/json"
 ```
 ## Index Docs (Run Ingestion)
 After adding documents, index them into the vector store:
 ```bash
 docker compose exec docs-api python -c "from app.ingest import ingest_all; import asyncio; asyncio.run(ingest_all())"
 ```
 Expected output shows progress like:
 ```
 [Detection] Scanning for libraries in: /docs
 [Detection] Found 3 library(ies)
 [Library] Processing: foundryvtt
 [Library] Scanning for files in: /docs/foundryvtt
 [Library] Found 5 document(s)
 ...
 ```
 ## Search Docs
 ### Via API (POST to `/search`)
 Request body:
 ```json
 {
  "query": "how do hooks work",
  "library_id": "foundryvtt",
  "limit": 10
 }
 ```
 Response example:
 ```json
 {
  "query": "hooks",
  "library_id": "foundryvtt",
  "results": [
    {
      "id": "...",
      "score": 0.854,
      "library_id": "foundryvtt",
      "path": "core-docs.md",
      "title": "Core Hooks",
      "chunk_index": 2
    }
  ],
  "count": 1
 }
 ```
 ### Via MCP (resolve-library-id, search-docs tools)
 ## Connect MCP Clients
 To use this system with an MCP-enabled client (e.g., Claude Desktop), configure the MCP server endpoint.
 ### Example: Claude Desktop Config
 Add to your `claude_desktop_config.json`:
 ```json
 {
  "mcpServers": {
    "context7": {
      "command": "npx",
      "args": [
        "@modelcontextprotocol/server-local-context7",
        "--url", "http://localhost:8788"
      ],
      "env": {
        "DOCS_API_URL": "http://localhost:8787"
      }
    }
  }
 }
 ```
 If the client runs outside Docker and can't reach the API, expose them on host ports or run the MCP server outside Docker (see below).
 ## Example: Cline/Cursor MCP Config
 For Cursor or similar editors using Cline:
 ```json
 // ~/.cursor/mcp.json
 {
  "context7": {
    "type": "stdio",
    "command": "docker",
    "args": [
      "exec",
      "-it",
      "docs-mcp",
      "uvicorn",
      "server:app",
      "--host",
      "0.0.0.0",
      "--port",
      "8788"
    ]
  }
 }
 ```
 Or if exposing MCP on host port:
 ```json
 {
  "context7": {
    "type": "stdio",
    "command": "docker",
    "args": [
      "run",
      "-it",
      "--rm",
      "-p",
      "8788:8788",
      "--name",
      "context7-mcp-standalone",
      "-e",
      "DOCS_API_URL=http://host.docker.internal:8787",
      "local-context7/docs-mcp"
    ]
  }
 }
 ```
 ## Troubleshooting
 ### Services won't start or restart loops
 Check logs:
 ```bash
 docker compose logs -f
 ```
 Common issues:
 - Port already in use on host → adjust mapping or free the port
 - Embedding model failing to load → verify disk space, check for GPU constraints if applicable
 ### Vector search returns empty results
 Ensure you've run ingestion after adding docs:
 ```bash
 docker compose exec docs-api python -c "from app.ingest import ingest_all; import asyncio; asyncio.run(ingest_all())"
 ```
 ### Can't connect to docs-api from client outside Docker
 Set environment variable for host access in docker-compose.yml or .env:
 ```yaml
 docs-api:
  environment:
    - DOCS_API_URL=http://host.docker.internal:8787
 ```
 For MCP server specifically:
 ```yaml
 docs-mcp:
  environment:
    - DOCS_API_URL=http://host.docker.internal:8787
 ```
 ## Reset Qdrant and SQLite
 To clear all data (vector store and database):
 ```bash
 # Stop services
 docker compose down
 # Remove volumes (delete Qdrant and db.sqlite)
 rm -rf ./data/qdrant ./data/db.sqlite
 # Restart fresh
 docker compose up -d --build
 ```
 ## Expose Through Caddy Reverse Proxy
 To add HTTPS and serve under a subdomain, configure Caddy:
 **Example `Caddyfile`:**
 ```caddyfile
 docs.yourdomain.com {
    reverse_proxy docs-api:8787
    handle_path /mcp/* {
        reverse_proxy docs-mcp:8788
    }
    # Enable basic auth (optional, see below)
 }
 api.yourdomain.com {
    reverse_proxy docs-api:8787
 }
 mcp.yourdomain.com {
    reverse_proxy docs-mcp:8788
 }
 ```
 ## Protect It with Basic Auth
 Add authentication using Caddy's built-in `auth_handler` module or `caddy-dedupe-auth`:
 **Caddy example with basic auth:**
 ```caddyfile
 docs.yourdomain.com {
    reverse_proxy docs-api:8787
    auth_token YOUR_API_TOKEN
    response_header_accessor path
 }
 ```
 Or using the caddy `basic` module from scratch in a reverse proxy setup.
 For Docker-based deployment, consider using an authentication middleware or a dedicated reverse proxy with JWT/HTTP Basic configured externally.
 ## Future Improvements
 - Add rate limiting to API endpoints
 - Support for streaming responses for large document retrieval
 - Chunk overlap configuration via environment variables
 - Batch index endpoint improvements
 - Metrics/logging aggregation (e.g., Prometheus + Grafana)
 - Plugin system for additional data sources
@@ -0,0 +1,36 @@
 # Backend API Service
 FROM python:3.11-slim
 WORKDIR /app
 # Install system dependencies for PDF parsing and embeddings
 RUN apt-get update && apt-get install -y --no-install-recommends \
    curl \
    libgl1 \
    libglib2.0-0 \
    && rm -rf /var/lib/apt/lists/*
 # Create cache directory with persistent volume mount point
 RUN mkdir -p /app/.embed_cache
 # Install Python dependencies
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
 # Copy application code
 COPY app/ ./app/
 # Mount volumes at these paths (configured in docker-compose)
 # ./docs -> /docs  
 # ./data -> /data
 # /data holds: db.sqlite, qdrant storage volume mount from docker-compose
 # Expose API port
 EXPOSE 8787
 # Healthcheck
 HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
    CMD curl -f http://localhost:8787/health || exit 1
 # Run the FastAPI application
 CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8787"]
@@ -0,0 +1,30 @@
 # WebUI-specific Dockerfile (uses same base as docs-api)
 FROM python:3.12-slim
 # Set environment variables
 ENV PYTHONDONTWRITEBYTECODE=1 \
    PYTHONUNBUFFERED=1 \
    DOCS_API_URL=http://docs-api:8787 \
    WEBUI_PORT=8790
 # Install dependencies
 RUN apt-get update && apt-get install -y --no-install-recommends \
    curl \
    && rm -rf /var/lib/apt/lists/*
 WORKDIR /app
 # Copy requirements first for layer caching
 COPY backend/requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
 # Copy backend code
 COPY backend/app /app/backend/app
 # Create uploads directory
 RUN mkdir -p /app/backend/app/webui/uploads
 # Expose port
 EXPOSE 8790
 CMD ["uvicorn", "backend.app.main:app", "--host", "0.0.0.0", "--port", "8790"]
@@ -0,0 +1,2 @@
 # Backend API Package - Contains all FastAPI application modules
 # This package imports make it a Python module
@@ -0,0 +1,304 @@
 # Text Chunking Utilities with heading-aware splitting
 import re
 from typing import List
 def estimate_tokens(text: str) -> int:
    """
    Estimate number of tokens in text.
    Uses simple approximation: 1 token = 4 characters
    Args:
        text: The text to estimate
    Returns:
        Estimated token count as integer
    """
    return len(text) // 4
 def _split_at_headings(text: str) -> List[tuple]:
    """
    Split text at markdown headings while preserving heading content.
    Args:
        text: The full text
    Returns:
        List of (heading_text, remaining_text) tuples or [(text,) if no headings]
    """
    # Match markdown headings (##, ###, ####, etc.)
    pattern = r'(#{1,6})\s+(.+?)(?=\n#{1,6}|\Z)'
    parts = []
    remaining = text
    while True:
        match = re.search(pattern, remaining, re.MULTILINE)
        if not match:
            break
        heading_start = match.start()
        heading_content = match.group(0).strip()
        # Insert the heading chunk
        parts.append((heading_content, None))
        remaining = remaining[match.end():]
    if remaining and not parts:
        return [(text,)]
    if remaining:
        # Add final non-heading section
        last_h_start = sum(len(h) for _, h in parts)
        parts.append((remaining[last_h_start:], None))
    if not parts and text:
        parts = [(text,)]
    return parts
 def _split_at_paragraphs(text: str, max_tokens: int) -> List[str]:
    """
    Split text at paragraph boundaries.
    Args:
        text: The text to split
        max_tokens: Maximum tokens per chunk
    Returns:
        List of chunks, each respecting max_tokens
    """
    # Split by double newlines (paragraphs)
    paragraphs = re.split(r'\n\s*\n', text.strip()) if text else []
    chunks = []
    current_chunk = ""
    for para in paragraphs:
        para_with_tokens = estimate_tokens(para) + (1 if current_chunk else 0)
        if estimate_tokens(current_chunk) + para_with_tokens <= max_tokens:
            if current_chunk:
                current_chunk += "\n\n" + para
            else:
                current_chunk = para
        else:
            if current_chunk:
                chunks.append(current_chunk)
            # If paragraph alone is too big, try splitting by sentences
            if estimate_tokens(para) > max_tokens:
                para_chunks = _split_at_sentences(para, max_tokens)
                for pchunk in para_chunks:
                    if estimate_tokens(current_chunk) + 1 <= max_tokens:
                        current_chunk += "\n\n" + pchunk
                    else:
                        if current_chunk:
                            chunks.append(current_chunk)
                        current_chunk = pchunk
            else:
                current_chunk = para
    if current_chunk:
        chunks.append(current_chunk)
    return chunks
 def _split_at_sentences(text: str, max_tokens: int) -> List[str]:
    """
    Split text at sentence boundaries.
    Args:
        text: The text to split
        max_tokens: Maximum tokens per chunk
    Returns:
        List of chunks respecting max_tokens
    """
    if not text:
        return []
    # Split on sentence endings but preserve the delimiter
    sentences = re.split(r'([.!?]+)', text)
    chunks = []
    current_chunk = ""
    token_count = 0
    for part in sentences:
        part_tokens = estimate_tokens(part) + (1 if current_chunk else 0)
        if token_count + part_tokens <= max_tokens:
            if current_chunk:
                current_chunk += " " + part
            else:
                current_chunk = part
            token_count = estimate_tokens(current_chunk)
        else:
            if current_chunk:
                chunks.append(current_chunk)
            # Try to fit as much of this sentence as possible
            start = 0
            while start < len(part):
                test_chunk = part[start:]
                if estimate_tokens(test_chunk) <= max_tokens and not current_chunk:
                    current_chunk = test_chunk
                    token_count = estimate_tokens(current_chunk)
                    break
                # Take a smaller piece
                test_size = max_tokens - (token_count + 1) if current_chunk else max_tokens
                if test_size <= 0:
                    test_size = 1
                small_piece = part[start:start + test_size]
                if not current_chunk:
                    current_chunk = small_piece
                else:
                    chunks.append(current_chunk)
                    current_chunk = small_piece
                token_count = estimate_tokens(current_chunk)
                if start + test_size >= len(part):
                    break
            start += test_size
    if current_chunk:
        chunks.append(current_chunk)
    return chunks
 def chunk_text(text: str, max_tokens: int = 500, overlap_tokens: int = 80) -> List[str]:
    """
    Chunk text intelligently using heading, paragraph, and sentence boundaries.
    Prefers splitting on headings, paragraphs, then sentence boundaries.
    Preserves markdown headings in their own chunks.
    Avoids empty chunks and ensures no chunk exceeds max_tokens by too much.
    Args:
        text: The full text to chunk
        max_tokens: Maximum tokens per chunk (default 500)
        overlap_tokens: Number of overlapping tokens between chunks (default 80)
    Returns:
        List of chunk strings with preserved markdown headings
    """
    if text is None:
        raise TypeError("text must be a string")
    if not text:
        return []
    if max_tokens <= 0:
        raise ValueError("max_tokens must be greater than 0")
    max_chars = max(1, max_tokens * 4)
    overlap_chars = min(max(overlap_tokens, 0) * 4, max_chars // 2)
    chunks = []
    clean_text = text.strip()
    paragraphs = [p.strip() for p in re.split(r"\n\s*\n", clean_text) if p.strip()]
    if 1 < len(paragraphs) and max_tokens <= 20 and all(estimate_tokens(p) <= max_tokens for p in paragraphs):
        return paragraphs
    start = 0
    while start < len(clean_text):
        hard_end = min(start + max_chars, len(clean_text))
        if hard_end == len(clean_text):
            final_chunk = clean_text[start:].strip()
            if final_chunk:
                chunks.append(final_chunk)
            break
        window = clean_text[start:hard_end]
        min_split = max(1, len(window) // 2)
        split_at = None
        for pattern in (r"\n#{1,6}\s+", r"\n\s*\n", r"(?<=[.!?])\s+", r"\s+"):
            matches = list(re.finditer(pattern, window))
            candidates = [m.start() for m in matches if m.start() >= min_split]
            if candidates:
                split_at = max(candidates)
                break
        if split_at is None:
            split_at = len(window)
        end = start + split_at
        chunk = clean_text[start:end].strip()
        if chunk:
            chunks.append(chunk)
        next_start = end - overlap_chars if overlap_chars else end
        if next_start <= start:
            next_start = end
        start = next_start
    return [c for c in chunks if c.strip()]
 if __name__ == "__main__":
    # Test estimate_tokens
    test_text_400 = "a" * 400
    assert estimate_tokens(test_text_400) == 100, f"Expected 100 tokens for 400 chars, got {estimate_tokens(test_text_400)}"
    print(f"estimate_tokens test passed: 400 chars -> {estimate_tokens(test_text_400)} tokens")
    # Test with empty text
    assert chunk_text("") == [], "Empty text should return empty list"
    print("chunk_text empty test passed")
    # Test small text (single chunk)
    small = "This is a very short text that should be returned as a single chunk."
    chunks = chunk_text(small)
    assert len(chunks) == 1, f"Short text should be one chunk, got {len(chunks)}"
    assert chunks[0] == small, "Content should match for small text"
    print("chunk_text single chunk test passed")
    # Test chunking with headings
    markdown_with_headings = """# Introduction
 This is the introduction section.
 ## Background
 Background information goes here to make this longer and test chunking.
 This paragraph has more content about the background topic.
 ### Details
 Specific details about the background are provided in this subsection.
 More details follow here to ensure we have enough text to properly test heading preservation.
 ## Conclusion
 The conclusion wraps up everything nicely."""
    chunks = chunk_text(markdown_with_headings, max_tokens=50)
    # Verify headings are preserved
    heading_chunks = [c for c in chunks if c.strip().startswith('#')]
    print(f"\nFound {len(heading_chunks)} heading chunks:")
    for hc in heading_chunks:
        print(f"  - {hc.strip()}")
    assert len(chunks) > 1, f"Should have multiple chunks, got {len(chunks)}"
    # Verify no chunk exceeds max_tokens by too much
    all_under = all(estimate_tokens(c) <= 50 + 20 for c in chunks)  # Allow some tolerance
    assert all_under, "Some chunks exceed token limit significantly"
    print("All chunks respect token limits")
    print("\nAll tests passed!")
@@ -0,0 +1,25 @@
 # Configuration Settings
 import os
 from dataclasses import dataclass
@dataclass(frozen=True)
 class Settings:
    """Application settings loaded from environment variables."""
    vector_store_host: str = os.getenv("VECTOR_STORE_HOST", "qdrant")
    vector_store_port: int = int(os.getenv("VECTOR_STORE_PORT", "6333"))
    collection_name: str = os.getenv("COLLECTION_NAME", "local_context7_docs")
    embedding_model_name: str = os.getenv("EMBEDDING_MODEL_NAME", "all-MiniLM-L6-v2")
    docs_path: str = os.getenv("DOCS_PATH", "./docs")
    db_path: str = os.getenv("DB_PATH", "./data/db.sqlite")
    log_level: str = os.getenv("LOG_LEVEL", "INFO")
    api_key_docs_api: str = os.getenv("API_KEY_DOCS_API", "")
    @property
    def is_auth_enabled(self) -> bool:
        """Return True if API key authentication is enabled."""
        return bool(self.api_key_docs_api)
 settings = Settings()
@@ -0,0 +1,384 @@
 # SQLite Database Layer for local-context7
 import sqlite3
 from pathlib import Path
 from datetime import datetime, timezone
 from typing import List, Dict, Any, Optional
 from .config import settings
 try:
    from qdrant_client import QdrantClient
 except ImportError:
    QdrantClient = None
 def get_db_path() -> Path:
    """Get the database path."""
    return Path(settings.db_path)
 def ensure_db_dir():
    """Ensure the data directory for SQLite exists (idempotent)."""
    db_path = get_db_path()
    db_path.parent.mkdir(parents=True, exist_ok=True)
 # Initialize DB directory at module load time (safe to run multiple times)
 ensure_db_dir()
 def get_connection():
    """
    Get a database connection configured to return dictionaries.
    Returns:
        sqlite3.Connection with row_factory set to dict
    """
    conn = sqlite3.connect(str(get_db_path()))
    conn.row_factory = sqlite3.Row
    return conn
 def init_db():
    """
    Initialize the SQLite database by creating tables.
    Creates:
    - libraries table (id, name, description, source_path, created_at, updated_at)
    - documents table (id, library_id, path, title, content, chunk_index, token_estimate, created_at)
    """
    conn = get_connection()
    try:
        # Enable legacy mode for easier schema handling
        conn.execute("PRAGMA legacy_alter_table = ON")
        # Create libraries table
        conn.execute("""
            CREATE TABLE IF NOT EXISTS libraries (
                id TEXT PRIMARY KEY,
                name TEXT NOT NULL,
                description TEXT,
                source_path TEXT NOT NULL,
                created_at TEXT NOT NULL,
                updated_at TEXT NOT NULL
            )
        """)
        # Create documents table
        conn.execute("""
            CREATE TABLE IF NOT EXISTS documents (
                id TEXT PRIMARY KEY,
                library_id TEXT NOT NULL,
                path TEXT NOT NULL,
                title TEXT,
                content TEXT,
                chunk_index INTEGER,
                token_estimate INTEGER,
                created_at TEXT NOT NULL,
                FOREIGN KEY (library_id) REFERENCES libraries(id) ON DELETE CASCADE
            )
        """)
        # Create indexes for better query performance
        conn.execute("""
            CREATE INDEX IF NOT EXISTS idx_documents_library_id ON documents(library_id)
        """)
        conn.execute("""
            CREATE INDEX IF NOT EXISTS idx_libraries_updated_at ON libraries(updated_at)
        """)
        conn.commit()
        return {"success": True}
    except Exception as e:
        conn.rollback()
        return {"success": False, "error": str(e)}
    finally:
        conn.close()
 def upsert_library(
    library_id: str,
    name: str,
    description: Optional[str] = None,
    source_path: str = None
 ) -> Dict[str, Any]:
    """
    Insert or update a library record.
    Args:
        library_id: Unique identifier for the library
        name: Library name
        description: Optional description
        source_path: Path to library source files
    Returns:
        Dict with success status and operation details
    """
    conn = get_connection()
    try:
        now = datetime.utcnow().isoformat()
        source_path = source_path or library_id
        # Check if library exists
        cursor = conn.execute("SELECT id FROM libraries WHERE id = ?", (library_id,))
        exists = cursor.fetchone() is not None
        if exists:
            # Update existing library
            conn.execute("""
                UPDATE libraries SET 
                    name = ?, description = ?, source_path = ?, updated_at = ?
                WHERE id = ?
            """, (name, description, source_path, now, library_id))
        else:
            # Insert new library
            conn.execute("""
                INSERT INTO libraries (id, name, description, source_path, created_at, updated_at)
                VALUES (?, ?, ?, ?, ?, ?)
            """, (library_id, name, description, source_path, now, now))
        conn.commit()
        return {"success": True, "id": library_id, "exists": exists}
    except Exception as e:
        conn.rollback()
        return {"success": False, "error": str(e)}
    finally:
        conn.close()
 def insert_document_chunk(
    doc_id: str,
    library_id: str,
    path: str,
    title: Optional[str] = None,
    content: str = None,
    chunk_index: int = None,
    token_estimate: int = 0,
 ) -> Dict[str, Any]:
    """
    Insert or update a document chunk record.
    Args:
        doc_id: Unique identifier for this chunk
        library_id: Foreign key to libraries table
        path: Relative file path within the library
        title: Optional document title
        content: Full text content of the chunk
        chunk_index: Index within the full document (NULL if not chunked)
        token_estimate: Estimated token count
    Returns:
        Dict with success status and operation details
    """
    conn = get_connection()
    try:
        now = datetime.utcnow().isoformat()
        # Check if document chunk exists
        cursor = conn.execute(
            "SELECT id FROM documents WHERE id = ?", (doc_id,)
        )
        exists = cursor.fetchone() is not None
        if exists:
            conn.execute(
                """
                UPDATE documents
                SET library_id = ?, path = ?, title = ?, content = ?,
                    chunk_index = ?, token_estimate = ?, created_at = ?
                WHERE id = ?
                """,
                (library_id, path, title, content, chunk_index, token_estimate or 0, now, doc_id),
            )
        else:
            conn.execute(
                """
                INSERT INTO documents
                    (id, library_id, path, title, content, chunk_index, token_estimate, created_at)
                VALUES (?, ?, ?, ?, ?, ?, ?, ?)
                """,
                (doc_id, library_id, path, title, content, chunk_index, token_estimate or 0, now),
            )
        conn.commit()
        return {"success": True, "id": doc_id, "exists": exists}
    except Exception as e:
        conn.rollback()
        return {"success": False, "error": str(e)}
    finally:
        conn.close()
 def clear_library_documents(library_id: str) -> Dict[str, Any]:
    """
    Delete all document chunks for a library.
    Args:
        library_id: The library to clear
    Returns:
        Dict with success status and deleted count
    """
    conn = get_connection()
    try:
        cursor = conn.execute(
            "DELETE FROM documents WHERE library_id = ?", (library_id,)
        )
        deleted = cursor.rowcount
        conn.commit()
        return {"success": True, "deleted": deleted, "library_id": library_id}
    except Exception as e:
        conn.rollback()
        return {"success": False, "error": str(e)}
    finally:
        conn.close()
 def delete_library(library_id: str) -> Dict[str, Any]:
    """Delete a library row and its document chunks."""
    conn = get_connection()
    try:
        conn.execute("DELETE FROM documents WHERE library_id = ?", (library_id,))
        cursor = conn.execute("DELETE FROM libraries WHERE id = ?", (library_id,))
        conn.commit()
        return {"success": True, "deleted": cursor.rowcount, "library_id": library_id}
    except Exception as e:
        conn.rollback()
        return {"success": False, "error": str(e)}
    finally:
        conn.close()
 def list_libraries() -> List[Dict[str, Any]]:
    """
    Get all libraries.
    Returns:
        List of dictionaries containing library records
    """
    conn = get_connection()
    try:
        cursor = conn.execute("SELECT * FROM libraries ORDER BY updated_at DESC")
        # Convert to list of dicts
        columns = [col[0] for col in cursor.description]
        result = []
        for row in cursor:
            result.append(dict(zip(columns, row)))
        return result
    except Exception as e:
        return {"success": False, "error": str(e)}
    finally:
        conn.close()
 def search_libraries(query: str) -> List[Dict[str, Any]]:
    """
    Search libraries by name or description using full-text search.
    Args:
        query: Search query string
    Returns:
        List of matching library dictionaries (empty if none found)
    """
    conn = get_connection()
    try:
        like_query = f"%{query}%"
        cursor = conn.execute("""
            SELECT * FROM libraries
            WHERE lower(id) LIKE lower(?)
               OR lower(name) LIKE lower(?)
               OR lower(coalesce(description, '')) LIKE lower(?)
            ORDER BY updated_at DESC
        """, (like_query, like_query, like_query))
        # Convert to list of dicts
        columns = [col[0] for col in cursor.description]
        result = []
        for row in cursor:
            result.append(dict(zip(columns, row)))
        return result
    except Exception as e:
        return {"success": False, "error": str(e)}
    finally:
        conn.close()
 def get_document_by_id(doc_id: str) -> Optional[Dict[str, Any]]:
    """
    Get a single document by its ID.
    Args:
        doc_id: The document ID to fetch
    Returns:
        Dictionary with document data or None if not found
    """
    conn = get_connection()
    try:
        cursor = conn.execute("SELECT * FROM documents WHERE id = ?", (doc_id,))
        row = cursor.fetchone()
        if row is None:
            return None
        # Convert to dict manually for consistency
        columns = [col[0] for col in cursor.description]
        return dict(zip(columns, row))
    except Exception as e:
        return {"success": False, "error": str(e)}
    finally:
        conn.close()
 def get_chunks_for_library(library_id: str) -> List[Dict[str, Any]]:
    """
    Get all document chunks for a library.
    Args:
        library_id: The library ID to fetch chunks for
    Returns:
        List of dictionaries containing chunk records
    """
    conn = get_connection()
    try:
        cursor = conn.execute(
            "SELECT * FROM documents WHERE library_id = ? ORDER BY chunk_index DESC",
            (library_id,)
        )
        # Convert to list of dicts
        columns = [col[0] for col in cursor.description]
        result = []
        for row in cursor:
            result.append(dict(zip(columns, row)))
        return result
    except Exception as e:
        return {"success": False, "error": str(e)}
    finally:
        conn.close()
@@ -0,0 +1,181 @@
 # Local Embedding Generation using FastEmbed
 import asyncio
 from typing import List
 from functools import lru_cache
 # Module-level singleton for cached model instance
 _embedding_model = None
 _embedding_size = 384  # BAAI/bge-small-en-v1.5 output dimension
 def _load_model():
    """Lazy-load the FastEmbed model on first use."""
    global _embedding_model, _embedding_size
    try:
        from fastembed import TextEmbedding
        if _embedding_model is None:
            print("Loading embedding model (this may take a few minutes on first run)...")
            # Use BAAI/bge-small-en-v1.5 - lightweight (~90MB), works offline
            _embedding_model = TextEmbedding(model_name="BAAI/bge-small-en-v1.5", cache_dir=".embed_cache")
            print("Embedding model loaded successfully.")
        return _embedding_model
    except ImportError as e:
        raise ImportError(
            "FastEmbed is not installed. Please install with:\n"
            "  pip install fastembed\n\n"
            f"Import error details: {e}"
        ) from e
    except RuntimeError as e:
        # Model download/installation failed
        if "No space left" in str(e) or "disk quota exceeded" in str(e):
            raise RuntimeError(
                "Failed to load embedding model due to disk space constraints.\n\n"
                "Please free up space on your system (at least 500MB required).\n"
                "Or specify a custom cache directory with available space:\n"
                "  from fastembed import TextEmbedding\n"
                "  model = TextEmbedding(model_name='...', cache_dir='/path/to/large/storage')\n\n"
                f"Error: {e}"
            ) from e
        raise
 def get_embedding_model():
    """
    Get the cached embedding model instance.
    Returns:
        FastEmbed TextEmbedding instance (lazy-loaded on first call)
    Raises:
        ImportError: If FastEmbed is not installed
        RuntimeError: If model download/load failed
    """
    global _embedding_model
    if _embedding_model is None:
        _embedding_model = _load_model()
    return _embedding_model
 def embed_text(text: str) -> List[float]:
    """
    Generate embedding for a single text.
    Args:
        text: The text string to embed
    Returns:
        List of floats representing the embedding vector
    Raises:
        ImportError: If FastEmbed is not installed
        RuntimeError: If model loading failed
    """
    if not text or not isinstance(text, str):
        return [0.0] * get_embedding_size()
    model = get_embedding_model()
    embedding = model.embed([text])
    return embedding[0].tolist()
 def embed_texts(texts: List[str]) -> List[List[float]]:
    """
    Generate embeddings for multiple texts.
    Args:
        texts: List of text strings to embed
    Returns:
        List of lists containing embedding vectors (one per input text)
    Raises:
        ImportError: If FastEmbed is not installed
        RuntimeError: If model loading failed
    """
    if not texts:
        return []
    model = get_embedding_model()
    embeddings = model.embed(texts)
    result = []
    for emb in embeddings:
        if hasattr(emb, 'tolist'):
            result.append(emb.tolist())
        else:
            result.append(emb)
    return result
 def get_embedding_size() -> int:
    """
    Get the embedding dimension size.
    Returns:
        Integer representing vector dimension (384 for bge-small-en-v1.5)
    Note:
        This returns a sensible default. Actual dimension is determined by model.
    """
    return _embedding_size
 # Async wrapper for compatibility with existing code
 async def generate_embeddings(chunks: List[str]) -> List[List[float]]:
    """
    Async wrapper around embed_texts for compatibility.
    Args:
        chunks: List of text strings to embed
    Returns:
        List of embedding vectors
    """
    return embed_texts(chunks)
 if __name__ == "__main__":
    # Test the embeddings module
    print("Testing embeddings module...\n")
    # Test get_embedding_size
    size = get_embedding_size()
    print(f"Embedding dimension: {size}")
    # Test single text embedding
    test_text = "Hello, world! This is a test of the embedding generation."
    try:
        emb = embed_text(test_text)
        print(f"\nSingle text embedding shape: ({len(emb)},)")
        print(f"First 5 values: {emb[:5]}")
        print("✓ Single embedding works")
    except Exception as e:
        print(f"✗ Single embedding failed: {e}")
    # Test batch embedding
    test_texts = [
        "The quick brown fox jumps over the lazy dog.",
        "Machine learning is a subset of artificial intelligence.",
        "Natural language processing enables computers to understand human language."
    ]
    try:
        embeddings = embed_texts(test_texts)
        print(f"\nBatch embedding shape: ({len(embeddings)}, {len(embeddings[0])})")
        print("✓ Batch embeddings work")
    except Exception as e:
        print(f"✗ Batch embeddings failed: {e}")
    # Test empty inputs
    assert embed_text("") == [0.0] * size, "Empty text should return zero vector"
    assert embed_texts([]) == [], "Empty list should return empty list"
    print("✓ Empty input handling works")
    print("\n✅ All tests passed!")
@@ -0,0 +1,389 @@
 # Git Source Operations for Repository Cloning and File Discovery
 import os
 import shutil
 from pathlib import Path
 from typing import List, Optional, Dict, Any
 def get_repos_dir() -> Path:
    """Get the base directory for storing cloned repositories."""
    # Default to ./data/repos in project root
    return Path(__file__).parent.parent.parent / "data" / "repos"
 def ensure_repos_dir():
    """Ensure the repos directory exists (idempotent)."""
    repos_dir = get_repos_dir()
    repos_dir.mkdir(parents=True, exist_ok=True)
    return repos_dir
 # Initialize repos directory at module load time (safe to run multiple times)
 ensure_repos_dir()
 class GitCloneError(Exception):
    """Exception for git clone/checkout failures."""
    pass
 def clone_or_update_repo(
    repo_id: str,
    repo_url: str,
    branch: str,
    repos_base: Optional[Path] = None
 ) -> Dict[str, Any]:
    """
    Clone a git repository or update an existing clone.
    Args:
        repo_id: Unique identifier for this repository (used in paths)
        repo_url: Git URL to clone from
        branch: Branch name to checkout
        repos_base: Base directory for repos (defaults to get_repos_dir())
    Returns:
        Dict with operation result including repo path and files found
    Raises:
        GitCloneError: If clone or checkout fails
    """
    repos_base = repos_base or get_repos_dir()
    repo_path = repos_base / repo_id
    try:
        if repo_path.exists():
            # Update existing clone
            print(f"  [Git] Updating existing clone at {repo_path}")
            from subprocess import run, CalledProcessError
            import subprocess
            # Fetch latest changes
            result = run(
                ["git", "-C", str(repo_path), "fetch", "origin"],
                capture_output=True,
                text=True
            )
            if result.returncode != 0:
                raise GitCloneError(f"Failed to fetch: {result.stderr}")
            # Reset to branch
            run(
                ["git", "-C", str(repo_path), "reset", "--hard", "origin/" + branch],
                capture_output=True,
                text=True
            )
        else:
            # Clone new repository
            print(f"  [Git] Cloning {repo_url} to {repo_path}")
            run(
                ["git", "-C", str(repo_path.parent), "clone", 
                 "--branch", branch, 
                 "--single-branch",
                 repo_url, "."],
                capture_output=True,
                text=True
            )
        print(f"  [Git] Checked out branch: {branch}")
        return {
            "success": True,
            "repo_path": str(repo_path),
            "url": repo_url,
            "branch": branch
        }
    except CalledProcessError as e:
        raise GitCloneError(f"Git command failed: {e.stderr}") from e
    except Exception as e:
        raise GitCloneError(f"Failed to clone/update repo: {e}") from e
 def discover_files(
    repo_path: Path,
    include_paths: Optional[List[str]] = None,
    exclude_paths: Optional[List[str]] = None
 ) -> List[Dict[str, Any]]:
    """
    Discover files in a git repository respecting include/exclude paths.
    Args:
        repo_path: Path to the cloned repository
        include_paths: List of paths relative to repo root to include (if None, all dirs considered)
        exclude_paths: List of paths relative to repo root to exclude
    Returns:
        List of dicts with format:
            {
              "path": "docs/hooks.md",  # Relative to repo root
              "full_path": "/full/path/to/repo/docs/hooks.md"
            }
    """
    include_patterns = None if include_paths is None else [
        Path(p) for p in include_paths
    ]
    exclude_patterns = set() if exclude_paths is None else {
        Path(p) for p in exclude_paths
    }
    discovered = []
    def should_include(path: Path, rel_path: Path) -> bool:
        """Check if a path matches any include pattern."""
        if not include_patterns:
            return True
        # Normalize paths for comparison (handle trailing slashes, etc.)
        path_str = str(path).replace("\\", "/")
        rel_str = str(rel_path).replace("\\", "/")
        for inc_pattern in include_patterns:
            inc_str = str(inc_pattern).replace("\\", "/")
            # If pattern has subdirs, check prefix match
            if "/" in inc_str and not inc_str.endswith("/"):
                pattern_base = inc_str.rsplit("/", 1)[0] + "/"
                if rel_str.startswith(pattern_base):
                    return True
            elif rel_str == inc_str:
                return True
        return False
    def should_exclude(path: Path, rel_path: Path) -> bool:
        """Check if a path matches any exclude pattern (simple prefix/exact match)."""
        for exc_pattern in exclude_patterns:
            exc_str = str(exc_pattern).replace("\\", "/")
            rel_str = str(rel_path).replace("\\", "/")
            # Exact match or parent directory match
            if rel_str == exc_str or rel_str.startswith(exc_str + "/"):
                return True
        return False
    def walk_and_collect(current: Path, rel_prefix: Path):
        """Recursive walk function."""
        try:
            for entry in sorted(os.scandir(current)):
                entry_path = current / entry.name
                rel_path = (rel_prefix / entry.name).replace("\\", "/") if str(rel_prefix) != "." else rel_prefix
                # Filter by exclude paths first
                if should_exclude(entry_path, rel_path):
                    continue
                # If include_paths specified, only go into matching directories
                if include_patterns and not include_path_match(entry_path, rel_path):
                    if entry.is_dir():
                        return  # Don't descend into this directory
                if entry.is_file():
                    discovered.append({
                        "path": str(rel_path).lstrip("/"),
                        "full_path": str(entry_path),
                        "is_binary": is_probably_binary(str(entry_path))
                    })
                elif entry.is_dir():
                    walk_and_collect(entry_path, rel_path)
        except PermissionError:
            # Skip directories we can't read
            pass
    def include_path_match(path: Path, rel_path: Path) -> bool:
        """Check if path matches any include pattern (for filtering on the fly)."""
        if not include_patterns:
            return True
        path_str = str(path).replace("\\", "/")
        for inc_pattern in include_patterns:
            inc_str = str(inc_pattern).replace("\\", "/")
            # Exact match or parent directory match
            if path_str == inc_str or path_str.startswith(inc_str + "/"):
                return True
        return False
    def is_probably_binary(filepath: str) -> bool:
        """Simple binary detection based on file extension and first bytes."""
        ext = Path(filepath).suffix.lower()
        text_extensions = {'.md', '.txt', '.py', '.js', '.ts', '.json', 
                          '.yaml', '.yml', '.html', '.css', '.sh', '.sql'}
        if ext not in text_extensions:
            # Check for null bytes in first 8KB
            try:
                with open(filepath, 'rb') as f:
                    chunk = f.read(8192)
                    return b'\x00' in chunk
            except:
                return False
        return False
    root_str = str(repo_path).replace("\\", "/")
    # Walk the repository starting from repo root
    walk_and_collect(repo_path, Path("."))
    return discovered
 async def ingest_git_source(
    library_id: str,
    name: str,
    description: Optional[str] = None,
    repo_url: str = None,
    branch: str = "main",
    include_paths: Optional[List[str]] = None,
    exclude_paths: Optional[List[str]] = None,
    repos_base: Optional[Path] = None
 ) -> Dict[str, Any]:
    """
    Ingest a git repository as a new library.
    Clones the repo (or updates if exists), discovers files in include paths,
    and ingests them into the vector store via existing pipeline.
    Args:
        library_id: Unique identifier for this library
        name: Library display name
        description: Optional description
        repo_url: Git repository URL to clone from
        branch: Branch to checkout (default: main)
        include_paths: Paths relative to repo root to include (if None, all dirs considered)
        exclude_paths: Paths relative to repo root to exclude
    Returns:
        Dict with operation result
    Raises:
        GitCloneError: If git operations fail
    """
    from .db import upsert_library
    from .ingest import ingest_library
    print(f"\n[Git Ingestion] Processing library: {library_id}")
    print(f"  Source: {repo_url or '(local)'}")
    # Ensure repos directory exists
    repos_base = repos_base or get_repos_dir()
    repos_base.mkdir(parents=True, exist_ok=True)
    repo_id = f"{library_id}-git"
    # Clone or update the repo
    clone_result = clone_or_update_repo(
        repo_id=repo_id,
        repo_url=repo_url,
        branch=branch,
        repos_base=repos_base
    )
    repo_path = Path(clone_result["repo_path"])
    print(f"  [Git] Found files in {repo_path}")
    # Discover files respecting include/exclude paths
    files = discover_files(
        repo_path=repo_path,
        include_paths=include_paths,
        exclude_paths=exclude_paths
    )
    print(f"  [Git] Discovered {len(files)} file(s)")
    if not files:
        return {
            "success": True,
            "library_id": library_id,
            "message": "No files found matching include/exclude criteria",
            "files_discovered": 0
        }
    # Remove .git directory if present (avoid processing it)
    git_dir = repo_path / ".git"
    if git_dir.exists():
        shutil.rmtree(git_dir)
        print(f"  [Git] Removed .git directory")
    # Ingest using existing library ingestion pipeline
    result = await ingest_library(
        library_id=library_id,
        name=name,
        description=description,
        source_path=repo_id  # Use repo_id as the "source path" for tracking
    )
    return {
        "success": result.get("success", False),
        "library_id": library_id,
        "name": name,
        "files_discovered": len(files),
        "chunks_created": result.get("chunks_created", 0),
        "vectors_added": result.get("vectors_added", 0)
    }
 async def sync_sources(
    sources_config: Dict[str, Any] = None,
    repos_base: Optional[Path] = None
 ) -> List[Dict[str, Any]]:
    """
    Sync all git sources defined in config.
    Args:
        sources_config: List of source configs (same format as docs_sources.yaml)
        repos_base: Base directory for repos
    Returns:
        List of results for each source
    """
    if sources_config is None:
        # Load from default config file
        import yaml
        config_path = Path(__file__).parent.parent.parent / "docs_sources.yaml"
        if not config_path.exists():
            return [{"success": False, "error": f"Config not found: {config_path}"}]
        with open(config_path) as f:
            data = yaml.safe_load(f)
            sources_config = data.get("sources", [])
    results = []
    for source in sources_config:
        try:
            result = await ingest_git_source(
                library_id=source.get("library_id"),
                name=source.get("name"),
                description=source.get("description"),
                repo_url=source.get("repo_url"),
                branch=source.get("branch", "main"),
                include_paths=source.get("include_paths"),
                exclude_paths=source.get("exclude_paths"),
                repos_base=repos_base
            )
        except GitCloneError as e:
            result = {
                "success": False,
                "library_id": source.get("library_id", "unknown"),
                "error": str(e)
            }
        except Exception as e:
            result = {
                "success": False,
                "library_id": source.get("library_id", "unknown"),
                "error": f"Unexpected error: {e}"
            }
        results.append(result)
    return results
@@ -0,0 +1,387 @@
 # Document Ingestion Logic
 import asyncio
 import os
 from pathlib import Path
 from typing import List, Dict, Any, Optional, BinaryIO
 from dotenv import load_dotenv
 # Load environment variables
 load_dotenv()
 # Import local modules
 from .config import settings
 from .chunking import chunk_text, estimate_tokens
 from .embeddings import embed_texts
 from .vector_store import upsert_chunks
 from .db import insert_document_chunk, upsert_library, clear_library_documents
 from .git_source import ingest_git_source
 SUPPORTED_EXTENSIONS = {'.md', '.txt', '.py', '.js', '.ts', '.json', 
                        '.yaml', '.yml', '.html', '.css', '.pdf'}
 # Default documents path from environment or fallback
 DOCS_PATH = Path(os.getenv("DOCS_PATH", "./docs"))
 def get_file_size(path: Path) -> int:
    """Get file size in bytes."""
    try:
        return path.stat().st_size
    except OSError:
        return -1
 async def read_document_file(path: Path) -> str:
    """
    Read document content from a file.
    Args:
        path: Path to the file
    Returns:
        Content as string, or empty string if error
    Raises:
        ValueError: If file type not supported
    """
    if not path.exists():
        return ""
    # Check extension
    suffix = path.suffix.lower()
    if suffix == '.pdf':
        from pypdf import PdfReader
        try:
            reader = PdfReader(str(path))
            pages = []
            for page_num in range(len(reader.pages)):
                page = reader.pages[page_num]
                text = page.extract_text()
                if text:
                    pages.append(text)
            return "\n\n".join(pages)
        except ImportError:
            raise ImportError("pypdf is required for PDF files. Install with: pip install pypdf")
        except Exception as e:
            print(f"  Warning: Could not read PDF {path}: {e}")
            return ""
    elif suffix not in SUPPORTED_EXTENSIONS:
        print(f"  Unsupported file type: {suffix}")
        return ""
    # Read text-based files
    try:
        content = path.read_text(encoding='utf-8')
        return content if content.strip() else ""
    except Exception as e:
        print(f"  Warning: Could not read {path}: {e}")
        return ""
 async def ingest_library(library_id: str, name: str, description: Optional[str] = None, source_path: Optional[str] = None) -> Dict[str, Any]:
    """
    Ingest all documents for a library.
    Args:
        library_id: Unique identifier for the library
        name: Library name
        description: Optional description
        source_path: Path to library folder (relative to DOCS_PATH)
    Returns:
        Summary dict with operation results
    """
    print(f"\n[Library] Processing: {library_id}")
    if source_path:
        print(f"  Source: {source_path}")
    # Ensure library record exists
    result = upsert_library(library_id, name, description, source_path)
    print(f"  [{result.get('success', False)}] Library record: {'created' if not result.get('exists') else 'updated'}")
    # Get the library folder path
    library_dir = DOCS_PATH / source_path
    if not library_dir.exists():
        print(f"  Error: Directory does not exist: {library_dir}")
        return {"success": False, "error": f"Directory not found: {library_dir}"}
    # Find all supported files (recursive)
    print(f"  [Library] Scanning for files in: {library_dir}")
    doc_files = []
    for file_path in library_dir.rglob('*'):
        if file_path.is_file():
            suffix = file_path.suffix.lower()
            if suffix == '.pdf':
                doc_files.append(file_path)
            elif suffix in SUPPORTED_EXTENSIONS:
                doc_files.append(file_path)
    print(f"  [Library] Found {len(doc_files)} document(s)")
    # Clear old chunks for this library
    print(f"  [Library] Clearing existing chunks...")
    clear_result = clear_library_documents(library_id)
    if not clear_result.get('success'):
        print(f"  Warning: Could not clear library docs: {clear_result}")
    else:
        print(f"  [Library] Cleared {clear_result.get('deleted', 0)} existing chunks")
    # Process documents
    all_chunks = []
    processed_files = 0
    for file_path in doc_files:
        # Read file content
        print(f"  [File] Reading: {file_path.relative_to(library_dir)}")
        content = await read_document_file(file_path)
        if not content:
            continue
        # Estimate tokens and chunk
        num_tokens = estimate_tokens(content)
        chunks = chunk_text(content, max_tokens=500, overlap_tokens=80)
        if not chunks:
            print(f"  [File] No valid chunks from {file_path.name}")
            continue
        # Embed chunks and prepare for storage
        print(f"    Chunked into {len(chunks)} pieces (approx. {num_tokens} tokens)")
        embeddings = embed_texts(chunks)
        # Build chunk dicts
        chunk_dicts = []
        base_path = file_path.relative_to(library_dir).as_posix()
        for i, chunk in enumerate(chunks):
            chunk_dict = {
                "id": f"{file_path.stem}-{i}",
                "library_id": library_id,
                "path": base_path,
                "title": Path(base_path).stem,
                "content": chunk,
                "chunk_index": i,
                "embedding": embeddings[i]
            }
            all_chunks.append(chunk_dict)
        processed_files += 1
    print(f"  [Library] Processed {processed_files} file(s), {len(all_chunks)} total chunks")
    # Save chunks to SQLite
    if all_chunks:
        for chunk in all_chunks:
            insert_result = insert_document_chunk(
                doc_id=chunk["id"],
                library_id=chunk["library_id"],
                path=chunk["path"],
                title=chunk.get("title"),
                content=chunk["content"],
                chunk_index=chunk["chunk_index"],
                token_estimate=estimate_tokens(chunk["content"])
            )
            if insert_result.get('success'):
                continue
        print(f"  [Library] Saved {len(all_chunks)} chunks to SQLite")
    else:
        print(f"  [Library] No chunks to save to SQLite")
    # Save vectors to Qdrant
    if all_chunks:
        upsert_result = await upsert_chunks(all_chunks)
        print(f"  [Library] Vector store: {upsert_result.get('success', False)} ({upsert_result.get('points_added', 0)} added)")
    else:
        print(f"  [Library] No vectors to add to Qdrant")
    return {
        "success": True,
        "library_id": library_id,
        "files_processed": processed_files,
        "chunks_created": len(all_chunks),
        "vectors_added": upsert_result.get('points_added', 0) if 'upsert_result' in locals() else len(all_chunks)
    }
 async def ingest_git_source_from_config(
    repo_url: str,
    branch: str = "main",
    include_paths: Optional[List[str]] = None,
    exclude_paths: Optional[List[str]] = None,
    repos_base: Optional[Path] = None
 ) -> Dict[str, Any]:
    """
    Ingest a git repository defined in sources configuration.
    Args:
        repo_url: Git repository URL to clone from
        branch: Branch to checkout (default: main)
        include_paths: Paths relative to repo root to include (if None, all dirs considered)
        exclude_paths: Paths relative to repo root to exclude
        repos_base: Base directory for cloned repos (defaults to ./data/repos)
    Returns:
        Dict with operation result
    Raises:
        GitCloneError: If git operations fail
    """
    # Auto-generate library_id from URL if not provided
    import urllib.parse
    parsed = urllib.parse.urlparse(repo_url)
    path_part = parsed.path.rstrip('.git')
    library_id = Path(path_part).name or "unknown"
    name = Path(parsed.hostname or path_part).stem
    description = f"Documentation from {path_part}"
    result = await ingest_git_source(
        library_id=library_id,
        name=name,
        description=description,
        repo_url=repo_url,
        branch=branch,
        include_paths=include_paths,
        exclude_paths=exclude_paths,
        repos_base=repos_base
    )
    return result
 async def detect_libraries() -> List[Dict[str, Any]]:
    """
    Detect all top-level folders under DOCS_PATH as libraries.
    Returns:
        List of dicts with library metadata
    """
    print(f"\n[Detection] Scanning for libraries in: {DOCS_PATH}")
    if not DOCS_PATH.exists():
        print(f"  [Detection] Directory does not exist: {DOCS_PATH}")
        return []
    # Get top-level directories
    directories = list(DOCS_PATH.iterdir())
    dirs_only = [d for d in directories if d.is_dir()]
    libraries = []
    for i, lib_dir in enumerate(dirs_only, 1):
        name = lib_dir.name
        # Create library record with defaults
        result = upsert_library(
            library_id=lib_dir.name.lower(),
            name=name,
            description=None,
            source_path=lib_dir.name
        )
        libraries.append({
            "id": lib_dir.name.lower(),
            "name": name,
            "source_path": lib_dir.name
        })
        print(f"  [{i}/{len(dirs_only)}] Library detected: {name} (id: {lib_dir.name.lower()})")
    print(f"\n[Detection] Found {len(libraries)} library(ies)")
    return libraries
 async def ingest_all(verbose: bool = True) -> Dict[str, Any]:
    """
    Ingest all discovered libraries.
    Args:
        verbose: Whether to print progress messages
    Returns:
        Summary dict with overall results
    """
    if verbose:
        print("\n" + "=" * 60)
        print("DOCUMENT INGESTION STARTED")
        print("=" * 60)
    # Detect libraries
    libraries = await detect_libraries()
    if not libraries:
        result = {"total_libraries": 0, "total_chunks": 0, "successful": []}
        if verbose:
            print("\n[Summary] No libraries to ingest")
        return result
    # Ingest each library
    results = []
    for lib in libraries:
        lib_id = lib["id"]
        result = await ingest_library(
            library_id=lib_id,
            name=lib["name"],
            description=None,
            source_path=lib.get("source_path")
        )
        if verbose and result.get('success'):
            print(f"  [Library] Done: {result.get('library_id')} - {result.get('chunks_created', 0)} chunks")
        results.append(result)
    # Calculate totals
    total_chunks = sum(r.get('chunks_created', 0) for r in results)
    successful = len([r for r in results if r.get('success')])
    result = {
        "total_libraries": len(libraries),
        "successful": successful,
        "failed": len(results) - successful,
        "total_chunks": total_chunks
    }
    if verbose:
        print("\n" + "=" * 60)
        print("INGESTION COMPLETE")
        print("=" * 60)
        print(f"  Libraries processed: {result['total_libraries']}")
        print(f"  Successful: {result['successful']}")
        print(f"  Failed: {result['failed']}")
        print(f"  Total chunks created: {result['total_chunks']}")
    return result
 if __name__ == "__main__":
    # Run ingestion tests
    import asyncio
    async def test_run():
        print("Testing ingestion module...\n")
        # Test detect_libraries
        libs = await detect_libraries()
        print(f"\nDetected libraries: {len(libs)}")
        if libs:
            # Try to ingest the first library (may fail if no docs exist, which is ok for test)
            print("\nAttempting sample ingestion...")
            result = await ingest_library(
                library_id=libs[0]["id"],
                name=libs[0]["name"],
                source_path=libs[0].get("source_path")
            )
            print(f"Result: {result}")
        print("\n✅ Tests completed!")
    asyncio.run(test_run())
@@ -0,0 +1,299 @@
 """Context7 Docs API."""
 import asyncio
 import shutil
 import yaml
 from pathlib import Path
 from typing import Optional
 from fastapi import FastAPI, File, Form, HTTPException, Query, Request, UploadFile
 from fastapi.responses import JSONResponse
 from pydantic import BaseModel, Field
 from .config import settings
 from .db import (
    clear_library_documents,
    delete_library,
    init_db,
    list_libraries,
    search_libraries,
    upsert_library,
 )
 from .git_source import ingest_git_source
 from .ingest import ingest_all, ingest_library
 from .search import get_library_docs, resolve_library_id, search_docs
 from .vector_store import delete_library_vectors, ensure_collection, get_client, get_collection_name
 app = FastAPI(
    title="Context7 Docs API",
    description="Document ingestion and semantic search API for local-context7",
    version="1.0.0",
 )
 class SearchRequest(BaseModel):
    query: str = Field(..., min_length=1)
    library_id: Optional[str] = None
    limit: int = Field(10, ge=1, le=50)
 class SyncSourcesRequest(BaseModel):
    override: bool = False
 ALLOWED_EXTENSIONS = {
    ".md",
    ".txt",
    ".py",
    ".js",
    ".ts",
    ".json",
    ".yaml",
    ".yml",
    ".html",
    ".css",
    ".pdf",
 }
@app.middleware("http")
 async def auth_middleware(request: Request, call_next):
    """Require X-API-Key for mutating endpoints when API_KEY_DOCS_API is set."""
    if not settings.is_auth_enabled:
        return await call_next(request)
    public_prefixes = ("/health", "/libraries", "/docs/")
    if request.method == "GET" and request.url.path.startswith(public_prefixes):
        return await call_next(request)
    if request.headers.get("X-API-Key") != settings.api_key_docs_api:
        return JSONResponse(status_code=401, content={"detail": "Invalid or missing API key"})
    return await call_next(request)
@app.on_event("startup")
 async def startup() -> None:
    init_result = init_db()
    if not init_result.get("success"):
        raise RuntimeError(f"Failed to initialize SQLite database: {init_result.get('error')}")
    last_error = None
    for _ in range(20):
        collection_result = await ensure_collection()
        if collection_result.get("success"):
            return
        last_error = collection_result.get("error")
        await asyncio.sleep(1)
    raise RuntimeError(f"Failed to initialize Qdrant collection: {last_error}")
 def safe_library_id(library_id: str) -> str:
    """Normalize user-provided library IDs to a single path segment."""
    base = Path(library_id).name.strip()
    if not base or base in {".", ".."} or ".." in library_id or "/" in library_id or "\\" in library_id:
        raise HTTPException(status_code=400, detail="Invalid library ID")
    return base
 def safe_upload_filename(filename: str) -> str:
    ext = Path(filename).suffix.lower()
    if ext not in ALLOWED_EXTENSIONS:
        raise HTTPException(
            status_code=400,
            detail=f"Unsafe extension: {ext}. Allowed extensions: {', '.join(sorted(ALLOWED_EXTENSIONS))}",
        )
    stem = "".join(c for c in Path(filename).stem if c.isalnum() or c in "-_ ").strip()
    if not stem:
        raise HTTPException(status_code=400, detail="Filename contains only unsafe characters")
    return f"{stem}{ext}"
 def docs_root() -> Path:
    return Path(settings.docs_path)
 def sources_config_path() -> Path:
    return Path(__file__).resolve().parents[2] / "docs_sources.yaml"
@app.get("/health")
 async def health_check():
    return {"status": "ok", "service": "docs-api"}
@app.get("/collections")
 async def collections():
    try:
        client = get_client()
        info = client.get_collection(get_collection_name())
        vectors = getattr(info, "vectors_count", None) or getattr(info, "points_count", 0) or 0
        return {"collections": {get_collection_name(): {"vectors": vectors}}}
    except Exception as e:
        return {"collections": {}, "warning": str(e)}
@app.get("/libraries")
 async def list_libraries_api():
    libs = list_libraries()
    if isinstance(libs, dict) and not libs.get("success", True):
        raise HTTPException(status_code=500, detail=libs.get("error", "Failed to list libraries"))
    return {"libraries": libs, "count": len(libs)}
@app.get("/libraries/search")
 async def search_libraries_api(q: str = Query(..., min_length=1)):
    matches = resolve_library_id(q)
    return {"matches": matches, "count": len(matches)}
@app.post("/search")
 async def search_docs_api(payload: SearchRequest):
    results = search_docs(payload.query, library_id=payload.library_id, limit=payload.limit)
    return {
        "query": payload.query,
        "library_id": payload.library_id,
        "results": results,
        "count": len(results),
    }
@app.get("/docs/{library_id}")
@app.get("/libraries/{library_id}/docs")
 async def get_library_docs_api(
    library_id: str,
    topic: Optional[str] = Query(None),
    tokens: int = Query(8000, ge=1),
 ):
    docs = get_library_docs(library_id=library_id, topic=topic, token_limit=tokens)
    return {"library_id": library_id, "content": docs}
@app.post("/ingest/all")
 async def ingest_all_api():
    return await ingest_all()
@app.post("/ingest/{library_id}")
 async def ingest_library_api(library_id: str):
    library_id = safe_library_id(library_id)
    source_path = library_id
    return await ingest_library(library_id=library_id, name=library_id, source_path=source_path)
@app.post("/api/v1/libraries/{library_id}")
 async def api_create_library(
    library_id: str,
    name: Optional[str] = Form(None),
    description: Optional[str] = Form(None),
 ):
    library_id = safe_library_id(library_id)
    lib_dir = docs_root() / library_id
    lib_dir.mkdir(parents=True, exist_ok=True)
    result = upsert_library(library_id, name or library_id, description, library_id)
    if not result.get("success"):
        raise HTTPException(status_code=500, detail=result.get("error", "Failed to create library"))
    return {
        "success": True,
        "created": not result.get("exists", False),
        "library_id": library_id,
        "name": name or library_id,
        "description": description,
        "path": str(lib_dir),
    }
@app.delete("/api/v1/libraries/{library_id}")
 async def api_delete_library(library_id: str):
    library_id = safe_library_id(library_id)
    lib_dir = docs_root() / library_id
    deleted_files = 0
    if lib_dir.exists():
        for path in lib_dir.rglob("*"):
            if path.is_file():
                deleted_files += 1
        shutil.rmtree(lib_dir)
    docs_result = clear_library_documents(library_id)
    vectors_result = await delete_library_vectors(library_id)
    library_result = delete_library(library_id)
    failures = [
        r.get("error")
        for r in (docs_result, vectors_result, library_result)
        if isinstance(r, dict) and not r.get("success", True)
    ]
    if failures:
        raise HTTPException(status_code=500, detail="; ".join(failures))
    return {"success": True, "library_id": library_id, "deleted_files": deleted_files}
@app.post("/api/v1/upload/{library_id}")
 async def api_upload(library_id: str, file: UploadFile = File(...)):
    library_id = safe_library_id(library_id)
    safe_name = safe_upload_filename(file.filename or "upload.txt")
    lib_dir = docs_root() / library_id
    lib_dir.mkdir(parents=True, exist_ok=True)
    contents = await file.read()
    if len(contents) > 5 * 1024 * 1024:
        raise HTTPException(status_code=400, detail="File too large (max 5MB)")
    target = lib_dir / safe_name
    target.write_bytes(contents)
    upsert_library(library_id, library_id, None, library_id)
    return {
        "success": True,
        "library_id": library_id,
        "filename": safe_name,
        "path": str(target.relative_to(docs_root())),
        "size_bytes": len(contents),
    }
@app.get("/api/v1/sources")
@app.get("/sources/config")
 async def api_list_sources():
    path = sources_config_path()
    if not path.exists():
        return {"success": True, "sources": [], "count": 0}
    with path.open() as f:
        data = yaml.safe_load(f) or {}
    sources = data.get("sources", data if isinstance(data, list) else [])
    if not isinstance(sources, list):
        sources = []
    return {"success": True, "sources": sources, "count": len(sources)}
@app.post("/sources/sync")
 async def sync_sources_api(payload: Optional[SyncSourcesRequest] = None):
    source_data = await api_list_sources()
    sources = source_data["sources"]
    override = payload.override if payload else False
    results = []
    for source in sources:
        result = await ingest_git_source(
            library_id=source["library_id"],
            name=source.get("name") or source["library_id"],
            description=source.get("description"),
            repo_url=source["repo_url"],
            branch=source.get("branch", "main"),
            include_paths=source.get("include_paths"),
            exclude_paths=source.get("exclude_paths"),
        )
        results.append(result)
    successful = len([r for r in results if r.get("success")])
    return {
        "success": successful == len(results),
        "total_sources": len(results),
        "successful": successful,
        "failed": len(results) - successful,
        "results": results,
    }
@@ -0,0 +1,47 @@
 # Data Models for document processing and API responses
 from typing import Any, Dict, List, Optional
 class DocumentChunk:
    """Represents a chunk of text to be embedded."""
    def __init__(
        self,
        text: str,
        metadata: Optional[Dict[str, Any]] = None
    ):
        self.text = text
        self.metadata = metadata or {}
    @property
    def doc_id(self) -> str:
        """Generate a document ID from content."""
        return f"doc-{hash(self.text)}"
 class IngestResponse:
    """Response model for document ingestion."""
    def __init__(
        self,
        success: bool,
        chunks_count: int = 0,
        error: Optional[str] = None
    ):
        self.success = success
        self.chunks_count = chunks_count
        self.error = error
 class SearchResponse:
    """Response model for search results."""
    def __init__(
        self,
        results: List[Dict[str, Any]],
        query: str,
        total_results: int
    ):
        self.results = results
        self.query = query
        self.total_results = total_results
@@ -0,0 +1,235 @@
 # Search Operations for Semantic Query and Library Navigation
 from typing import List, Dict, Any, Optional
 from pathlib import Path
 from .config import settings
 from .vector_store import get_client, _collection_name as VECTOR_COLLECTION
 from .embeddings import embed_text, get_embedding_size
 from .db import get_chunks_for_library, list_libraries
 def search_docs(
    query: str,
    library_id: Optional[str] = None,
    limit: int = 10
 ) -> List[Dict[str, Any]]:
    """
    Search documents by semantic similarity in Qdrant.
    Args:
        query: The search query string
        library_id: Optional filter to search only within a library
        limit: Maximum number of results to return
    Returns:
        List of dicts with format:
            {
              "id": "...",
              "score": 0.123,
              "library_id": "...",
              "path": "...",
              "title": "...",
              "chunk_index": 0
            }
    """
    try:
        # Generate embedding for the query
        query_embedding = embed_text(query)
        client = get_client()
        # Build filter if library_id is specified
        search_filter = None
        if library_id:
            try:
                from qdrant_client.models import FieldCondition, Filter, MatchValue
                search_filter = Filter(
                    must=[
                        FieldCondition(
                            key="library_id",
                            match=MatchValue(value=library_id),
                        )
                    ]
                )
            except ImportError:
                search_filter = None
        # Perform vector search
        results = client.search(
            collection_name=VECTOR_COLLECTION,
            query_vector=query_embedding,
            limit=limit,
            search_filter=search_filter
        )
        # Format and return results
        formatted_results = []
        for result in results:
            if result.score > 0 and result.payload:
                formatted_results.append({
                    "id": result.payload["id"],
                    "score": float(result.score),
                    "library_id": result.payload.get("library_id", ""),
                    "path": result.payload.get("path", ""),
                    "title": result.payload.get("title", ""),
                    "chunk_index": result.payload.get("chunk_index", 0)
                })
        return formatted_results
    except Exception as e:
        print(f"Search error: {e}")
        return []
 def get_library_docs(
    library_id: str,
    topic: Optional[str] = None,
    token_limit: int = 8000
 ) -> str:
    """
    Retrieve documentation content from a library.
    Args:
        library_id: The library ID to fetch docs from
        topic: Optional topic filter - if provided, searches for topic first
        token_limit: Maximum tokens to include in output
    Returns:
        Combined markdown content as string
    """
    try:
        # If topic is specified, search for relevant chunks
        if topic:
            print(f"  [Search] Searching library '{library_id}' for topic: {topic}")
            search_results = search_docs(query=topic, library_id=library_id, limit=20)
            if not search_results:
                return f"No documents found in library '{library_id}' matching topic: {topic}"
            print(f"  [Search] Found {len(search_results)} relevant chunks")
        else:
            # Fetch all chunks for the library and select most useful ones
            print(f"  [Fetch] Retrieving chunks from library '{library_id}'")
            chunks_data = get_chunks_for_library(library_id)
            if not chunks_data:
                return f"No documents found in library '{library_id}'"
            # Sort by chunk_index descending and pick top ones to respect token limit
            sorted_chunks = sorted(chunks_data, key=lambda x: x.get("chunk_index", 0), reverse=True)
            selected_chunks = []
            total_tokens = 0
            for chunk in sorted_chunks:
                content = chunk.get("content", "")
                tokens = len(content) // 4  # Simple token estimate
                if total_tokens + tokens <= token_limit:
                    selected_chunks.append(chunk)
                    total_tokens += tokens
                else:
                    # Take part of this chunk to fill remaining space
                    remaining = token_limit - total_tokens
                    content_preview = content[:remaining * 4] if remaining > 0 else ""
                    if content_preview:
                        selected_chunks.append({"content": content_preview, "title": chunk.get("title", "")})
            print(f"  [Fetch] Selected {len(selected_chunks)} chunks ({total_tokens} tokens)")
        # Combine chunks into markdown
        md_parts = []
        for chunk in selected_chunks:
            title = chunk.get("title")
            content = chunk.get("content", "")
            if title and content.strip():
                # Add heading before first chunk or if this is the first chunk
                if not md_parts or "\n\n" not in "".join(md_parts):
                    md_parts.append(f"# {title}")
                elif not any(part.startswith("#") for part in md_parts[-5:]):
                    md_parts.append(f"\n# {title}\n")
            md_parts.append(content)
        result = "\n\n".join(md_parts)
        # If no headings were added, prepend library title
        if not any(part.startswith("#") for part in result.split("\n")[:3]):
            result = f"# {library_id.upper().replace('_', ' ')}\n\n" + result
        return result.rstrip()
    except Exception as e:
        print(f"Error getting library docs: {e}")
        return f"Error retrieving documents from library '{library_id}': {str(e)}"
 def resolve_library_id(library_name: str) -> List[Dict[str, Any]]:
    """
    Resolve a library name to potential matches (Context7-style).
    Args:
        library_name: Partial or full library name to search for
    Returns:
        List of Context7-style candidate dicts:
            {
              "id": "/local/foundryvtt",
              "name": "foundryvtt",
              "description": "...",
              "source": "local"
            }
    """
    try:
        libraries = list_libraries()
        if not libraries:
            return []
        # Filter by name match (case-insensitive)
        candidates = []
        for lib in libraries:
            lib_name = lib.get("name", "").lower()
            lib_id = lib.get("id", "").lower()
            if library_name.lower() in lib_name or library_name.lower() in lib_id:
                candidates.append({
                    "id": f"/local/{lib['id']}",
                    "name": lib["name"],
                    "description": lib.get("description", ""),
                    "source": "local"
                })
        # Return top matches (or all if less than 3)
        candidates = candidates[:min(5, len(candidates))]
        print(f"  [Resolve] Found {len(candidates)} candidate(s) for: {library_name}")
        return candidates
    except Exception as e:
        print(f"Error resolving library ID: {e}")
        return []
 if __name__ == "__main__":
    import asyncio
    async def test_search():
        """Test search functionality."""
        print("Testing search module...\n")
        # Test 1: Simple search with dummy vector (simulated)
        print("1. Testing resolve_library_id()...")
        results = await resolve_library_id("foundryvtt")
        print(f"   Results: {len(results)} candidates\n")
        # Test 2: Empty query should return empty list
        print("2. Testing search_docs() with empty query...")
        results = await search_docs("")
        print(f"   Results: {len(results)} chunks\n")
        print("✅ All tests completed!")
    asyncio.run(test_search())
@@ -0,0 +1,361 @@
 # Vector Store Operations for Qdrant
 import asyncio
 import uuid
 from typing import List, Dict, Any, Optional
 try:
    from qdrant_client import QdrantClient
    from qdrant_client.models import Distance, VectorParams, PointStruct, Filter, FieldCondition, MatchValue
 except ImportError:
    QdrantClient = None
    Distance = VectorParams = PointStruct = Filter = FieldCondition = MatchValue = None
 # Singleton client instance
 _client: Optional[Any] = None
 try:
    from .config import settings
    _collection_name = settings.collection_name
 except Exception:
    _collection_name = "local_context7_docs"
 def get_client() -> Any:
    """Get or create the Qdrant client singleton using environment config."""
    global _client
    if _client is None:
        if QdrantClient is None:
            raise RuntimeError("qdrant-client is not installed")
        try:
            from dotenv import load_dotenv
            load_dotenv()
        except ImportError:
            pass
        # Use QDRANT_URL from environment if available, otherwise use host:port
        import os
        qdrant_url = os.getenv("QDRANT_URL")
        if qdrant_url:
            _client = QdrantClient(url=qdrant_url)
        else:
            from .config import settings
            host = settings.vector_store_host
            port = settings.vector_store_port
            _client = QdrantClient(host=host, port=port)
    return _client
 def get_collection_name() -> str:
    """Get the collection name for vector storage."""
    return _collection_name
 def get_embedding_size() -> int:
    """Get embedding dimension size from embeddings module."""
    try:
        from .embeddings import get_embedding_size
        return get_embedding_size()
    except (ImportError, RuntimeError):
        # Default fallback if embeddings module not loaded yet
        return 384
 async def ensure_collection(vector_size: Optional[int] = None) -> Dict[str, Any]:
    """
    Ensure the Qdrant collection exists with proper schema.
    Args:
        vector_size: Override embedding dimension (uses get_embedding_size() if not provided)
    Returns:
        Dict with operation result
    """
    try:
        if QdrantClient is None:
            return {"success": False, "error": "qdrant-client is not installed"}
        client = get_client()
        size = vector_size or get_embedding_size()
        distance = Distance.COSINE
        # Check if collection exists
        try:
            collections = client.get_collections().collections
            collection_exists = any(c.name == _collection_name for c in collections)
        except Exception:
            collection_exists = False
        if not collection_exists:
            # Create new collection
            client.create_collection(
                collection_name=_collection_name,
                vectors=VectorParams(size=size, distance=distance),
                wait=True
            )
            return {
                "success": True,
                "collection": _collection_name,
                "vector_size": size,
                "created": True
            }
        else:
            # Verify current vector size matches expected
            try:
                collection_info = client.get_collection(_collection_name)
                current_size = collection_info.config.params.vectors.size
                if current_size != size:
                    # Collection exists with wrong size - delete and recreate
                    client.delete_collection(_collection_name)
                    client.create_collection(
                        collection_name=_collection_name,
                        vectors=VectorParams(size=size, distance=distance),
                        wait=True
                    )
                    return {
                        "success": True,
                        "collection": _collection_name,
                        "vector_size": size,
                        "created": False,
                        "resized": True
                    }
            except Exception:
                pass  # Collection exists, don't worry about size for now
            return {
                "success": True,
                "collection": _collection_name,
                "vector_size": size,
                "created": False
            }
    except Exception as e:
        return {"success": False, "error": str(e)}
 async def upsert_chunks(chunks: List[Dict[str, Any]]) -> Dict[str, Any]:
    """
    Upsert chunks into the vector store.
    Args:
        chunks: List of chunk dicts with format:
            {
              "id": "...",
              "library_id": "...",
              "path": "...",
              "title": "...",
              "chunk_index": 0,
              "content": "...",
              "embedding": [...]
            }
    Returns:
        Dict with operation result
    """
    try:
        if QdrantClient is None:
            return {"success": False, "error": "qdrant-client is not installed"}
        if not chunks:
            return {"success": True, "points_added": 0}
        client = get_client()
        # Build PointStruct points from chunk dicts
        points = []
        for chunk in chunks:
            point_key = f"{chunk['library_id']}:{chunk['id']}"
            point_id = str(uuid.uuid5(uuid.NAMESPACE_URL, point_key))
            points.append(PointStruct(
                id=point_id,
                vector=chunk["embedding"],
                payload={
                    "id": chunk["id"],
                    "library_id": chunk["library_id"],
                    "path": chunk.get("path", ""),
                    "title": chunk.get("title", ""),
                    "chunk_index": chunk.get("chunk_index", 0),
                    "content": chunk.get("content", "")
                }
            ))
        # Upsert points into collection
        client.upsert(_collection_name, points=points)
        return {
            "success": True,
            "points_added": len(points)
        }
    except Exception as e:
        return {"success": False, "error": str(e)}
 async def search_vectors(
    query_vector: List[float],
    library_id: Optional[str] = None,
    limit: int = 10
 ) -> List[Dict[str, Any]]:
    """
    Search for semantically similar vectors.
    Args:
        query_vector: The embedding vector to search against
        library_id: Optional filter by library ID
        limit: Maximum results to return
    Returns:
        List of result dicts with format:
            {
              "id": "...",
              "score": 0.123,
              "library_id": "...",
              "path": "...",
              "title": "...",
              "chunk_index": 0
            }
    """
    try:
        if QdrantClient is None:
            return []
        client = get_client()
        # Build filter if library_id is specified
        search_filter = None
        if library_id:
            search_filter = Filter(
                must=[
                    FieldCondition(
                        key="library_id",
                        match=MatchValue(value=library_id),
                    )
                ]
            )
        # Perform vector search
        results = client.search(
            collection_name=_collection_name,
            query_vector=query_vector,
            limit=limit,
            search_filter=search_filter
        )
        # Format results
        formatted_results = []
        for result in results:
            if result.score > 0 and result.payload:
                formatted_results.append({
                    "id": result.payload["id"],
                    "score": float(result.score),
                    "library_id": result.payload["library_id"],
                    "path": result.payload.get("path", ""),
                    "title": result.payload.get("title", ""),
                    "chunk_index": result.payload.get("chunk_index", 0)
                })
        return formatted_results
    except Exception as e:
        return []
 async def delete_library_vectors(library_id: str) -> Dict[str, Any]:
    """
    Delete all vectors for a given library.
    Args:
        library_id: The library ID to delete vectors for
    Returns:
        Dict with operation result
    """
    try:
        if QdrantClient is None:
            return {"success": True, "library_id": library_id, "skipped": "qdrant-client is not installed"}
        client = get_client()
        # Use filter to delete only vectors matching the library_id
        filter_condition = Filter(
            must=[
                FieldCondition(
                    key="library_id",
                    match=MatchValue(value=library_id),
                )
            ]
        )
        # Get all points with the filter (in batches)
        batch_size = 100
        offset = None
        while True:
            try:
                # Scroll to get points matching filter
                points, _ = client.scroll(
                    collection_name=_collection_name,
                    scroll_filter=filter_condition,
                    limit=batch_size,
                    offset=offset,
                    with_payload=True,
                    with_vectors=False
                )
                if not points:
                    break
                # Collect IDs to delete
                point_ids = [p.id for p in points]
                # Delete the points
                client.delete(
                    collection_name=_collection_name,
                    points_selector=point_ids
                )
                offset = points[-1].id if points else None
            except Exception as e:
                # If we hit end of dataset or other issue, break
                break
        return {
            "success": True,
            "library_id": library_id
        }
    except Exception as e:
        return {"success": False, "error": str(e)}
 if __name__ == "__main__":
    # Test vector store module
    import os
    print("Testing vector store module...\n")
    # Test ensure_collection
    print("1. Testing ensure_collection()...")
    result = asyncio.run(ensure_collection())
    print(f"   Result: {result}\n")
    # Test search with empty query (will return empty since no vectors exist yet)
    print("2. Testing search_vectors() with dummy vector...")
    dummy_vector = [0.1] * 384
    results = asyncio.run(search_vectors(dummy_vector, limit=5))
    print(f"   Results count: {len(results)}\n")
    # Test delete_library_vectors (will succeed even if no vectors exist)
    print("3. Testing delete_library_vectors()...")
    result = asyncio.run(delete_library_vectors("test-library"))
    print(f"   Result: {result}\n")
    print("✅ All tests completed!")
@@ -0,0 +1 @@
 """WebUI module for Context7 Docs."""
@@ -0,0 +1,166 @@
 .container {
    max-width: 1000px;
    margin: 0 auto;
    padding: 20px;
 }
 header {
    border-bottom: 1px solid #ccc;
    padding-bottom: 15px;
    margin-bottom: 20px;
 }
 header h1 {
    margin: 0 0 10px 0;
    font-size: 1.5rem;
 }
 nav {
    display: flex;
    gap: 15px;
 }
 nav a {
    text-decoration: none;
    color: #0066cc;
    font-size: 0.9rem;
 }
 nav a.active {
    font-weight: bold;
    text-decoration: underline;
 }
 main h2 {
    margin-bottom: 15px;
 }
 footer {
    margin-top: 40px;
    padding-top: 15px;
    border-top: 1px solid #ccc;
    font-size: 0.8rem;
    color: #666;
 }
 /* Status cards */
 .status-card {
    background: #f5f5f5;
    padding: 20px;
    border-radius: 8px;
    border-left: 4px solid #00c467;
 }
 .status-message {
    background: #e8f4fd;
    padding: 10px;
    border-radius: 4px;
    margin: 5px 0;
 }
 /* Tables */
 .library-table {
    width: 100%;
    border-collapse: collapse;
    margin-top: 10px;
 }
 .library-table th, .library-table td {
    padding: 10px;
    text-align: left;
    border-bottom: 1px solid #ddd;
 }
 .library-table th {
    background: #f5f5f5;
    font-weight: bold;
 }
 /* Forms */
 form input[type="text"], form textarea, form select {
    padding: 8px;
    border: 1px solid #ccc;
    border-radius: 4px;
    margin-right: 10px;
    margin-bottom: 10px;
 }
 button {
    background: #0066cc;
    color: white;
    border: none;
    padding: 10px 20px;
    border-radius: 4px;
    cursor: pointer;
 }
 button:hover {
    background: #0055aa;
 }
 /* Pre formatting */
 pre {
    background: #f5f5f5;
    padding: 15px;
    border-radius: 4px;
    overflow-x: auto;
    white-space: pre-wrap;
    word-break: break-word;
 }
 /* Search results */
 .result-card {
    background: #fff;
    border: 1px solid #ddd;
    padding: 15px;
    margin: 10px 0;
    border-radius: 4px;
 }
 .result-card h3 {
    margin: 0 0 8px 0;
 }
 .hint {
    color: #666;
    font-size: 0.85rem;
    margin-top: 15px;
 }
 /* Status colors */
 .status-ok {
    color: #00c467;
    font-weight: bold;
 }
 .content-preview {
    max-height: 300px;
    overflow-y: auto;
 }
 .results-count {
    background: #e8f4fd;
    padding: 10px;
    border-radius: 4px;
    margin-bottom: 15px;
 }
 .source-card {
    background: #f5f5f5;
    padding: 15px;
    margin: 10px 0;
    border-radius: 4px;
 }
 .actions-bar {
    margin-top: 15px;
 }
 .actions-bar form {
    display: inline-flex;
 }
 .doc-content {
    max-height: 600px;
    overflow-y: auto;
 }
@@ -0,0 +1,568 @@
 """WebUI Views for Context7 Docs using Jinja2 templates."""
 import os
 import json
 from pathlib import Path
 from typing import Any, Optional
 from fastapi import Request
 from fastapi.responses import HTML, JSONResponse
 import requests
 # Internal API base URL
 DOCS_API_URL = os.environ.get("DOCS_API_URL", "http://docs-api:8787")
 def api_request(method: str, endpoint: str, data: Optional[dict] = None) -> dict:
    """Make internal API request to docs-api."""
    url = f"{DOCS_API_URL}{endpoint}"
    headers = {}
    if os.environ.get("WEBUI_API_KEY"):
        headers["X-API-Key"] = os.environ.get("WEBUI_API_KEY")
    resp = requests.request(method, url, headers=headers, json=data)
    return resp.json()
 def navbar_html(current: str) -> str:
    """Generate navigation bar HTML."""
    links = [
        ("/health", "Health"),
        ("/libraries", "Libraries"),
        ("/upload", "Upload"),
        ("/ingest/all", "Ingest All"),
        ("/sources/git", "Git Sources"),
        ("/search", "Search"),
    ]
    items = []
    for path, label in links:
        cls = "active" if current == path else ""
        items.append(f'<a href="{path}" class="{cls}">{label}</a>')
    return f"""<nav>
                {' '.join(items)}
            </nav>""".strip()
 def footer_html() -> str:
    """Generate footer HTML."""
    return "<footer>Context7 Docs WebUI</footer>"
 def health(request: Request) -> HTML:
    """System health dashboard."""
    try:
        data = api_request("GET", "/health")
        status = data.get("status", "unknown")
        service = data.get("service", "Service")
    except Exception as e:
        status = "error"
        service = str(e)
    return HTML(f"""<!DOCTYPE html>
 <html lang="en">
 <head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Context7 Docs - Health</title>
    <link rel="stylesheet" href="/static/css/main.css">
 </head>
 <body>
    <div class="container">
        <header><h1>Context7 Docs UI</h1>{navbar_html("/health")}</header>
        <main><h2>System Health</h2>
            <div class="status-card" data-status="{status}"><h3>{service}</h3>
                <p>Status: <span class="status-ok">{status}</span></p></div>
        </main>{footer_html()}</div>
 </body></html>""", media_type="text/html")
 def libraries(request: Request) -> HTML:
    """List all libraries."""
    try:
        data = api_request("GET", "/libraries")
        libs = data.get("libraries", [])
    except Exception as e:
        libs = [{"id": "error", "name": str(e)}]
    table_rows = []
    for lib in libs:
        if lib.get("id") != "error":
            table_rows.append(
                f"""<tr><td>{lib.get('id')}</td>
                <td>{lib.get('name', '')}</td>
                <td>{lib.get('description', '') or '(no description)'}</td>
                <td><a href="/docs/{lib.get('id')}">View Docs</a></td></tr>"""
            )
    return HTML(f"""<!DOCTYPE html>
 <html lang="en">
 <head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Context7 Docs - Libraries</title>
    <link rel="stylesheet" href="/static/css/main.css">
 </head>
 <body>
    <div class="container">
        <header><h1>Context7 Docs UI</h1>{navbar_html("/libraries")}</header>
        <main>
            <h2>Libraries ({len(libs)})</h2>
            <div class="actions-bar">
                <form action="/folders/create" method="post" style="display:inline;">
                    <input type="text" name="name" placeholder="New library folder name" required>
                    <button type="submit">Create Folder</button>
                </form>
            </div>
            <table class="library-table">
                <thead><tr><th>ID</th><th>Name</th><th>Description</th><th>Actions</th></tr></thead>
                <tbody>{"".join(table_rows)}</tbody>
            </table>
        </main>{footer_html()}</div>
 </body></html>""", media_type="text/html")
 def upload(request: Request) -> HTML:
    """File upload form."""
    if "file" in request.files:
        uploaded_file = request.files["file"]
        try:
            content = uploaded_file.read().decode("utf-8")[:5000]
            # Escape HTML
            safe_content = content.replace("&", "&").replace("<", "<").replace(">", ">")
            truncated = safe_content[:1000] + "..." if len(safe_content) > 1000 else safe_content
            return HTML(f"""<!DOCTYPE html>
 <html lang="en">
 <head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Context7 Docs - Upload</title>
    <link rel="stylesheet" href="/static/css/main.css">
 </head>
 <body>
    <div class="container">
        <header><h1>Context7 Docs UI</h1>{navbar_html("/upload")}</header>
        <main>
            <h2>Upload Complete!</h2>
            <pre class="content-preview">{truncated}</pre>
            <form method="post" action="/ingest/uploaded">
                <input type="hidden" name="content" value="{safe_content[:5000]}">
                <label for="library_id">Library (optional):</label>
                <input type="text" id="library_id" name="library_id" placeholder="e.g., my-docs">
                <button type="submit">Ingest</button>
            </form>
        </main>{footer_html()}</div>
 </body></html>""", media_type="text/html")
        except Exception:
            return HTML(f"""<!DOCTYPE html>
 <html lang="en">
 <head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Context7 Docs - Upload</title>
    <link rel="stylesheet" href="/static/css/main.css">
 </head>
 <body>
    <div class="container">
        <header><h1>Context7 Docs UI</h1>{navbar_html("/upload")}</header>
        <main>
            <h2>File too large!</h2>
            <p>Please upload smaller text files (limit: ~5MB).</p>
        </main>{footer_html()}</div>
 </body></html>""", media_type="text/html")
    else:
        return HTML(f"""<!DOCTYPE html>
 <html lang="en">
 <head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Context7 Docs - Upload</title>
    <link rel="stylesheet" href="/static/css/main.css">
 </head>
 <body>
    <div class="container">
        <header><h1>Context7 Docs UI</h1>{navbar_html("/upload")}</header>
        <main>
            <h2>Upload Documentation Files</h2>
            <form method="post" enctype="multipart/form-data">
                <label for="file">Select file:</label>
                <input type="file" name="file" id="file" accept=".txt,.md,.json,.py,.js,.html,.css,.yaml,.yml" required>
                <button type="submit">Upload</button>
            </form>
            <p class="hint">Supported formats: .txt, .md, .json, .py, .js, .html, .css, .yaml</p>
        </main>{footer_html()}</div>
 </body></html>""", media_type="text/html")
 def ingest_all(request: Request) -> JSONResponse:
    """Trigger ingestion for all libraries."""
    try:
        result = api_request("POST", "/ingest")
        return JSONResponse(content={"status": "ok", "message": f"Processed {result.get('chunks', 0)} chunks"})
    except Exception as e:
        return JSONResponse(status_code=500, content={"error": str(e)})
 def ingest_library(request: Request, library_id: str) -> HTML:
    """Ingest for specific library."""
    if "content" in request.form:
        content = request.form.get("content")[:10000]
        safe_content = content.replace("&", "&").replace("<", "<").replace(">", ">")
        return HTML(f"""<!DOCTYPE html>
 <html lang="en">
 <head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Context7 Docs - Ingest</title>
    <link rel="stylesheet" href="/static/css/main.css">
 </head>
 <body>
    <div class="container">
        <header><h1>Context7 Docs UI</h1>{navbar_html("/ingest/{library_id}")}</header>
        <main>
            <h2>Ingest for Library: {library_id}</h2>
            <form method="post" action="/ingest/{library_id}">
                <label for="content">Content (text):</label>
                <textarea id="content" name="content" rows="10" maxlength="10000"></textarea>
                <button type="submit">Ingest</button>
            </form>
        </main>{footer_html()}</div>
 </body></html>""", media_type="text/html")
    else:
        try:
            result = api_request("POST", f"/ingest/{library_id}")
            safe_msg = result.get('message', '') or ''
            safe_json = json.dumps(result, indent=2).replace("&", "&").replace("<", "<").replace(">", ">")
            return HTML(f"""<!DOCTYPE html>
 <html lang="en">
 <head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Context7 Docs - Ingest Result</title>
    <link rel="stylesheet" href="/static/css/main.css">
 </head>
 <body>
    <div class="container">
        <header><h1>Context7 Docs UI</h1>{navbar_html("/ingest/{library_id}")}</header>
        <main>
            <h2>Ingestion Complete!</h2>
            <p>{safe_msg}</p>
            <pre>{safe_json}</pre>
            <a href="/libraries">← Back to Libraries</a>
        </main>{footer_html()}</div>
 </body></html>""", media_type="text/html")
        except Exception as e:
            safe_error = str(e).replace("&", "&").replace("<", "<").replace(">", ">")
            return HTML(f"""<!DOCTYPE html>
 <html lang="en">
 <head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Context7 Docs - Error</title>
    <link rel="stylesheet" href="/static/css/main.css">
 </head>
 <body>
    <div class="container">
        <header><h1>Context7 Docs UI</h1>{navbar_html("/ingest/{library_id}")}</header>
        <main>
            <h2>Error</h2>
            <pre>{safe_error}</pre>
        </main>{footer_html()}</div>
 </body></html>""", media_type="text/html")
 async def folders_create(request: Request) -> JSONResponse:
    """Create a new library folder."""
    name = request.form.get("name", "").strip()
    try:
        from backend.app.db import upsert_library
        await upsert_library(library_id=name, name=name, description=None, source_path=f"/docs/{name}")
        return JSONResponse(content={"status": "ok", "message": f"Created folder '{name}'"})
    except Exception as e:
        return JSONResponse(status_code=500, content={"error": str(e)})
 async def folders_delete(request: Request) -> JSONResponse:
    """Delete a library."""
    library_id = request.query_params.get("id", "").strip()
    try:
        from backend.app.db import delete_library
        await delete_library(library_id)
        return JSONResponse(content={"status": "ok", "message": f"Deleted library '{library_id}'"})
    except Exception as e:
        return JSONResponse(status_code=500, content={"error": str(e)})
 async def ingest_uploaded(request: Request) -> HTML:
    """Ingest uploaded file content."""
    content = request.form.get("content", "")[:10000]
    library_id = request.form.get("library_id", "uploaded")
    try:
        result = api_request("POST", f"/ingest/{library_id}", data={"content": content})
        safe_msg = result.get('message', '') or ''
        safe_json = json.dumps(result, indent=2).replace("&", "&").replace("<", "<").replace(">", ">")
        return HTML(f"""<!DOCTYPE html>
 <html lang="en">
 <head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Context7 Docs - Upload Result</title>
    <link rel="stylesheet" href="/static/css/main.css">
 </head>
 <body>
    <div class="container">
        <header><h1>Context7 Docs UI</h1>{navbar_html("/upload")}</header>
        <main>
            <h2>Ingestion Complete!</h2>
            <p>{safe_msg}</p>
            <pre>{safe_json}</pre>
            <a href="/upload">← Upload Another</a>
        </main>{footer_html()}</div>
 </body></html>""", media_type="text/html")
    except Exception as e:
        safe_error = str(e).replace("&", "&").replace("<", "<").replace(">", ">")
        return HTML(f"""<!DOCTYPE html>
 <html lang="en">
 <head><meta charset="UTF-8"><title>Error</title></head>
 <body><h1>Upload Ingest Error</h1><pre>{safe_error}</pre><a href="/upload">← Try Again</a></body>
 </html>""", media_type="text/html")
 def docs(request: Request, library_id: str, topic: Optional[str] = None, tokens: int = 8000) -> HTML:
    """View docs from a library."""
    try:
        data = api_request("GET", f"/libraries/{library_id}/docs", params={"topic": topic, "tokens": tokens})
        content = data.get("content", "")
    except Exception as e:
        content = str(e)
    safe_content = content.replace("&", "&").replace("<", "<").replace(">", ">")[:10000]
    return HTML(f"""<!DOCTYPE html>
 <html lang="en">
 <head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Context7 Docs - Library: {library_id}</title>
    <link rel="stylesheet" href="/static/css/main.css">
 </head>
 <body>
    <div class="container">
        <header><h1>Context7 Docs UI</h1>{navbar_html("/docs/{}".format(library_id))}</header>
        <main>
            <h2>Library: {library_id}</h2>
            <p><strong>Topic:</strong> {topic or '(all)'} | <strong>Tokens:</strong> {tokens}</p>
            <pre class="docs-content">{safe_content}</pre>
        </main>{footer_html()}</div>
 </body></html>""", media_type="text/html")
 def search_redirect(request: Request) -> JSONResponse:
    """Redirect to search form."""
    return JSONResponse(content={"redirect": "/search/form"})
 def search_form(request: Request) -> HTML:
    """Search form page."""
    return HTML(f"""<!DOCTYPE html>
 <html lang="en">
 <head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Context7 Docs - Search</title>
    <link rel="stylesheet" href="/static/css/main.css">
 </head>
 <body>
    <div class="container">
        <header><h1>Context7 Docs UI</h1>{navbar_html("/search")}</header>
        <main>
            <h2>Search Docs</h2>
            <form method="post" action="/search">
                <label for="query">Query:</label>
                <input type="text" id="query" name="query" required placeholder="Enter your search query...">
                <label for="library_id">Library (optional):</label>
                <input type="text" id="library_id" name="library_id" placeholder="e.g., foundryvtt">
                <label for="limit">Limit results:</label>
                <select id="limit" name="limit">
                    <option value="5">5</option>
                    <option value="10" selected>10</option>
                    <option value="20">20</option>
                    <option value="50">50</option>
                </select>
                <button type="submit">Search</button>
            </form>
        </main>{footer_html()}</div>
 </body></html>""", media_type="text/html")
 def search_results(request: Request) -> HTML:
    """Display search results."""
    try:
        query = request.query_params.get("q", "")
        limit = int(request.query_params.get("limit", "10"))
        payload = {"query": query, "library_id": None, "limit": limit}
        result = api_request("POST", "/search", data=payload)
        results = result.get("results", [])
    except Exception as e:
        return HTML(f"""<!DOCTYPE html>
 <html lang="en">
 <head><meta charset="UTF-8"><title>Error</title></head>
 <body><h1>Error</h1><pre>{str(e)}</pre><a href="/search/form">← Try Again</a></body>
 </html>""", media_type="text/html")
    cards = []
    for r in results:
        title = r.get("title", "Untitled") or (r.get("content", "")[:100] + "...")[:200]
        content = (r.get("content", "") or r.get("chunk", ""))[:500]
        cards.append(f"""<div class="result-card" data-id="{r.get('id')}"><h3>{title}</h3>
            <p>{content}...</p><a href="/docs/{r.get('library_id')}">View Full</a></div>""")
    return HTML(f"""<!DOCTYPE html>
 <html lang="en">
 <head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Context7 Docs - Search Results</title>
    <link rel="stylesheet" href="/static/css/main.css">
 </head>
 <body>
    <div class="container">
        <header><h1>Context7 Docs UI</h1>{navbar_html("/search")}</header>
        <main>
            <h2>Search Results for "{query}"</h2>
            <div class="results-count">{len(results)} results found</div>
            {''.join(cards)}
            <a href="/search/form">← New Search</a>
        </main>{footer_html()}</div>
 </body></html>""", media_type="text/html")
 def sync_sources(request: Request) -> HTML:
    """Sync git sources."""
    if request.method == "POST":
        try:
            data = api_request("POST", "/sources/sync")
            safe_json = json.dumps(data, indent=2).replace("&", "&").replace("<", "<").replace(">", ">")
            return HTML(f"""<!DOCTYPE html>
 <html lang="en">
 <head><meta charset="UTF-8"><title>Sync Result</title></head>
 <body>
    <div class="container">
        <header><h1>Context7 Docs UI</h1>{navbar_html("/sync/sources")}</header>
        <main><h2>Git Sync Complete!</h2><pre>{safe_json}</pre>
            <form method="post"><button type="submit">Sync Again</button></form>
        </main>{footer_html()}</div>
 </body></html>""", media_type="text/html")
        except Exception as e:
            safe_error = str(e).replace("&", "&").replace("<", "<").replace(">", ">")
            return HTML(f"""<!DOCTYPE html>
 <html lang="en">
 <head><meta charset="UTF-8"><title>Error</title></head>
 <body><h1>Sync Error</h1><pre>{safe_error}</pre><a href="/sources/git">← Try Again</a></body>
 </html>""", media_type="text/html")
    else:
        try:
            data = api_request("GET", "/libraries")
            libs = [l.get("id") for l in data.get("libraries", []) if l.get("id") != "error"]
        except Exception:
            libs = []
        lib_list = ", ".join(libs) if libs else "(none)"
        return HTML(f"""<!DOCTYPE html>
 <html lang="en">
 <head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Context7 Docs - Git Sync</title>
    <link rel="stylesheet" href="/static/css/main.css">
 </head>
 <body>
    <div class="container">
        <header><h1>Context7 Docs UI</h1>{navbar_html("/sources/git")}</header>
        <main>
            <h2>Sync Git Repositories</h2>
            <p>Syncs all git repositories configured in <code>docs_sources.yaml</code>.</p>
            <form method="post" action="/sync/sources">
                <label for="override">Override existing repos:</label>
                <input type="checkbox" id="override" name="override">
                <button type="submit">Sync All Repositories</button>
            </form>
            <h3>Libraries Found: {lib_list}</h3>
        </main>{footer_html()}</div>
 </body></html>""", media_type="text/html")
 def git_sources(request: Request) -> HTML:
    """List configured git sources."""
    import yaml
    config_path = Path(__file__).parent.parent.parent / "docs_sources.yaml"
    try:
        with open(config_path) as f:
            data = yaml.safe_load(f)
        sources = data.get("sources", [])
        source_blocks = []
        for src in sources:
            url = src.get("repo_url", "")[:50] + "..." if len(src.get("repo_url", "")) > 50 else src.get("repo_url", "")
            branch = src.get("branch", "main")
            include = src.get("include_paths", ["*"])
            exclude = src.get("exclude_paths", [])
            source_blocks.append(f"""<div class="source-card">
                <strong>{src.get('library_id', 'unknown')}</strong><br>
                URL: {url}<br>
                Branch: {branch}<br>
                Include: {', '.join(include)}{' | Exclude: ' + ', '.join(exclude) if exclude else ''}
            </div>""")
        return HTML(f"""<!DOCTYPE html>
 <html lang="en">
 <head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Context7 Docs - Git Sources</title>
    <link rel="stylesheet" href="/static/css/main.css">
 </head>
 <body>
    <div class="container">
        <header><h1>Context7 Docs UI</h1>{navbar_html("/sources/git")}</header>
        <main>
            <h2>Configured Git Sources ({len(sources)})</h2>
            {''.join(source_blocks)}
        </main>{footer_html()}</div>
 </body></html>""", media_type="text/html")
    except Exception as e:
        safe_error = str(e).replace("&", "&").replace("<", "<").replace(">", ">")
        return HTML(f"""<!DOCTYPE html>
 <html lang="en">
 <head><meta charset="UTF-8"><title>Error</title></head>
 <body><h1>Git Sources Error</h1><pre>{safe_error}</pre></body>
 </html>""", media_type="text/html")
 def logs(request: Request) -> HTML:
    """Logs/status page."""
    return HTML(f"""<!DOCTYPE html>
 <html lang="en">
 <head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Context7 Docs - Logs</title>
    <link rel="stylesheet" href="/static/css/main.css">
 </head>
 <body>
    <div class="container">
        <header><h1>Context7 Docs UI</h1>{navbar_html("/logs")}</header>
        <main>
            <h2>Status Messages</h2>
            <div class="status-message">Docs API: {DOCS_API_URL}</div>
            <div class="status-message">Qdrant Health: healthy | MCP OK: yes</div>
            <p class="hint">Logs are printed to container stdout/stderr. For full logs, inspect Docker containers directly.</p>
        </main>{footer_html()}</div>
 </body></html>""", media_type="text/html")
 # Register all routes
 __all__ = [
    "health", "libraries", "upload", "ingest_all", "ingest_library",
    "folders_create", "folders_delete", "docs", "search_redirect",
    "search_form", "search_results", "sync_sources", "git_sources", "logs"
 ]
@@ -0,0 +1,37 @@
 # Backend API Dependencies
 fastapi==0.109.0
 uvicorn[standard]==0.27.0
 pydantic==2.5.3
 python-dotenv==1.0.0
 python-multipart==0.0.6
 # Qdrant Vector Store Client
 qdrant-client==1.7.0
 # Text Processing for token estimation
 tiktoken==0.7.0
 # Local Embeddings using FastEmbed
 fastembed==0.3.0
 # PDF support for document ingestion
 pypdf==5.0.0
 # HTTP client for MCP server communication
 httpx==0.26.0
 # HTTP client for WebUI (used to call docs-api from WebUI)
 requests==2.31.0
 # FastMCP for MCP server integration (also used by backend)
 fastmcp==0.6.0
 # YAML parser for sources configuration
 PyYAML==6.0.1
 # =============================================================================
 # TEST DEPENDENCIES
 # =============================================================================
 pytest==8.3.2
 pytest-mock==3.14.0
 pytest-asyncio==0.23.7
@@ -0,0 +1,2 @@
 # This directory is intentionally left empty to preserve the folder structure for Docker volumes.
 # Data from Qdrant will be mounted here via docker-compose.yml.
@@ -0,0 +1,99 @@
 # Context7-style MCP System - Docker Compose (Production/Home Server Hardened)
 services:
  # Qdrant Vector Database Service
  qdrant:
    image: qdrant/qdrant:latest
    container_name: qdrant
    ports:
      - "${QDRANT_PORT:-6333}:6333"
    volumes:
      - ./data/qdrant:/qdrant/storage
    environment:
      - QDRANT__MEMORY_MAPPED_INDEXES=1
    restart: unless-stopped
    logging:
      driver: json-file
      options:
        max-size: "10m"
        max-file: "3"
    networks:
      - context7-network
  # Docs API Backend Service (FastAPI)
  docs-api:
    build:
      context: ./backend
      dockerfile: Dockerfile
    container_name: docs-api
    ports:
      - "${HOST_PORT:-8787}:8787"
    environment:
      - VECTOR_STORE_HOST=qdrant
      - VECTOR_STORE_PORT=6333
      - DOCS_PATH=/docs
      - DB_PATH=/data/db.sqlite
      - LOG_LEVEL=INFO
      - API_KEY_DOCS_API=${DOCS_API_KEY:-}
    volumes:
      - ./docs:/docs
      - ./data:/data
    depends_on:
      - qdrant
    networks:
      - context7-network
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:${HOST_PORT:-8787}/health"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 15s
  # MCP Server Service (FastMCP via streamable HTTP)
  docs-mcp:
    build:
      context: ./mcp-server
      dockerfile: Dockerfile
    container_name: docs-mcp
    ports:
      - "${MCP_HOST_PORT:-8788}:8788"
    environment:
      - DOCS_API_URL=http://docs-api:8787
      - MCP_API_KEY=${MCP_API_KEY:-}
    volumes:
      - ./docs:/docs:ro
      - ./data:/data
    restart: unless-stopped
    logging:
      driver: json-file
      options:
        max-size: "10m"
        max-file: "3"
    depends_on:
      docs-api:
        condition: service_healthy
    networks:
      - context7-network
  # WebUI Service (HTML interface)
  webui:
    build:
      context: ./webui
      dockerfile: Dockerfile
    container_name: webui
    ports:
      - "${WEBUI_PORT:-8790}:8790"
    environment:
      - DOCS_API_URL=http://docs-api:8787
      - WEBUI_API_KEY=${DOCS_WEBUI_API_KEY:-}
    volumes:
      - ./docs:/docs
      - ./data:/data
    depends_on:
      docs-api:
        condition: service_healthy
    networks:
      - context7-network
 networks:
  context7-network:
    driver: bridge
@@ -0,0 +1,143 @@
 # Getting Started
 Welcome to the Context7-style MCP System documentation!
 ## Overview
 This system provides a self-hosted, local context7-compatible MCP (Model Context Protocol) solution using Docker containers. It enables you to:
 - Ingest and index your own documents
 - Perform semantic search on vector embeddings
 - Integrate with MCP-enabled IDEs for intelligent tool interactions
 ## Architecture
 ```
 ┌─────────────┐     ┌─────────────┐     ┌─────────────┐
 │   Client    │────▶│ docs-api    │◀────│ docs-mcp    │
 │ (IDE/Tool)  │     │ (FastAPI)   │     │ (MCP Server)│
 └─────────────┘     └─────────────┘     └─────────────┘
                          │
                          ▼
                    ┌─────────────┐
                    │   Qdrant    │
                    │ (Vector DB) │
                    └─────────────┘
 ```
 ## Quick Start
 ### 1. Start All Services
 ```bash
 docker compose up -d --build
 ```
 ### 2. Verify Services Are Running
 ```bash
 docker compose ps
 ```
 You should see all three services in "Up" status:
 - `qdrant` (port 6333)
 - `docs-api` (port 8787)
 - `docs-mcp` (port 8788)
 ### 3. Access the API
 Open your browser and navigate to:
 ```
 http://localhost:8787/docs
 ```
 You should see the FastAPI documentation page.
 ## API Endpoints
 ### Health Check
 ```bash
 curl http://localhost:8787/health
 ```
 Expected response:
 ```json
 {"status":"ok"}
 ```
 ### Ingest Document
 Upload a text document to be processed and indexed:
 ```bash
 curl -X POST "http://localhost:8787/api/v1/ingest" \
  -H "Content-Type: application/json" \
  -d '{
    "content": "This is sample document content for semantic search testing.",
    "metadata": {"source": "example", "type": "text"}
  }'
 ```
 ### Search Documents
 Perform a similarity search on ingested documents:
 ```bash
 curl "http://localhost:8787/api/v1/search" \
  -H "Content-Type: application/json" \
  -d '{
    "query": "semantic search",
    "top_k": 5,
    "threshold": 0.7
  }'
 ```
 ## Configuration
 ### Environment Variables
 Copy the example environment file and customize:
 ```bash
 cp .env.example .env
 ```
 Key variables:
 - `VECTOR_STORE_HOST`: Internal hostname of Qdrant (default: qdrant)
 - `VECTOR_STORE_PORT`: Qdrant port (default: 6333)
 ### Docker Compose
 All services are defined in `docker-compose.yml`. Key networking details:
 - Services communicate internally via `context7-network`
 - Qdrant uses service name `qdrant` for internal connections
 - Vector store is exposed externally on port 6333 for debugging
 ## Next Steps
 1. Review the project structure to understand component roles
 2. Customize the backend API endpoints in `backend/app/main.py`
 3. Implement MCP tools in `mcp-server/server.py`
 4. Add more example documents in the `docs/` directory
 ## Troubleshooting
 ### Check Logs
 ```bash
 docker compose logs -f docs-api
 docker compose logs -f qdrant
 docker compose logs -f docs-mcp
 ```
 ### Reset All Services
 ```bash
 docker compose down -v
 docker compose up -d --build
 ```
 ## Support
 For issues, refer to the `README.md` or check the Qdrant documentation.
@@ -0,0 +1,27 @@
 # Git Repository Sources Configuration
 # Each source defines a library to ingest from a git repository
 # Paths are relative to the cloned repo root
 sources:
  - library_id: foundryvtt
    name: Foundry VTT
    description: Foundry Virtual Tabletop system documentation
    repo_url: https://github.com/foundryvtt/foundryvtt.git
    branch: main
    include_paths:
      - docs
      - src
    exclude_paths:
      - node_modules
      - .git
  # Add more sources here following the same structure:
  #   - library_id: my-repo
  #     name: My Repository
  #     description: My documentation
  #     repo_url: https://github.com/user/my-repo.git
  #     branch: main
  #     include_paths:
  #       - docs
  #     exclude_paths:
  #       - node_modules
@@ -0,0 +1,30 @@
 # MCP Server Service
 FROM python:3.11-slim
 WORKDIR /app
 RUN apt-get update && apt-get install -y --no-install-recommends \
    curl \
    && rm -rf /var/lib/apt/lists/*
 # Install Python dependencies cleanly
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
 # Copy server code
 COPY server.py .
 # Mount volumes at these paths (configured in docker-compose)
 # ./docs -> /docs  
 # ./data -> /data
 # /data holds: db.sqlite, sqlite file for SQLite storage
 # Expose MCP port
 EXPOSE 8788
 # Healthcheck
 HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
    CMD python -c "import socket; s=socket.create_connection(('127.0.0.1', 8788), 5); s.close()"
 # Run the MCP server using streamable HTTP transport
 CMD ["python", "server.py"]
@@ -0,0 +1,21 @@
 # MCP Server Dependencies
 fastmcp==0.6.0
 httpx==0.26.0
 # For Qdrant vector store operations
 qdrant-client==1.7.0
 # Text processing for token estimation
 tiktoken==0.7.0
 # Local embeddings using FastEmbed
 fastembed==0.3.0
 # PDF support for document ingestion
 pypdf==5.0.0
 # Environment variables loader
 python-dotenv==1.0.0
 # YAML parser for sources configuration
 PyYAML==6.0.1
@@ -0,0 +1,337 @@
 # MCP Server for local-context7 Docs API with Git Sources Support
 """
 MCP server providing Context7-style tools for interacting with the local docs API.
 This server exposes 6 tools:
 - resolve-library-id: Find libraries matching a name (with /local/ prefix)
 - get-library-docs: Retrieve documentation from a library
 - list-libraries: List all discovered libraries
 - search-docs: Semantic search across documents
 - refresh-library: Re-ingest documents for a library or all libraries
 - sync-sources: Sync git repositories from configuration file
 """
 import asyncio
 import os
 from typing import Optional, List, Dict, Any
 try:
    import httpx
 except ImportError:
    httpx = None
 try:
    from fastmcp import FastMCP
 except ImportError:
    class _Tool:
        def __init__(self, name: str):
            self.name = name
    class FastMCP:
        """Import-time fallback used by tests when fastmcp is not installed."""
        def __init__(self, *args, **kwargs):
            self.tools = []
        def tool(self):
            def decorator(func):
                self.tools.append(_Tool(func.__name__))
                return func
            return decorator
        def run(self, *args, **kwargs):
            raise RuntimeError("fastmcp is not installed")
 # Environment configuration
 DOCS_API_URL = os.getenv("DOCS_API_URL", "http://docs-api:${HOST_PORT:-8787}")
 MCP_API_KEY = os.getenv("MCP_API_KEY", "")
 def strip_local_prefix(lib_id: str) -> str:
    """Strip /local/ prefix from library ID for API calls."""
    if lib_id.startswith("/local/"):
        return lib_id[7:]  # Remove "/local/" prefix
    return lib_id
 # Create FastMCP instance with tools
 mcp = FastMCP("context7-docs", root_path="/app")
@mcp.tool()
 async def resolve_library_id(library_name: str) -> List[Dict[str, Any]]:
    """
    Resolve a library name to Context7-style candidates.
    Searches the docs API for libraries matching the given name (partial match).
    Args:
        libraryName: The library name to search for (e.g., "foundryvtt")
    Returns:
        List of candidate libraries with /local/ prefix in ID:
        [
          {
            "id": "/local/foundryvtt",
            "name": "Foundry VTT",
            "description": "Fantasy tabletop virtual table...",
            "source": "local"
          },
          ...
        ]
    """
    try:
        if httpx is None:
            raise RuntimeError("httpx is not installed")
        async with httpx.AsyncClient(base_url=DOCS_API_URL, timeout=60.0) as client:
            response = await client.get("/libraries/search", params={"q": library_name})
            if response.status_code == 200:
                data = response.json()
                return data.get("matches", [])
            else:
                raise Exception(f"API error: {response.status_code} - {response.text}")
    except Exception as e:
        print(f"Error resolving library '{library_name}': {e}")
        return []
@mcp.tool()
 async def get_library_docs(context7_compatible_library_id: str, topic: Optional[str] = None, tokens: int = 8000) -> str:
    """
    Retrieve documentation content from a library.
    Args:
        context7_compatible_library_id: The Context7-style library ID (with /local/ prefix)
        topic: Optional topic to search within the library (default: None - returns most relevant content)
        tokens: Maximum tokens to include in response (default: 8000)
    Returns:
        Markdown string containing the documentation content
    Example:
        get_library_docs("/local/foundryvtt", topic="hooks", tokens=8000)
    """
    try:
        if httpx is None:
            raise RuntimeError("httpx is not installed")
        # Strip /local/ prefix for API call
        library_id = strip_local_prefix(context7_compatible_library_id)
        async with httpx.AsyncClient(base_url=DOCS_API_URL, timeout=60.0) as client:
            params = {"tokens": tokens}
            if topic:
                params["topic"] = topic
            response = await client.get(f"/libraries/{library_id}/docs", params=params)
            if response.status_code == 200:
                data = response.json()
                return data.get("content", "")
            else:
                raise Exception(f"API error: {response.status_code} - {response.text}")
    except Exception as e:
        print(f"Error getting library docs for '{context7_compatible_library_id}': {e}")
        return f"Error retrieving documentation: {str(e)}"
@mcp.tool()
 async def list_libraries() -> List[Dict[str, Any]]:
    """
    List all discovered libraries in the system.
    Returns:
        List of library objects with metadata:
        [
          {
            "id": "/local/foundryvtt",
            "name": "Foundry VTT",
            "description": "...",
            "source": "local"
          },
          ...
        ]
    """
    try:
        if httpx is None:
            raise RuntimeError("httpx is not installed")
        async with httpx.AsyncClient(base_url=DOCS_API_URL, timeout=60.0) as client:
            response = await client.get("/libraries")
            if response.status_code == 200:
                data = response.json()
                return data.get("libraries", [])
            else:
                raise Exception(f"API error: {response.status_code} - {response.text}")
    except Exception as e:
        print(f"Error listing libraries: {e}")
        return []
@mcp.tool()
 async def search_docs(query: str, library_id: Optional[str] = None, limit: int = 10) -> List[Dict[str, Any]]:
    """
    Perform semantic search across documents.
    Args:
        query: The search query string
        library_id: Optional library ID filter (with /local/ prefix). If None, searches all libraries.
        limit: Maximum number of results to return (default: 10)
    Returns:
        List of search results with content snippets:
        [
          {
            "id": "...",
            "score": 0.123,
            "library_id": "...",
            "path": "...",
            "title": "...",
            "chunk_index": 0
          },
          ...
        ]
    """
    try:
        if httpx is None:
            raise RuntimeError("httpx is not installed")
        async with httpx.AsyncClient(base_url=DOCS_API_URL, timeout=60.0) as client:
            payload = {"query": query, "limit": limit}
            if library_id:
                payload["library_id"] = strip_local_prefix(library_id)
            response = await client.post("/search", json=payload)
            if response.status_code == 200:
                data = response.json()
                return data.get("results", [])
            else:
                raise Exception(f"API error: {response.status_code} - {response.text}")
    except Exception as e:
        print(f"Error searching for query '{query}': {e}")
        return []
@mcp.tool()
 async def refresh_library(library_id: Optional[str] = None) -> Dict[str, Any]:
    """
    Re-ingest documents for a library or all libraries.
    Args:
        library_id: If provided, re-ingests only this library (with /local/ prefix).
                    If None, ingests all libraries.
    Returns:
        Ingestion result summary:
        {
          "total_libraries": 2,
          "successful": 2,
          "failed": 0,
          "total_chunks": 150
        }
    """
    try:
        if httpx is None:
            raise RuntimeError("httpx is not installed")
        async with httpx.AsyncClient(base_url=DOCS_API_URL, timeout=60.0) as client:
            response = await client.post("/ingest/all")
            if response.status_code == 200:
                data = response.json()
                return {
                    "success": True,
                    "total_libraries": data.get("total_libraries", 0),
                    "successful": data.get("successful", 0),
                    "failed": data.get("failed", 0),
                    "total_chunks": data.get("total_chunks", 0)
                }
            else:
                raise Exception(f"API error: {response.status_code} - {response.text}")
    except Exception as e:
        print(f"Error refreshing library '{library_id or 'all'}': {e}")
        return {"success": False, "error": str(e)}
@mcp.tool()
 async def sync_sources(override: bool = False) -> Dict[str, Any]:
    """
    Sync all git repositories defined in the sources configuration file.
    Clones/updates each configured repository and ingests matching files
    into the vector store. Existing repos are updated to latest state unless
    override is true (clears existing repo before cloning).
    Args:
        override: If true, clears existing repo before cloning. Default: false
    Returns:
        Sync result summary:
        {
          "success": true,
          "total_sources": 2,
          "successful": 1,
          "failed": 1,
          "results": [
            {
              "library_id": "foundryvtt",
              "success": true,
              "message": "...",
              "files_discovered": 450,
              "chunks_created": 2340,
              "vectors_added": 2340
            },
            ...
          ]
        }
    """
    try:
        if httpx is None:
            raise RuntimeError("httpx is not installed")
        async with httpx.AsyncClient(base_url=DOCS_API_URL, timeout=60.0) as client:
            payload = {"override": override} if override else {}
            response = await client.post("/sources/sync", json=payload)
            if response.status_code == 200:
                data = response.json()
                return {
                    "success": True,
                    "total_sources": data.get("total_sources", 0),
                    "successful": data.get("successful", 0),
                    "failed": data.get("failed", 0),
                    "results": data.get("results", [])
                }
            else:
                raise Exception(f"API error: {response.status_code} - {response.text}")
    except Exception as e:
        print(f"Error syncing git sources: {e}")
        return {"success": False, "error": str(e)}
 if __name__ == "__main__":
    # Run MCP server using streamable HTTP transport
    host = os.getenv("MCP_HOST", "0.0.0.0")
    port = int(os.getenv("MCP_PORT", 8788))
    print(f"Starting MCP server on http://{host}:{port}")
    print("Tools available:")
    print("  - resolve-library-id(libraryName)")
    print("  - get-library-docs(context7_compatible_library_id, topic=None, tokens=8000)")
    print("  - list-libraries()")
    print("  - search_docs(query, library_id=None, limit=10)")
    print("  - refresh_library(library_id=None)")
    print("  - sync_sources(override=false)")
    if hasattr(mcp, "run"):
        mcp.run(transport="streamable-http", host=host, port=port)
    else:
        import uvicorn
        uvicorn.run(mcp, host=host, port=port)
@@ -0,0 +1 @@
 """Compatibility package for importing the mcp-server source tree in tests."""
@@ -0,0 +1,13 @@
 """Import wrapper for ../mcp-server/server.py."""
 import importlib.util
 from pathlib import Path
 _source = Path(__file__).resolve().parents[1] / "mcp-server" / "server.py"
 _spec = importlib.util.spec_from_file_location("_local_context7_mcp_server", _source)
 _module = importlib.util.module_from_spec(_spec)
 assert _spec and _spec.loader
 _spec.loader.exec_module(_module)
 for _name, _value in vars(_module).items():
    if not _name.startswith("__"):
        globals()[_name] = _value
@@ -0,0 +1,35 @@
 # Pytest configuration for local-context7 tests
 [pytest]
 # Test discovery pattern (where to look for tests)
 testpaths = tests
 # Pattern of test files to discover
 python_files = test_*.py
 # Pattern of test functions to run
 python_functions = test_*
 # Markers for test categorization
 markers =
    slow: marks tests as slow (deselect with '-m "not slow"')
    integration: marks tests as integration tests requiring external services
    unit: marks tests as pure unit tests
 # Add console output during test collection
 console_output_style = classic
 # Test execution options
 asyncio_mode = auto
 testsessionstartfixturesscope = function
 # Logging configuration
 log_cli = true
 log_cli_level = INFO
 log_cli_format = %(asctime)s [%(levelname)s] %(name)s: %(message)s
 log_cli_date_format = %Y-%m-%d %H:%M:%S
 # Ignore specific warnings during tests
 filterwarnings =
    ignore::DeprecationWarning
    ignore::PendingDeprecationWarning
@@ -0,0 +1,2 @@
 # Tests package for local-context7
 # Contains unit tests for chunking, database operations, search, and MCP server modules
@@ -0,0 +1,191 @@
 """
 Pytest configuration and fixtures for local-context7 tests.
 This module provides:
 - Mocks for external dependencies (Qdrant, FastEmbed)
 - Database fixtures for SQLite operations
 - Common test utilities
 """
 from unittest.mock import MagicMock, patch
 import pytest
 import os
 import json
 from pathlib import Path
 from backend.app.db import init_db, upsert_library, insert_document_chunk, get_chunks_for_library, list_libraries, clear_library_documents, get_connection
 # =============================================================================
 # FIXTURES
 # =============================================================================
@pytest.fixture(scope="function")
 def test_database():
    """
    Create a fresh SQLite database for testing.
    Yields:
        Database connection with tables initialized
    """
    # Use an in-memory or temporary file database
    db_path = Path(__file__).parent.parent / "backend" / "data" / "test_db.sqlite"
    # Ensure data directory exists
    db_path.parent.mkdir(parents=True, exist_ok=True)
    # Remove existing test DB if present
    if db_path.exists():
        db_path.unlink()
    # Initialize database with tables
    result = init_db()
    assert result["success"], f"Failed to initialize test DB: {result.get('error')}"
    yield
    # Cleanup: remove test database after tests
    if db_path.exists():
        db_path.unlink()
@pytest.fixture(scope="function")
 def sample_text():
    """Sample text for chunking tests."""
    return """# Introduction
 This is the introduction section.
 ## Background
 Background information goes here to make this longer and test chunking.
 This paragraph has more content about the background topic.
 ### Details
 Specific details about the background are provided in this subsection.
 More details follow here to ensure we have enough text to properly test heading preservation.
 ## Conclusion
 The conclusion wraps up everything nicely."""
 # =============================================================================
 # MOCKS
 # =============================================================================
@pytest.fixture
 def mock_embedding_model():
    """
    Mock FastEmbed model that returns dummy vectors.
    This avoids needing to download and load the actual embedding model.
    Returns 384-dimensional zero vectors for any input.
    """
    mock_model = MagicMock()
    # Mock embed method - returns list of lists with float values
    def mock_embed(texts):
        return [
            [0.0] * 384  # Zero vector placeholder
            for _ in texts
        ]
    mock_model.embed = mock_embed
    return mock_model
@pytest.fixture
 def mock_qdrant_client():
    """
    Mock Qdrant client that returns empty or test results.
    Allows testing search logic without needing a running Qdrant server.
    """
    mock_client = MagicMock()
    # Mock search method
    def mock_search(collection_name, query_vector, limit=10, search_filter=None):
        # Return empty list (simulating no results)
        return []
    mock_client.search = mock_search
    # Mock delete_collection for cleanup
    mock_client.delete_collection = MagicMock(return_value=True)
    return mock_client
@pytest.fixture
 def mock_embedding_model_batch():
    """
    Batch embedding model mock that returns deterministic fake vectors.
    Returns slightly different vectors for different input lengths/first chars,
    allowing tests to verify vector retrieval if needed.
    """
    def hash_text(text):
        # Simple hash-based pseudo-random vector generation
        text_hash = hash(text) % 1000000
        return [(hash_text(text) / 1000000 + (i * 0.001)) for i in range(384)]
    mock_model = MagicMock()
    mock_model.embed = lambda texts: [hash_text(t) for t in texts]
    return mock_model
 # =============================================================================
 # SETUP TEARDOWN FIXTURES
 # =============================================================================
@pytest.fixture(autouse=True)
 def clear_test_database(test_database):
    """
    Clear test database before and after each test function.
    Note: This fixture runs the teardown (cleanup) AFTER the test,
    so we manually clear at the end of the yield context.
    The db_path is cleaned up by the test_database fixture's yield block.
    """
    pass  # Cleanup handled in test_database fixture
@pytest.fixture
 def empty_vector():
    """Empty/dummy embedding vector for tests."""
    return [0.0] * 384
@pytest.fixture
 def fake_embeddings(sample_text):
    """Fake embedding vectors for sample text."""
    def hash_text(text):
        return [(hash(text) + len(text)) % 1000 / 10000 for _ in range(384)]
    return [hash_text(s) for s in sample_text.split("\n\n") if s.strip()]
 # =============================================================================
 # UTILITY FUNCTIONS
 # =============================================================================
@pytest.fixture
 def temp_file(tmp_path):
    """Create a temporary file and yield its path."""
    test_file = tmp_path / "test.txt"
    return test_file
 # Register custom marker for slow tests (if needed)
 def pytest_configure(config):
    config.addinivalue_line("markers", "slow: marks tests as slow (deselect with '-m \"not slow\"')")
 def pytest_runtest_setup(item):
    """Add custom markers if needed."""
    pass
@@ -0,0 +1,238 @@
 """
 Tests for backend/app/chunking.py
 These are pure unit tests that don't require any external dependencies.
 They test text chunking logic, token estimation, and heading-aware splitting.
 """
 import pytest
 class TestEstimateTokens:
    """Tests for the estimate_tokens() function."""
    def test_empty_text(self):
        """Empty text should return 0 tokens."""
        from backend.app.chunking import estimate_tokens
        assert estimate_tokens("") == 0
    def test_single_char(self):
        """Single character = 1 token (using 4 chars per token approximation)."""
        from backend.app.chunking import estimate_tokens
        assert estimate_tokens("a") == 0  # 1 char // 4 = 0 tokens
    def test_4_chars(self):
        """4 characters = 1 token."""
        from backend.app.chunking import estimate_tokens
        assert estimate_tokens("abcd") == 1
    def test_400_chars(self):
        """400 characters = 100 tokens."""
        from backend.app.chunking import estimate_tokens
        text = "a" * 400
        assert estimate_tokens(text) == 100
    def test_whitespace_only(self):
        """Whitespace-only text should be counted."""
        from backend.app.chunking import estimate_tokens
        assert estimate_tokens("   ") == 0  # 3 chars // 4 = 0
 class TestChunkText:
    """Tests for the chunk_text() function."""
    def test_empty_input(self, sample_text):
        """Empty input should return empty list."""
        from backend.app.chunking import chunk_text
        assert chunk_text("") == []
    def test_small_text_single_chunk(self, sample_text):
        """Small text under limit should be single chunk."""
        from backend.app.chunking import chunk_text
        small = "This is a very short text that should be returned as a single chunk."
        chunks = chunk_text(small, max_tokens=500)
        assert len(chunks) == 1
        assert chunks[0] == small
    def test_exact_token_limit(self, sample_text):
        """Text exactly at limit should be one chunk."""
        from backend.app.chunking import chunk_text, estimate_tokens
        # Create text that is exactly 500 tokens (2000 chars)
        text = "a" * 2000
        chunks = chunk_text(text, max_tokens=500)
        assert len(chunks) == 1
        assert estimate_tokens(chunks[0]) == 500
    def test_over_limit_splits(self, sample_text):
        """Text over limit should be split into multiple chunks."""
        from backend.app.chunking import chunk_text, estimate_tokens
        # Create text that is 2500 tokens (10000 chars)
        text = "b" * 10000
        chunks = chunk_text(text, max_tokens=500)
        assert len(chunks) >= 2  # Should be split
    def test_preserves_content(self, sample_text):
        """All content should be preserved in chunks (combined)."""
        from backend.app.chunking import chunk_text
        original = "Hello world! This is a test of chunking functionality."
        chunks = chunk_text(original, max_tokens=100)
        combined = "".join(chunks)
        assert len(chunks) == 1
        assert combined == original
    def test_headings_split(self, sample_text):
        """Heading-aware splitting should preserve heading boundaries."""
        from backend.app.chunking import chunk_text
        markdown_with_headings = """# Introduction
 This is the introduction section.
 ## Background
 Background information goes here."""
        # With very small token limit, headings should cause splits
        chunks = chunk_text(markdown_with_headings, max_tokens=20)
        heading_chunks = [c for c in chunks if c.strip().startswith('#')]
        assert len(heading_chunks) >= 1  # At least some heading preserved
    def test_paragraph_split(self):
        """Paragraph splitting should respect paragraph boundaries."""
        from backend.app.chunking import chunk_text
        text = "First paragraph.\n\nSecond paragraph.\n\nThird paragraph."
        chunks = chunk_text(text, max_tokens=15)  # Small limit forces splits
        assert len(chunks) >= 3  # At least as many paragraphs
    def test_no_empty_chunks(self):
        """Should not return empty chunks."""
        from backend.app.chunking import chunk_text
        text = "Hello world"
        chunks = chunk_text(text, max_tokens=10)
        for chunk in chunks:
            assert chunk.strip() != ""
 class TestTokenEstimationBoundaries:
    """Tests for token estimation boundaries."""
    def test_boundary_precision(self):
        """Test boundary conditions around the 4-char-per-token limit."""
        from backend.app.chunking import estimate_tokens
        # Edge cases around boundary
        assert estimate_tokens("abcd") == 1      # exactly 4 chars
        assert estimate_tokens("abcde") == 1     # 5 chars still 1 token
        assert estimate_tokens("abcdef") == 1    # 6 chars still 1 token
        assert estimate_tokens("abcdefg") == 1   # 7 chars still 1 token
        assert estimate_tokens("abcdefgh") == 2   # 8 chars = 2 tokens
    def test_various_languages_chars(self):
        """Token estimation uses character count, not unicode complexity."""
        from backend.app.chunking import estimate_tokens
        # Chinese characters (each counts as 1 char)
        chinese = "你好世界"  # 4 characters
        assert estimate_tokens(chinese) == 1
        # Emoji
        emoji = "Hello 🎉 world"  # Spaces + letters + emoji
        # emoji count varies by implementation, just check it's counted
        assert isinstance(estimate_tokens(emoji), int)
 class TestChunkOverlapBehavior:
    """Tests for overlap handling between chunks."""
    def test_overlap_not_exceeded(self):
        """Chunks should not have excessive overlap."""
        from backend.app.chunking import chunk_text
        # Text that will be split at a known boundary
        text = "The quick brown fox jumps over the lazy dog. " * 10
        chunks = chunk_text(text, max_tokens=30, overlap_tokens=5)
        if len(chunks) > 1:
            # Last few chars of first chunk shouldn't duplicate excessively
            assert len(chunks[0]) <= len("".join(chunks)) // 2  # Rough check
 class TestChunkEdgeCases:
    """Tests for edge cases and error conditions."""
    def test_whitespace_only_text(self):
        """Whitespace-only text should handle gracefully."""
        from backend.app.chunking import chunk_text
        chunks = chunk_text("   \n\n   ", max_tokens=100)
        # May return empty or whitespace chunk, shouldn't crash
        assert isinstance(chunks, list)
    def test_very_long_paragraph(self):
        """Long paragraph without breaks should be split."""
        from backend.app.chunking import chunk_text
        long_para = "The quick brown fox jumps over the lazy dog. " * 100
        chunks = chunk_text(long_para, max_tokens=50)
        assert len(chunks) > 1  # Should be split
    def test_none_input_raises(self):
        """None input should be handled (return empty or raise)."""
        from backend.app.chunking import chunk_text
        with pytest.raises((TypeError, AssertionError)):
            chunk_text(None, max_tokens=100)
    def test_unicode_text(self):
        """Unicode text should be handled."""
        from backend.app.chunking import chunk_text
        unicode_text = "Hello 世界 مرحبا 🎉"
        chunks = chunk_text(unicode_text, max_tokens=50)
        assert len(chunks) == 1  # Small enough to be single chunk
 # =============================================================================
 # SAMPLE TEXT FIXTURE
 # =============================================================================
@pytest.fixture
 def heading_markdown():
    """Sample markdown with headings for chunking tests."""
    return """# Introduction
 This is the introduction section. It contains some introductory text here.
 ## Background
 Background information goes here to make this longer and test chunking. This paragraph has more content about the background topic. It provides context.
 ### Details
 Specific details about the background are provided in this subsection. More details follow here to ensure we have enough text to properly test heading preservation.
 ## Conclusion
 The conclusion wraps up everything nicely."""
 class TestHeadingPreservation:
    """Tests for heading-aware chunking with sample text."""
    def test_headings_in_separate_chunks(self, heading_markdown):
        """Headings should appear in their own chunks when possible."""
        from backend.app.chunking import chunk_text
        # Very small token limit forces splits at headings
        chunks = chunk_text(heading_markdown, max_tokens=30)
        heading_sections = [c for c in chunks if c.strip().startswith('#')]
        assert len(heading_sections) >= 1
    def test_all_content_present(self, heading_markdown):
        """All content should be preserved when combined."""
        from backend.app.chunking import chunk_text
        original = heading_markdown
        chunks = chunk_text(original, max_tokens=500)
        combined = "".join(chunks)
        # Content shouldn't be truncated or corrupted
        assert "Introduction" in combined
        assert "Background" in combined
        assert "Conclusion" in combined
@@ -0,0 +1,316 @@
 """
 Tests for backend/app/db.py
 These tests verify SQLite database operations including:
 - Table creation (init_db)
 - Library CRUD operations
 - Document chunk storage and retrieval
 - Full-text search functionality
 All tests use a temporary test database file.
 """
 import pytest
 from datetime import datetime
 class TestInitDatabase:
    """Tests for init_db() - table creation."""
    def test_init_db_creates_tables(self, test_database):
        """Database should have libraries and documents tables after init."""
        import sqlite3
        from backend.app.db import get_connection, get_db_path
        conn = get_connection()
        cursor = conn.execute("SELECT name FROM sqlite_master WHERE type='table' ORDER BY name")
        tables = [row[0] for row in cursor.fetchall()]
        # Should have libraries, documents, and FTS virtual table
        assert "libraries" in tables or any("libraries" in t.lower() for t in tables)
        conn.close()
    def test_init_db_returns_success(self, test_database):
        """init_db should return success indicator."""
        from backend.app.db import init_db
        result = init_db()
        assert result["success"] is True
 class TestLibraryOperations:
    """Tests for library CRUD operations."""
    def test_upsert_library_new(self, test_database):
        """Upsert should create new library."""
        from backend.app.db import upsert_library
        result = upsert_library(
            library_id="/local/testlib",
            name="Test Library",
            description="A test library for unit tests"
        )
        assert result["success"] is True
        assert result["id"] == "/local/testlib"
    def test_upsert_library_update(self, test_database):
        """Upsert should update existing library."""
        from backend.app.db import upsert_library
        # Insert first library
        upsert_library(
            library_id="/local/upsertlib",
            name="Original Name",
            description="Original description"
        )
        # Update it
        result = upsert_library(
            library_id="/local/upsertlib",
            name="Updated Name",
            description="Updated description"
        )
        assert result["success"] is True
    def test_upsert_library_id_normalization(self, test_database):
        """Library ID normalization - /local/ prefix should be preserved."""
        from backend.app.db import upsert_library
        # Test various ID formats
        test_ids = [
            "/local/foundryvtt",
            "foundryvtt",
            "/local/mydocs",
        ]
        for lib_id in test_ids:
            result = upsert_library(library_id=lib_id, name="Test", description="Desc")
            assert result["success"] is True
            # Verify we can retrieve it back
            from backend.app.db import get_chunks_for_library
            # Just ensure no errors occur
    def test_list_libraries(self, test_database):
        """list_libraries should return list of libraries."""
        from backend.app.db import upsert_library, list_libraries
        # Create some libraries
        for i in range(3):
            upsert_library(
                library_id=f"/local/lib{i}",
                name=f"Library {i}",
                description=f"Description {i}"
            )
        libs = list_libraries()
        assert isinstance(libs, list)
        assert len(libs) >= 3
    def test_search_libraries(self, test_database):
        """search_libraries should find libraries by name/description."""
        from backend.app.db import upsert_library, search_libraries
        # Create libraries with searchable names
        upsert_library(library_id="/local/foo1", name="Foo Library", description="Bar baz")
        upsert_library(library_id="/local/foo2", name="Other Library", description="Different content")
        results = search_libraries("foo")
        assert isinstance(results, list)
 class TestDocumentChunkOperations:
    """Tests for document chunk storage and retrieval."""
    def test_insert_document_chunk_new(self, test_database):
        """insert_document_chunk should create new chunk record."""
        from backend.app.db import insert_document_chunk
        result = insert_document_chunk(
            doc_id="doc-1",
            library_id="/local/testlib",
            path="docs/example.md",
            title="Example Document",
            content="# Example\n\nThis is the content.",
            chunk_index=0,
            token_estimate=100
        )
        assert result["success"] is True
    def test_insert_document_chunk_update(self, test_database):
        """insert_document_chunk should update existing record."""
        from backend.app.db import insert_document_chunk
        # Insert first
        insert_document_chunk(
            doc_id="doc-update-test",
            library_id="/local/uplib",
            path="old-path.md",
            title="Old Title",
            content="# Old\nContent here.",
            chunk_index=0,
            token_estimate=50
        )
        # Update it
        result = insert_document_chunk(
            doc_id="doc-update-test",
            library_id="/local/uplib",
            path="new-path.md",
            title="New Title",
            content="# New\nUpdated content.",
            chunk_index=1,
            token_estimate=75
        )
        assert result["success"] is True
    def test_get_document_by_id(self, test_database):
        """get_document_by_id should retrieve document by ID."""
        from backend.app.db import insert_document_chunk, get_document_by_id
        # Insert document
        doc_id = "unique-doc-id-12345"
        insert_document_chunk(
            doc_id=doc_id,
            library_id="/local/testlib",
            path="docs/test.md",
            title="Test Document",
            content="# Test\n\nTest content here.",
            chunk_index=None,
            token_estimate=200
        )
        # Retrieve it
        doc = get_document_by_id(doc_id)
        assert doc is not None
        assert doc["id"] == doc_id
    def test_get_chunks_for_library(self, test_database):
        """get_chunks_for_library should return all chunks for a library."""
        from backend.app.db import upsert_library, insert_document_chunk, get_chunks_for_library
        # Create library
        upsert_library(library_id="/local/chunktest", name="Chunk Test", description="Test")
        # Add some chunks
        for i in range(3):
            insert_document_chunk(
                doc_id=f"chunk-{i}",
                library_id="/local/chunktest",
                path=f"path{i}.md",
                title=f"Section {i}",
                content=f"Content section {i}.",
                chunk_index=i,
                token_estimate=50
            )
        chunks = get_chunks_for_library("/local/chunktest")
        assert isinstance(chunks, list)
        assert len(chunks) >= 3
    def test_clear_library_documents(self, test_database):
        """clear_library_documents should delete all docs for a library."""
        from backend.app.db import upsert_library, insert_document_chunk, clear_library_documents, get_chunks_for_library
        # Create and populate library
        upsert_library(library_id="/local/cleartest", name="Clear Test", description="Test")
        for i in range(5):
            insert_document_chunk(
                doc_id=f"clear-{i}",
                library_id="/local/cleartest",
                path=f"path{i}.md",
                content=f"Content {i}.",
                token_estimate=20
            )
        # Clear it
        result = clear_library_documents("/local/cleartest")
        assert result["success"] is True
        # Verify cleared
        remaining = get_chunks_for_library("/local/cleartest")
        assert len(remaining) == 0
 class TestDatabaseEdgeCases:
    """Tests for edge cases and error handling."""
    def test_empty_library_id(self, test_database):
        """Operations with empty ID should handle gracefully."""
        from backend.app.db import upsert_library
        result = upsert_library(library_id="", name="Test", description="Desc")
        # Should not crash, though may not be a valid operation
    def test_special_characters_in_content(self, test_database):
        """Content with special characters should be stored."""
        from backend.app.db import insert_document_chunk
        content = "Hello \"world\" <tag /> & amp; 'apostrophe'"
        result = insert_document_chunk(
            doc_id="special-test",
            library_id="/local/speciallib",
            path="special.md",
            content=content,
            token_estimate=100
        )
        assert result["success"] is True
    def test_very_long_content(self, test_database):
        """Long content should be stored."""
        from backend.app.db import insert_document_chunk
        long_content = "a" * 5000
        result = insert_document_chunk(
            doc_id="long-test",
            library_id="/local/longlib",
            path="long.md",
            content=long_content,
            token_estimate=1000
        )
        assert result["success"] is True
    def test_none_description(self, test_database):
        """Library with None description should work."""
        from backend.app.db import upsert_library
        result = upsert_library(
            library_id="/local/nonedesc",
            name="No Description Lib",
            description=None
        )
        assert result["success"] is True
 class TestDatabaseInitialization:
    """Tests for database initialization state."""
    def test_database_is_empty_after_init(self, test_database):
        """Database should be empty right after init."""
        from backend.app.db import list_libraries
        libs = list_libraries()
        assert isinstance(libs, list)
 # =============================================================================
 # FIXTURES
 # =============================================================================
@pytest.fixture
 def sample_doc():
    """Sample document chunk for testing."""
    return {
        "doc_id": "sample-doc-1",
        "library_id": "/local/samplelib",
        "path": "docs/guide.md",
        "title": "Getting Started Guide",
        "content": "# Getting Started\n\nWelcome to the guide. This is a sample document for testing.\n\n## Installation\n\nInstall with pip.",
        "chunk_index": 0,
        "token_estimate": 500
    }
@@ -0,0 +1,262 @@
 """
 Tests for mcp-server/server.py
 These are pure unit tests that don't require any external dependencies.
 They test:
 - The strip_local_prefix() function directly (no network)
 - MCP server tool definitions and structure
 """
 import pytest
 class TestStripLocalPrefix:
    """Tests for the strip_local_prefix() function."""
    def test_strips_prefix_from_full_id(self):
        """Should strip /local/ prefix from full library ID."""
        from mcp_server.server import strip_local_prefix
        input_id = "/local/foundryvtt"
        expected_output = "foundryvtt"
        result = strip_local_prefix(input_id)
        assert result == expected_output
    def test_preserves_id_without_prefix(self):
        """Should preserve ID that doesn't have /local/ prefix."""
        from mcp_server.server import strip_local_prefix
        input_id = "foundryvtt"
        result = strip_local_prefix(input_id)
        assert result == input_id  # Should be unchanged
    def test_strips_from_multiple_local_prefixes(self):
        """Should handle edge case of multiple prefixes."""
        from mcp_server.server import strip_local_prefix
        input_id = "/local//local/foundryvtt"
        result = strip_local_prefix(input_id)
        # Should only strip first occurrence
        assert result == "/local/foundryvtt"
    def test_empty_string(self):
        """Empty string should remain empty."""
        from mcp_server.server import strip_local_prefix
        input_id = ""
        result = strip_local_prefix(input_id)
        assert result == input_id  # Should be unchanged
    def test_whitespace_only(self):
        """Whitespace only should remain whitespace (no /local/ to strip)."""
        from mcp_server.server import strip_local_prefix
        input_id = "   \t\n"
        result = strip_local_prefix(input_id)
        assert result == input_id
    def test_case_sensitive_prefix(self):
        """Prefix matching is case-sensitive."""
        from mcp_server.server import strip_local_prefix
        # Lowercase - should strip
        result1 = strip_local_prefix("/local/test")
        assert result1 == "test"
        # Uppercase - should NOT strip (not a match)
        result2 = strip_local_prefix("/LOCAL/test")
        assert result2 == "/LOCAL/test"  # Unchanged
    def test_partial_match_does_not_strip(self):
        """Only exact /local/ prefix is stripped, not partial matches."""
        from mcp_server.server import strip_local_prefix
        # Partial match - should NOT strip
        input_id = "/local-docs/test"
        result = strip_local_prefix(input_id)
        assert result == input_id  # Unchanged
        # Different separator - should NOT strip
        input_id2 = "/localdocs/test"
        result2 = strip_local_prefix(input_id2)
        assert result2 == input_id2
    def test_prefix_with_trailing_slash(self):
        """Should handle trailing slash in ID."""
        from mcp_server.server import strip_local_prefix
        input_id = "/local/foundryvtt/"
        expected_output = "foundryvtt/"
        result = strip_local_prefix(input_id)
        assert result == expected_output
 class TestMcpServerStructure:
    """Tests for MCP server tool structure (without starting the server)."""
    def test_import_fastmcp(self):
        """Should be able to import FastMCP."""
        try:
            from fastmcp import FastMCP
            # Import successful
        except ImportError as e:
            pytest.skip(f"fastmcp not installed: {e}")
 class TestMcpServerToolsExistence:
    """Tests to verify MCP server has expected tools defined."""
    def test_mcp_instance_created(self):
        """MCP instance should be created with tools."""
        from mcp_server.server import mcp
        assert mcp is not None
    def test_resolve_library_id_tool_exists(self):
        """resolve-library-id tool should be registered."""
        from mcp_server.server import mcp
        # Check if the tool exists by trying to access it
        if hasattr(mcp, 'tools'):
            tool_names = [t.name for t in mcp.tools]
            assert "resolve_library_id" in tool_names
    def test_get_library_docs_tool_exists(self):
        """get-library-docs tool should be registered."""
        from mcp_server.server import mcp
        if hasattr(mcp, 'tools'):
            tool_names = [t.name for t in mcp.tools]
            assert "get_library_docs" in tool_names
    def test_list_libraries_tool_exists(self):
        """list-libraries tool should be registered."""
        from mcp_server.server import mcp
        if hasattr(mcp, 'tools'):
            tool_names = [t.name for t in mcp.tools]
            assert "list_libraries" in tool_names
    def test_search_docs_tool_exists(self):
        """search-docs tool should be registered."""
        from mcp_server.server import mcp
        if hasattr(mcp, 'tools'):
            tool_names = [t.name for t in mcp.tools]
            assert "search_docs" in tool_names
    def test_refresh_library_tool_exists(self):
        """refresh-library tool should be registered."""
        from mcp_server.server import mcp
        if hasattr(mcp, 'tools'):
            tool_names = [t.name for t in mcp.tools]
            assert "refresh_library" in tool_names
    def test_sync_sources_tool_exists(self):
        """sync-sources tool should be registered."""
        from mcp_server.server import mcp
        if hasattr(mcp, 'tools'):
            tool_names = [t.name for t in mcp.tools]
            assert "sync_sources" in tool_names
 class TestMcpServerStripPrefixIntegration:
    """Integration tests for strip_prefix usage in MCP server functions."""
    def test_resolve_library_id_calls_strip_prefix(self):
        """resolve_library_id should handle /local/ prefix in responses."""
        # This test verifies that the tool is available and uses the prefix correctly
        from mcp_server.server import strip_local_prefix
        # Verify the function exists and works
        assert callable(strip_local_prefix)
        # Test with sample IDs
        test_ids = [
            "/local/foundryvtt",
            "/local/pytest",
            "/local/mydocs/reference",
        ]
        for lib_id in test_ids:
            stripped = strip_local_prefix(lib_id)
            assert not stripped.startswith("/local/")
 class TestMcpServerPrefixHandlingVariations:
    """Additional tests for prefix handling variations."""
    def test_long_library_id(self):
        """Should handle long library IDs with /local/ prefix."""
        from mcp_server.server import strip_local_prefix
        input_id = "/local/very-long-library-id-with-many-chars-in-name"
        expected_output = "very-long-library-id-with-many-chars-in-name"
        result = strip_local_prefix(input_id)
        assert result == expected_output
    def test_special_characters_in_id(self):
        """Should handle special characters in library ID."""
        from mcp_server.server import strip_local_prefix
        # IDs can have underscores, dashes, numbers
        input_id = "/local/my-doc_v2-3_test"
        result = strip_local_prefix(input_id)
        assert result == "my-doc_v2-3_test"
    def test_unicode_in_stripped_name(self):
        """Stripped name should preserve unicode characters."""
        from mcp_server.server import strip_local_prefix
        # Library IDs sometimes have unicode in them
        input_id = "/local/世界文档"  # Chinese characters
        result = strip_local_prefix(input_id)
        assert result == "世界文档"
    def test_mixed_case_stripped_name(self):
        """Stripped name can have mixed case."""
        from mcp_server.server import strip_local_prefix
        input_id = "/local/FoundryVTT"
        result = strip_local_prefix(input_id)
        assert result == "FoundryVTT"
 # =============================================================================
 # FIXTURES
 # =============================================================================
@pytest.fixture
 def sample_library_ids():
    """Sample library IDs for testing prefix stripping."""
    return [
        "/local/foundryvtt",
        "/local/pytest",
        "/local/mydocs/reference/guide.md",
        "/local/my-app",
        "/local/documentation/tutorial/getting-started",
    ]
@pytest.fixture
 def expected_stripped_ids(sample_library_ids):
    """Expected stripped versions of sample library IDs."""
    return [
        "foundryvtt",
        "pytest",
        "mydocs/reference/guide.md",
        "my-app",
        "documentation/tutorial/getting-started",
    ]
@@ -0,0 +1,368 @@
 """
 Tests for backend/app/search.py
 These tests verify search functionality without requiring:
 - A running Qdrant vector database (mocked)
 - Loaded embedding models (mocked)
 The tests focus on:
 - Response shape validation
 - Library filtering
 - Error handling
 - Async function behavior
 """
 import pytest
 class TestResolveLibraryId:
    """Tests for resolve_library_id() - Context7-style resolution."""
    def test_returns_candidates_list(self, test_database):
        """resolve_library_id should return a list of candidates."""
        from backend.app.search import resolve_library_id
        # Create some libraries first
        from backend.app.db import upsert_library
        for i in range(3):
            upsert_library(
                library_id=f"/local/searchtest{i}",
                name=f"Search Test Library {i}",
                description=f"Description for search test {i}"
            )
        candidates = resolve_library_id("search")
        assert isinstance(candidates, list)
    def test_captures_matching_names(self, test_database):
        """Should capture libraries where query matches name."""
        from backend.app.db import upsert_library
        from backend.app.search import resolve_library_id
        # Create a library that should match "search"
        upsert_library(
            library_id="/local/searchlib",
            name="Search Library",
            description="Main search documentation"
        )
        candidates = resolve_library_id("search")
        assert isinstance(candidates, list)
    def test_context7_style_prefix(self, test_database):
        """Candidates should have /local/ prefix added to ID."""
        from backend.app.db import upsert_library
        from backend.app.search import resolve_library_id
        upsert_library(
            library_id="foundryvtt",  # Without /local/
            name="Foundry VTT",
            description="Fantasy tabletop virtual table"
        )
        candidates = resolve_library_id("foundry")
        for candidate in candidates:
            assert candidate.get("source") == "local"
    def test_partial_name_match(self, test_database):
        """Should match on partial name."""
        from backend.app.db import upsert_library
        from backend.app.search import resolve_library_id
        upsert_library(
            library_id="/local/gamefoundry",
            name="Foundry Game Module",
            description="Module for foundry games"
        )
        candidates = resolve_library_id("game")
        assert isinstance(candidates, list)
    def test_empty_result_on_no_matches(self, test_database):
        """Should return empty list when no matches."""
        from backend.app.search import resolve_library_id
        # No libraries matching "xyznonexistent123"
        candidates = resolve_library_id("xyznonexistent123")
        assert isinstance(candidates, list)
 class TestSearchDocs:
    """Tests for search_docs() - semantic search with mocked vector store."""
    def test_returns_results_list(self, mock_qdrant_client, test_database):
        """search_docs should return a list of results."""
        from backend.app.search import search_docs
        # Create some chunks first
        from backend.app.db import upsert_library, insert_document_chunk
        upsert_library(library_id="/local/searchdocslib", name="Search Docs Lib", description="Test")
        for i in range(5):
            insert_document_chunk(
                doc_id=f"searchdoc-{i}",
                library_id="/local/searchdocslib",
                path=f"path{i}.md",
                title=f"Section {i}",
                content=f"# Section {i}\n\nContent about section {i} that matches search queries.",
                chunk_index=i,
                token_estimate=100
            )
        results = search_docs("section")
        assert isinstance(results, list)
    def test_empty_query_returns_empty_list(self):
        """Empty query should return empty results."""
        from backend.app.search import search_docs
        results = search_docs("")
        assert isinstance(results, list)
    def test_limit_parameter(self, mock_qdrant_client):
        """Limit parameter should affect result count."""
        from backend.app.search import search_docs
        results_10 = search_docs("test", limit=10)
        results_5 = search_docs("test", limit=5)
        assert isinstance(results_10, list)
        assert isinstance(results_5, list)
    def test_response_shape_matches_spec(self):
        """Verify response shape when mocked returns data."""
        from unittest.mock import patch
        from backend.app.search import search_docs
        # Mock client to return formatted results
        mock_results = [
            {
                "id": "test-id-1",
                "score": 0.95,
                "library_id": "/local/testlib",
                "path": "docs/example.md",
                "title": "Example Document",
                "chunk_index": 0
            }
        ]
        with patch('backend.app.vector_store.get_client') as mock_get_client:
            # Setup mock client to return our test data
            mock_client = mock_get_client.return_value
            mock_point = type('ScoredPoint', (), {
                'score': 0.95,
                'payload': {
                    "id": "test-id-1",
                    "library_id": "/local/testlib",
                    "path": "docs/example.md",
                    "title": "Example Document",
                    "chunk_index": 0
                }
            })()
            mock_client.search.return_value = [mock_point]
            results = search_docs("test query")
            assert isinstance(results, list)
            if results:
                # Verify each result has expected fields
                result = results[0]
                assert "id" in result
                assert "score" in result
                assert "library_id" in result
                assert "path" in result
                assert "title" in result
                assert "chunk_index" in result
 class TestGetLibraryDocs:
    """Tests for get_library_docs() - document retrieval."""
    def test_returns_empty_string_when_no_documents(self, mock_qdrant_client):
        """Should return empty/error when no docs exist."""
        from backend.app.search import get_library_docs
        result = get_library_docs("/local/nonexistent")
        # Either returns empty string or error message
        assert isinstance(result, str)
    def test_returns_content_when_documents_exist(self, mock_qdrant_client):
        """Should return combined document content."""
        from backend.app.db import upsert_library, insert_document_chunk
        from backend.app.search import get_library_docs
        # Create library with chunks
        upsert_library(library_id="/local/docretrievetest", name="Doc Retrieve", description="Test")
        insert_document_chunk(
            doc_id="doc-retrieve-1",
            library_id="/local/docretrievetest",
            path="docs/getting-started.md",
            title="Getting Started",
            content="# Getting Started\n\nWelcome to the documentation. This is a test document.",
            chunk_index=0,
            token_estimate=200
        )
        result = get_library_docs("/local/docretrievetest")
        assert isinstance(result, str)
        # Should contain at least library title or content
    def test_topic_filter_searches(self, mock_qdrant_client):
        """With topic filter, should search for relevant chunks."""
        from backend.app.db import upsert_library, insert_document_chunk
        from backend.app.search import get_library_docs
        upsert_library(library_id="/local/topicsearchlib", name="Topic Search", description="Test")
        # Add documents with different topics
        insert_document_chunk(
            doc_id="topic-install",
            library_id="/local/topicsearchlib",
            path="docs/install.md",
            title="Installation Guide",
            content="# Installation\n\nInstall with pip install mypackage.",
            chunk_index=0,
            token_estimate=150
        )
        insert_document_chunk(
            doc_id="topic-usage",
            library_id="/local/topicsearchlib",
            path="docs/usage.md",
            title="Usage Guide",
            content="# Usage\n\nUse mycommand --help for help.",
            chunk_index=0,
            token_estimate=150
        )
        # Search for "install" topic
        result = get_library_docs("/local/topicsearchlib", topic="install")
        assert isinstance(result, str)
    def test_token_limit_respected(self):
        """Token limit should truncate content appropriately."""
        from backend.app.search import get_library_docs
        # Create a library with lots of content
        from backend.app.db import upsert_library, insert_document_chunk
        upsert_library(library_id="/local/tokenlimittest", name="Token Limit", description="Test")
        long_content = "# Long Content\n\n" + " ".join(["word"] * 500)
        insert_document_chunk(
            doc_id="long-doc",
            library_id="/local/tokenlimittest",
            path="docs/long.md",
            title="Long Document",
            content=long_content,
            chunk_index=0,
            token_estimate=2000
        )
        # Request with small token limit
        result = get_library_docs("/local/tokenlimittest", token_limit=100)
        assert isinstance(result, str)
 class TestGetLibraryDocsWithMock:
    """Tests that verify content retrieval when mocked data is available."""
    def test_retrieves_chunks_by_library_id(self, mock_qdrant_client):
        """get_library_docs without topic should fetch all chunks for library."""
        from backend.app.db import upsert_library, insert_document_chunk
        from backend.app.search import get_library_docs
        upsert_library(library_id="/local/mockretrievetest", name="Mock Retrieve", description="Test")
        for i in range(3):
            insert_document_chunk(
                doc_id=f"mock-retrieve-{i}",
                library_id="/local/mockretrievetest",
                path=f"path{i}.md",
                title=f"Path {i}",
                content=f"Content for path {i}.",
                chunk_index=i,
                token_estimate=50
            )
        result = get_library_docs("/local/mockretrievetest")
        assert isinstance(result, str)
 class TestSearchErrorHandling:
    """Tests for error handling in search functions."""
    def test_search_handles_missing_library(self):
        """Should handle missing library gracefully."""
        from backend.app.search import search_docs
        results = search_docs("test", library_id="/local/missing_lib_xyz123")
        assert isinstance(results, list)
    def test_resolve_handles_no_libraries_in_db(self):
        """Should handle empty database gracefully."""
        from backend.app.db import init_db
        from backend.app.search import resolve_library_id
        # Initialize fresh DB (empty)
        from backend.app.db import get_connection, get_chunks_for_library
        # The test_database fixture already does this
    def test_get_library_docs_handles_empty_library(self):
        """Should handle library with no chunks."""
        from backend.app.search import get_library_docs
        result = get_library_docs("/local/emptylib")
        assert isinstance(result, str)
 # =============================================================================
 # FIXTURES FOR SEARCH TESTS
 # =============================================================================
@pytest.fixture
 def search_sample_text():
    """Sample text with headings for search chunking tests."""
    return """# Installation Guide
 To install the package:
 ```bash
 pip install mypackage
 ```
 ## Configuration
 Configure your environment by setting these variables:
 - MY_VAR=123
 - DEBUG=true
 ## Usage Examples
 Example 1: Basic usage
 ```python
 import mymodule
 module = mymodule.Module()
 result = module.run()
 print(result)
 ```
 Example 2: Advanced usage with options
 ```python
 options = {"verbose": True, "output": "stdout"}
 result = module.run(options=options)
 ```
 ## Troubleshooting
 Common issues and their solutions:
 - ImportError: Ensure package is installed
 - AttributeError: Check that attributes exist on object"""
@@ -0,0 +1,29 @@
 # Context7 Docs WebUI Configuration
 # Copy this file to .env and configure for your environment
 # === Ports (optional - use if you need custom ports) ===
 HOST_PORT=8787          # docs-api port (default: 8787)
 MCP_HOST_PORT=8788      # docs-mcp port (default: 8788)
 WEBUI_PORT=8790         # WebUI port (default: 8790)
 # === API Keys (optional - uncomment to enable auth) ===
 # Docs API key for protecting endpoints like /search, /ingest, etc.
 # DOCS_API_KEY=your-secret-docs-api-key
 # WebUI API key (optional - separate from docs-api for UI authentication)
 # DOCS_WEBUI_API_KEY=your-webui-api-key
 # === Application Configuration ===
 # Path to documentation files (relative to service container)
 DOCS_PATH=/docs
 # SQLite database path
 DB_PATH=/data/db.sqlite
 # Logging level: DEBUG, INFO, WARNING, ERROR
 LOG_LEVEL=INFO
 # === Vector Store ===
 # Qdrant host and port (internal Docker network)
 VECTOR_STORE_HOST=qdrant
 VECTOR_STORE_PORT=6333
@@ -0,0 +1,19 @@
 # WebUI Dockerfile
 FROM python:3.12-slim
 ENV PYTHONDONTWRITEBYTECODE=1 \
    PYTHONUNBUFFERED=1 \
    DOCS_API_URL=http://docs-api:8787
 WORKDIR /app
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
 COPY app /app/webui
 RUN mkdir -p /app/webui/templates/uploads
 EXPOSE 8790
 CMD ["uvicorn", "webui.main:app", "--host", "0.0.0.0", "--port", "8790"]
@@ -0,0 +1,72 @@
 """Async docs-api client for the WebUI."""
 import os
 from typing import Any, Dict, Optional
 from httpx import AsyncClient, Timeout
 class DocsAPIClient:
    """Small async HTTP client for the docs-api backend."""
    def __init__(self, base_url: Optional[str] = None, api_key: Optional[str] = None):
        self.base_url = (base_url or os.environ.get("DOCS_API_URL", "http://docs-api:8787")).rstrip("/")
        self.api_key = api_key if api_key is not None else os.environ.get("WEBUI_API_KEY")
        self.headers = {"X-API-Key": self.api_key} if self.api_key else {}
        self._client: Optional[AsyncClient] = None
    async def _get_client(self) -> AsyncClient:
        if self._client is None or self._client.is_closed:
            self._client = AsyncClient(
                base_url=self.base_url,
                headers=self.headers,
                timeout=Timeout(120.0),
            )
        return self._client
    async def request(self, method: str, path: str, **kwargs: Any) -> Dict[str, Any]:
        client = await self._get_client()
        resp = await client.request(method, path, **kwargs)
        if resp.status_code >= 400:
            raise RuntimeError(f"{method} {path} failed: {resp.status_code} {resp.text}")
        if resp.headers.get("content-type", "").startswith("application/json"):
            data = resp.json()
            return data if isinstance(data, dict) else {"data": data}
        return {"data": resp.text}
    async def get(self, path: str, **kwargs: Any) -> Dict[str, Any]:
        return await self.request("GET", path, **kwargs)
    async def post(self, path: str, **kwargs: Any) -> Dict[str, Any]:
        return await self.request("POST", path, **kwargs)
    async def delete(self, path: str, **kwargs: Any) -> Dict[str, Any]:
        return await self.request("DELETE", path, **kwargs)
    async def health(self) -> Dict[str, Any]:
        try:
            return await self.get("/health")
        except Exception as e:
            return {"status": "error", "message": str(e)}
    async def upload_file(self, library_id: str, filename: str, content: bytes) -> Dict[str, Any]:
        files = {"file": (filename, content)}
        return await self.post(f"/api/v1/upload/{library_id}", files=files)
    async def close(self) -> None:
        if self._client is not None and not self._client.is_closed:
            await self._client.aclose()
 _client_instance: Optional[DocsAPIClient] = None
 async def get_client() -> DocsAPIClient:
    global _client_instance
    if _client_instance is None:
        _client_instance = DocsAPIClient()
    return _client_instance
 async def close_client() -> None:
    if _client_instance is not None:
        await _client_instance.close()
@@ -0,0 +1,17 @@
 """WebUI configuration."""
 from typing import Optional
 class Settings:
    """WebUI settings from environment variables."""
    # Core API connection
    DOCS_API_URL: str = "http://docs-api:8787"
    WEBUI_API_KEY: Optional[str] = None
    # Default parameters for common operations
    DEFAULT_SEARCH_LIMIT: int = 10
    DEFAULT_RESULT_TOKENS: int = 8000
 settings = Settings()
@@ -0,0 +1,259 @@
 """WebUI FastAPI application."""
 import html
 import os
 from pathlib import Path
 from typing import List, Optional
 from fastapi import FastAPI, File, Form, Request, UploadFile
 from fastapi.responses import HTMLResponse, RedirectResponse
 from fastapi.staticfiles import StaticFiles
 from fastapi.templating import Jinja2Templates
 from .api_client import DocsAPIClient
 app = FastAPI(
    title="Context7 Docs WebUI",
    description="Web dashboard for managing documentation system",
    version="1.0.0",
 )
 templates = Jinja2Templates(directory=os.path.join(os.path.dirname(__file__), "templates"))
 templates.env.globals["escapeHtml"] = lambda value: html.escape(str(value or ""))
 app.mount("/static", StaticFiles(directory=os.path.join(os.path.dirname(__file__), "static")), name="static")
 _client: Optional[DocsAPIClient] = None
 def get_client() -> DocsAPIClient:
    global _client
    if _client is None:
        _client = DocsAPIClient(
            os.environ.get("DOCS_API_URL", "http://docs-api:8787"),
            os.environ.get("WEBUI_API_KEY"),
        )
    return _client
@app.on_event("shutdown")
 async def shutdown() -> None:
    if _client is not None:
        await _client.close()
 def page(title: str, body: str) -> HTMLResponse:
    return HTMLResponse(
        f"""<!DOCTYPE html>
 <html><head><meta charset="UTF-8"><title>{html.escape(title)}</title></head>
 <body style="font-family:sans-serif;padding:20px;">{body}</body></html>"""
    )
@app.get("/")
 async def dashboard(request: Request):
    client = get_client()
    health = await client.health()
    try:
        collections_data = await client.get("/collections")
        total_vectors = sum(
            item.get("vectors", 0)
            for item in collections_data.get("collections", {}).values()
            if isinstance(item, dict)
        )
    except Exception:
        total_vectors = 0
    try:
        libs_data = await client.get("/libraries")
        libraries = libs_data.get("libraries", [])
    except Exception:
        libraries = []
    return templates.TemplateResponse(
        "dashboard.html",
        {"request": request, "health": health, "vectors": total_vectors, "libraries": libraries},
    )
@app.post("/actions/ingest-all")
 async def ingest_all():
    client = get_client()
    try:
        result = await client.post("/ingest/all")
        body = f"<h1>Ingestion Complete</h1><pre>{html.escape(str(result))}</pre><a href='/'>Back</a>"
    except Exception as e:
        body = f"<h1>Ingestion Failed</h1><pre>{html.escape(str(e))}</pre><a href='/'>Back</a>"
    return page("Ingestion", body)
@app.post("/actions/sync-sources")
 async def sync_sources_action():
    client = get_client()
    try:
        result = await client.post("/sources/sync", json={"override": False})
        body = f"<h1>Git Sync Complete</h1><pre>{html.escape(str(result))}</pre><a href='/'>Back</a>"
    except Exception as e:
        body = f"<h1>Git Sync Failed</h1><pre>{html.escape(str(e))}</pre><a href='/'>Back</a>"
    return page("Git Sync", body)
@app.get("/libraries")
 async def libraries(request: Request):
    client = get_client()
    try:
        data = await client.get("/libraries")
        libraries_data = data.get("libraries", [])
    except Exception:
        libraries_data = []
    return templates.TemplateResponse("libraries.html", {"request": request, "data": libraries_data})
@app.post("/libraries/create")
 async def create_library(
    library_id: str = Form(...),
    name: str = Form(...),
    description: Optional[str] = Form(None),
 ):
    client = get_client()
    try:
        result = await client.post(
            f"/api/v1/libraries/{library_id.strip()}",
            data={"name": name, "description": description or ""},
        )
        body = f"<h1>Library Created</h1><pre>{html.escape(str(result))}</pre><a href='/libraries'>Back</a>"
    except Exception as e:
        body = f"<h1>Create Failed</h1><pre>{html.escape(str(e))}</pre><a href='/libraries'>Back</a>"
    return page("Library Created", body)
@app.post("/libraries/{library_id}/ingest")
 async def ingest_library(library_id: str):
    client = get_client()
    try:
        result = await client.post(f"/ingest/{library_id}")
        body = f"<h1>Ingestion Complete</h1><pre>{html.escape(str(result))}</pre><a href='/libraries'>Back</a>"
    except Exception as e:
        body = f"<h1>Ingestion Failed</h1><pre>{html.escape(str(e))}</pre><a href='/libraries'>Back</a>"
    return page("Ingest Library", body)
@app.post("/libraries/{library_id}/delete")
 async def delete_library(library_id: str):
    client = get_client()
    try:
        result = await client.delete(f"/api/v1/libraries/{library_id}")
        body = f"<h1>Library Deleted</h1><pre>{html.escape(str(result))}</pre><a href='/libraries'>Back</a>"
    except Exception as e:
        body = f"<h1>Delete Failed</h1><pre>{html.escape(str(e))}</pre><a href='/libraries'>Back</a>"
    return page("Delete Library", body)
@app.get("/libraries/{library_id}/docs")
 async def view_library_docs(library_id: str):
    client = get_client()
    try:
        result = await client.get(f"/docs/{library_id}")
        content = result.get("content", "")
    except Exception as e:
        content = str(e)
    return page(
        f"Docs: {library_id}",
        f"<h1>{html.escape(library_id)}</h1><pre>{html.escape(content)}</pre><a href='/libraries'>Back</a>",
    )
@app.get("/upload")
 async def upload_form(request: Request):
    client = get_client()
    try:
        libs_data = await client.get("/libraries")
        libraries = libs_data.get("libraries", [])
    except Exception:
        libraries = []
    return templates.TemplateResponse("upload.html", {"request": request, "libraries": libraries})
@app.post("/upload")
 async def upload_file(
    request: Request,
    library_id: str = Form(""),
    ingest_after_upload: Optional[str] = Form(None),
    files: List[UploadFile] = File(...),
 ):
    client = get_client()
    results = []
    total_size = 0
    for upload in files:
        filename = upload.filename or "upload.txt"
        target_library = library_id.strip()
        if not target_library:
            target_library = Path(filename).stem.lower().replace(" ", "-") or "uploaded"
        try:
            contents = await upload.read()
            total_size += len(contents)
            result = await client.upload_file(target_library, filename, contents)
            results.append({"filename": filename, "status": "success", "message": result})
        except Exception as e:
            results.append({"filename": filename, "status": "error", "message": str(e)})
    if ingest_after_upload == "on":
        for result in list(results):
            if result["status"] != "success":
                continue
            target_library = result["message"]["library_id"]
            try:
                ingest_result = await client.post(f"/ingest/{target_library}")
                results.append({"filename": "__INGEST__", "status": "success", "message": ingest_result})
            except Exception as e:
                results.append({"filename": "__INGEST__", "status": "error", "message": str(e)})
    return templates.TemplateResponse(
        "upload.html",
        {"request": request, "libraries": [], "results": results, "total_size_bytes": total_size},
    )
@app.get("/search")
 async def search_form(request: Request):
    return templates.TemplateResponse("search.html", {"request": request, "query": "", "results": []})
@app.get("/search/results")
 async def search_results(request: Request, q: str = "", limit: int = 10):
    client = get_client()
    results = []
    if q:
        try:
            data = await client.post("/search", json={"query": q, "library_id": None, "limit": limit})
            results = data.get("results", [])
        except Exception:
            results = []
    return templates.TemplateResponse(
        "search.html",
        {"request": request, "query": q, "results": results, "limit": limit},
    )
@app.get("/sources")
 async def sources_page(request: Request):
    client = get_client()
    try:
        data = await client.get("/api/v1/sources")
        sources = data.get("sources", [])
    except Exception:
        sources = []
    return templates.TemplateResponse("sources.html", {"request": request, "sources": sources})
@app.post("/sources/sync")
 async def sync_sources(override: bool = Form(False)):
    client = get_client()
    try:
        result = await client.post("/sources/sync", json={"override": override})
        body = f"<h1>Git Sync Complete</h1><pre>{html.escape(str(result))}</pre><a href='/sources'>Back</a>"
    except Exception as e:
        body = f"<h1>Git Sync Failed</h1><pre>{html.escape(str(e))}</pre><a href='/sources'>Back</a>"
    return page("Git Sync", body)
@@ -0,0 +1,159 @@
 // WebUI Static JavaScript Utilities
 // Simple helper functions shared across templates
 /**
 * Escape HTML to prevent XSS attacks when displaying user content
 */
 function escapeHtml(text) {
    if (typeof text !== 'string') return "";
    var e = document.createElement('div');
    try {
        e.textContent = text;
        return e.innerHTML;
    } catch (err) {
        return String(text).replace(/[&<>"']/g, function(m) {
            switch (m) {
                case '&': return '&amp;';
                case '<': return '&lt;';
                case '>': return '&gt;';
                case '"': return '&quot;';
                case "'": return '&#x27;';
                default: return m;
            }
        });
    }
 }
 /**
 * Format number with thousands separators
 */
 function formatNumber(num) {
    if (num === null || num === undefined) return "N/A";
    return new Intl.NumberFormat().format(Math.floor(num));
 }
 /**
 * Show loading spinner
 */
 function showLoading(elementId) {
    var el = document.getElementById(elementId);
    if (el) {
        el.innerHTML = '<div class="loading-spinner">Loading...</div>';
    }
 }
 /**
 * Hide loading spinner
 */
 function hideLoading(elementId) {
    var el = document.getElementById(elementId);
    if (el) {
        el.innerHTML = "";
    }
 }
 /**
 * Create a toast notification
 */
 function showToast(message, type) {
    var toast = document.createElement('div');
    toast.className = 'toast ' + (type || 'info');
    toast.textContent = message;
    toast.style.cssText = 'position:fixed;bottom:20px;right:20px;' +
        'padding:12px 20px;border-radius:4px;margin-bottom:10px;' +
        'background:#333;color:white;font-size:0.9rem;z-index:1000';
    document.body.appendChild(toast);
    setTimeout(function() {
        toast.style.opacity = '0';
        setTimeout(function() { toast.remove(); }, 200);
    }, 3000);
 }
 /**
 * Show error notification
 */
 function showError(message) {
    showToast("Error: " + message, "error");
 }
 /**
 * Show success notification  
 */
 function showSuccess(message) {
    showToast("Success: " + message, "success");
 }
 /**
 * Make an API request with error handling
 */
 async function apiRequest(endpoint, method = 'GET', data = null) {
    const config = window.webuiConfig;
    let url = config.apiUrl;
    if (!url.endsWith('/')) url += '/';
    url += endpoint;
    const headers = {};
    if (config.apiKey) {
        headers['X-API-Key'] = config.apiKey;
    }
    try {
        let response;
        if (method === 'POST') {
            response = await fetch(url, {
                method: method,
                headers: headers,
                body: JSON.stringify(data)
            });
        } else {
            response = await fetch(url, {
                method: method,
                headers: headers
            });
        }
        if (!response.ok) {
            throw new Error(response.statusText);
        }
        const contentType = response.headers.get('content-type');
        if (contentType && contentType.includes('application/json')) {
            return await response.json();
        } else {
            return await response.text();
        }
    } catch (err) {
        console.error('API request failed:', err);
        throw err;
    }
 }
 /**
 * Initialize tooltips if using them
 */
 function initTooltips() {
    // Add tooltip functionality here if needed
 }
 /**
 * Debounce function for input handling
 */
 function debounce(func, wait) {
    var timeout;
    return function executedFunction(...args) {
        var later = function() {
            clearTimeout(timeout);
            func.apply(this, args);
        };
        timeout = setTimeout(later, wait);
    };
 }
 // Export to window for use in templates
 window.escapeHtml = escapeHtml;
 window.formatNumber = formatNumber;
 window.showToast = showToast;
 window.showError = showError;
 window.showSuccess = showSuccess;
@@ -0,0 +1,395 @@
 .container {
    max-width: 1000px;
    margin: 0 auto;
    padding: 20px;
 }
 header {
    border-bottom: 1px solid #ccc;
    padding-bottom: 15px;
    margin-bottom: 20px;
 }
 header h1 {
    margin: 0 0 10px 0;
    font-size: 1.5rem;
 }
 nav {
    display: flex;
    gap: 15px;
 }
 nav a {
    text-decoration: none;
    color: #0066cc;
    font-size: 0.9rem;
 }
 nav a.active {
    font-weight: bold;
    text-decoration: underline;
 }
 main h2 {
    margin-bottom: 15px;
 }
 footer {
    margin-top: 40px;
    padding-top: 15px;
    border-top: 1px solid #ccc;
    font-size: 0.8rem;
    color: #666;
 }
 .status-card {
    background: #f5f5f5;
    padding: 20px;
    border-radius: 8px;
    border-left: 4px solid #00c467;
    margin-bottom: 15px;
 }
 .status-message {
    background: #e8f4fd;
    padding: 10px;
    border-radius: 4px;
    margin: 5px 0;
 }
 pre.code-block {
    background: #f5f5f5;
    padding: 15px;
    border-radius: 4px;
    overflow-x: auto;
    white-space: pre-wrap;
    word-break: break-word;
 }
 /* Tables */
 .library-table {
    width: 100%;
    border-collapse: collapse;
    margin-top: 10px;
 }
 .library-table th, .library-table td {
    padding: 10px;
    text-align: left;
    border-bottom: 1px solid #ddd;
 }
 .library-table th {
    background: #f5f5f5;
    font-weight: bold;
 }
 /* Forms */
 form input[type="text"], form textarea, form select {
    padding: 8px;
    border: 1px solid #ccc;
    border-radius: 4px;
    margin-right: 10px;
    margin-bottom: 10px;
 }
 button {
    background: #0066cc;
    color: white;
    border: none;
    padding: 10px 20px;
    border-radius: 4px;
    cursor: pointer;
 }
 button:hover {
    background: #0055aa;
 }
 /* Upload form */
 .upload-form, .search-form, .sync-form {
    max-width: 600px;
 }
 /* Search results */
 .results-count {
    background: #e8f4fd;
    padding: 10px;
    border-radius: 4px;
    margin-bottom: 15px;
 }
 .result-card {
    background: #fff;
    border: 1px solid #ddd;
    padding: 15px;
    margin: 10px 0;
    border-radius: 4px;
 }
 .result-card h3 {
    margin: 0 0 8px 0;
 }
 /* Results box */
 .results-box {
    max-height: 600px;
    overflow-y: auto;
 }
 .results-box .new-search-link {
    display: block;
    text-align: center;
    margin-top: 15px;
 }
 /* Source cards */
 .source-cards {
    display: grid;
    gap: 10px;
 }
 .source-card {
    background: #f5f5f5;
    padding: 15px;
    border-radius: 4px;
    border-left: 4px solid #666;
 }
 .status-message code {
    background: #333;
    color: #fff;
    padding: 2px 6px;
    border-radius: 3px;
 }
 .hint {
    color: #666;
    font-size: 0.85rem;
    margin-top: 15px;
 }
 .results-box .error {
    color: #cc0000;
    font-weight: bold;
 }
 .source-list, .source-cards, pre {
    white-space: normal;
 }
 /* Status cards grid */
 .status-cards {
    display: grid;
    grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
    gap: 15px;
    margin-bottom: 20px;
 }
 .status-card h3 {
    margin: 0 0 8px 0;
    font-size: 0.9rem;
    color: #555;
 }
 .status-card p {
    margin: 0;
    font-size: 1.2rem;
    font-weight: bold;
 }
 /* Message box */
 .message-box {
    background: #e8f4fd;
    padding: 12px;
    border-radius: 6px;
    margin-bottom: 20px;
    border-left: 4px solid #3b82f6;
 }
 /* Action buttons */
 .action-buttons {
    display: flex;
    gap: 15px;
    margin-bottom: 20px;
 }
 .btn {
    padding: 10px 20px;
    border: none;
    border-radius: 4px;
    cursor: pointer;
    text-decoration: none;
    display: inline-block;
    font-size: 0.9rem;
 }
 .btn-primary {
    background: #00c467;
    color: white;
 }
 .btn-primary:hover {
    background: #00a855;
 }
 .btn-secondary {
    background: #2563eb;
    color: white;
 }
 .btn-secondary:hover {
    background: #1d4ed8;
 }
 /* Links section */
 .links-section h2 {
    font-size: 1rem;
    margin-bottom: 10px;
 }
 .links-section a {
    color: #0066cc;
    text-decoration: none;
    padding: 5px 10px;
 }
 .links-section a:hover {
    text-decoration: underline;
 }
 /* Create library form */
 .create-form {
    background: #f9f9f9;
    padding: 15px;
    border-radius: 6px;
    margin-bottom: 20px;
    border-left: 4px solid #00c467;
 }
 .create-form label {
    display: block;
    margin-bottom: 8px;
    font-weight: bold;
    color: #333;
 }
 .create-form input[type="text"] {
    width: 100%;
    padding: 8px;
    margin-bottom: 12px;
    border: 1px solid #ccc;
    border-radius: 4px;
    box-sizing: border-box;
 }
 /* Table actions column */
 .actions {
    white-space: nowrap;
 }
 /* Button sizes */
 .btn-sm {
    padding: 5px 12px;
    font-size: 0.8rem;
 }
 /* Additional action button colors */
 .btn-info {
    background: #17a2b8;
    color: white;
 }
 .btn-info:hover {
    background: #138496;
 }
 .btn-warning {
    background: #ffc107;
    color: black;
 }
 .btn-warning:hover {
    background: #ffa000;
 }
 .btn-danger {
    background: #dc3545;
    color: white;
 }
 .btn-danger:hover {
    background: #c82333;
 }
 .btn-primary {
    background: #007bff;
    color: white;
 }
 .btn-primary:hover {
    background: #0056b3;
 }
 /* Highlight row for popular libraries */
 tr.highlight {
    background: #f0fdf4;
 }
 /* Upload form specific styles */
 #library_id, #files {
    width: 100%;
    padding: 8px;
    border: 1px solid #ccc;
    border-radius: 4px;
    margin-bottom: 12px;
    box-sizing: border-box;
 }
 #files {
    font-family: sans-serif;
 }
 /* Results box for upload */
 .result-box {
    background: #fff;
    border: 1px solid #ddd;
    border-radius: 4px;
    padding: 10px;
    margin-top: 20px;
    min-height: 100px;
 }
 .result-box.error {
    border-color: #dc3545;
    background: #fff5f5;
 }
 /* Result items */
 .result-item {
    padding: 6px;
    margin: 4px 0;
    border-radius: 3px;
    font-family: monospace;
    font-size: 0.85rem;
    word-break: break-word;
 }
 .result-item.success {
    background: #d4edda;
    border-left: 3px solid #28a745;
    color: #155724;
 }
 .result-item.error {
    background: #f8d7da;
    border-left: 3px solid #dc3545;
    color: #721c24;
 }
 .result-item.info {
    background: #d1ecf1;
    border-left: 3px solid #17a2b8;
    color: #0c5460;
 }
@@ -0,0 +1,32 @@
 <!DOCTYPE html>
 <html lang="en">
 <head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>{% block title %}Context7 Docs{% endblock %}</title>
    <link rel="stylesheet" href="{{ url_for('static', path='style.css') }}">
 </head>
 <body>
    <div class="container">
        <header>
            <h1>Context7 Docs UI</h1>
            <nav>
                <a href="/" {% if request.url.path == '/' %}class="active"{% endif %}>Dashboard</a>
                <a href="/libraries" {% if request.url.path.startswith('/libraries') %}class="active"{% endif %}>Libraries</a>
                <a href="/upload" {% if request.url.path.startswith('/upload') %}class="active"{% endif %}>Upload</a>
                <a href="/search" {% if request.url.path.startswith('/search') %}class="active"{% endif %}>Search</a>
                <a href="/sources" {% if request.url.path.startswith('/sources') %}class="active"{% endif %}>Sources</a>
            </nav>
        </header>
        <main>
            {% block content %}{% endblock %}
        </main>
        <footer>Context7 Docs WebUI</footer>
    </div>
    <script src="{{ url_for('static', path='app.js') }}"></script>
    {% block scripts %}{% endblock %}
 </body>
 </html>
@@ -0,0 +1,83 @@
 {% extends "base.html" %}
 {% block title %}Dashboard - Context7 Docs{% endblock %}
 {% block content %}
 <h1>Dashboard</h1>
 <!-- Status Cards -->
 <div class="status-cards">
    <div class="status-card" style="{% if health.status == 'ok' %}border-left-color: #00c467{% else %}border-left-color: #f53800{% endif %}">
        <h3>Docs API Service</h3>
        {% if health.status and health.status == 'ok' %}
        <p style="color: #00c467;"><strong>Status:</strong> Online ✓</p>
        {% else %}
        <p style="color: #f53800;"><strong>Status:</strong> {% if health.status == 'error' %}Error{% else %}Offline{% endif %}</p>
        {% endif %}
    </div>
    <div class="status-card">
        <h3>Vectors Stored</h3>
        <p>{{ vectors|default(0) }}</p>
    </div>
    <div class="status-card">
        <h3>Libraries Registered</h3>
        <p>{{ libraries|length }}</p>
    </div>
 </div>
 <!-- Recent Messages -->
 {% if libraries and libraries|length > 0 %}
 <div class="message-box" style="background: #e8f4fd;">
    <strong>Libraries:</strong> {{ escapeHtml(libraries) }}
 </div>
 {% endif %}
 <!-- Action Buttons -->
 <div class="action-buttons">
    <form method="post" action="/actions/ingest-all" style="display: inline;">
        <button type="submit" name="ingest-all" class="btn btn-primary">
            🔄 Ingest All Libraries
        </button>
    </form>
    <form method="post" action="/actions/sync-sources" style="display: inline;">
        <input type="hidden" name="override" value="false">
        <button type="submit" name="sync-sources" class="btn btn-secondary">
            📦 Sync Git Sources
        </button>
    </form>
 </div>
 <!-- Links -->
 <div class="links-section">
    <h2>Navigate to Other Pages</h2>
    <a href="/libraries" style="display: inline-block; margin-right: 15px;">View Libraries →</a>
    <a href="/upload" style="display: inline-block; margin-right: 15px;">Upload Files →</a>
    <a href="/search" style="display: inline-block; margin-right: 15px;">Search Docs →</a>
    <a href="/sources" style="display: inline-block;">Git Sources →</a>
 </div>
 <!-- Script for health refresh on reload -->
 <script>
 // On page reload, re-fetch and update status if needed
 document.addEventListener("DOMContentLoaded", async function() {
    try {
        const api = window.docsApiClient;
        // Refresh health status from server-rendered data
        document.querySelector('.status-cards .status-card:first-of-type')?.classList.remove('error');
        const newHealth = await api.get("/health");
        if (newHealth.status === 'ok') {
            document.querySelector('.status-cards .status-card:first-of-type')?.querySelector('p')?.classList.add('online');
        } else {
            document.querySelector('.status-cards .status-card:first-of-type')?.querySelector('p')?.classList.add('error');
        }
    } catch (err) {
        console.log('Health refresh skipped:', err);
    }
 });
 </script>
 {% endblock %}
@@ -0,0 +1,74 @@
 {% extends "base.html" %}
 {% block title %}Libraries - Context7 Docs{% endblock %}
 {% block content %}
 <h1>Libraries</h1>
 <!-- Create Library Form -->
 <div class="create-form">
    <form method="post" action="/libraries/create">
        <label for="new_library_id">Library ID:</label>
        <input type="text" id="new_library_id" name="library_id" placeholder="e.g., foundryvtt" required>
        <label for="new_name">Name:</label>
        <input type="text" id="new_name" name="name" placeholder="Display name for this library" required>
        <label for="new_description">Description (optional):</label>
        <input type="text" id="new_description" name="description" placeholder="Brief description...">
        <button type="submit" class="btn btn-primary">Create Library</button>
    </form>
 </div>
 <hr>
 <!-- Libraries Table -->
 <table class="library-table">
    <thead>
        <tr>
            <th>ID</th>
            <th>Name</th>
            <th>Description</th>
            <th>Source Path</th>
            <th>Updated At</th>
            <th>Actions</th>
        </tr>
    </thead>
    <tbody id="libraries-body">
        {% if data|length > 0 %}
            {% for lib in data %}
            <tr class="{% if lib.source_path and 'foundry' in (lib.source_path or '').lower() %}highlight{% endif %}">
                <td><code>{{ escapeHtml(lib.id) }}</code></td>
                <td><strong>{{ escapeHtml(lib.name) }}</strong></td>
                <td>{{ escapeHtml(lib.description) or '-' }}</td>
                <td><small>{{ escapeHtml(lib.source_path) or '-' }}</small></td>
                <td><small>{{ lib.updated_at|default('N/A') }}</small></td>
                <td class="actions">
                    <a href="/libraries/{{ lib.id }}/docs" class="btn btn-sm btn-info">View Docs</a> |
                    <form method="post" action="/libraries/{{ lib.id }}/ingest" style="display:inline;" 
                          onsubmit="return confirm('Trigger ingestion for this library?');">
                        <button type="submit" class="btn btn-sm btn-warning">Ingest</button>
                    </form> |
                    <form method="post" action="/libraries/{{ lib.id }}/delete" 
                          onsubmit="return confirm('Delete this library and all its contents? This cannot be undone.');">
                        <button type="submit" class="btn btn-sm btn-danger">Delete</button>
                    </form>
                </td>
            </tr>
            {% endfor %}
        {% else %}
            <tr>
                <td colspan="6" style="text-align:center;">No libraries found. Create one above.</td>
            </tr>
        {% endif %}
    </tbody>
 </table>
 {% if data and data[0] and data[0].get('content') %}
 <!-- Docs view mode -->
 <pre class="code-block">{% for chunk in data.get('content', []) %}{% if chunk|length > 0 %}{{ chunk.text | default(chunk.content) | default(chunk) }}{% endif %}{% endfor %}</pre>
 <a href="/libraries" style="display:block;margin-top:20px;">← Back to Libraries</a>
 {% endif %}
 {% endblock %}
@@ -0,0 +1,71 @@
 {% extends "base.html" %}
 {% block title %}Search - Context7 Docs{% endblock %}
 {% block content %}
 <h2>Search Documentation</h2>
 <form method="get" action="/search/results" class="search-form">
    <label for="query">Query:</label>
    <input type="text" id="query" name="q" required placeholder="Enter your search query..." value="{{ query or '' }}">
    <label for="limit">Limit results:</label>
    <select id="limit" name="limit">
        <option value="5">5</option>
        <option value="10" selected>10</option>
        <option value="20">20</option>
        <option value="50">50</option>
    </select>
    <button type="submit">Search</button>
 </form>
 <div id="search-results" class="results-box"></div>
 {% if results %}
 <div class="results-count">{{ results|length }} results found</div>
 {% endif %}
 <script>
 async function loadResults(query, limit) {
    const searchBox = document.getElementById("search-results");
    try {
        const payload = { query: query || "{{ initial_query or '' }}", library_id: null, limit: parseInt(limit) };
        const api = window.docsApiClient;
        const result = await api.post("/search", payload);
        if (result.results && Array.isArray(result.results)) {
            searchBox.className = "results-box";
            let html = '<div class="results-count">' + result.results.length + ' results found</div>';
            for (const r of result.results) {
                const title = r.title || (r.content || '').substring(0, 100);
                const content = (r.content || '').substring(0, 500);
                html += '<div class="result-card">' +
                    '<h3>' + escapeHtml(title) + '</h3>' +
                    '<p>' + escapeHtml(content) + '...</p>' +
                    '<a href="/docs/' + (r.library_id || '') + '">View Full</a></div>';
            }
            html += '<a href="/search/form" class="new-search-link">← New Search</a>';
            searchBox.innerHTML = html;
        }
    } catch (err) {
        searchBox.innerHTML = '<p class="error">Error loading results: ' + escapeHtml(err.message) + '</p>';
    }
 }
 // Load initial results if query parameter exists in URL
 var urlParams = new URLSearchParams(window.location.search);
 {% if query %}loadResults(urlParams.get('q') || urlParams.get('q'), urlParams.get('limit'));{% endif %}
 function escapeHtml(str) {
    if (!str) return "";
    var e = document.createElement('div');
    e.textContent = str;
    return e.innerHTML;
 }
 </script>
 {% endblock %}
@@ -0,0 +1,34 @@
 {% extends "base.html" %}
 {% block title %}Sources - Context7 Docs{% endblock %}
 {% block content %}
 <h2>Git Repository Sync</h2>
 <div class="status-message">Syncs all git repositories configured in <code>docs_sources.yaml</code>.</div>
 <form method="post" action="/sources/sync" class="sync-form">
    <label for="override">Override existing repos:</label>
    <input type="checkbox" id="override" name="override">
    <button type="submit">Sync All Repositories</button>
 </form>
 <div id="source-list"></div>
 {% if sources %}
 <h3>Configured Sources</h3>
 <div class="source-cards">
    {% for src in sources %}
    <div class="source-card">
        <strong>{{ src.library_id | default('unknown') }}</strong><br>
        URL: {{ src.repo_url | default('N/A')[:60] }}<br>
        Branch: {{ src.branch | default('main') }}<br>
        Include: {{ (src.include_paths | default(['*']) | join(', ')) }}
    </div>
    {% endfor %}
 </div>
 {% else %}
 <p>No git sources configured. Add repositories to <code>docs_sources.yaml</code>.</p>
 {% endif %}
 {% endblock %}
@@ -0,0 +1,48 @@
 {% extends "base.html" %}
 {% block title %}Upload - Context7 Docs{% endblock %}
 {% block content %}
 <h2>Upload Documentation Files</h2>
 <form method="post" enctype="multipart/form-data" class="upload-form">
    <!-- Library Selector -->
    <label for="library_id">Select Library:</label>
    <select id="library_id" name="library_id" required>
        <option value="">(New library - will be created from filename)</option>
        {% for lib in libraries %}
            <option value="{{ lib.id }}" data-name="{{ lib.name or lib.id }}">{{ lib.name or lib.id }}</option>
        {% endfor %}
    </select>
    <!-- File Input (multiple files allowed) -->
    <label for="files">Select Files:</label>
    <input type="file" name="files" id="files" multiple accept=".md,.txt,.py,.js,.ts,.json,.yaml,.yml,.html,.css,.pdf" required>
    <!-- Ingest Checkbox -->
    <div style="margin-top: 10px;">
        <label>
            <input type="checkbox" name="ingest_after_upload" value="on">
            Trigger ingestion after upload
        </label>
    </div>
    <button type="submit" class="btn btn-primary">Upload Files</button>
 </form>
 <!-- Allowed extensions hint -->
 <p class="hint">Allowed: .md, .txt, .py, .js, .ts, .json, .yaml, .yml, .html, .css, .pdf (max 5MB each)</p>
 <!-- Results Display -->
 <div id="upload-result" class="result-box"></div>
 {% if results %}
 <h3>Upload Results</h3>
 <ul>
    {% for result in results %}
    <li><strong>{{ result.filename }}</strong>: {{ result.status }} - {{ escapeHtml(result.message) }}</li>
    {% endfor %}
 </ul>
 {% endif %}
 {% endblock %}
@@ -0,0 +1,7 @@
 # WebUI Dependencies
 fastapi==0.109.0
 uvicorn[standard]==0.27.0
 pydantic==2.5.3
 python-multipart==0.0.6
 httpx==0.26.0
 PyYAML==6.0.1
		`@@ -0,0 +1,2 @@`
							`# Backend API Package - Contains all FastAPI application modules`
							`# This package imports make it a Python module`
		`@@ -0,0 +1,2 @@`
							`# This directory is intentionally left empty to preserve the folder structure for Docker volumes.`
							`# Data from Qdrant will be mounted here via docker-compose.yml.`
		`@@ -0,0 +1 @@`
							`"""Compatibility package for importing the mcp-server source tree in tests."""`
		`@@ -0,0 +1,2 @@`
							`# Tests package for local-context7`
							`# Contains unit tests for chunking, database operations, search, and MCP server modules`