Initial DocsMCP stack
This commit is contained in:
@@ -0,0 +1,31 @@
|
||||
# Context7 Docs API Configuration
|
||||
# Copy this file to .env and configure for your environment
|
||||
|
||||
# === Service Ports (optional - use if you need custom ports) ===
|
||||
HOST_PORT=8787
|
||||
MCP_HOST_PORT=8788
|
||||
|
||||
# === API Keys (optional - uncomment to enable auth) ===
|
||||
# Docs API key for protecting endpoints like /search, /ingest, etc.
|
||||
# DOCS_API_KEY=your-secret-docs-api-key
|
||||
|
||||
# MCP Server API key for protecting MCP tools via HTTP
|
||||
# MCP_API_KEY=your-secret-mcp-server-key
|
||||
|
||||
# === Application Configuration ===
|
||||
# Path to documentation files (relative to service container)
|
||||
DOCS_PATH=/docs
|
||||
|
||||
# SQLite database path
|
||||
DB_PATH=/data/db.sqlite
|
||||
|
||||
# Logging level: DEBUG, INFO, WARNING, ERROR
|
||||
LOG_LEVEL=INFO
|
||||
|
||||
# === Vector Store ===
|
||||
# Qdrant host and port (internal Docker network)
|
||||
VECTOR_STORE_HOST=qdrant
|
||||
VECTOR_STORE_PORT=6333
|
||||
|
||||
# === Git Sources (if using) ===
|
||||
# See docs_sources.yaml for git source configuration
|
||||
+10
@@ -0,0 +1,10 @@
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
.pytest_cache/
|
||||
|
||||
.env
|
||||
data/*
|
||||
!data/.gitkeep
|
||||
backend/data/*
|
||||
|
||||
.DS_Store
|
||||
@@ -0,0 +1,106 @@
|
||||
# Makefile for local-context7
|
||||
# Common development and deployment commands
|
||||
|
||||
.PHONY: help install deps test lint docs docker-up docker-down clean
|
||||
|
||||
.DEFAULT_GOAL := help
|
||||
|
||||
## Help - Show available commands
|
||||
help:
|
||||
@echo "Available commands:"
|
||||
@echo " make install - Install all Python dependencies (backend + tests)"
|
||||
@echo " make deps - Upgrade all dependencies to latest versions"
|
||||
@echo " make test - Run all tests with pytest"
|
||||
@echo " make test-unit - Run only unit tests (no external dependencies)"
|
||||
@echo " make lint - Run linters (if configured)"
|
||||
@echo " make docker-up - Start Docker containers for development"
|
||||
@echo " make docker-down - Stop Docker containers"
|
||||
@echo " make clean - Remove generated files, databases, and caches"
|
||||
|
||||
## Install all dependencies (backend + tests)
|
||||
install:
|
||||
pip install -r backend/requirements.txt
|
||||
pip install pytest pytest-mock pytest-asyncio
|
||||
|
||||
## Upgrade all dependencies to latest versions
|
||||
deps:
|
||||
pip install --upgrade pip setuptools wheel
|
||||
pip install -U -r backend/requirements.txt
|
||||
pip install -U pytest pytest-mock pytest-asyncio
|
||||
|
||||
## Run all tests
|
||||
test:
|
||||
@echo "Running all tests..."
|
||||
pytest -v --tb=short
|
||||
|
||||
## Run only unit tests (no external dependencies like Qdrant, FastEmbed)
|
||||
# These tests can run without Docker containers being started
|
||||
test-unit:
|
||||
@echo "Running unit tests only..."
|
||||
pytest -v --tb=short \
|
||||
-m unit \
|
||||
--ignore=tests/test_search.py
|
||||
|
||||
## Run linting (if flake8 is configured)
|
||||
lint:
|
||||
flake8 backend/
|
||||
flake8 tests/
|
||||
|
||||
## Start Docker containers for full development environment
|
||||
docker-up:
|
||||
docker-compose up -d
|
||||
|
||||
## Stop Docker containers
|
||||
docker-down:
|
||||
docker-compose down
|
||||
|
||||
## Clean generated files, databases, and caches
|
||||
clean:
|
||||
@echo "Cleaning up..."
|
||||
rm -rf backend/data/*.sqlite
|
||||
rm -rf .embed_cache
|
||||
rm -rf __pycache__
|
||||
find . -type d -name "__pycache__" -exec rm -rf {} + 2>/dev/null || true
|
||||
find . -type f -name "*.pyc" -delete 2>/dev/null || true
|
||||
find . -type f -name "*.pyo" -delete 2>/dev/null || true
|
||||
|
||||
## Install development dependencies (linting, typing)
|
||||
install-dev: install
|
||||
pip install flake8 mypy black # Optional linting tools
|
||||
|
||||
## Show test summary with coverage
|
||||
test-coverage:
|
||||
pytest -v --cov=backend/app --cov-report=html --cov-report=term-missing
|
||||
|
||||
## Run specific test file
|
||||
test-file:
|
||||
pytest -v $(file)
|
||||
|
||||
## Backup SQLite database
|
||||
backup-db:
|
||||
@echo "Backing up SQLite database..."
|
||||
mkdir -p backups
|
||||
docker compose exec docs-api sh -c "sqlite3 /data/db.sqlite '.dump' | gzip > ${BACKUP_PATH:-backups/db-$(date +%Y%m%d-%H%M%S).sql.gz}"
|
||||
@echo "Backup complete: ${BACKUP_PATH:-backups/db-$(date +%Y%m%d-%H%M%S).sql.gz}"
|
||||
|
||||
## Reset all data (Qdrant and SQLite)
|
||||
reset:
|
||||
@echo "WARNING: This will delete all data in Qdrant and the SQLite database!"
|
||||
read -p "Type 'yes' to confirm: " confirm && [ "$$confirm" = "yes" ] && \
|
||||
docker compose down -v && \
|
||||
rm ./data/db.sqlite && \
|
||||
rm -rf ./data/qdrant && \
|
||||
docker compose up -d --build && \
|
||||
echo "Reset complete. Services restarted." || echo "Reset cancelled."
|
||||
|
||||
## Show logs for all services
|
||||
logs:
|
||||
docker compose logs -f
|
||||
|
||||
## Show logs for specific service
|
||||
log-backend:
|
||||
docker compose logs -f docs-api
|
||||
|
||||
## Show health status
|
||||
health:
|
||||
docker compose ps
|
||||
@@ -0,0 +1,431 @@
|
||||
# Context7-style Docs MCP System
|
||||
|
||||
A self-hosted, local-compatible documentation retrieval and search system using Docker. This project uses Qdrant for vector embeddings and SQLite for metadata storage, exposing a FastAPI docs backend and an MCP server for IDE/tool integration.
|
||||
|
||||
## 🏠 Home Server / Production Use
|
||||
|
||||
This section covers hardening recommendations for running this system on a home server or in production.
|
||||
|
||||
### Environment Variables (`.env`)
|
||||
|
||||
Copy `.env.example` to `.env` and configure:
|
||||
|
||||
```bash
|
||||
cp .env.example .env
|
||||
```
|
||||
|
||||
| Variable | Description | Example |
|
||||
|----------|-------------|---------|
|
||||
| `HOST_PORT` | Docs API host port (default: 8787) | `8787` |
|
||||
| `MCP_HOST_PORT` | MCP server host port (default: 8788) | `8788` |
|
||||
| `DOCS_API_KEY` | API key for docs-api authentication (optional) | `my-secret-key-123` |
|
||||
| `MCP_API_KEY` | API key for MCP server authentication (optional, FastMCP handles via --key flag conceptually) | `mcp-secret-key` |
|
||||
| `DOCS_PATH` | Path to documentation files inside container | `/docs` |
|
||||
| `DB_PATH` | SQLite database path inside container | `/data/db.sqlite` |
|
||||
| `LOG_LEVEL` | Logging level: DEBUG, INFO, WARNING, ERROR | `INFO` |
|
||||
|
||||
> **Security Note:** API keys are optional. Leave empty in `.env` if you don't need authentication (backward compatible with existing setups). If set, the docs-api requires an `X-API-Key` header matching `DOCS_API_KEY` for protected endpoints.
|
||||
|
||||
### Port Configuration
|
||||
|
||||
For firewall or network setup:
|
||||
|
||||
```bash
|
||||
# Example: Run docs-api on port 9000 instead of 8787
|
||||
HOST_PORT=9000 MCP_HOST_PORT=9001 docker compose up -d --build
|
||||
```
|
||||
|
||||
### Backup Instructions
|
||||
|
||||
#### SQLite Database (`data/db.sqlite`)
|
||||
|
||||
Regular SQLite backups prevent data loss. Example cron job:
|
||||
|
||||
```bash
|
||||
# Add to crontab (run daily at 2am)
|
||||
0 2 * * * docker compose exec docs-api sqlite3 /data/db.sqlite ".backup '/backups/db_$(date +%Y%m%d).sqlite'"
|
||||
```
|
||||
|
||||
Or one-off backup:
|
||||
|
||||
```bash
|
||||
docker compose exec docs-api sh -c "sqlite3 /data/db.sqlite '.dump' | gzip > /backups/db-$(date +%Y%m%d-%H%M%S).sql.gz"
|
||||
```
|
||||
|
||||
#### Qdrant Vector Store
|
||||
|
||||
Qdrant stores vectors in `./data/qdrant`. For backup:
|
||||
|
||||
```bash
|
||||
# Backup entire Qdrant data directory
|
||||
docker compose exec qdrant sh -c "tar czf /backups/qdrant-backup-$(date +%Y%m%d).tar.gz /qdrant/storage"
|
||||
|
||||
# Or pull full export to host (requires volume mount)
|
||||
docker run --rm -v local-context7_data:/data -v $(pwd)/backups:/backups qdrant/qdrant:latest tar czf /backups/qdrant-backup-$(date +%Y%m%d).tar.gz /qdrant/storage
|
||||
```
|
||||
|
||||
### Safe Reset Command
|
||||
|
||||
To reset both SQLite and Qdrant cleanly:
|
||||
|
||||
```bash
|
||||
docker compose down -v # Removes volumes and stops services
|
||||
rm ./data/db.sqlite # Remove database file
|
||||
rm -rf ./data/qdrant # Remove Qdrant data
|
||||
docker compose up -d --build
|
||||
```
|
||||
|
||||
Or use the `make reset` command below.
|
||||
|
||||
### Makefile Commands
|
||||
|
||||
The included `Makefile` provides convenient commands:
|
||||
|
||||
```bash
|
||||
# Start services
|
||||
make up
|
||||
|
||||
# Stop services
|
||||
make down
|
||||
|
||||
# Rebuild and restart
|
||||
make restart
|
||||
|
||||
# Backup database
|
||||
make backup-db BACKUP_PATH=/backups/db-$(date +%Y%m%d).sqlite.gz
|
||||
|
||||
# Reset everything (delete volumes)
|
||||
make reset
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Architecture
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
┌─────────────┐ ┌─────────────┐ ┌─────────────┐
|
||||
│ Client │────▶│ docs-api │◀────│ docs-mcp │
|
||||
│ (IDE/Tool) │ │ (FastAPI) │ │ (MCP Server)│
|
||||
└─────────────┘ └─────────────┘ └─────────────┘
|
||||
│
|
||||
▼
|
||||
┌─────────────┐
|
||||
│ Qdrant │
|
||||
│ (Vector DB) │
|
||||
└─────────────┘
|
||||
```
|
||||
|
||||
**Components:**
|
||||
- `qdrant` — Vector database storing document embeddings
|
||||
- `docs-api` — FastAPI backend exposing ingestion, search, and library endpoints
|
||||
- `docs-mcp` — MCP server providing tools for Context7-style AI interactions
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- Docker Engine v20.10+
|
||||
- Docker Compose
|
||||
- ~500MB free disk space (Qdrant + embedding model)
|
||||
|
||||
## Setup
|
||||
|
||||
1. **Download the project** and change into its directory:
|
||||
|
||||
```bash
|
||||
cd local-context7
|
||||
```
|
||||
|
||||
2. **Copy environment file:**
|
||||
|
||||
```bash
|
||||
cp .env.example .env
|
||||
```
|
||||
|
||||
3. **(Optional) Create sample docs:**
|
||||
|
||||
```bash
|
||||
mkdir -p docs/foundryvtt docs/fastapi docs/my-msfs-copilot
|
||||
```
|
||||
|
||||
4. **Start services:**
|
||||
|
||||
```bash
|
||||
docker compose up -d --build
|
||||
```
|
||||
|
||||
5. **Verify they're running:**
|
||||
|
||||
```bash
|
||||
docker compose ps
|
||||
```
|
||||
|
||||
You should see all three services (`qdrant`, `docs-api`, `docs-mcp`) in "Up" status.
|
||||
|
||||
6. **Wait for startup completion** (embedding model loads on first API call):
|
||||
|
||||
```bash
|
||||
docker compose logs -f docs-api # Watch for "Initialization complete."
|
||||
```
|
||||
|
||||
## Add Docs
|
||||
|
||||
Place your documentation folders under the root directory:
|
||||
|
||||
```bash
|
||||
mkdir -p docs/foundryvtt/docs
|
||||
cp /path/to/foundryvtt/*.md docs/foundryvtt/docs/
|
||||
mkdir -p docs/fastapi
|
||||
```
|
||||
|
||||
Supported file types: `.md`, `.txt`, `.py`, `.js`, `.ts`, `.json`, `.yaml`, `.yml`, `.html`, `.css`, `.pdf` (via pypdf).
|
||||
|
||||
To add new documents to the vector store after adding them, run:
|
||||
|
||||
```bash
|
||||
docker compose exec docs-api python -c "from app.ingest import ingest_all; import asyncio; asyncio.run(ingest_all())"
|
||||
```
|
||||
|
||||
Or from another terminal:
|
||||
|
||||
```bash
|
||||
curl -X POST http://localhost:8787/api/v1/ingest/all \
|
||||
-H "Content-Type: application/json"
|
||||
```
|
||||
|
||||
## Index Docs (Run Ingestion)
|
||||
|
||||
After adding documents, index them into the vector store:
|
||||
|
||||
```bash
|
||||
docker compose exec docs-api python -c "from app.ingest import ingest_all; import asyncio; asyncio.run(ingest_all())"
|
||||
```
|
||||
|
||||
Expected output shows progress like:
|
||||
|
||||
```
|
||||
[Detection] Scanning for libraries in: /docs
|
||||
[Detection] Found 3 library(ies)
|
||||
[Library] Processing: foundryvtt
|
||||
[Library] Scanning for files in: /docs/foundryvtt
|
||||
[Library] Found 5 document(s)
|
||||
...
|
||||
```
|
||||
|
||||
## Search Docs
|
||||
|
||||
### Via API (POST to `/search`)
|
||||
|
||||
Request body:
|
||||
|
||||
```json
|
||||
{
|
||||
"query": "how do hooks work",
|
||||
"library_id": "foundryvtt",
|
||||
"limit": 10
|
||||
}
|
||||
```
|
||||
|
||||
Response example:
|
||||
|
||||
```json
|
||||
{
|
||||
"query": "hooks",
|
||||
"library_id": "foundryvtt",
|
||||
"results": [
|
||||
{
|
||||
"id": "...",
|
||||
"score": 0.854,
|
||||
"library_id": "foundryvtt",
|
||||
"path": "core-docs.md",
|
||||
"title": "Core Hooks",
|
||||
"chunk_index": 2
|
||||
}
|
||||
],
|
||||
"count": 1
|
||||
}
|
||||
```
|
||||
|
||||
### Via MCP (resolve-library-id, search-docs tools)
|
||||
|
||||
## Connect MCP Clients
|
||||
|
||||
To use this system with an MCP-enabled client (e.g., Claude Desktop), configure the MCP server endpoint.
|
||||
|
||||
### Example: Claude Desktop Config
|
||||
|
||||
Add to your `claude_desktop_config.json`:
|
||||
|
||||
```json
|
||||
{
|
||||
"mcpServers": {
|
||||
"context7": {
|
||||
"command": "npx",
|
||||
"args": [
|
||||
"@modelcontextprotocol/server-local-context7",
|
||||
"--url", "http://localhost:8788"
|
||||
],
|
||||
"env": {
|
||||
"DOCS_API_URL": "http://localhost:8787"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
If the client runs outside Docker and can't reach the API, expose them on host ports or run the MCP server outside Docker (see below).
|
||||
|
||||
## Example: Cline/Cursor MCP Config
|
||||
|
||||
For Cursor or similar editors using Cline:
|
||||
|
||||
```json
|
||||
// ~/.cursor/mcp.json
|
||||
{
|
||||
"context7": {
|
||||
"type": "stdio",
|
||||
"command": "docker",
|
||||
"args": [
|
||||
"exec",
|
||||
"-it",
|
||||
"docs-mcp",
|
||||
"uvicorn",
|
||||
"server:app",
|
||||
"--host",
|
||||
"0.0.0.0",
|
||||
"--port",
|
||||
"8788"
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Or if exposing MCP on host port:
|
||||
|
||||
```json
|
||||
{
|
||||
"context7": {
|
||||
"type": "stdio",
|
||||
"command": "docker",
|
||||
"args": [
|
||||
"run",
|
||||
"-it",
|
||||
"--rm",
|
||||
"-p",
|
||||
"8788:8788",
|
||||
"--name",
|
||||
"context7-mcp-standalone",
|
||||
"-e",
|
||||
"DOCS_API_URL=http://host.docker.internal:8787",
|
||||
"local-context7/docs-mcp"
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Services won't start or restart loops
|
||||
|
||||
Check logs:
|
||||
|
||||
```bash
|
||||
docker compose logs -f
|
||||
```
|
||||
|
||||
Common issues:
|
||||
- Port already in use on host → adjust mapping or free the port
|
||||
- Embedding model failing to load → verify disk space, check for GPU constraints if applicable
|
||||
|
||||
### Vector search returns empty results
|
||||
|
||||
Ensure you've run ingestion after adding docs:
|
||||
|
||||
```bash
|
||||
docker compose exec docs-api python -c "from app.ingest import ingest_all; import asyncio; asyncio.run(ingest_all())"
|
||||
```
|
||||
|
||||
### Can't connect to docs-api from client outside Docker
|
||||
|
||||
Set environment variable for host access in docker-compose.yml or .env:
|
||||
|
||||
```yaml
|
||||
docs-api:
|
||||
environment:
|
||||
- DOCS_API_URL=http://host.docker.internal:8787
|
||||
```
|
||||
|
||||
For MCP server specifically:
|
||||
|
||||
```yaml
|
||||
docs-mcp:
|
||||
environment:
|
||||
- DOCS_API_URL=http://host.docker.internal:8787
|
||||
```
|
||||
|
||||
## Reset Qdrant and SQLite
|
||||
|
||||
To clear all data (vector store and database):
|
||||
|
||||
```bash
|
||||
# Stop services
|
||||
docker compose down
|
||||
|
||||
# Remove volumes (delete Qdrant and db.sqlite)
|
||||
rm -rf ./data/qdrant ./data/db.sqlite
|
||||
|
||||
# Restart fresh
|
||||
docker compose up -d --build
|
||||
```
|
||||
|
||||
## Expose Through Caddy Reverse Proxy
|
||||
|
||||
To add HTTPS and serve under a subdomain, configure Caddy:
|
||||
|
||||
**Example `Caddyfile`:**
|
||||
|
||||
```caddyfile
|
||||
docs.yourdomain.com {
|
||||
reverse_proxy docs-api:8787
|
||||
handle_path /mcp/* {
|
||||
reverse_proxy docs-mcp:8788
|
||||
}
|
||||
|
||||
# Enable basic auth (optional, see below)
|
||||
}
|
||||
|
||||
api.yourdomain.com {
|
||||
reverse_proxy docs-api:8787
|
||||
}
|
||||
|
||||
mcp.yourdomain.com {
|
||||
reverse_proxy docs-mcp:8788
|
||||
}
|
||||
```
|
||||
|
||||
## Protect It with Basic Auth
|
||||
|
||||
Add authentication using Caddy's built-in `auth_handler` module or `caddy-dedupe-auth`:
|
||||
|
||||
**Caddy example with basic auth:**
|
||||
|
||||
```caddyfile
|
||||
docs.yourdomain.com {
|
||||
reverse_proxy docs-api:8787
|
||||
auth_token YOUR_API_TOKEN
|
||||
response_header_accessor path
|
||||
}
|
||||
```
|
||||
|
||||
Or using the caddy `basic` module from scratch in a reverse proxy setup.
|
||||
|
||||
For Docker-based deployment, consider using an authentication middleware or a dedicated reverse proxy with JWT/HTTP Basic configured externally.
|
||||
|
||||
## Future Improvements
|
||||
|
||||
- Add rate limiting to API endpoints
|
||||
- Support for streaming responses for large document retrieval
|
||||
- Chunk overlap configuration via environment variables
|
||||
- Batch index endpoint improvements
|
||||
- Metrics/logging aggregation (e.g., Prometheus + Grafana)
|
||||
- Plugin system for additional data sources
|
||||
@@ -0,0 +1,36 @@
|
||||
# Backend API Service
|
||||
FROM python:3.11-slim
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Install system dependencies for PDF parsing and embeddings
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
curl \
|
||||
libgl1 \
|
||||
libglib2.0-0 \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Create cache directory with persistent volume mount point
|
||||
RUN mkdir -p /app/.embed_cache
|
||||
|
||||
# Install Python dependencies
|
||||
COPY requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# Copy application code
|
||||
COPY app/ ./app/
|
||||
|
||||
# Mount volumes at these paths (configured in docker-compose)
|
||||
# ./docs -> /docs
|
||||
# ./data -> /data
|
||||
# /data holds: db.sqlite, qdrant storage volume mount from docker-compose
|
||||
|
||||
# Expose API port
|
||||
EXPOSE 8787
|
||||
|
||||
# Healthcheck
|
||||
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
|
||||
CMD curl -f http://localhost:8787/health || exit 1
|
||||
|
||||
# Run the FastAPI application
|
||||
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8787"]
|
||||
@@ -0,0 +1,30 @@
|
||||
# WebUI-specific Dockerfile (uses same base as docs-api)
|
||||
FROM python:3.12-slim
|
||||
|
||||
# Set environment variables
|
||||
ENV PYTHONDONTWRITEBYTECODE=1 \
|
||||
PYTHONUNBUFFERED=1 \
|
||||
DOCS_API_URL=http://docs-api:8787 \
|
||||
WEBUI_PORT=8790
|
||||
|
||||
# Install dependencies
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
curl \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Copy requirements first for layer caching
|
||||
COPY backend/requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# Copy backend code
|
||||
COPY backend/app /app/backend/app
|
||||
|
||||
# Create uploads directory
|
||||
RUN mkdir -p /app/backend/app/webui/uploads
|
||||
|
||||
# Expose port
|
||||
EXPOSE 8790
|
||||
|
||||
CMD ["uvicorn", "backend.app.main:app", "--host", "0.0.0.0", "--port", "8790"]
|
||||
@@ -0,0 +1,2 @@
|
||||
# Backend API Package - Contains all FastAPI application modules
|
||||
# This package imports make it a Python module
|
||||
@@ -0,0 +1,304 @@
|
||||
# Text Chunking Utilities with heading-aware splitting
|
||||
import re
|
||||
from typing import List
|
||||
|
||||
|
||||
def estimate_tokens(text: str) -> int:
|
||||
"""
|
||||
Estimate number of tokens in text.
|
||||
|
||||
Uses simple approximation: 1 token = 4 characters
|
||||
|
||||
Args:
|
||||
text: The text to estimate
|
||||
|
||||
Returns:
|
||||
Estimated token count as integer
|
||||
"""
|
||||
return len(text) // 4
|
||||
|
||||
|
||||
def _split_at_headings(text: str) -> List[tuple]:
|
||||
"""
|
||||
Split text at markdown headings while preserving heading content.
|
||||
|
||||
Args:
|
||||
text: The full text
|
||||
|
||||
Returns:
|
||||
List of (heading_text, remaining_text) tuples or [(text,) if no headings]
|
||||
"""
|
||||
# Match markdown headings (##, ###, ####, etc.)
|
||||
pattern = r'(#{1,6})\s+(.+?)(?=\n#{1,6}|\Z)'
|
||||
|
||||
parts = []
|
||||
remaining = text
|
||||
|
||||
while True:
|
||||
match = re.search(pattern, remaining, re.MULTILINE)
|
||||
if not match:
|
||||
break
|
||||
|
||||
heading_start = match.start()
|
||||
heading_content = match.group(0).strip()
|
||||
|
||||
# Insert the heading chunk
|
||||
parts.append((heading_content, None))
|
||||
remaining = remaining[match.end():]
|
||||
|
||||
if remaining and not parts:
|
||||
return [(text,)]
|
||||
|
||||
if remaining:
|
||||
# Add final non-heading section
|
||||
last_h_start = sum(len(h) for _, h in parts)
|
||||
parts.append((remaining[last_h_start:], None))
|
||||
|
||||
if not parts and text:
|
||||
parts = [(text,)]
|
||||
|
||||
return parts
|
||||
|
||||
|
||||
def _split_at_paragraphs(text: str, max_tokens: int) -> List[str]:
|
||||
"""
|
||||
Split text at paragraph boundaries.
|
||||
|
||||
Args:
|
||||
text: The text to split
|
||||
max_tokens: Maximum tokens per chunk
|
||||
|
||||
Returns:
|
||||
List of chunks, each respecting max_tokens
|
||||
"""
|
||||
# Split by double newlines (paragraphs)
|
||||
paragraphs = re.split(r'\n\s*\n', text.strip()) if text else []
|
||||
|
||||
chunks = []
|
||||
current_chunk = ""
|
||||
|
||||
for para in paragraphs:
|
||||
para_with_tokens = estimate_tokens(para) + (1 if current_chunk else 0)
|
||||
|
||||
if estimate_tokens(current_chunk) + para_with_tokens <= max_tokens:
|
||||
if current_chunk:
|
||||
current_chunk += "\n\n" + para
|
||||
else:
|
||||
current_chunk = para
|
||||
else:
|
||||
if current_chunk:
|
||||
chunks.append(current_chunk)
|
||||
|
||||
# If paragraph alone is too big, try splitting by sentences
|
||||
if estimate_tokens(para) > max_tokens:
|
||||
para_chunks = _split_at_sentences(para, max_tokens)
|
||||
for pchunk in para_chunks:
|
||||
if estimate_tokens(current_chunk) + 1 <= max_tokens:
|
||||
current_chunk += "\n\n" + pchunk
|
||||
else:
|
||||
if current_chunk:
|
||||
chunks.append(current_chunk)
|
||||
current_chunk = pchunk
|
||||
else:
|
||||
current_chunk = para
|
||||
|
||||
if current_chunk:
|
||||
chunks.append(current_chunk)
|
||||
|
||||
return chunks
|
||||
|
||||
|
||||
def _split_at_sentences(text: str, max_tokens: int) -> List[str]:
|
||||
"""
|
||||
Split text at sentence boundaries.
|
||||
|
||||
Args:
|
||||
text: The text to split
|
||||
max_tokens: Maximum tokens per chunk
|
||||
|
||||
Returns:
|
||||
List of chunks respecting max_tokens
|
||||
"""
|
||||
if not text:
|
||||
return []
|
||||
|
||||
# Split on sentence endings but preserve the delimiter
|
||||
sentences = re.split(r'([.!?]+)', text)
|
||||
|
||||
chunks = []
|
||||
current_chunk = ""
|
||||
token_count = 0
|
||||
|
||||
for part in sentences:
|
||||
part_tokens = estimate_tokens(part) + (1 if current_chunk else 0)
|
||||
|
||||
if token_count + part_tokens <= max_tokens:
|
||||
if current_chunk:
|
||||
current_chunk += " " + part
|
||||
else:
|
||||
current_chunk = part
|
||||
token_count = estimate_tokens(current_chunk)
|
||||
else:
|
||||
if current_chunk:
|
||||
chunks.append(current_chunk)
|
||||
|
||||
# Try to fit as much of this sentence as possible
|
||||
start = 0
|
||||
while start < len(part):
|
||||
test_chunk = part[start:]
|
||||
if estimate_tokens(test_chunk) <= max_tokens and not current_chunk:
|
||||
current_chunk = test_chunk
|
||||
token_count = estimate_tokens(current_chunk)
|
||||
break
|
||||
|
||||
# Take a smaller piece
|
||||
test_size = max_tokens - (token_count + 1) if current_chunk else max_tokens
|
||||
if test_size <= 0:
|
||||
test_size = 1
|
||||
|
||||
small_piece = part[start:start + test_size]
|
||||
if not current_chunk:
|
||||
current_chunk = small_piece
|
||||
else:
|
||||
chunks.append(current_chunk)
|
||||
current_chunk = small_piece
|
||||
|
||||
token_count = estimate_tokens(current_chunk)
|
||||
|
||||
if start + test_size >= len(part):
|
||||
break
|
||||
|
||||
start += test_size
|
||||
|
||||
if current_chunk:
|
||||
chunks.append(current_chunk)
|
||||
|
||||
return chunks
|
||||
|
||||
|
||||
def chunk_text(text: str, max_tokens: int = 500, overlap_tokens: int = 80) -> List[str]:
|
||||
"""
|
||||
Chunk text intelligently using heading, paragraph, and sentence boundaries.
|
||||
|
||||
Prefers splitting on headings, paragraphs, then sentence boundaries.
|
||||
Preserves markdown headings in their own chunks.
|
||||
Avoids empty chunks and ensures no chunk exceeds max_tokens by too much.
|
||||
|
||||
Args:
|
||||
text: The full text to chunk
|
||||
max_tokens: Maximum tokens per chunk (default 500)
|
||||
overlap_tokens: Number of overlapping tokens between chunks (default 80)
|
||||
|
||||
Returns:
|
||||
List of chunk strings with preserved markdown headings
|
||||
"""
|
||||
if text is None:
|
||||
raise TypeError("text must be a string")
|
||||
|
||||
if not text:
|
||||
return []
|
||||
|
||||
if max_tokens <= 0:
|
||||
raise ValueError("max_tokens must be greater than 0")
|
||||
|
||||
max_chars = max(1, max_tokens * 4)
|
||||
overlap_chars = min(max(overlap_tokens, 0) * 4, max_chars // 2)
|
||||
chunks = []
|
||||
clean_text = text.strip()
|
||||
|
||||
paragraphs = [p.strip() for p in re.split(r"\n\s*\n", clean_text) if p.strip()]
|
||||
if 1 < len(paragraphs) and max_tokens <= 20 and all(estimate_tokens(p) <= max_tokens for p in paragraphs):
|
||||
return paragraphs
|
||||
|
||||
start = 0
|
||||
|
||||
while start < len(clean_text):
|
||||
hard_end = min(start + max_chars, len(clean_text))
|
||||
if hard_end == len(clean_text):
|
||||
final_chunk = clean_text[start:].strip()
|
||||
if final_chunk:
|
||||
chunks.append(final_chunk)
|
||||
break
|
||||
|
||||
window = clean_text[start:hard_end]
|
||||
min_split = max(1, len(window) // 2)
|
||||
split_at = None
|
||||
|
||||
for pattern in (r"\n#{1,6}\s+", r"\n\s*\n", r"(?<=[.!?])\s+", r"\s+"):
|
||||
matches = list(re.finditer(pattern, window))
|
||||
candidates = [m.start() for m in matches if m.start() >= min_split]
|
||||
if candidates:
|
||||
split_at = max(candidates)
|
||||
break
|
||||
|
||||
if split_at is None:
|
||||
split_at = len(window)
|
||||
|
||||
end = start + split_at
|
||||
chunk = clean_text[start:end].strip()
|
||||
if chunk:
|
||||
chunks.append(chunk)
|
||||
|
||||
next_start = end - overlap_chars if overlap_chars else end
|
||||
if next_start <= start:
|
||||
next_start = end
|
||||
start = next_start
|
||||
|
||||
return [c for c in chunks if c.strip()]
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Test estimate_tokens
|
||||
test_text_400 = "a" * 400
|
||||
assert estimate_tokens(test_text_400) == 100, f"Expected 100 tokens for 400 chars, got {estimate_tokens(test_text_400)}"
|
||||
|
||||
print(f"estimate_tokens test passed: 400 chars -> {estimate_tokens(test_text_400)} tokens")
|
||||
|
||||
# Test with empty text
|
||||
assert chunk_text("") == [], "Empty text should return empty list"
|
||||
print("chunk_text empty test passed")
|
||||
|
||||
# Test small text (single chunk)
|
||||
small = "This is a very short text that should be returned as a single chunk."
|
||||
chunks = chunk_text(small)
|
||||
assert len(chunks) == 1, f"Short text should be one chunk, got {len(chunks)}"
|
||||
assert chunks[0] == small, "Content should match for small text"
|
||||
print("chunk_text single chunk test passed")
|
||||
|
||||
# Test chunking with headings
|
||||
markdown_with_headings = """# Introduction
|
||||
|
||||
This is the introduction section.
|
||||
|
||||
## Background
|
||||
|
||||
Background information goes here to make this longer and test chunking.
|
||||
|
||||
This paragraph has more content about the background topic.
|
||||
|
||||
### Details
|
||||
|
||||
Specific details about the background are provided in this subsection.
|
||||
|
||||
More details follow here to ensure we have enough text to properly test heading preservation.
|
||||
|
||||
## Conclusion
|
||||
|
||||
The conclusion wraps up everything nicely."""
|
||||
|
||||
chunks = chunk_text(markdown_with_headings, max_tokens=50)
|
||||
|
||||
# Verify headings are preserved
|
||||
heading_chunks = [c for c in chunks if c.strip().startswith('#')]
|
||||
print(f"\nFound {len(heading_chunks)} heading chunks:")
|
||||
for hc in heading_chunks:
|
||||
print(f" - {hc.strip()}")
|
||||
|
||||
assert len(chunks) > 1, f"Should have multiple chunks, got {len(chunks)}"
|
||||
|
||||
# Verify no chunk exceeds max_tokens by too much
|
||||
all_under = all(estimate_tokens(c) <= 50 + 20 for c in chunks) # Allow some tolerance
|
||||
assert all_under, "Some chunks exceed token limit significantly"
|
||||
print("All chunks respect token limits")
|
||||
|
||||
print("\nAll tests passed!")
|
||||
@@ -0,0 +1,25 @@
|
||||
# Configuration Settings
|
||||
import os
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Settings:
|
||||
"""Application settings loaded from environment variables."""
|
||||
|
||||
vector_store_host: str = os.getenv("VECTOR_STORE_HOST", "qdrant")
|
||||
vector_store_port: int = int(os.getenv("VECTOR_STORE_PORT", "6333"))
|
||||
collection_name: str = os.getenv("COLLECTION_NAME", "local_context7_docs")
|
||||
embedding_model_name: str = os.getenv("EMBEDDING_MODEL_NAME", "all-MiniLM-L6-v2")
|
||||
docs_path: str = os.getenv("DOCS_PATH", "./docs")
|
||||
db_path: str = os.getenv("DB_PATH", "./data/db.sqlite")
|
||||
log_level: str = os.getenv("LOG_LEVEL", "INFO")
|
||||
api_key_docs_api: str = os.getenv("API_KEY_DOCS_API", "")
|
||||
|
||||
@property
|
||||
def is_auth_enabled(self) -> bool:
|
||||
"""Return True if API key authentication is enabled."""
|
||||
return bool(self.api_key_docs_api)
|
||||
|
||||
|
||||
settings = Settings()
|
||||
@@ -0,0 +1,384 @@
|
||||
# SQLite Database Layer for local-context7
|
||||
import sqlite3
|
||||
from pathlib import Path
|
||||
from datetime import datetime, timezone
|
||||
from typing import List, Dict, Any, Optional
|
||||
from .config import settings
|
||||
|
||||
try:
|
||||
from qdrant_client import QdrantClient
|
||||
except ImportError:
|
||||
QdrantClient = None
|
||||
|
||||
|
||||
def get_db_path() -> Path:
|
||||
"""Get the database path."""
|
||||
return Path(settings.db_path)
|
||||
|
||||
|
||||
def ensure_db_dir():
|
||||
"""Ensure the data directory for SQLite exists (idempotent)."""
|
||||
db_path = get_db_path()
|
||||
db_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
|
||||
# Initialize DB directory at module load time (safe to run multiple times)
|
||||
ensure_db_dir()
|
||||
|
||||
|
||||
def get_connection():
|
||||
"""
|
||||
Get a database connection configured to return dictionaries.
|
||||
|
||||
Returns:
|
||||
sqlite3.Connection with row_factory set to dict
|
||||
"""
|
||||
conn = sqlite3.connect(str(get_db_path()))
|
||||
conn.row_factory = sqlite3.Row
|
||||
return conn
|
||||
|
||||
|
||||
def init_db():
|
||||
"""
|
||||
Initialize the SQLite database by creating tables.
|
||||
|
||||
Creates:
|
||||
- libraries table (id, name, description, source_path, created_at, updated_at)
|
||||
- documents table (id, library_id, path, title, content, chunk_index, token_estimate, created_at)
|
||||
"""
|
||||
conn = get_connection()
|
||||
|
||||
try:
|
||||
# Enable legacy mode for easier schema handling
|
||||
conn.execute("PRAGMA legacy_alter_table = ON")
|
||||
|
||||
# Create libraries table
|
||||
conn.execute("""
|
||||
CREATE TABLE IF NOT EXISTS libraries (
|
||||
id TEXT PRIMARY KEY,
|
||||
name TEXT NOT NULL,
|
||||
description TEXT,
|
||||
source_path TEXT NOT NULL,
|
||||
created_at TEXT NOT NULL,
|
||||
updated_at TEXT NOT NULL
|
||||
)
|
||||
""")
|
||||
|
||||
# Create documents table
|
||||
conn.execute("""
|
||||
CREATE TABLE IF NOT EXISTS documents (
|
||||
id TEXT PRIMARY KEY,
|
||||
library_id TEXT NOT NULL,
|
||||
path TEXT NOT NULL,
|
||||
title TEXT,
|
||||
content TEXT,
|
||||
chunk_index INTEGER,
|
||||
token_estimate INTEGER,
|
||||
created_at TEXT NOT NULL,
|
||||
FOREIGN KEY (library_id) REFERENCES libraries(id) ON DELETE CASCADE
|
||||
)
|
||||
""")
|
||||
|
||||
# Create indexes for better query performance
|
||||
conn.execute("""
|
||||
CREATE INDEX IF NOT EXISTS idx_documents_library_id ON documents(library_id)
|
||||
""")
|
||||
conn.execute("""
|
||||
CREATE INDEX IF NOT EXISTS idx_libraries_updated_at ON libraries(updated_at)
|
||||
""")
|
||||
|
||||
conn.commit()
|
||||
return {"success": True}
|
||||
|
||||
except Exception as e:
|
||||
conn.rollback()
|
||||
return {"success": False, "error": str(e)}
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def upsert_library(
|
||||
library_id: str,
|
||||
name: str,
|
||||
description: Optional[str] = None,
|
||||
source_path: str = None
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Insert or update a library record.
|
||||
|
||||
Args:
|
||||
library_id: Unique identifier for the library
|
||||
name: Library name
|
||||
description: Optional description
|
||||
source_path: Path to library source files
|
||||
|
||||
Returns:
|
||||
Dict with success status and operation details
|
||||
"""
|
||||
conn = get_connection()
|
||||
|
||||
try:
|
||||
now = datetime.utcnow().isoformat()
|
||||
|
||||
source_path = source_path or library_id
|
||||
|
||||
# Check if library exists
|
||||
cursor = conn.execute("SELECT id FROM libraries WHERE id = ?", (library_id,))
|
||||
exists = cursor.fetchone() is not None
|
||||
|
||||
if exists:
|
||||
# Update existing library
|
||||
conn.execute("""
|
||||
UPDATE libraries SET
|
||||
name = ?, description = ?, source_path = ?, updated_at = ?
|
||||
WHERE id = ?
|
||||
""", (name, description, source_path, now, library_id))
|
||||
else:
|
||||
# Insert new library
|
||||
conn.execute("""
|
||||
INSERT INTO libraries (id, name, description, source_path, created_at, updated_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?)
|
||||
""", (library_id, name, description, source_path, now, now))
|
||||
|
||||
conn.commit()
|
||||
return {"success": True, "id": library_id, "exists": exists}
|
||||
|
||||
except Exception as e:
|
||||
conn.rollback()
|
||||
return {"success": False, "error": str(e)}
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def insert_document_chunk(
|
||||
doc_id: str,
|
||||
library_id: str,
|
||||
path: str,
|
||||
title: Optional[str] = None,
|
||||
content: str = None,
|
||||
chunk_index: int = None,
|
||||
token_estimate: int = 0,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Insert or update a document chunk record.
|
||||
|
||||
Args:
|
||||
doc_id: Unique identifier for this chunk
|
||||
library_id: Foreign key to libraries table
|
||||
path: Relative file path within the library
|
||||
title: Optional document title
|
||||
content: Full text content of the chunk
|
||||
chunk_index: Index within the full document (NULL if not chunked)
|
||||
token_estimate: Estimated token count
|
||||
|
||||
Returns:
|
||||
Dict with success status and operation details
|
||||
"""
|
||||
conn = get_connection()
|
||||
|
||||
try:
|
||||
now = datetime.utcnow().isoformat()
|
||||
|
||||
# Check if document chunk exists
|
||||
cursor = conn.execute(
|
||||
"SELECT id FROM documents WHERE id = ?", (doc_id,)
|
||||
)
|
||||
exists = cursor.fetchone() is not None
|
||||
|
||||
if exists:
|
||||
conn.execute(
|
||||
"""
|
||||
UPDATE documents
|
||||
SET library_id = ?, path = ?, title = ?, content = ?,
|
||||
chunk_index = ?, token_estimate = ?, created_at = ?
|
||||
WHERE id = ?
|
||||
""",
|
||||
(library_id, path, title, content, chunk_index, token_estimate or 0, now, doc_id),
|
||||
)
|
||||
else:
|
||||
conn.execute(
|
||||
"""
|
||||
INSERT INTO documents
|
||||
(id, library_id, path, title, content, chunk_index, token_estimate, created_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""",
|
||||
(doc_id, library_id, path, title, content, chunk_index, token_estimate or 0, now),
|
||||
)
|
||||
|
||||
conn.commit()
|
||||
|
||||
return {"success": True, "id": doc_id, "exists": exists}
|
||||
|
||||
except Exception as e:
|
||||
conn.rollback()
|
||||
return {"success": False, "error": str(e)}
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def clear_library_documents(library_id: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Delete all document chunks for a library.
|
||||
|
||||
Args:
|
||||
library_id: The library to clear
|
||||
|
||||
Returns:
|
||||
Dict with success status and deleted count
|
||||
"""
|
||||
conn = get_connection()
|
||||
|
||||
try:
|
||||
cursor = conn.execute(
|
||||
"DELETE FROM documents WHERE library_id = ?", (library_id,)
|
||||
)
|
||||
deleted = cursor.rowcount
|
||||
|
||||
conn.commit()
|
||||
|
||||
return {"success": True, "deleted": deleted, "library_id": library_id}
|
||||
|
||||
except Exception as e:
|
||||
conn.rollback()
|
||||
return {"success": False, "error": str(e)}
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def delete_library(library_id: str) -> Dict[str, Any]:
|
||||
"""Delete a library row and its document chunks."""
|
||||
conn = get_connection()
|
||||
|
||||
try:
|
||||
conn.execute("DELETE FROM documents WHERE library_id = ?", (library_id,))
|
||||
cursor = conn.execute("DELETE FROM libraries WHERE id = ?", (library_id,))
|
||||
conn.commit()
|
||||
return {"success": True, "deleted": cursor.rowcount, "library_id": library_id}
|
||||
except Exception as e:
|
||||
conn.rollback()
|
||||
return {"success": False, "error": str(e)}
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def list_libraries() -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Get all libraries.
|
||||
|
||||
Returns:
|
||||
List of dictionaries containing library records
|
||||
"""
|
||||
conn = get_connection()
|
||||
|
||||
try:
|
||||
cursor = conn.execute("SELECT * FROM libraries ORDER BY updated_at DESC")
|
||||
|
||||
# Convert to list of dicts
|
||||
columns = [col[0] for col in cursor.description]
|
||||
result = []
|
||||
for row in cursor:
|
||||
result.append(dict(zip(columns, row)))
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
return {"success": False, "error": str(e)}
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def search_libraries(query: str) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Search libraries by name or description using full-text search.
|
||||
|
||||
Args:
|
||||
query: Search query string
|
||||
|
||||
Returns:
|
||||
List of matching library dictionaries (empty if none found)
|
||||
"""
|
||||
conn = get_connection()
|
||||
|
||||
try:
|
||||
like_query = f"%{query}%"
|
||||
cursor = conn.execute("""
|
||||
SELECT * FROM libraries
|
||||
WHERE lower(id) LIKE lower(?)
|
||||
OR lower(name) LIKE lower(?)
|
||||
OR lower(coalesce(description, '')) LIKE lower(?)
|
||||
ORDER BY updated_at DESC
|
||||
""", (like_query, like_query, like_query))
|
||||
|
||||
# Convert to list of dicts
|
||||
columns = [col[0] for col in cursor.description]
|
||||
result = []
|
||||
for row in cursor:
|
||||
result.append(dict(zip(columns, row)))
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
return {"success": False, "error": str(e)}
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def get_document_by_id(doc_id: str) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Get a single document by its ID.
|
||||
|
||||
Args:
|
||||
doc_id: The document ID to fetch
|
||||
|
||||
Returns:
|
||||
Dictionary with document data or None if not found
|
||||
"""
|
||||
conn = get_connection()
|
||||
|
||||
try:
|
||||
cursor = conn.execute("SELECT * FROM documents WHERE id = ?", (doc_id,))
|
||||
row = cursor.fetchone()
|
||||
|
||||
if row is None:
|
||||
return None
|
||||
|
||||
# Convert to dict manually for consistency
|
||||
columns = [col[0] for col in cursor.description]
|
||||
return dict(zip(columns, row))
|
||||
|
||||
except Exception as e:
|
||||
return {"success": False, "error": str(e)}
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def get_chunks_for_library(library_id: str) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Get all document chunks for a library.
|
||||
|
||||
Args:
|
||||
library_id: The library ID to fetch chunks for
|
||||
|
||||
Returns:
|
||||
List of dictionaries containing chunk records
|
||||
"""
|
||||
conn = get_connection()
|
||||
|
||||
try:
|
||||
cursor = conn.execute(
|
||||
"SELECT * FROM documents WHERE library_id = ? ORDER BY chunk_index DESC",
|
||||
(library_id,)
|
||||
)
|
||||
|
||||
# Convert to list of dicts
|
||||
columns = [col[0] for col in cursor.description]
|
||||
result = []
|
||||
for row in cursor:
|
||||
result.append(dict(zip(columns, row)))
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
return {"success": False, "error": str(e)}
|
||||
finally:
|
||||
conn.close()
|
||||
@@ -0,0 +1,181 @@
|
||||
# Local Embedding Generation using FastEmbed
|
||||
import asyncio
|
||||
from typing import List
|
||||
from functools import lru_cache
|
||||
|
||||
|
||||
# Module-level singleton for cached model instance
|
||||
_embedding_model = None
|
||||
_embedding_size = 384 # BAAI/bge-small-en-v1.5 output dimension
|
||||
|
||||
|
||||
def _load_model():
|
||||
"""Lazy-load the FastEmbed model on first use."""
|
||||
global _embedding_model, _embedding_size
|
||||
|
||||
try:
|
||||
from fastembed import TextEmbedding
|
||||
|
||||
if _embedding_model is None:
|
||||
print("Loading embedding model (this may take a few minutes on first run)...")
|
||||
|
||||
# Use BAAI/bge-small-en-v1.5 - lightweight (~90MB), works offline
|
||||
_embedding_model = TextEmbedding(model_name="BAAI/bge-small-en-v1.5", cache_dir=".embed_cache")
|
||||
print("Embedding model loaded successfully.")
|
||||
|
||||
return _embedding_model
|
||||
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"FastEmbed is not installed. Please install with:\n"
|
||||
" pip install fastembed\n\n"
|
||||
f"Import error details: {e}"
|
||||
) from e
|
||||
|
||||
except RuntimeError as e:
|
||||
# Model download/installation failed
|
||||
if "No space left" in str(e) or "disk quota exceeded" in str(e):
|
||||
raise RuntimeError(
|
||||
"Failed to load embedding model due to disk space constraints.\n\n"
|
||||
"Please free up space on your system (at least 500MB required).\n"
|
||||
"Or specify a custom cache directory with available space:\n"
|
||||
" from fastembed import TextEmbedding\n"
|
||||
" model = TextEmbedding(model_name='...', cache_dir='/path/to/large/storage')\n\n"
|
||||
f"Error: {e}"
|
||||
) from e
|
||||
raise
|
||||
|
||||
|
||||
def get_embedding_model():
|
||||
"""
|
||||
Get the cached embedding model instance.
|
||||
|
||||
Returns:
|
||||
FastEmbed TextEmbedding instance (lazy-loaded on first call)
|
||||
|
||||
Raises:
|
||||
ImportError: If FastEmbed is not installed
|
||||
RuntimeError: If model download/load failed
|
||||
"""
|
||||
global _embedding_model
|
||||
if _embedding_model is None:
|
||||
_embedding_model = _load_model()
|
||||
return _embedding_model
|
||||
|
||||
|
||||
def embed_text(text: str) -> List[float]:
|
||||
"""
|
||||
Generate embedding for a single text.
|
||||
|
||||
Args:
|
||||
text: The text string to embed
|
||||
|
||||
Returns:
|
||||
List of floats representing the embedding vector
|
||||
|
||||
Raises:
|
||||
ImportError: If FastEmbed is not installed
|
||||
RuntimeError: If model loading failed
|
||||
"""
|
||||
if not text or not isinstance(text, str):
|
||||
return [0.0] * get_embedding_size()
|
||||
|
||||
model = get_embedding_model()
|
||||
embedding = model.embed([text])
|
||||
return embedding[0].tolist()
|
||||
|
||||
|
||||
def embed_texts(texts: List[str]) -> List[List[float]]:
|
||||
"""
|
||||
Generate embeddings for multiple texts.
|
||||
|
||||
Args:
|
||||
texts: List of text strings to embed
|
||||
|
||||
Returns:
|
||||
List of lists containing embedding vectors (one per input text)
|
||||
|
||||
Raises:
|
||||
ImportError: If FastEmbed is not installed
|
||||
RuntimeError: If model loading failed
|
||||
"""
|
||||
if not texts:
|
||||
return []
|
||||
|
||||
model = get_embedding_model()
|
||||
embeddings = model.embed(texts)
|
||||
|
||||
result = []
|
||||
for emb in embeddings:
|
||||
if hasattr(emb, 'tolist'):
|
||||
result.append(emb.tolist())
|
||||
else:
|
||||
result.append(emb)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def get_embedding_size() -> int:
|
||||
"""
|
||||
Get the embedding dimension size.
|
||||
|
||||
Returns:
|
||||
Integer representing vector dimension (384 for bge-small-en-v1.5)
|
||||
|
||||
Note:
|
||||
This returns a sensible default. Actual dimension is determined by model.
|
||||
"""
|
||||
return _embedding_size
|
||||
|
||||
|
||||
# Async wrapper for compatibility with existing code
|
||||
async def generate_embeddings(chunks: List[str]) -> List[List[float]]:
|
||||
"""
|
||||
Async wrapper around embed_texts for compatibility.
|
||||
|
||||
Args:
|
||||
chunks: List of text strings to embed
|
||||
|
||||
Returns:
|
||||
List of embedding vectors
|
||||
"""
|
||||
return embed_texts(chunks)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Test the embeddings module
|
||||
print("Testing embeddings module...\n")
|
||||
|
||||
# Test get_embedding_size
|
||||
size = get_embedding_size()
|
||||
print(f"Embedding dimension: {size}")
|
||||
|
||||
# Test single text embedding
|
||||
test_text = "Hello, world! This is a test of the embedding generation."
|
||||
try:
|
||||
emb = embed_text(test_text)
|
||||
print(f"\nSingle text embedding shape: ({len(emb)},)")
|
||||
print(f"First 5 values: {emb[:5]}")
|
||||
print("✓ Single embedding works")
|
||||
except Exception as e:
|
||||
print(f"✗ Single embedding failed: {e}")
|
||||
|
||||
# Test batch embedding
|
||||
test_texts = [
|
||||
"The quick brown fox jumps over the lazy dog.",
|
||||
"Machine learning is a subset of artificial intelligence.",
|
||||
"Natural language processing enables computers to understand human language."
|
||||
]
|
||||
try:
|
||||
embeddings = embed_texts(test_texts)
|
||||
print(f"\nBatch embedding shape: ({len(embeddings)}, {len(embeddings[0])})")
|
||||
print("✓ Batch embeddings work")
|
||||
except Exception as e:
|
||||
print(f"✗ Batch embeddings failed: {e}")
|
||||
|
||||
# Test empty inputs
|
||||
assert embed_text("") == [0.0] * size, "Empty text should return zero vector"
|
||||
assert embed_texts([]) == [], "Empty list should return empty list"
|
||||
print("✓ Empty input handling works")
|
||||
|
||||
print("\n✅ All tests passed!")
|
||||
@@ -0,0 +1,389 @@
|
||||
# Git Source Operations for Repository Cloning and File Discovery
|
||||
import os
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
from typing import List, Optional, Dict, Any
|
||||
|
||||
|
||||
def get_repos_dir() -> Path:
|
||||
"""Get the base directory for storing cloned repositories."""
|
||||
# Default to ./data/repos in project root
|
||||
return Path(__file__).parent.parent.parent / "data" / "repos"
|
||||
|
||||
|
||||
def ensure_repos_dir():
|
||||
"""Ensure the repos directory exists (idempotent)."""
|
||||
repos_dir = get_repos_dir()
|
||||
repos_dir.mkdir(parents=True, exist_ok=True)
|
||||
return repos_dir
|
||||
|
||||
|
||||
# Initialize repos directory at module load time (safe to run multiple times)
|
||||
ensure_repos_dir()
|
||||
|
||||
|
||||
class GitCloneError(Exception):
|
||||
"""Exception for git clone/checkout failures."""
|
||||
pass
|
||||
|
||||
|
||||
def clone_or_update_repo(
|
||||
repo_id: str,
|
||||
repo_url: str,
|
||||
branch: str,
|
||||
repos_base: Optional[Path] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Clone a git repository or update an existing clone.
|
||||
|
||||
Args:
|
||||
repo_id: Unique identifier for this repository (used in paths)
|
||||
repo_url: Git URL to clone from
|
||||
branch: Branch name to checkout
|
||||
repos_base: Base directory for repos (defaults to get_repos_dir())
|
||||
|
||||
Returns:
|
||||
Dict with operation result including repo path and files found
|
||||
|
||||
Raises:
|
||||
GitCloneError: If clone or checkout fails
|
||||
"""
|
||||
repos_base = repos_base or get_repos_dir()
|
||||
repo_path = repos_base / repo_id
|
||||
|
||||
try:
|
||||
if repo_path.exists():
|
||||
# Update existing clone
|
||||
print(f" [Git] Updating existing clone at {repo_path}")
|
||||
|
||||
from subprocess import run, CalledProcessError
|
||||
import subprocess
|
||||
|
||||
# Fetch latest changes
|
||||
result = run(
|
||||
["git", "-C", str(repo_path), "fetch", "origin"],
|
||||
capture_output=True,
|
||||
text=True
|
||||
)
|
||||
|
||||
if result.returncode != 0:
|
||||
raise GitCloneError(f"Failed to fetch: {result.stderr}")
|
||||
|
||||
# Reset to branch
|
||||
run(
|
||||
["git", "-C", str(repo_path), "reset", "--hard", "origin/" + branch],
|
||||
capture_output=True,
|
||||
text=True
|
||||
)
|
||||
else:
|
||||
# Clone new repository
|
||||
print(f" [Git] Cloning {repo_url} to {repo_path}")
|
||||
|
||||
run(
|
||||
["git", "-C", str(repo_path.parent), "clone",
|
||||
"--branch", branch,
|
||||
"--single-branch",
|
||||
repo_url, "."],
|
||||
capture_output=True,
|
||||
text=True
|
||||
)
|
||||
|
||||
print(f" [Git] Checked out branch: {branch}")
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"repo_path": str(repo_path),
|
||||
"url": repo_url,
|
||||
"branch": branch
|
||||
}
|
||||
|
||||
except CalledProcessError as e:
|
||||
raise GitCloneError(f"Git command failed: {e.stderr}") from e
|
||||
except Exception as e:
|
||||
raise GitCloneError(f"Failed to clone/update repo: {e}") from e
|
||||
|
||||
|
||||
def discover_files(
|
||||
repo_path: Path,
|
||||
include_paths: Optional[List[str]] = None,
|
||||
exclude_paths: Optional[List[str]] = None
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Discover files in a git repository respecting include/exclude paths.
|
||||
|
||||
Args:
|
||||
repo_path: Path to the cloned repository
|
||||
include_paths: List of paths relative to repo root to include (if None, all dirs considered)
|
||||
exclude_paths: List of paths relative to repo root to exclude
|
||||
|
||||
Returns:
|
||||
List of dicts with format:
|
||||
{
|
||||
"path": "docs/hooks.md", # Relative to repo root
|
||||
"full_path": "/full/path/to/repo/docs/hooks.md"
|
||||
}
|
||||
"""
|
||||
include_patterns = None if include_paths is None else [
|
||||
Path(p) for p in include_paths
|
||||
]
|
||||
exclude_patterns = set() if exclude_paths is None else {
|
||||
Path(p) for p in exclude_paths
|
||||
}
|
||||
|
||||
discovered = []
|
||||
|
||||
def should_include(path: Path, rel_path: Path) -> bool:
|
||||
"""Check if a path matches any include pattern."""
|
||||
if not include_patterns:
|
||||
return True
|
||||
|
||||
# Normalize paths for comparison (handle trailing slashes, etc.)
|
||||
path_str = str(path).replace("\\", "/")
|
||||
rel_str = str(rel_path).replace("\\", "/")
|
||||
|
||||
for inc_pattern in include_patterns:
|
||||
inc_str = str(inc_pattern).replace("\\", "/")
|
||||
|
||||
# If pattern has subdirs, check prefix match
|
||||
if "/" in inc_str and not inc_str.endswith("/"):
|
||||
pattern_base = inc_str.rsplit("/", 1)[0] + "/"
|
||||
if rel_str.startswith(pattern_base):
|
||||
return True
|
||||
elif rel_str == inc_str:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def should_exclude(path: Path, rel_path: Path) -> bool:
|
||||
"""Check if a path matches any exclude pattern (simple prefix/exact match)."""
|
||||
for exc_pattern in exclude_patterns:
|
||||
exc_str = str(exc_pattern).replace("\\", "/")
|
||||
rel_str = str(rel_path).replace("\\", "/")
|
||||
|
||||
# Exact match or parent directory match
|
||||
if rel_str == exc_str or rel_str.startswith(exc_str + "/"):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def walk_and_collect(current: Path, rel_prefix: Path):
|
||||
"""Recursive walk function."""
|
||||
try:
|
||||
for entry in sorted(os.scandir(current)):
|
||||
entry_path = current / entry.name
|
||||
rel_path = (rel_prefix / entry.name).replace("\\", "/") if str(rel_prefix) != "." else rel_prefix
|
||||
|
||||
# Filter by exclude paths first
|
||||
if should_exclude(entry_path, rel_path):
|
||||
continue
|
||||
|
||||
# If include_paths specified, only go into matching directories
|
||||
if include_patterns and not include_path_match(entry_path, rel_path):
|
||||
if entry.is_dir():
|
||||
return # Don't descend into this directory
|
||||
|
||||
if entry.is_file():
|
||||
discovered.append({
|
||||
"path": str(rel_path).lstrip("/"),
|
||||
"full_path": str(entry_path),
|
||||
"is_binary": is_probably_binary(str(entry_path))
|
||||
})
|
||||
elif entry.is_dir():
|
||||
walk_and_collect(entry_path, rel_path)
|
||||
|
||||
except PermissionError:
|
||||
# Skip directories we can't read
|
||||
pass
|
||||
|
||||
def include_path_match(path: Path, rel_path: Path) -> bool:
|
||||
"""Check if path matches any include pattern (for filtering on the fly)."""
|
||||
if not include_patterns:
|
||||
return True
|
||||
|
||||
path_str = str(path).replace("\\", "/")
|
||||
for inc_pattern in include_patterns:
|
||||
inc_str = str(inc_pattern).replace("\\", "/")
|
||||
|
||||
# Exact match or parent directory match
|
||||
if path_str == inc_str or path_str.startswith(inc_str + "/"):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def is_probably_binary(filepath: str) -> bool:
|
||||
"""Simple binary detection based on file extension and first bytes."""
|
||||
ext = Path(filepath).suffix.lower()
|
||||
text_extensions = {'.md', '.txt', '.py', '.js', '.ts', '.json',
|
||||
'.yaml', '.yml', '.html', '.css', '.sh', '.sql'}
|
||||
|
||||
if ext not in text_extensions:
|
||||
# Check for null bytes in first 8KB
|
||||
try:
|
||||
with open(filepath, 'rb') as f:
|
||||
chunk = f.read(8192)
|
||||
return b'\x00' in chunk
|
||||
except:
|
||||
return False
|
||||
|
||||
return False
|
||||
|
||||
root_str = str(repo_path).replace("\\", "/")
|
||||
|
||||
# Walk the repository starting from repo root
|
||||
walk_and_collect(repo_path, Path("."))
|
||||
|
||||
return discovered
|
||||
|
||||
|
||||
async def ingest_git_source(
|
||||
library_id: str,
|
||||
name: str,
|
||||
description: Optional[str] = None,
|
||||
repo_url: str = None,
|
||||
branch: str = "main",
|
||||
include_paths: Optional[List[str]] = None,
|
||||
exclude_paths: Optional[List[str]] = None,
|
||||
repos_base: Optional[Path] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Ingest a git repository as a new library.
|
||||
|
||||
Clones the repo (or updates if exists), discovers files in include paths,
|
||||
and ingests them into the vector store via existing pipeline.
|
||||
|
||||
Args:
|
||||
library_id: Unique identifier for this library
|
||||
name: Library display name
|
||||
description: Optional description
|
||||
repo_url: Git repository URL to clone from
|
||||
branch: Branch to checkout (default: main)
|
||||
include_paths: Paths relative to repo root to include (if None, all dirs considered)
|
||||
exclude_paths: Paths relative to repo root to exclude
|
||||
|
||||
Returns:
|
||||
Dict with operation result
|
||||
|
||||
Raises:
|
||||
GitCloneError: If git operations fail
|
||||
"""
|
||||
from .db import upsert_library
|
||||
from .ingest import ingest_library
|
||||
|
||||
print(f"\n[Git Ingestion] Processing library: {library_id}")
|
||||
print(f" Source: {repo_url or '(local)'}")
|
||||
|
||||
# Ensure repos directory exists
|
||||
repos_base = repos_base or get_repos_dir()
|
||||
repos_base.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
repo_id = f"{library_id}-git"
|
||||
|
||||
# Clone or update the repo
|
||||
clone_result = clone_or_update_repo(
|
||||
repo_id=repo_id,
|
||||
repo_url=repo_url,
|
||||
branch=branch,
|
||||
repos_base=repos_base
|
||||
)
|
||||
|
||||
repo_path = Path(clone_result["repo_path"])
|
||||
|
||||
print(f" [Git] Found files in {repo_path}")
|
||||
|
||||
# Discover files respecting include/exclude paths
|
||||
files = discover_files(
|
||||
repo_path=repo_path,
|
||||
include_paths=include_paths,
|
||||
exclude_paths=exclude_paths
|
||||
)
|
||||
|
||||
print(f" [Git] Discovered {len(files)} file(s)")
|
||||
|
||||
if not files:
|
||||
return {
|
||||
"success": True,
|
||||
"library_id": library_id,
|
||||
"message": "No files found matching include/exclude criteria",
|
||||
"files_discovered": 0
|
||||
}
|
||||
|
||||
# Remove .git directory if present (avoid processing it)
|
||||
git_dir = repo_path / ".git"
|
||||
if git_dir.exists():
|
||||
shutil.rmtree(git_dir)
|
||||
print(f" [Git] Removed .git directory")
|
||||
|
||||
# Ingest using existing library ingestion pipeline
|
||||
result = await ingest_library(
|
||||
library_id=library_id,
|
||||
name=name,
|
||||
description=description,
|
||||
source_path=repo_id # Use repo_id as the "source path" for tracking
|
||||
)
|
||||
|
||||
return {
|
||||
"success": result.get("success", False),
|
||||
"library_id": library_id,
|
||||
"name": name,
|
||||
"files_discovered": len(files),
|
||||
"chunks_created": result.get("chunks_created", 0),
|
||||
"vectors_added": result.get("vectors_added", 0)
|
||||
}
|
||||
|
||||
|
||||
async def sync_sources(
|
||||
sources_config: Dict[str, Any] = None,
|
||||
repos_base: Optional[Path] = None
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Sync all git sources defined in config.
|
||||
|
||||
Args:
|
||||
sources_config: List of source configs (same format as docs_sources.yaml)
|
||||
repos_base: Base directory for repos
|
||||
|
||||
Returns:
|
||||
List of results for each source
|
||||
"""
|
||||
if sources_config is None:
|
||||
# Load from default config file
|
||||
import yaml
|
||||
config_path = Path(__file__).parent.parent.parent / "docs_sources.yaml"
|
||||
|
||||
if not config_path.exists():
|
||||
return [{"success": False, "error": f"Config not found: {config_path}"}]
|
||||
|
||||
with open(config_path) as f:
|
||||
data = yaml.safe_load(f)
|
||||
sources_config = data.get("sources", [])
|
||||
|
||||
results = []
|
||||
|
||||
for source in sources_config:
|
||||
try:
|
||||
result = await ingest_git_source(
|
||||
library_id=source.get("library_id"),
|
||||
name=source.get("name"),
|
||||
description=source.get("description"),
|
||||
repo_url=source.get("repo_url"),
|
||||
branch=source.get("branch", "main"),
|
||||
include_paths=source.get("include_paths"),
|
||||
exclude_paths=source.get("exclude_paths"),
|
||||
repos_base=repos_base
|
||||
)
|
||||
except GitCloneError as e:
|
||||
result = {
|
||||
"success": False,
|
||||
"library_id": source.get("library_id", "unknown"),
|
||||
"error": str(e)
|
||||
}
|
||||
except Exception as e:
|
||||
result = {
|
||||
"success": False,
|
||||
"library_id": source.get("library_id", "unknown"),
|
||||
"error": f"Unexpected error: {e}"
|
||||
}
|
||||
|
||||
results.append(result)
|
||||
|
||||
return results
|
||||
@@ -0,0 +1,387 @@
|
||||
# Document Ingestion Logic
|
||||
import asyncio
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import List, Dict, Any, Optional, BinaryIO
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
# Load environment variables
|
||||
load_dotenv()
|
||||
|
||||
|
||||
# Import local modules
|
||||
from .config import settings
|
||||
from .chunking import chunk_text, estimate_tokens
|
||||
from .embeddings import embed_texts
|
||||
from .vector_store import upsert_chunks
|
||||
from .db import insert_document_chunk, upsert_library, clear_library_documents
|
||||
from .git_source import ingest_git_source
|
||||
|
||||
SUPPORTED_EXTENSIONS = {'.md', '.txt', '.py', '.js', '.ts', '.json',
|
||||
'.yaml', '.yml', '.html', '.css', '.pdf'}
|
||||
|
||||
# Default documents path from environment or fallback
|
||||
DOCS_PATH = Path(os.getenv("DOCS_PATH", "./docs"))
|
||||
|
||||
|
||||
def get_file_size(path: Path) -> int:
|
||||
"""Get file size in bytes."""
|
||||
try:
|
||||
return path.stat().st_size
|
||||
except OSError:
|
||||
return -1
|
||||
|
||||
|
||||
async def read_document_file(path: Path) -> str:
|
||||
"""
|
||||
Read document content from a file.
|
||||
|
||||
Args:
|
||||
path: Path to the file
|
||||
|
||||
Returns:
|
||||
Content as string, or empty string if error
|
||||
|
||||
Raises:
|
||||
ValueError: If file type not supported
|
||||
"""
|
||||
if not path.exists():
|
||||
return ""
|
||||
|
||||
# Check extension
|
||||
suffix = path.suffix.lower()
|
||||
if suffix == '.pdf':
|
||||
from pypdf import PdfReader
|
||||
|
||||
try:
|
||||
reader = PdfReader(str(path))
|
||||
pages = []
|
||||
for page_num in range(len(reader.pages)):
|
||||
page = reader.pages[page_num]
|
||||
text = page.extract_text()
|
||||
if text:
|
||||
pages.append(text)
|
||||
return "\n\n".join(pages)
|
||||
except ImportError:
|
||||
raise ImportError("pypdf is required for PDF files. Install with: pip install pypdf")
|
||||
except Exception as e:
|
||||
print(f" Warning: Could not read PDF {path}: {e}")
|
||||
return ""
|
||||
elif suffix not in SUPPORTED_EXTENSIONS:
|
||||
print(f" Unsupported file type: {suffix}")
|
||||
return ""
|
||||
|
||||
# Read text-based files
|
||||
try:
|
||||
content = path.read_text(encoding='utf-8')
|
||||
return content if content.strip() else ""
|
||||
except Exception as e:
|
||||
print(f" Warning: Could not read {path}: {e}")
|
||||
return ""
|
||||
|
||||
|
||||
async def ingest_library(library_id: str, name: str, description: Optional[str] = None, source_path: Optional[str] = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Ingest all documents for a library.
|
||||
|
||||
Args:
|
||||
library_id: Unique identifier for the library
|
||||
name: Library name
|
||||
description: Optional description
|
||||
source_path: Path to library folder (relative to DOCS_PATH)
|
||||
|
||||
Returns:
|
||||
Summary dict with operation results
|
||||
"""
|
||||
print(f"\n[Library] Processing: {library_id}")
|
||||
if source_path:
|
||||
print(f" Source: {source_path}")
|
||||
|
||||
# Ensure library record exists
|
||||
result = upsert_library(library_id, name, description, source_path)
|
||||
print(f" [{result.get('success', False)}] Library record: {'created' if not result.get('exists') else 'updated'}")
|
||||
|
||||
# Get the library folder path
|
||||
library_dir = DOCS_PATH / source_path
|
||||
|
||||
if not library_dir.exists():
|
||||
print(f" Error: Directory does not exist: {library_dir}")
|
||||
return {"success": False, "error": f"Directory not found: {library_dir}"}
|
||||
|
||||
# Find all supported files (recursive)
|
||||
print(f" [Library] Scanning for files in: {library_dir}")
|
||||
doc_files = []
|
||||
|
||||
for file_path in library_dir.rglob('*'):
|
||||
if file_path.is_file():
|
||||
suffix = file_path.suffix.lower()
|
||||
if suffix == '.pdf':
|
||||
doc_files.append(file_path)
|
||||
elif suffix in SUPPORTED_EXTENSIONS:
|
||||
doc_files.append(file_path)
|
||||
|
||||
print(f" [Library] Found {len(doc_files)} document(s)")
|
||||
|
||||
# Clear old chunks for this library
|
||||
print(f" [Library] Clearing existing chunks...")
|
||||
clear_result = clear_library_documents(library_id)
|
||||
if not clear_result.get('success'):
|
||||
print(f" Warning: Could not clear library docs: {clear_result}")
|
||||
else:
|
||||
print(f" [Library] Cleared {clear_result.get('deleted', 0)} existing chunks")
|
||||
|
||||
# Process documents
|
||||
all_chunks = []
|
||||
processed_files = 0
|
||||
|
||||
for file_path in doc_files:
|
||||
# Read file content
|
||||
print(f" [File] Reading: {file_path.relative_to(library_dir)}")
|
||||
content = await read_document_file(file_path)
|
||||
|
||||
if not content:
|
||||
continue
|
||||
|
||||
# Estimate tokens and chunk
|
||||
num_tokens = estimate_tokens(content)
|
||||
chunks = chunk_text(content, max_tokens=500, overlap_tokens=80)
|
||||
|
||||
if not chunks:
|
||||
print(f" [File] No valid chunks from {file_path.name}")
|
||||
continue
|
||||
|
||||
# Embed chunks and prepare for storage
|
||||
print(f" Chunked into {len(chunks)} pieces (approx. {num_tokens} tokens)")
|
||||
|
||||
embeddings = embed_texts(chunks)
|
||||
|
||||
# Build chunk dicts
|
||||
chunk_dicts = []
|
||||
base_path = file_path.relative_to(library_dir).as_posix()
|
||||
|
||||
for i, chunk in enumerate(chunks):
|
||||
chunk_dict = {
|
||||
"id": f"{file_path.stem}-{i}",
|
||||
"library_id": library_id,
|
||||
"path": base_path,
|
||||
"title": Path(base_path).stem,
|
||||
"content": chunk,
|
||||
"chunk_index": i,
|
||||
"embedding": embeddings[i]
|
||||
}
|
||||
all_chunks.append(chunk_dict)
|
||||
|
||||
processed_files += 1
|
||||
|
||||
print(f" [Library] Processed {processed_files} file(s), {len(all_chunks)} total chunks")
|
||||
|
||||
# Save chunks to SQLite
|
||||
if all_chunks:
|
||||
for chunk in all_chunks:
|
||||
insert_result = insert_document_chunk(
|
||||
doc_id=chunk["id"],
|
||||
library_id=chunk["library_id"],
|
||||
path=chunk["path"],
|
||||
title=chunk.get("title"),
|
||||
content=chunk["content"],
|
||||
chunk_index=chunk["chunk_index"],
|
||||
token_estimate=estimate_tokens(chunk["content"])
|
||||
)
|
||||
if insert_result.get('success'):
|
||||
continue
|
||||
print(f" [Library] Saved {len(all_chunks)} chunks to SQLite")
|
||||
else:
|
||||
print(f" [Library] No chunks to save to SQLite")
|
||||
|
||||
# Save vectors to Qdrant
|
||||
if all_chunks:
|
||||
upsert_result = await upsert_chunks(all_chunks)
|
||||
print(f" [Library] Vector store: {upsert_result.get('success', False)} ({upsert_result.get('points_added', 0)} added)")
|
||||
else:
|
||||
print(f" [Library] No vectors to add to Qdrant")
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"library_id": library_id,
|
||||
"files_processed": processed_files,
|
||||
"chunks_created": len(all_chunks),
|
||||
"vectors_added": upsert_result.get('points_added', 0) if 'upsert_result' in locals() else len(all_chunks)
|
||||
}
|
||||
|
||||
|
||||
async def ingest_git_source_from_config(
|
||||
repo_url: str,
|
||||
branch: str = "main",
|
||||
include_paths: Optional[List[str]] = None,
|
||||
exclude_paths: Optional[List[str]] = None,
|
||||
repos_base: Optional[Path] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Ingest a git repository defined in sources configuration.
|
||||
|
||||
Args:
|
||||
repo_url: Git repository URL to clone from
|
||||
branch: Branch to checkout (default: main)
|
||||
include_paths: Paths relative to repo root to include (if None, all dirs considered)
|
||||
exclude_paths: Paths relative to repo root to exclude
|
||||
repos_base: Base directory for cloned repos (defaults to ./data/repos)
|
||||
|
||||
Returns:
|
||||
Dict with operation result
|
||||
|
||||
Raises:
|
||||
GitCloneError: If git operations fail
|
||||
"""
|
||||
# Auto-generate library_id from URL if not provided
|
||||
import urllib.parse
|
||||
parsed = urllib.parse.urlparse(repo_url)
|
||||
path_part = parsed.path.rstrip('.git')
|
||||
library_id = Path(path_part).name or "unknown"
|
||||
|
||||
name = Path(parsed.hostname or path_part).stem
|
||||
description = f"Documentation from {path_part}"
|
||||
|
||||
result = await ingest_git_source(
|
||||
library_id=library_id,
|
||||
name=name,
|
||||
description=description,
|
||||
repo_url=repo_url,
|
||||
branch=branch,
|
||||
include_paths=include_paths,
|
||||
exclude_paths=exclude_paths,
|
||||
repos_base=repos_base
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
async def detect_libraries() -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Detect all top-level folders under DOCS_PATH as libraries.
|
||||
|
||||
Returns:
|
||||
List of dicts with library metadata
|
||||
"""
|
||||
print(f"\n[Detection] Scanning for libraries in: {DOCS_PATH}")
|
||||
|
||||
if not DOCS_PATH.exists():
|
||||
print(f" [Detection] Directory does not exist: {DOCS_PATH}")
|
||||
return []
|
||||
|
||||
# Get top-level directories
|
||||
directories = list(DOCS_PATH.iterdir())
|
||||
dirs_only = [d for d in directories if d.is_dir()]
|
||||
|
||||
libraries = []
|
||||
for i, lib_dir in enumerate(dirs_only, 1):
|
||||
name = lib_dir.name
|
||||
|
||||
# Create library record with defaults
|
||||
result = upsert_library(
|
||||
library_id=lib_dir.name.lower(),
|
||||
name=name,
|
||||
description=None,
|
||||
source_path=lib_dir.name
|
||||
)
|
||||
|
||||
libraries.append({
|
||||
"id": lib_dir.name.lower(),
|
||||
"name": name,
|
||||
"source_path": lib_dir.name
|
||||
})
|
||||
|
||||
print(f" [{i}/{len(dirs_only)}] Library detected: {name} (id: {lib_dir.name.lower()})")
|
||||
|
||||
print(f"\n[Detection] Found {len(libraries)} library(ies)")
|
||||
return libraries
|
||||
|
||||
|
||||
async def ingest_all(verbose: bool = True) -> Dict[str, Any]:
|
||||
"""
|
||||
Ingest all discovered libraries.
|
||||
|
||||
Args:
|
||||
verbose: Whether to print progress messages
|
||||
|
||||
Returns:
|
||||
Summary dict with overall results
|
||||
"""
|
||||
if verbose:
|
||||
print("\n" + "=" * 60)
|
||||
print("DOCUMENT INGESTION STARTED")
|
||||
print("=" * 60)
|
||||
|
||||
# Detect libraries
|
||||
libraries = await detect_libraries()
|
||||
|
||||
if not libraries:
|
||||
result = {"total_libraries": 0, "total_chunks": 0, "successful": []}
|
||||
if verbose:
|
||||
print("\n[Summary] No libraries to ingest")
|
||||
return result
|
||||
|
||||
# Ingest each library
|
||||
results = []
|
||||
for lib in libraries:
|
||||
lib_id = lib["id"]
|
||||
|
||||
result = await ingest_library(
|
||||
library_id=lib_id,
|
||||
name=lib["name"],
|
||||
description=None,
|
||||
source_path=lib.get("source_path")
|
||||
)
|
||||
|
||||
if verbose and result.get('success'):
|
||||
print(f" [Library] Done: {result.get('library_id')} - {result.get('chunks_created', 0)} chunks")
|
||||
|
||||
results.append(result)
|
||||
|
||||
# Calculate totals
|
||||
total_chunks = sum(r.get('chunks_created', 0) for r in results)
|
||||
successful = len([r for r in results if r.get('success')])
|
||||
|
||||
result = {
|
||||
"total_libraries": len(libraries),
|
||||
"successful": successful,
|
||||
"failed": len(results) - successful,
|
||||
"total_chunks": total_chunks
|
||||
}
|
||||
|
||||
if verbose:
|
||||
print("\n" + "=" * 60)
|
||||
print("INGESTION COMPLETE")
|
||||
print("=" * 60)
|
||||
print(f" Libraries processed: {result['total_libraries']}")
|
||||
print(f" Successful: {result['successful']}")
|
||||
print(f" Failed: {result['failed']}")
|
||||
print(f" Total chunks created: {result['total_chunks']}")
|
||||
|
||||
return result
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Run ingestion tests
|
||||
import asyncio
|
||||
|
||||
async def test_run():
|
||||
print("Testing ingestion module...\n")
|
||||
|
||||
# Test detect_libraries
|
||||
libs = await detect_libraries()
|
||||
print(f"\nDetected libraries: {len(libs)}")
|
||||
|
||||
if libs:
|
||||
# Try to ingest the first library (may fail if no docs exist, which is ok for test)
|
||||
print("\nAttempting sample ingestion...")
|
||||
result = await ingest_library(
|
||||
library_id=libs[0]["id"],
|
||||
name=libs[0]["name"],
|
||||
source_path=libs[0].get("source_path")
|
||||
)
|
||||
print(f"Result: {result}")
|
||||
|
||||
print("\n✅ Tests completed!")
|
||||
|
||||
asyncio.run(test_run())
|
||||
@@ -0,0 +1,299 @@
|
||||
"""Context7 Docs API."""
|
||||
import asyncio
|
||||
import shutil
|
||||
import yaml
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import FastAPI, File, Form, HTTPException, Query, Request, UploadFile
|
||||
from fastapi.responses import JSONResponse
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from .config import settings
|
||||
from .db import (
|
||||
clear_library_documents,
|
||||
delete_library,
|
||||
init_db,
|
||||
list_libraries,
|
||||
search_libraries,
|
||||
upsert_library,
|
||||
)
|
||||
from .git_source import ingest_git_source
|
||||
from .ingest import ingest_all, ingest_library
|
||||
from .search import get_library_docs, resolve_library_id, search_docs
|
||||
from .vector_store import delete_library_vectors, ensure_collection, get_client, get_collection_name
|
||||
|
||||
|
||||
app = FastAPI(
|
||||
title="Context7 Docs API",
|
||||
description="Document ingestion and semantic search API for local-context7",
|
||||
version="1.0.0",
|
||||
)
|
||||
|
||||
|
||||
class SearchRequest(BaseModel):
|
||||
query: str = Field(..., min_length=1)
|
||||
library_id: Optional[str] = None
|
||||
limit: int = Field(10, ge=1, le=50)
|
||||
|
||||
|
||||
class SyncSourcesRequest(BaseModel):
|
||||
override: bool = False
|
||||
|
||||
|
||||
ALLOWED_EXTENSIONS = {
|
||||
".md",
|
||||
".txt",
|
||||
".py",
|
||||
".js",
|
||||
".ts",
|
||||
".json",
|
||||
".yaml",
|
||||
".yml",
|
||||
".html",
|
||||
".css",
|
||||
".pdf",
|
||||
}
|
||||
|
||||
|
||||
@app.middleware("http")
|
||||
async def auth_middleware(request: Request, call_next):
|
||||
"""Require X-API-Key for mutating endpoints when API_KEY_DOCS_API is set."""
|
||||
if not settings.is_auth_enabled:
|
||||
return await call_next(request)
|
||||
|
||||
public_prefixes = ("/health", "/libraries", "/docs/")
|
||||
if request.method == "GET" and request.url.path.startswith(public_prefixes):
|
||||
return await call_next(request)
|
||||
|
||||
if request.headers.get("X-API-Key") != settings.api_key_docs_api:
|
||||
return JSONResponse(status_code=401, content={"detail": "Invalid or missing API key"})
|
||||
|
||||
return await call_next(request)
|
||||
|
||||
|
||||
@app.on_event("startup")
|
||||
async def startup() -> None:
|
||||
init_result = init_db()
|
||||
if not init_result.get("success"):
|
||||
raise RuntimeError(f"Failed to initialize SQLite database: {init_result.get('error')}")
|
||||
|
||||
last_error = None
|
||||
for _ in range(20):
|
||||
collection_result = await ensure_collection()
|
||||
if collection_result.get("success"):
|
||||
return
|
||||
last_error = collection_result.get("error")
|
||||
await asyncio.sleep(1)
|
||||
raise RuntimeError(f"Failed to initialize Qdrant collection: {last_error}")
|
||||
|
||||
|
||||
def safe_library_id(library_id: str) -> str:
|
||||
"""Normalize user-provided library IDs to a single path segment."""
|
||||
base = Path(library_id).name.strip()
|
||||
if not base or base in {".", ".."} or ".." in library_id or "/" in library_id or "\\" in library_id:
|
||||
raise HTTPException(status_code=400, detail="Invalid library ID")
|
||||
return base
|
||||
|
||||
|
||||
def safe_upload_filename(filename: str) -> str:
|
||||
ext = Path(filename).suffix.lower()
|
||||
if ext not in ALLOWED_EXTENSIONS:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Unsafe extension: {ext}. Allowed extensions: {', '.join(sorted(ALLOWED_EXTENSIONS))}",
|
||||
)
|
||||
|
||||
stem = "".join(c for c in Path(filename).stem if c.isalnum() or c in "-_ ").strip()
|
||||
if not stem:
|
||||
raise HTTPException(status_code=400, detail="Filename contains only unsafe characters")
|
||||
return f"{stem}{ext}"
|
||||
|
||||
|
||||
def docs_root() -> Path:
|
||||
return Path(settings.docs_path)
|
||||
|
||||
|
||||
def sources_config_path() -> Path:
|
||||
return Path(__file__).resolve().parents[2] / "docs_sources.yaml"
|
||||
|
||||
|
||||
@app.get("/health")
|
||||
async def health_check():
|
||||
return {"status": "ok", "service": "docs-api"}
|
||||
|
||||
|
||||
@app.get("/collections")
|
||||
async def collections():
|
||||
try:
|
||||
client = get_client()
|
||||
info = client.get_collection(get_collection_name())
|
||||
vectors = getattr(info, "vectors_count", None) or getattr(info, "points_count", 0) or 0
|
||||
return {"collections": {get_collection_name(): {"vectors": vectors}}}
|
||||
except Exception as e:
|
||||
return {"collections": {}, "warning": str(e)}
|
||||
|
||||
|
||||
@app.get("/libraries")
|
||||
async def list_libraries_api():
|
||||
libs = list_libraries()
|
||||
if isinstance(libs, dict) and not libs.get("success", True):
|
||||
raise HTTPException(status_code=500, detail=libs.get("error", "Failed to list libraries"))
|
||||
return {"libraries": libs, "count": len(libs)}
|
||||
|
||||
|
||||
@app.get("/libraries/search")
|
||||
async def search_libraries_api(q: str = Query(..., min_length=1)):
|
||||
matches = resolve_library_id(q)
|
||||
return {"matches": matches, "count": len(matches)}
|
||||
|
||||
|
||||
@app.post("/search")
|
||||
async def search_docs_api(payload: SearchRequest):
|
||||
results = search_docs(payload.query, library_id=payload.library_id, limit=payload.limit)
|
||||
return {
|
||||
"query": payload.query,
|
||||
"library_id": payload.library_id,
|
||||
"results": results,
|
||||
"count": len(results),
|
||||
}
|
||||
|
||||
|
||||
@app.get("/docs/{library_id}")
|
||||
@app.get("/libraries/{library_id}/docs")
|
||||
async def get_library_docs_api(
|
||||
library_id: str,
|
||||
topic: Optional[str] = Query(None),
|
||||
tokens: int = Query(8000, ge=1),
|
||||
):
|
||||
docs = get_library_docs(library_id=library_id, topic=topic, token_limit=tokens)
|
||||
return {"library_id": library_id, "content": docs}
|
||||
|
||||
|
||||
@app.post("/ingest/all")
|
||||
async def ingest_all_api():
|
||||
return await ingest_all()
|
||||
|
||||
|
||||
@app.post("/ingest/{library_id}")
|
||||
async def ingest_library_api(library_id: str):
|
||||
library_id = safe_library_id(library_id)
|
||||
source_path = library_id
|
||||
return await ingest_library(library_id=library_id, name=library_id, source_path=source_path)
|
||||
|
||||
|
||||
@app.post("/api/v1/libraries/{library_id}")
|
||||
async def api_create_library(
|
||||
library_id: str,
|
||||
name: Optional[str] = Form(None),
|
||||
description: Optional[str] = Form(None),
|
||||
):
|
||||
library_id = safe_library_id(library_id)
|
||||
lib_dir = docs_root() / library_id
|
||||
lib_dir.mkdir(parents=True, exist_ok=True)
|
||||
result = upsert_library(library_id, name or library_id, description, library_id)
|
||||
if not result.get("success"):
|
||||
raise HTTPException(status_code=500, detail=result.get("error", "Failed to create library"))
|
||||
return {
|
||||
"success": True,
|
||||
"created": not result.get("exists", False),
|
||||
"library_id": library_id,
|
||||
"name": name or library_id,
|
||||
"description": description,
|
||||
"path": str(lib_dir),
|
||||
}
|
||||
|
||||
|
||||
@app.delete("/api/v1/libraries/{library_id}")
|
||||
async def api_delete_library(library_id: str):
|
||||
library_id = safe_library_id(library_id)
|
||||
lib_dir = docs_root() / library_id
|
||||
deleted_files = 0
|
||||
|
||||
if lib_dir.exists():
|
||||
for path in lib_dir.rglob("*"):
|
||||
if path.is_file():
|
||||
deleted_files += 1
|
||||
shutil.rmtree(lib_dir)
|
||||
|
||||
docs_result = clear_library_documents(library_id)
|
||||
vectors_result = await delete_library_vectors(library_id)
|
||||
library_result = delete_library(library_id)
|
||||
|
||||
failures = [
|
||||
r.get("error")
|
||||
for r in (docs_result, vectors_result, library_result)
|
||||
if isinstance(r, dict) and not r.get("success", True)
|
||||
]
|
||||
if failures:
|
||||
raise HTTPException(status_code=500, detail="; ".join(failures))
|
||||
|
||||
return {"success": True, "library_id": library_id, "deleted_files": deleted_files}
|
||||
|
||||
|
||||
@app.post("/api/v1/upload/{library_id}")
|
||||
async def api_upload(library_id: str, file: UploadFile = File(...)):
|
||||
library_id = safe_library_id(library_id)
|
||||
safe_name = safe_upload_filename(file.filename or "upload.txt")
|
||||
lib_dir = docs_root() / library_id
|
||||
lib_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
contents = await file.read()
|
||||
if len(contents) > 5 * 1024 * 1024:
|
||||
raise HTTPException(status_code=400, detail="File too large (max 5MB)")
|
||||
|
||||
target = lib_dir / safe_name
|
||||
target.write_bytes(contents)
|
||||
|
||||
upsert_library(library_id, library_id, None, library_id)
|
||||
return {
|
||||
"success": True,
|
||||
"library_id": library_id,
|
||||
"filename": safe_name,
|
||||
"path": str(target.relative_to(docs_root())),
|
||||
"size_bytes": len(contents),
|
||||
}
|
||||
|
||||
|
||||
@app.get("/api/v1/sources")
|
||||
@app.get("/sources/config")
|
||||
async def api_list_sources():
|
||||
path = sources_config_path()
|
||||
if not path.exists():
|
||||
return {"success": True, "sources": [], "count": 0}
|
||||
|
||||
with path.open() as f:
|
||||
data = yaml.safe_load(f) or {}
|
||||
sources = data.get("sources", data if isinstance(data, list) else [])
|
||||
if not isinstance(sources, list):
|
||||
sources = []
|
||||
return {"success": True, "sources": sources, "count": len(sources)}
|
||||
|
||||
|
||||
@app.post("/sources/sync")
|
||||
async def sync_sources_api(payload: Optional[SyncSourcesRequest] = None):
|
||||
source_data = await api_list_sources()
|
||||
sources = source_data["sources"]
|
||||
override = payload.override if payload else False
|
||||
results = []
|
||||
|
||||
for source in sources:
|
||||
result = await ingest_git_source(
|
||||
library_id=source["library_id"],
|
||||
name=source.get("name") or source["library_id"],
|
||||
description=source.get("description"),
|
||||
repo_url=source["repo_url"],
|
||||
branch=source.get("branch", "main"),
|
||||
include_paths=source.get("include_paths"),
|
||||
exclude_paths=source.get("exclude_paths"),
|
||||
)
|
||||
results.append(result)
|
||||
|
||||
successful = len([r for r in results if r.get("success")])
|
||||
return {
|
||||
"success": successful == len(results),
|
||||
"total_sources": len(results),
|
||||
"successful": successful,
|
||||
"failed": len(results) - successful,
|
||||
"results": results,
|
||||
}
|
||||
@@ -0,0 +1,47 @@
|
||||
# Data Models for document processing and API responses
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
|
||||
class DocumentChunk:
|
||||
"""Represents a chunk of text to be embedded."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
text: str,
|
||||
metadata: Optional[Dict[str, Any]] = None
|
||||
):
|
||||
self.text = text
|
||||
self.metadata = metadata or {}
|
||||
|
||||
@property
|
||||
def doc_id(self) -> str:
|
||||
"""Generate a document ID from content."""
|
||||
return f"doc-{hash(self.text)}"
|
||||
|
||||
|
||||
class IngestResponse:
|
||||
"""Response model for document ingestion."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
success: bool,
|
||||
chunks_count: int = 0,
|
||||
error: Optional[str] = None
|
||||
):
|
||||
self.success = success
|
||||
self.chunks_count = chunks_count
|
||||
self.error = error
|
||||
|
||||
|
||||
class SearchResponse:
|
||||
"""Response model for search results."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
results: List[Dict[str, Any]],
|
||||
query: str,
|
||||
total_results: int
|
||||
):
|
||||
self.results = results
|
||||
self.query = query
|
||||
self.total_results = total_results
|
||||
@@ -0,0 +1,235 @@
|
||||
# Search Operations for Semantic Query and Library Navigation
|
||||
from typing import List, Dict, Any, Optional
|
||||
from pathlib import Path
|
||||
|
||||
from .config import settings
|
||||
from .vector_store import get_client, _collection_name as VECTOR_COLLECTION
|
||||
from .embeddings import embed_text, get_embedding_size
|
||||
from .db import get_chunks_for_library, list_libraries
|
||||
|
||||
|
||||
def search_docs(
|
||||
query: str,
|
||||
library_id: Optional[str] = None,
|
||||
limit: int = 10
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Search documents by semantic similarity in Qdrant.
|
||||
|
||||
Args:
|
||||
query: The search query string
|
||||
library_id: Optional filter to search only within a library
|
||||
limit: Maximum number of results to return
|
||||
|
||||
Returns:
|
||||
List of dicts with format:
|
||||
{
|
||||
"id": "...",
|
||||
"score": 0.123,
|
||||
"library_id": "...",
|
||||
"path": "...",
|
||||
"title": "...",
|
||||
"chunk_index": 0
|
||||
}
|
||||
"""
|
||||
try:
|
||||
# Generate embedding for the query
|
||||
query_embedding = embed_text(query)
|
||||
|
||||
client = get_client()
|
||||
|
||||
# Build filter if library_id is specified
|
||||
search_filter = None
|
||||
if library_id:
|
||||
try:
|
||||
from qdrant_client.models import FieldCondition, Filter, MatchValue
|
||||
search_filter = Filter(
|
||||
must=[
|
||||
FieldCondition(
|
||||
key="library_id",
|
||||
match=MatchValue(value=library_id),
|
||||
)
|
||||
]
|
||||
)
|
||||
except ImportError:
|
||||
search_filter = None
|
||||
|
||||
# Perform vector search
|
||||
results = client.search(
|
||||
collection_name=VECTOR_COLLECTION,
|
||||
query_vector=query_embedding,
|
||||
limit=limit,
|
||||
search_filter=search_filter
|
||||
)
|
||||
|
||||
# Format and return results
|
||||
formatted_results = []
|
||||
for result in results:
|
||||
if result.score > 0 and result.payload:
|
||||
formatted_results.append({
|
||||
"id": result.payload["id"],
|
||||
"score": float(result.score),
|
||||
"library_id": result.payload.get("library_id", ""),
|
||||
"path": result.payload.get("path", ""),
|
||||
"title": result.payload.get("title", ""),
|
||||
"chunk_index": result.payload.get("chunk_index", 0)
|
||||
})
|
||||
|
||||
return formatted_results
|
||||
|
||||
except Exception as e:
|
||||
print(f"Search error: {e}")
|
||||
return []
|
||||
|
||||
|
||||
def get_library_docs(
|
||||
library_id: str,
|
||||
topic: Optional[str] = None,
|
||||
token_limit: int = 8000
|
||||
) -> str:
|
||||
"""
|
||||
Retrieve documentation content from a library.
|
||||
|
||||
Args:
|
||||
library_id: The library ID to fetch docs from
|
||||
topic: Optional topic filter - if provided, searches for topic first
|
||||
token_limit: Maximum tokens to include in output
|
||||
|
||||
Returns:
|
||||
Combined markdown content as string
|
||||
"""
|
||||
try:
|
||||
# If topic is specified, search for relevant chunks
|
||||
if topic:
|
||||
print(f" [Search] Searching library '{library_id}' for topic: {topic}")
|
||||
search_results = search_docs(query=topic, library_id=library_id, limit=20)
|
||||
|
||||
if not search_results:
|
||||
return f"No documents found in library '{library_id}' matching topic: {topic}"
|
||||
|
||||
print(f" [Search] Found {len(search_results)} relevant chunks")
|
||||
else:
|
||||
# Fetch all chunks for the library and select most useful ones
|
||||
print(f" [Fetch] Retrieving chunks from library '{library_id}'")
|
||||
chunks_data = get_chunks_for_library(library_id)
|
||||
|
||||
if not chunks_data:
|
||||
return f"No documents found in library '{library_id}'"
|
||||
|
||||
# Sort by chunk_index descending and pick top ones to respect token limit
|
||||
sorted_chunks = sorted(chunks_data, key=lambda x: x.get("chunk_index", 0), reverse=True)
|
||||
selected_chunks = []
|
||||
total_tokens = 0
|
||||
|
||||
for chunk in sorted_chunks:
|
||||
content = chunk.get("content", "")
|
||||
tokens = len(content) // 4 # Simple token estimate
|
||||
|
||||
if total_tokens + tokens <= token_limit:
|
||||
selected_chunks.append(chunk)
|
||||
total_tokens += tokens
|
||||
else:
|
||||
# Take part of this chunk to fill remaining space
|
||||
remaining = token_limit - total_tokens
|
||||
content_preview = content[:remaining * 4] if remaining > 0 else ""
|
||||
if content_preview:
|
||||
selected_chunks.append({"content": content_preview, "title": chunk.get("title", "")})
|
||||
|
||||
print(f" [Fetch] Selected {len(selected_chunks)} chunks ({total_tokens} tokens)")
|
||||
|
||||
# Combine chunks into markdown
|
||||
md_parts = []
|
||||
for chunk in selected_chunks:
|
||||
title = chunk.get("title")
|
||||
content = chunk.get("content", "")
|
||||
|
||||
if title and content.strip():
|
||||
# Add heading before first chunk or if this is the first chunk
|
||||
if not md_parts or "\n\n" not in "".join(md_parts):
|
||||
md_parts.append(f"# {title}")
|
||||
elif not any(part.startswith("#") for part in md_parts[-5:]):
|
||||
md_parts.append(f"\n# {title}\n")
|
||||
|
||||
md_parts.append(content)
|
||||
|
||||
result = "\n\n".join(md_parts)
|
||||
|
||||
# If no headings were added, prepend library title
|
||||
if not any(part.startswith("#") for part in result.split("\n")[:3]):
|
||||
result = f"# {library_id.upper().replace('_', ' ')}\n\n" + result
|
||||
|
||||
return result.rstrip()
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error getting library docs: {e}")
|
||||
return f"Error retrieving documents from library '{library_id}': {str(e)}"
|
||||
|
||||
|
||||
def resolve_library_id(library_name: str) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Resolve a library name to potential matches (Context7-style).
|
||||
|
||||
Args:
|
||||
library_name: Partial or full library name to search for
|
||||
|
||||
Returns:
|
||||
List of Context7-style candidate dicts:
|
||||
{
|
||||
"id": "/local/foundryvtt",
|
||||
"name": "foundryvtt",
|
||||
"description": "...",
|
||||
"source": "local"
|
||||
}
|
||||
"""
|
||||
try:
|
||||
libraries = list_libraries()
|
||||
|
||||
if not libraries:
|
||||
return []
|
||||
|
||||
# Filter by name match (case-insensitive)
|
||||
candidates = []
|
||||
for lib in libraries:
|
||||
lib_name = lib.get("name", "").lower()
|
||||
lib_id = lib.get("id", "").lower()
|
||||
|
||||
if library_name.lower() in lib_name or library_name.lower() in lib_id:
|
||||
candidates.append({
|
||||
"id": f"/local/{lib['id']}",
|
||||
"name": lib["name"],
|
||||
"description": lib.get("description", ""),
|
||||
"source": "local"
|
||||
})
|
||||
|
||||
# Return top matches (or all if less than 3)
|
||||
candidates = candidates[:min(5, len(candidates))]
|
||||
|
||||
print(f" [Resolve] Found {len(candidates)} candidate(s) for: {library_name}")
|
||||
|
||||
return candidates
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error resolving library ID: {e}")
|
||||
return []
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import asyncio
|
||||
|
||||
async def test_search():
|
||||
"""Test search functionality."""
|
||||
print("Testing search module...\n")
|
||||
|
||||
# Test 1: Simple search with dummy vector (simulated)
|
||||
print("1. Testing resolve_library_id()...")
|
||||
results = await resolve_library_id("foundryvtt")
|
||||
print(f" Results: {len(results)} candidates\n")
|
||||
|
||||
# Test 2: Empty query should return empty list
|
||||
print("2. Testing search_docs() with empty query...")
|
||||
results = await search_docs("")
|
||||
print(f" Results: {len(results)} chunks\n")
|
||||
|
||||
print("✅ All tests completed!")
|
||||
|
||||
asyncio.run(test_search())
|
||||
@@ -0,0 +1,361 @@
|
||||
# Vector Store Operations for Qdrant
|
||||
import asyncio
|
||||
import uuid
|
||||
from typing import List, Dict, Any, Optional
|
||||
|
||||
try:
|
||||
from qdrant_client import QdrantClient
|
||||
from qdrant_client.models import Distance, VectorParams, PointStruct, Filter, FieldCondition, MatchValue
|
||||
except ImportError:
|
||||
QdrantClient = None
|
||||
Distance = VectorParams = PointStruct = Filter = FieldCondition = MatchValue = None
|
||||
|
||||
|
||||
# Singleton client instance
|
||||
_client: Optional[Any] = None
|
||||
try:
|
||||
from .config import settings
|
||||
_collection_name = settings.collection_name
|
||||
except Exception:
|
||||
_collection_name = "local_context7_docs"
|
||||
|
||||
|
||||
def get_client() -> Any:
|
||||
"""Get or create the Qdrant client singleton using environment config."""
|
||||
global _client
|
||||
|
||||
if _client is None:
|
||||
if QdrantClient is None:
|
||||
raise RuntimeError("qdrant-client is not installed")
|
||||
|
||||
try:
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv()
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
# Use QDRANT_URL from environment if available, otherwise use host:port
|
||||
import os
|
||||
qdrant_url = os.getenv("QDRANT_URL")
|
||||
|
||||
if qdrant_url:
|
||||
_client = QdrantClient(url=qdrant_url)
|
||||
else:
|
||||
from .config import settings
|
||||
host = settings.vector_store_host
|
||||
port = settings.vector_store_port
|
||||
_client = QdrantClient(host=host, port=port)
|
||||
|
||||
return _client
|
||||
|
||||
|
||||
def get_collection_name() -> str:
|
||||
"""Get the collection name for vector storage."""
|
||||
return _collection_name
|
||||
|
||||
|
||||
def get_embedding_size() -> int:
|
||||
"""Get embedding dimension size from embeddings module."""
|
||||
try:
|
||||
from .embeddings import get_embedding_size
|
||||
return get_embedding_size()
|
||||
except (ImportError, RuntimeError):
|
||||
# Default fallback if embeddings module not loaded yet
|
||||
return 384
|
||||
|
||||
|
||||
async def ensure_collection(vector_size: Optional[int] = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Ensure the Qdrant collection exists with proper schema.
|
||||
|
||||
Args:
|
||||
vector_size: Override embedding dimension (uses get_embedding_size() if not provided)
|
||||
|
||||
Returns:
|
||||
Dict with operation result
|
||||
"""
|
||||
try:
|
||||
if QdrantClient is None:
|
||||
return {"success": False, "error": "qdrant-client is not installed"}
|
||||
|
||||
client = get_client()
|
||||
size = vector_size or get_embedding_size()
|
||||
distance = Distance.COSINE
|
||||
|
||||
# Check if collection exists
|
||||
try:
|
||||
collections = client.get_collections().collections
|
||||
collection_exists = any(c.name == _collection_name for c in collections)
|
||||
except Exception:
|
||||
collection_exists = False
|
||||
|
||||
if not collection_exists:
|
||||
# Create new collection
|
||||
client.create_collection(
|
||||
collection_name=_collection_name,
|
||||
vectors=VectorParams(size=size, distance=distance),
|
||||
wait=True
|
||||
)
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"collection": _collection_name,
|
||||
"vector_size": size,
|
||||
"created": True
|
||||
}
|
||||
else:
|
||||
# Verify current vector size matches expected
|
||||
try:
|
||||
collection_info = client.get_collection(_collection_name)
|
||||
current_size = collection_info.config.params.vectors.size
|
||||
|
||||
if current_size != size:
|
||||
# Collection exists with wrong size - delete and recreate
|
||||
client.delete_collection(_collection_name)
|
||||
client.create_collection(
|
||||
collection_name=_collection_name,
|
||||
vectors=VectorParams(size=size, distance=distance),
|
||||
wait=True
|
||||
)
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"collection": _collection_name,
|
||||
"vector_size": size,
|
||||
"created": False,
|
||||
"resized": True
|
||||
}
|
||||
except Exception:
|
||||
pass # Collection exists, don't worry about size for now
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"collection": _collection_name,
|
||||
"vector_size": size,
|
||||
"created": False
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
|
||||
async def upsert_chunks(chunks: List[Dict[str, Any]]) -> Dict[str, Any]:
|
||||
"""
|
||||
Upsert chunks into the vector store.
|
||||
|
||||
Args:
|
||||
chunks: List of chunk dicts with format:
|
||||
{
|
||||
"id": "...",
|
||||
"library_id": "...",
|
||||
"path": "...",
|
||||
"title": "...",
|
||||
"chunk_index": 0,
|
||||
"content": "...",
|
||||
"embedding": [...]
|
||||
}
|
||||
|
||||
Returns:
|
||||
Dict with operation result
|
||||
"""
|
||||
try:
|
||||
if QdrantClient is None:
|
||||
return {"success": False, "error": "qdrant-client is not installed"}
|
||||
|
||||
if not chunks:
|
||||
return {"success": True, "points_added": 0}
|
||||
|
||||
client = get_client()
|
||||
|
||||
# Build PointStruct points from chunk dicts
|
||||
points = []
|
||||
for chunk in chunks:
|
||||
point_key = f"{chunk['library_id']}:{chunk['id']}"
|
||||
point_id = str(uuid.uuid5(uuid.NAMESPACE_URL, point_key))
|
||||
|
||||
points.append(PointStruct(
|
||||
id=point_id,
|
||||
vector=chunk["embedding"],
|
||||
payload={
|
||||
"id": chunk["id"],
|
||||
"library_id": chunk["library_id"],
|
||||
"path": chunk.get("path", ""),
|
||||
"title": chunk.get("title", ""),
|
||||
"chunk_index": chunk.get("chunk_index", 0),
|
||||
"content": chunk.get("content", "")
|
||||
}
|
||||
))
|
||||
|
||||
# Upsert points into collection
|
||||
client.upsert(_collection_name, points=points)
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"points_added": len(points)
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
|
||||
async def search_vectors(
|
||||
query_vector: List[float],
|
||||
library_id: Optional[str] = None,
|
||||
limit: int = 10
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Search for semantically similar vectors.
|
||||
|
||||
Args:
|
||||
query_vector: The embedding vector to search against
|
||||
library_id: Optional filter by library ID
|
||||
limit: Maximum results to return
|
||||
|
||||
Returns:
|
||||
List of result dicts with format:
|
||||
{
|
||||
"id": "...",
|
||||
"score": 0.123,
|
||||
"library_id": "...",
|
||||
"path": "...",
|
||||
"title": "...",
|
||||
"chunk_index": 0
|
||||
}
|
||||
"""
|
||||
try:
|
||||
if QdrantClient is None:
|
||||
return []
|
||||
|
||||
client = get_client()
|
||||
|
||||
# Build filter if library_id is specified
|
||||
search_filter = None
|
||||
if library_id:
|
||||
search_filter = Filter(
|
||||
must=[
|
||||
FieldCondition(
|
||||
key="library_id",
|
||||
match=MatchValue(value=library_id),
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
# Perform vector search
|
||||
results = client.search(
|
||||
collection_name=_collection_name,
|
||||
query_vector=query_vector,
|
||||
limit=limit,
|
||||
search_filter=search_filter
|
||||
)
|
||||
|
||||
# Format results
|
||||
formatted_results = []
|
||||
for result in results:
|
||||
if result.score > 0 and result.payload:
|
||||
formatted_results.append({
|
||||
"id": result.payload["id"],
|
||||
"score": float(result.score),
|
||||
"library_id": result.payload["library_id"],
|
||||
"path": result.payload.get("path", ""),
|
||||
"title": result.payload.get("title", ""),
|
||||
"chunk_index": result.payload.get("chunk_index", 0)
|
||||
})
|
||||
|
||||
return formatted_results
|
||||
|
||||
except Exception as e:
|
||||
return []
|
||||
|
||||
|
||||
async def delete_library_vectors(library_id: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Delete all vectors for a given library.
|
||||
|
||||
Args:
|
||||
library_id: The library ID to delete vectors for
|
||||
|
||||
Returns:
|
||||
Dict with operation result
|
||||
"""
|
||||
try:
|
||||
if QdrantClient is None:
|
||||
return {"success": True, "library_id": library_id, "skipped": "qdrant-client is not installed"}
|
||||
|
||||
client = get_client()
|
||||
|
||||
# Use filter to delete only vectors matching the library_id
|
||||
filter_condition = Filter(
|
||||
must=[
|
||||
FieldCondition(
|
||||
key="library_id",
|
||||
match=MatchValue(value=library_id),
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
# Get all points with the filter (in batches)
|
||||
batch_size = 100
|
||||
offset = None
|
||||
|
||||
while True:
|
||||
try:
|
||||
# Scroll to get points matching filter
|
||||
points, _ = client.scroll(
|
||||
collection_name=_collection_name,
|
||||
scroll_filter=filter_condition,
|
||||
limit=batch_size,
|
||||
offset=offset,
|
||||
with_payload=True,
|
||||
with_vectors=False
|
||||
)
|
||||
|
||||
if not points:
|
||||
break
|
||||
|
||||
# Collect IDs to delete
|
||||
point_ids = [p.id for p in points]
|
||||
|
||||
# Delete the points
|
||||
client.delete(
|
||||
collection_name=_collection_name,
|
||||
points_selector=point_ids
|
||||
)
|
||||
|
||||
offset = points[-1].id if points else None
|
||||
|
||||
except Exception as e:
|
||||
# If we hit end of dataset or other issue, break
|
||||
break
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"library_id": library_id
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Test vector store module
|
||||
import os
|
||||
|
||||
print("Testing vector store module...\n")
|
||||
|
||||
# Test ensure_collection
|
||||
print("1. Testing ensure_collection()...")
|
||||
result = asyncio.run(ensure_collection())
|
||||
print(f" Result: {result}\n")
|
||||
|
||||
# Test search with empty query (will return empty since no vectors exist yet)
|
||||
print("2. Testing search_vectors() with dummy vector...")
|
||||
dummy_vector = [0.1] * 384
|
||||
results = asyncio.run(search_vectors(dummy_vector, limit=5))
|
||||
print(f" Results count: {len(results)}\n")
|
||||
|
||||
# Test delete_library_vectors (will succeed even if no vectors exist)
|
||||
print("3. Testing delete_library_vectors()...")
|
||||
result = asyncio.run(delete_library_vectors("test-library"))
|
||||
print(f" Result: {result}\n")
|
||||
|
||||
print("✅ All tests completed!")
|
||||
@@ -0,0 +1 @@
|
||||
"""WebUI module for Context7 Docs."""
|
||||
@@ -0,0 +1,166 @@
|
||||
.container {
|
||||
max-width: 1000px;
|
||||
margin: 0 auto;
|
||||
padding: 20px;
|
||||
}
|
||||
|
||||
header {
|
||||
border-bottom: 1px solid #ccc;
|
||||
padding-bottom: 15px;
|
||||
margin-bottom: 20px;
|
||||
}
|
||||
|
||||
header h1 {
|
||||
margin: 0 0 10px 0;
|
||||
font-size: 1.5rem;
|
||||
}
|
||||
|
||||
nav {
|
||||
display: flex;
|
||||
gap: 15px;
|
||||
}
|
||||
|
||||
nav a {
|
||||
text-decoration: none;
|
||||
color: #0066cc;
|
||||
font-size: 0.9rem;
|
||||
}
|
||||
|
||||
nav a.active {
|
||||
font-weight: bold;
|
||||
text-decoration: underline;
|
||||
}
|
||||
|
||||
main h2 {
|
||||
margin-bottom: 15px;
|
||||
}
|
||||
|
||||
footer {
|
||||
margin-top: 40px;
|
||||
padding-top: 15px;
|
||||
border-top: 1px solid #ccc;
|
||||
font-size: 0.8rem;
|
||||
color: #666;
|
||||
}
|
||||
|
||||
/* Status cards */
|
||||
.status-card {
|
||||
background: #f5f5f5;
|
||||
padding: 20px;
|
||||
border-radius: 8px;
|
||||
border-left: 4px solid #00c467;
|
||||
}
|
||||
|
||||
.status-message {
|
||||
background: #e8f4fd;
|
||||
padding: 10px;
|
||||
border-radius: 4px;
|
||||
margin: 5px 0;
|
||||
}
|
||||
|
||||
/* Tables */
|
||||
.library-table {
|
||||
width: 100%;
|
||||
border-collapse: collapse;
|
||||
margin-top: 10px;
|
||||
}
|
||||
|
||||
.library-table th, .library-table td {
|
||||
padding: 10px;
|
||||
text-align: left;
|
||||
border-bottom: 1px solid #ddd;
|
||||
}
|
||||
|
||||
.library-table th {
|
||||
background: #f5f5f5;
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
/* Forms */
|
||||
form input[type="text"], form textarea, form select {
|
||||
padding: 8px;
|
||||
border: 1px solid #ccc;
|
||||
border-radius: 4px;
|
||||
margin-right: 10px;
|
||||
margin-bottom: 10px;
|
||||
}
|
||||
|
||||
button {
|
||||
background: #0066cc;
|
||||
color: white;
|
||||
border: none;
|
||||
padding: 10px 20px;
|
||||
border-radius: 4px;
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
button:hover {
|
||||
background: #0055aa;
|
||||
}
|
||||
|
||||
/* Pre formatting */
|
||||
pre {
|
||||
background: #f5f5f5;
|
||||
padding: 15px;
|
||||
border-radius: 4px;
|
||||
overflow-x: auto;
|
||||
white-space: pre-wrap;
|
||||
word-break: break-word;
|
||||
}
|
||||
|
||||
/* Search results */
|
||||
.result-card {
|
||||
background: #fff;
|
||||
border: 1px solid #ddd;
|
||||
padding: 15px;
|
||||
margin: 10px 0;
|
||||
border-radius: 4px;
|
||||
}
|
||||
|
||||
.result-card h3 {
|
||||
margin: 0 0 8px 0;
|
||||
}
|
||||
|
||||
.hint {
|
||||
color: #666;
|
||||
font-size: 0.85rem;
|
||||
margin-top: 15px;
|
||||
}
|
||||
|
||||
/* Status colors */
|
||||
.status-ok {
|
||||
color: #00c467;
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
.content-preview {
|
||||
max-height: 300px;
|
||||
overflow-y: auto;
|
||||
}
|
||||
|
||||
.results-count {
|
||||
background: #e8f4fd;
|
||||
padding: 10px;
|
||||
border-radius: 4px;
|
||||
margin-bottom: 15px;
|
||||
}
|
||||
|
||||
.source-card {
|
||||
background: #f5f5f5;
|
||||
padding: 15px;
|
||||
margin: 10px 0;
|
||||
border-radius: 4px;
|
||||
}
|
||||
|
||||
.actions-bar {
|
||||
margin-top: 15px;
|
||||
}
|
||||
|
||||
.actions-bar form {
|
||||
display: inline-flex;
|
||||
}
|
||||
|
||||
.doc-content {
|
||||
max-height: 600px;
|
||||
overflow-y: auto;
|
||||
}
|
||||
@@ -0,0 +1,568 @@
|
||||
"""WebUI Views for Context7 Docs using Jinja2 templates."""
|
||||
import os
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Any, Optional
|
||||
from fastapi import Request
|
||||
from fastapi.responses import HTML, JSONResponse
|
||||
import requests
|
||||
|
||||
# Internal API base URL
|
||||
DOCS_API_URL = os.environ.get("DOCS_API_URL", "http://docs-api:8787")
|
||||
|
||||
|
||||
def api_request(method: str, endpoint: str, data: Optional[dict] = None) -> dict:
|
||||
"""Make internal API request to docs-api."""
|
||||
url = f"{DOCS_API_URL}{endpoint}"
|
||||
headers = {}
|
||||
if os.environ.get("WEBUI_API_KEY"):
|
||||
headers["X-API-Key"] = os.environ.get("WEBUI_API_KEY")
|
||||
|
||||
resp = requests.request(method, url, headers=headers, json=data)
|
||||
return resp.json()
|
||||
|
||||
|
||||
def navbar_html(current: str) -> str:
|
||||
"""Generate navigation bar HTML."""
|
||||
links = [
|
||||
("/health", "Health"),
|
||||
("/libraries", "Libraries"),
|
||||
("/upload", "Upload"),
|
||||
("/ingest/all", "Ingest All"),
|
||||
("/sources/git", "Git Sources"),
|
||||
("/search", "Search"),
|
||||
]
|
||||
items = []
|
||||
for path, label in links:
|
||||
cls = "active" if current == path else ""
|
||||
items.append(f'<a href="{path}" class="{cls}">{label}</a>')
|
||||
return f"""<nav>
|
||||
{' '.join(items)}
|
||||
</nav>""".strip()
|
||||
|
||||
|
||||
def footer_html() -> str:
|
||||
"""Generate footer HTML."""
|
||||
return "<footer>Context7 Docs WebUI</footer>"
|
||||
|
||||
|
||||
def health(request: Request) -> HTML:
|
||||
"""System health dashboard."""
|
||||
try:
|
||||
data = api_request("GET", "/health")
|
||||
status = data.get("status", "unknown")
|
||||
service = data.get("service", "Service")
|
||||
except Exception as e:
|
||||
status = "error"
|
||||
service = str(e)
|
||||
|
||||
return HTML(f"""<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Context7 Docs - Health</title>
|
||||
<link rel="stylesheet" href="/static/css/main.css">
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<header><h1>Context7 Docs UI</h1>{navbar_html("/health")}</header>
|
||||
<main><h2>System Health</h2>
|
||||
<div class="status-card" data-status="{status}"><h3>{service}</h3>
|
||||
<p>Status: <span class="status-ok">{status}</span></p></div>
|
||||
</main>{footer_html()}</div>
|
||||
</body></html>""", media_type="text/html")
|
||||
|
||||
|
||||
def libraries(request: Request) -> HTML:
|
||||
"""List all libraries."""
|
||||
try:
|
||||
data = api_request("GET", "/libraries")
|
||||
libs = data.get("libraries", [])
|
||||
except Exception as e:
|
||||
libs = [{"id": "error", "name": str(e)}]
|
||||
|
||||
table_rows = []
|
||||
for lib in libs:
|
||||
if lib.get("id") != "error":
|
||||
table_rows.append(
|
||||
f"""<tr><td>{lib.get('id')}</td>
|
||||
<td>{lib.get('name', '')}</td>
|
||||
<td>{lib.get('description', '') or '(no description)'}</td>
|
||||
<td><a href="/docs/{lib.get('id')}">View Docs</a></td></tr>"""
|
||||
)
|
||||
|
||||
return HTML(f"""<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Context7 Docs - Libraries</title>
|
||||
<link rel="stylesheet" href="/static/css/main.css">
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<header><h1>Context7 Docs UI</h1>{navbar_html("/libraries")}</header>
|
||||
<main>
|
||||
<h2>Libraries ({len(libs)})</h2>
|
||||
<div class="actions-bar">
|
||||
<form action="/folders/create" method="post" style="display:inline;">
|
||||
<input type="text" name="name" placeholder="New library folder name" required>
|
||||
<button type="submit">Create Folder</button>
|
||||
</form>
|
||||
</div>
|
||||
<table class="library-table">
|
||||
<thead><tr><th>ID</th><th>Name</th><th>Description</th><th>Actions</th></tr></thead>
|
||||
<tbody>{"".join(table_rows)}</tbody>
|
||||
</table>
|
||||
</main>{footer_html()}</div>
|
||||
</body></html>""", media_type="text/html")
|
||||
|
||||
|
||||
def upload(request: Request) -> HTML:
|
||||
"""File upload form."""
|
||||
if "file" in request.files:
|
||||
uploaded_file = request.files["file"]
|
||||
try:
|
||||
content = uploaded_file.read().decode("utf-8")[:5000]
|
||||
# Escape HTML
|
||||
safe_content = content.replace("&", "&").replace("<", "<").replace(">", ">")
|
||||
truncated = safe_content[:1000] + "..." if len(safe_content) > 1000 else safe_content
|
||||
|
||||
return HTML(f"""<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Context7 Docs - Upload</title>
|
||||
<link rel="stylesheet" href="/static/css/main.css">
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<header><h1>Context7 Docs UI</h1>{navbar_html("/upload")}</header>
|
||||
<main>
|
||||
<h2>Upload Complete!</h2>
|
||||
<pre class="content-preview">{truncated}</pre>
|
||||
<form method="post" action="/ingest/uploaded">
|
||||
<input type="hidden" name="content" value="{safe_content[:5000]}">
|
||||
<label for="library_id">Library (optional):</label>
|
||||
<input type="text" id="library_id" name="library_id" placeholder="e.g., my-docs">
|
||||
<button type="submit">Ingest</button>
|
||||
</form>
|
||||
</main>{footer_html()}</div>
|
||||
</body></html>""", media_type="text/html")
|
||||
except Exception:
|
||||
return HTML(f"""<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Context7 Docs - Upload</title>
|
||||
<link rel="stylesheet" href="/static/css/main.css">
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<header><h1>Context7 Docs UI</h1>{navbar_html("/upload")}</header>
|
||||
<main>
|
||||
<h2>File too large!</h2>
|
||||
<p>Please upload smaller text files (limit: ~5MB).</p>
|
||||
</main>{footer_html()}</div>
|
||||
</body></html>""", media_type="text/html")
|
||||
else:
|
||||
return HTML(f"""<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Context7 Docs - Upload</title>
|
||||
<link rel="stylesheet" href="/static/css/main.css">
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<header><h1>Context7 Docs UI</h1>{navbar_html("/upload")}</header>
|
||||
<main>
|
||||
<h2>Upload Documentation Files</h2>
|
||||
<form method="post" enctype="multipart/form-data">
|
||||
<label for="file">Select file:</label>
|
||||
<input type="file" name="file" id="file" accept=".txt,.md,.json,.py,.js,.html,.css,.yaml,.yml" required>
|
||||
<button type="submit">Upload</button>
|
||||
</form>
|
||||
<p class="hint">Supported formats: .txt, .md, .json, .py, .js, .html, .css, .yaml</p>
|
||||
</main>{footer_html()}</div>
|
||||
</body></html>""", media_type="text/html")
|
||||
|
||||
|
||||
def ingest_all(request: Request) -> JSONResponse:
|
||||
"""Trigger ingestion for all libraries."""
|
||||
try:
|
||||
result = api_request("POST", "/ingest")
|
||||
return JSONResponse(content={"status": "ok", "message": f"Processed {result.get('chunks', 0)} chunks"})
|
||||
except Exception as e:
|
||||
return JSONResponse(status_code=500, content={"error": str(e)})
|
||||
|
||||
|
||||
def ingest_library(request: Request, library_id: str) -> HTML:
|
||||
"""Ingest for specific library."""
|
||||
if "content" in request.form:
|
||||
content = request.form.get("content")[:10000]
|
||||
safe_content = content.replace("&", "&").replace("<", "<").replace(">", ">")
|
||||
return HTML(f"""<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Context7 Docs - Ingest</title>
|
||||
<link rel="stylesheet" href="/static/css/main.css">
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<header><h1>Context7 Docs UI</h1>{navbar_html("/ingest/{library_id}")}</header>
|
||||
<main>
|
||||
<h2>Ingest for Library: {library_id}</h2>
|
||||
<form method="post" action="/ingest/{library_id}">
|
||||
<label for="content">Content (text):</label>
|
||||
<textarea id="content" name="content" rows="10" maxlength="10000"></textarea>
|
||||
<button type="submit">Ingest</button>
|
||||
</form>
|
||||
</main>{footer_html()}</div>
|
||||
</body></html>""", media_type="text/html")
|
||||
else:
|
||||
try:
|
||||
result = api_request("POST", f"/ingest/{library_id}")
|
||||
safe_msg = result.get('message', '') or ''
|
||||
safe_json = json.dumps(result, indent=2).replace("&", "&").replace("<", "<").replace(">", ">")
|
||||
return HTML(f"""<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Context7 Docs - Ingest Result</title>
|
||||
<link rel="stylesheet" href="/static/css/main.css">
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<header><h1>Context7 Docs UI</h1>{navbar_html("/ingest/{library_id}")}</header>
|
||||
<main>
|
||||
<h2>Ingestion Complete!</h2>
|
||||
<p>{safe_msg}</p>
|
||||
<pre>{safe_json}</pre>
|
||||
<a href="/libraries">← Back to Libraries</a>
|
||||
</main>{footer_html()}</div>
|
||||
</body></html>""", media_type="text/html")
|
||||
except Exception as e:
|
||||
safe_error = str(e).replace("&", "&").replace("<", "<").replace(">", ">")
|
||||
return HTML(f"""<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Context7 Docs - Error</title>
|
||||
<link rel="stylesheet" href="/static/css/main.css">
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<header><h1>Context7 Docs UI</h1>{navbar_html("/ingest/{library_id}")}</header>
|
||||
<main>
|
||||
<h2>Error</h2>
|
||||
<pre>{safe_error}</pre>
|
||||
</main>{footer_html()}</div>
|
||||
</body></html>""", media_type="text/html")
|
||||
|
||||
|
||||
async def folders_create(request: Request) -> JSONResponse:
|
||||
"""Create a new library folder."""
|
||||
name = request.form.get("name", "").strip()
|
||||
try:
|
||||
from backend.app.db import upsert_library
|
||||
await upsert_library(library_id=name, name=name, description=None, source_path=f"/docs/{name}")
|
||||
return JSONResponse(content={"status": "ok", "message": f"Created folder '{name}'"})
|
||||
except Exception as e:
|
||||
return JSONResponse(status_code=500, content={"error": str(e)})
|
||||
|
||||
|
||||
async def folders_delete(request: Request) -> JSONResponse:
|
||||
"""Delete a library."""
|
||||
library_id = request.query_params.get("id", "").strip()
|
||||
try:
|
||||
from backend.app.db import delete_library
|
||||
await delete_library(library_id)
|
||||
return JSONResponse(content={"status": "ok", "message": f"Deleted library '{library_id}'"})
|
||||
except Exception as e:
|
||||
return JSONResponse(status_code=500, content={"error": str(e)})
|
||||
|
||||
|
||||
async def ingest_uploaded(request: Request) -> HTML:
|
||||
"""Ingest uploaded file content."""
|
||||
content = request.form.get("content", "")[:10000]
|
||||
library_id = request.form.get("library_id", "uploaded")
|
||||
|
||||
try:
|
||||
result = api_request("POST", f"/ingest/{library_id}", data={"content": content})
|
||||
safe_msg = result.get('message', '') or ''
|
||||
safe_json = json.dumps(result, indent=2).replace("&", "&").replace("<", "<").replace(">", ">")
|
||||
return HTML(f"""<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Context7 Docs - Upload Result</title>
|
||||
<link rel="stylesheet" href="/static/css/main.css">
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<header><h1>Context7 Docs UI</h1>{navbar_html("/upload")}</header>
|
||||
<main>
|
||||
<h2>Ingestion Complete!</h2>
|
||||
<p>{safe_msg}</p>
|
||||
<pre>{safe_json}</pre>
|
||||
<a href="/upload">← Upload Another</a>
|
||||
</main>{footer_html()}</div>
|
||||
</body></html>""", media_type="text/html")
|
||||
except Exception as e:
|
||||
safe_error = str(e).replace("&", "&").replace("<", "<").replace(">", ">")
|
||||
return HTML(f"""<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head><meta charset="UTF-8"><title>Error</title></head>
|
||||
<body><h1>Upload Ingest Error</h1><pre>{safe_error}</pre><a href="/upload">← Try Again</a></body>
|
||||
</html>""", media_type="text/html")
|
||||
|
||||
|
||||
def docs(request: Request, library_id: str, topic: Optional[str] = None, tokens: int = 8000) -> HTML:
|
||||
"""View docs from a library."""
|
||||
try:
|
||||
data = api_request("GET", f"/libraries/{library_id}/docs", params={"topic": topic, "tokens": tokens})
|
||||
content = data.get("content", "")
|
||||
except Exception as e:
|
||||
content = str(e)
|
||||
|
||||
safe_content = content.replace("&", "&").replace("<", "<").replace(">", ">")[:10000]
|
||||
return HTML(f"""<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Context7 Docs - Library: {library_id}</title>
|
||||
<link rel="stylesheet" href="/static/css/main.css">
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<header><h1>Context7 Docs UI</h1>{navbar_html("/docs/{}".format(library_id))}</header>
|
||||
<main>
|
||||
<h2>Library: {library_id}</h2>
|
||||
<p><strong>Topic:</strong> {topic or '(all)'} | <strong>Tokens:</strong> {tokens}</p>
|
||||
<pre class="docs-content">{safe_content}</pre>
|
||||
</main>{footer_html()}</div>
|
||||
</body></html>""", media_type="text/html")
|
||||
|
||||
|
||||
def search_redirect(request: Request) -> JSONResponse:
|
||||
"""Redirect to search form."""
|
||||
return JSONResponse(content={"redirect": "/search/form"})
|
||||
|
||||
|
||||
def search_form(request: Request) -> HTML:
|
||||
"""Search form page."""
|
||||
return HTML(f"""<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Context7 Docs - Search</title>
|
||||
<link rel="stylesheet" href="/static/css/main.css">
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<header><h1>Context7 Docs UI</h1>{navbar_html("/search")}</header>
|
||||
<main>
|
||||
<h2>Search Docs</h2>
|
||||
<form method="post" action="/search">
|
||||
<label for="query">Query:</label>
|
||||
<input type="text" id="query" name="query" required placeholder="Enter your search query...">
|
||||
<label for="library_id">Library (optional):</label>
|
||||
<input type="text" id="library_id" name="library_id" placeholder="e.g., foundryvtt">
|
||||
<label for="limit">Limit results:</label>
|
||||
<select id="limit" name="limit">
|
||||
<option value="5">5</option>
|
||||
<option value="10" selected>10</option>
|
||||
<option value="20">20</option>
|
||||
<option value="50">50</option>
|
||||
</select>
|
||||
<button type="submit">Search</button>
|
||||
</form>
|
||||
</main>{footer_html()}</div>
|
||||
</body></html>""", media_type="text/html")
|
||||
|
||||
|
||||
def search_results(request: Request) -> HTML:
|
||||
"""Display search results."""
|
||||
try:
|
||||
query = request.query_params.get("q", "")
|
||||
limit = int(request.query_params.get("limit", "10"))
|
||||
payload = {"query": query, "library_id": None, "limit": limit}
|
||||
result = api_request("POST", "/search", data=payload)
|
||||
results = result.get("results", [])
|
||||
except Exception as e:
|
||||
return HTML(f"""<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head><meta charset="UTF-8"><title>Error</title></head>
|
||||
<body><h1>Error</h1><pre>{str(e)}</pre><a href="/search/form">← Try Again</a></body>
|
||||
</html>""", media_type="text/html")
|
||||
|
||||
cards = []
|
||||
for r in results:
|
||||
title = r.get("title", "Untitled") or (r.get("content", "")[:100] + "...")[:200]
|
||||
content = (r.get("content", "") or r.get("chunk", ""))[:500]
|
||||
cards.append(f"""<div class="result-card" data-id="{r.get('id')}"><h3>{title}</h3>
|
||||
<p>{content}...</p><a href="/docs/{r.get('library_id')}">View Full</a></div>""")
|
||||
|
||||
return HTML(f"""<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Context7 Docs - Search Results</title>
|
||||
<link rel="stylesheet" href="/static/css/main.css">
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<header><h1>Context7 Docs UI</h1>{navbar_html("/search")}</header>
|
||||
<main>
|
||||
<h2>Search Results for "{query}"</h2>
|
||||
<div class="results-count">{len(results)} results found</div>
|
||||
{''.join(cards)}
|
||||
<a href="/search/form">← New Search</a>
|
||||
</main>{footer_html()}</div>
|
||||
</body></html>""", media_type="text/html")
|
||||
|
||||
|
||||
def sync_sources(request: Request) -> HTML:
|
||||
"""Sync git sources."""
|
||||
if request.method == "POST":
|
||||
try:
|
||||
data = api_request("POST", "/sources/sync")
|
||||
safe_json = json.dumps(data, indent=2).replace("&", "&").replace("<", "<").replace(">", ">")
|
||||
return HTML(f"""<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head><meta charset="UTF-8"><title>Sync Result</title></head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<header><h1>Context7 Docs UI</h1>{navbar_html("/sync/sources")}</header>
|
||||
<main><h2>Git Sync Complete!</h2><pre>{safe_json}</pre>
|
||||
<form method="post"><button type="submit">Sync Again</button></form>
|
||||
</main>{footer_html()}</div>
|
||||
</body></html>""", media_type="text/html")
|
||||
except Exception as e:
|
||||
safe_error = str(e).replace("&", "&").replace("<", "<").replace(">", ">")
|
||||
return HTML(f"""<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head><meta charset="UTF-8"><title>Error</title></head>
|
||||
<body><h1>Sync Error</h1><pre>{safe_error}</pre><a href="/sources/git">← Try Again</a></body>
|
||||
</html>""", media_type="text/html")
|
||||
else:
|
||||
try:
|
||||
data = api_request("GET", "/libraries")
|
||||
libs = [l.get("id") for l in data.get("libraries", []) if l.get("id") != "error"]
|
||||
except Exception:
|
||||
libs = []
|
||||
|
||||
lib_list = ", ".join(libs) if libs else "(none)"
|
||||
return HTML(f"""<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Context7 Docs - Git Sync</title>
|
||||
<link rel="stylesheet" href="/static/css/main.css">
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<header><h1>Context7 Docs UI</h1>{navbar_html("/sources/git")}</header>
|
||||
<main>
|
||||
<h2>Sync Git Repositories</h2>
|
||||
<p>Syncs all git repositories configured in <code>docs_sources.yaml</code>.</p>
|
||||
<form method="post" action="/sync/sources">
|
||||
<label for="override">Override existing repos:</label>
|
||||
<input type="checkbox" id="override" name="override">
|
||||
<button type="submit">Sync All Repositories</button>
|
||||
</form>
|
||||
<h3>Libraries Found: {lib_list}</h3>
|
||||
</main>{footer_html()}</div>
|
||||
</body></html>""", media_type="text/html")
|
||||
|
||||
|
||||
def git_sources(request: Request) -> HTML:
|
||||
"""List configured git sources."""
|
||||
import yaml
|
||||
config_path = Path(__file__).parent.parent.parent / "docs_sources.yaml"
|
||||
|
||||
try:
|
||||
with open(config_path) as f:
|
||||
data = yaml.safe_load(f)
|
||||
sources = data.get("sources", [])
|
||||
|
||||
source_blocks = []
|
||||
for src in sources:
|
||||
url = src.get("repo_url", "")[:50] + "..." if len(src.get("repo_url", "")) > 50 else src.get("repo_url", "")
|
||||
branch = src.get("branch", "main")
|
||||
include = src.get("include_paths", ["*"])
|
||||
exclude = src.get("exclude_paths", [])
|
||||
source_blocks.append(f"""<div class="source-card">
|
||||
<strong>{src.get('library_id', 'unknown')}</strong><br>
|
||||
URL: {url}<br>
|
||||
Branch: {branch}<br>
|
||||
Include: {', '.join(include)}{' | Exclude: ' + ', '.join(exclude) if exclude else ''}
|
||||
</div>""")
|
||||
|
||||
return HTML(f"""<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Context7 Docs - Git Sources</title>
|
||||
<link rel="stylesheet" href="/static/css/main.css">
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<header><h1>Context7 Docs UI</h1>{navbar_html("/sources/git")}</header>
|
||||
<main>
|
||||
<h2>Configured Git Sources ({len(sources)})</h2>
|
||||
{''.join(source_blocks)}
|
||||
</main>{footer_html()}</div>
|
||||
</body></html>""", media_type="text/html")
|
||||
except Exception as e:
|
||||
safe_error = str(e).replace("&", "&").replace("<", "<").replace(">", ">")
|
||||
return HTML(f"""<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head><meta charset="UTF-8"><title>Error</title></head>
|
||||
<body><h1>Git Sources Error</h1><pre>{safe_error}</pre></body>
|
||||
</html>""", media_type="text/html")
|
||||
|
||||
|
||||
def logs(request: Request) -> HTML:
|
||||
"""Logs/status page."""
|
||||
return HTML(f"""<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Context7 Docs - Logs</title>
|
||||
<link rel="stylesheet" href="/static/css/main.css">
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<header><h1>Context7 Docs UI</h1>{navbar_html("/logs")}</header>
|
||||
<main>
|
||||
<h2>Status Messages</h2>
|
||||
<div class="status-message">Docs API: {DOCS_API_URL}</div>
|
||||
<div class="status-message">Qdrant Health: healthy | MCP OK: yes</div>
|
||||
<p class="hint">Logs are printed to container stdout/stderr. For full logs, inspect Docker containers directly.</p>
|
||||
</main>{footer_html()}</div>
|
||||
</body></html>""", media_type="text/html")
|
||||
|
||||
|
||||
# Register all routes
|
||||
__all__ = [
|
||||
"health", "libraries", "upload", "ingest_all", "ingest_library",
|
||||
"folders_create", "folders_delete", "docs", "search_redirect",
|
||||
"search_form", "search_results", "sync_sources", "git_sources", "logs"
|
||||
]
|
||||
@@ -0,0 +1,37 @@
|
||||
# Backend API Dependencies
|
||||
fastapi==0.109.0
|
||||
uvicorn[standard]==0.27.0
|
||||
pydantic==2.5.3
|
||||
python-dotenv==1.0.0
|
||||
python-multipart==0.0.6
|
||||
|
||||
# Qdrant Vector Store Client
|
||||
qdrant-client==1.7.0
|
||||
|
||||
# Text Processing for token estimation
|
||||
tiktoken==0.7.0
|
||||
|
||||
# Local Embeddings using FastEmbed
|
||||
fastembed==0.3.0
|
||||
|
||||
# PDF support for document ingestion
|
||||
pypdf==5.0.0
|
||||
|
||||
# HTTP client for MCP server communication
|
||||
httpx==0.26.0
|
||||
|
||||
# HTTP client for WebUI (used to call docs-api from WebUI)
|
||||
requests==2.31.0
|
||||
|
||||
# FastMCP for MCP server integration (also used by backend)
|
||||
fastmcp==0.6.0
|
||||
|
||||
# YAML parser for sources configuration
|
||||
PyYAML==6.0.1
|
||||
|
||||
# =============================================================================
|
||||
# TEST DEPENDENCIES
|
||||
# =============================================================================
|
||||
pytest==8.3.2
|
||||
pytest-mock==3.14.0
|
||||
pytest-asyncio==0.23.7
|
||||
@@ -0,0 +1,2 @@
|
||||
# This directory is intentionally left empty to preserve the folder structure for Docker volumes.
|
||||
# Data from Qdrant will be mounted here via docker-compose.yml.
|
||||
@@ -0,0 +1,99 @@
|
||||
# Context7-style MCP System - Docker Compose (Production/Home Server Hardened)
|
||||
services:
|
||||
# Qdrant Vector Database Service
|
||||
qdrant:
|
||||
image: qdrant/qdrant:latest
|
||||
container_name: qdrant
|
||||
ports:
|
||||
- "${QDRANT_PORT:-6333}:6333"
|
||||
volumes:
|
||||
- ./data/qdrant:/qdrant/storage
|
||||
environment:
|
||||
- QDRANT__MEMORY_MAPPED_INDEXES=1
|
||||
restart: unless-stopped
|
||||
logging:
|
||||
driver: json-file
|
||||
options:
|
||||
max-size: "10m"
|
||||
max-file: "3"
|
||||
networks:
|
||||
- context7-network
|
||||
|
||||
# Docs API Backend Service (FastAPI)
|
||||
docs-api:
|
||||
build:
|
||||
context: ./backend
|
||||
dockerfile: Dockerfile
|
||||
container_name: docs-api
|
||||
ports:
|
||||
- "${HOST_PORT:-8787}:8787"
|
||||
environment:
|
||||
- VECTOR_STORE_HOST=qdrant
|
||||
- VECTOR_STORE_PORT=6333
|
||||
- DOCS_PATH=/docs
|
||||
- DB_PATH=/data/db.sqlite
|
||||
- LOG_LEVEL=INFO
|
||||
- API_KEY_DOCS_API=${DOCS_API_KEY:-}
|
||||
volumes:
|
||||
- ./docs:/docs
|
||||
- ./data:/data
|
||||
depends_on:
|
||||
- qdrant
|
||||
networks:
|
||||
- context7-network
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:${HOST_PORT:-8787}/health"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 15s
|
||||
|
||||
# MCP Server Service (FastMCP via streamable HTTP)
|
||||
docs-mcp:
|
||||
build:
|
||||
context: ./mcp-server
|
||||
dockerfile: Dockerfile
|
||||
container_name: docs-mcp
|
||||
ports:
|
||||
- "${MCP_HOST_PORT:-8788}:8788"
|
||||
environment:
|
||||
- DOCS_API_URL=http://docs-api:8787
|
||||
- MCP_API_KEY=${MCP_API_KEY:-}
|
||||
volumes:
|
||||
- ./docs:/docs:ro
|
||||
- ./data:/data
|
||||
restart: unless-stopped
|
||||
logging:
|
||||
driver: json-file
|
||||
options:
|
||||
max-size: "10m"
|
||||
max-file: "3"
|
||||
depends_on:
|
||||
docs-api:
|
||||
condition: service_healthy
|
||||
networks:
|
||||
- context7-network
|
||||
|
||||
# WebUI Service (HTML interface)
|
||||
webui:
|
||||
build:
|
||||
context: ./webui
|
||||
dockerfile: Dockerfile
|
||||
container_name: webui
|
||||
ports:
|
||||
- "${WEBUI_PORT:-8790}:8790"
|
||||
environment:
|
||||
- DOCS_API_URL=http://docs-api:8787
|
||||
- WEBUI_API_KEY=${DOCS_WEBUI_API_KEY:-}
|
||||
volumes:
|
||||
- ./docs:/docs
|
||||
- ./data:/data
|
||||
depends_on:
|
||||
docs-api:
|
||||
condition: service_healthy
|
||||
networks:
|
||||
- context7-network
|
||||
|
||||
networks:
|
||||
context7-network:
|
||||
driver: bridge
|
||||
@@ -0,0 +1,143 @@
|
||||
# Getting Started
|
||||
|
||||
Welcome to the Context7-style MCP System documentation!
|
||||
|
||||
## Overview
|
||||
|
||||
This system provides a self-hosted, local context7-compatible MCP (Model Context Protocol) solution using Docker containers. It enables you to:
|
||||
|
||||
- Ingest and index your own documents
|
||||
- Perform semantic search on vector embeddings
|
||||
- Integrate with MCP-enabled IDEs for intelligent tool interactions
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
┌─────────────┐ ┌─────────────┐ ┌─────────────┐
|
||||
│ Client │────▶│ docs-api │◀────│ docs-mcp │
|
||||
│ (IDE/Tool) │ │ (FastAPI) │ │ (MCP Server)│
|
||||
└─────────────┘ └─────────────┘ └─────────────┘
|
||||
│
|
||||
▼
|
||||
┌─────────────┐
|
||||
│ Qdrant │
|
||||
│ (Vector DB) │
|
||||
└─────────────┘
|
||||
```
|
||||
|
||||
## Quick Start
|
||||
|
||||
### 1. Start All Services
|
||||
|
||||
```bash
|
||||
docker compose up -d --build
|
||||
```
|
||||
|
||||
### 2. Verify Services Are Running
|
||||
|
||||
```bash
|
||||
docker compose ps
|
||||
```
|
||||
|
||||
You should see all three services in "Up" status:
|
||||
- `qdrant` (port 6333)
|
||||
- `docs-api` (port 8787)
|
||||
- `docs-mcp` (port 8788)
|
||||
|
||||
### 3. Access the API
|
||||
|
||||
Open your browser and navigate to:
|
||||
```
|
||||
http://localhost:8787/docs
|
||||
```
|
||||
|
||||
You should see the FastAPI documentation page.
|
||||
|
||||
## API Endpoints
|
||||
|
||||
### Health Check
|
||||
|
||||
```bash
|
||||
curl http://localhost:8787/health
|
||||
```
|
||||
|
||||
Expected response:
|
||||
```json
|
||||
{"status":"ok"}
|
||||
```
|
||||
|
||||
### Ingest Document
|
||||
|
||||
Upload a text document to be processed and indexed:
|
||||
|
||||
```bash
|
||||
curl -X POST "http://localhost:8787/api/v1/ingest" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"content": "This is sample document content for semantic search testing.",
|
||||
"metadata": {"source": "example", "type": "text"}
|
||||
}'
|
||||
```
|
||||
|
||||
### Search Documents
|
||||
|
||||
Perform a similarity search on ingested documents:
|
||||
|
||||
```bash
|
||||
curl "http://localhost:8787/api/v1/search" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"query": "semantic search",
|
||||
"top_k": 5,
|
||||
"threshold": 0.7
|
||||
}'
|
||||
```
|
||||
|
||||
## Configuration
|
||||
|
||||
### Environment Variables
|
||||
|
||||
Copy the example environment file and customize:
|
||||
|
||||
```bash
|
||||
cp .env.example .env
|
||||
```
|
||||
|
||||
Key variables:
|
||||
- `VECTOR_STORE_HOST`: Internal hostname of Qdrant (default: qdrant)
|
||||
- `VECTOR_STORE_PORT`: Qdrant port (default: 6333)
|
||||
|
||||
### Docker Compose
|
||||
|
||||
All services are defined in `docker-compose.yml`. Key networking details:
|
||||
- Services communicate internally via `context7-network`
|
||||
- Qdrant uses service name `qdrant` for internal connections
|
||||
- Vector store is exposed externally on port 6333 for debugging
|
||||
|
||||
## Next Steps
|
||||
|
||||
1. Review the project structure to understand component roles
|
||||
2. Customize the backend API endpoints in `backend/app/main.py`
|
||||
3. Implement MCP tools in `mcp-server/server.py`
|
||||
4. Add more example documents in the `docs/` directory
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Check Logs
|
||||
|
||||
```bash
|
||||
docker compose logs -f docs-api
|
||||
docker compose logs -f qdrant
|
||||
docker compose logs -f docs-mcp
|
||||
```
|
||||
|
||||
### Reset All Services
|
||||
|
||||
```bash
|
||||
docker compose down -v
|
||||
docker compose up -d --build
|
||||
```
|
||||
|
||||
## Support
|
||||
|
||||
For issues, refer to the `README.md` or check the Qdrant documentation.
|
||||
@@ -0,0 +1,27 @@
|
||||
# Git Repository Sources Configuration
|
||||
# Each source defines a library to ingest from a git repository
|
||||
# Paths are relative to the cloned repo root
|
||||
|
||||
sources:
|
||||
- library_id: foundryvtt
|
||||
name: Foundry VTT
|
||||
description: Foundry Virtual Tabletop system documentation
|
||||
repo_url: https://github.com/foundryvtt/foundryvtt.git
|
||||
branch: main
|
||||
include_paths:
|
||||
- docs
|
||||
- src
|
||||
exclude_paths:
|
||||
- node_modules
|
||||
- .git
|
||||
|
||||
# Add more sources here following the same structure:
|
||||
# - library_id: my-repo
|
||||
# name: My Repository
|
||||
# description: My documentation
|
||||
# repo_url: https://github.com/user/my-repo.git
|
||||
# branch: main
|
||||
# include_paths:
|
||||
# - docs
|
||||
# exclude_paths:
|
||||
# - node_modules
|
||||
@@ -0,0 +1,30 @@
|
||||
# MCP Server Service
|
||||
FROM python:3.11-slim
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
curl \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install Python dependencies cleanly
|
||||
COPY requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# Copy server code
|
||||
COPY server.py .
|
||||
|
||||
# Mount volumes at these paths (configured in docker-compose)
|
||||
# ./docs -> /docs
|
||||
# ./data -> /data
|
||||
# /data holds: db.sqlite, sqlite file for SQLite storage
|
||||
|
||||
# Expose MCP port
|
||||
EXPOSE 8788
|
||||
|
||||
# Healthcheck
|
||||
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
|
||||
CMD python -c "import socket; s=socket.create_connection(('127.0.0.1', 8788), 5); s.close()"
|
||||
|
||||
# Run the MCP server using streamable HTTP transport
|
||||
CMD ["python", "server.py"]
|
||||
@@ -0,0 +1,21 @@
|
||||
# MCP Server Dependencies
|
||||
fastmcp==0.6.0
|
||||
httpx==0.26.0
|
||||
|
||||
# For Qdrant vector store operations
|
||||
qdrant-client==1.7.0
|
||||
|
||||
# Text processing for token estimation
|
||||
tiktoken==0.7.0
|
||||
|
||||
# Local embeddings using FastEmbed
|
||||
fastembed==0.3.0
|
||||
|
||||
# PDF support for document ingestion
|
||||
pypdf==5.0.0
|
||||
|
||||
# Environment variables loader
|
||||
python-dotenv==1.0.0
|
||||
|
||||
# YAML parser for sources configuration
|
||||
PyYAML==6.0.1
|
||||
@@ -0,0 +1,337 @@
|
||||
# MCP Server for local-context7 Docs API with Git Sources Support
|
||||
"""
|
||||
MCP server providing Context7-style tools for interacting with the local docs API.
|
||||
|
||||
This server exposes 6 tools:
|
||||
- resolve-library-id: Find libraries matching a name (with /local/ prefix)
|
||||
- get-library-docs: Retrieve documentation from a library
|
||||
- list-libraries: List all discovered libraries
|
||||
- search-docs: Semantic search across documents
|
||||
- refresh-library: Re-ingest documents for a library or all libraries
|
||||
- sync-sources: Sync git repositories from configuration file
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
from typing import Optional, List, Dict, Any
|
||||
|
||||
try:
|
||||
import httpx
|
||||
except ImportError:
|
||||
httpx = None
|
||||
|
||||
try:
|
||||
from fastmcp import FastMCP
|
||||
except ImportError:
|
||||
class _Tool:
|
||||
def __init__(self, name: str):
|
||||
self.name = name
|
||||
|
||||
class FastMCP:
|
||||
"""Import-time fallback used by tests when fastmcp is not installed."""
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
self.tools = []
|
||||
|
||||
def tool(self):
|
||||
def decorator(func):
|
||||
self.tools.append(_Tool(func.__name__))
|
||||
return func
|
||||
return decorator
|
||||
|
||||
def run(self, *args, **kwargs):
|
||||
raise RuntimeError("fastmcp is not installed")
|
||||
|
||||
|
||||
# Environment configuration
|
||||
DOCS_API_URL = os.getenv("DOCS_API_URL", "http://docs-api:${HOST_PORT:-8787}")
|
||||
MCP_API_KEY = os.getenv("MCP_API_KEY", "")
|
||||
|
||||
|
||||
def strip_local_prefix(lib_id: str) -> str:
|
||||
"""Strip /local/ prefix from library ID for API calls."""
|
||||
if lib_id.startswith("/local/"):
|
||||
return lib_id[7:] # Remove "/local/" prefix
|
||||
return lib_id
|
||||
|
||||
|
||||
# Create FastMCP instance with tools
|
||||
mcp = FastMCP("context7-docs", root_path="/app")
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
async def resolve_library_id(library_name: str) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Resolve a library name to Context7-style candidates.
|
||||
|
||||
Searches the docs API for libraries matching the given name (partial match).
|
||||
|
||||
Args:
|
||||
libraryName: The library name to search for (e.g., "foundryvtt")
|
||||
|
||||
Returns:
|
||||
List of candidate libraries with /local/ prefix in ID:
|
||||
[
|
||||
{
|
||||
"id": "/local/foundryvtt",
|
||||
"name": "Foundry VTT",
|
||||
"description": "Fantasy tabletop virtual table...",
|
||||
"source": "local"
|
||||
},
|
||||
...
|
||||
]
|
||||
"""
|
||||
try:
|
||||
if httpx is None:
|
||||
raise RuntimeError("httpx is not installed")
|
||||
async with httpx.AsyncClient(base_url=DOCS_API_URL, timeout=60.0) as client:
|
||||
response = await client.get("/libraries/search", params={"q": library_name})
|
||||
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
return data.get("matches", [])
|
||||
else:
|
||||
raise Exception(f"API error: {response.status_code} - {response.text}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error resolving library '{library_name}': {e}")
|
||||
return []
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
async def get_library_docs(context7_compatible_library_id: str, topic: Optional[str] = None, tokens: int = 8000) -> str:
|
||||
"""
|
||||
Retrieve documentation content from a library.
|
||||
|
||||
Args:
|
||||
context7_compatible_library_id: The Context7-style library ID (with /local/ prefix)
|
||||
topic: Optional topic to search within the library (default: None - returns most relevant content)
|
||||
tokens: Maximum tokens to include in response (default: 8000)
|
||||
|
||||
Returns:
|
||||
Markdown string containing the documentation content
|
||||
|
||||
Example:
|
||||
get_library_docs("/local/foundryvtt", topic="hooks", tokens=8000)
|
||||
"""
|
||||
try:
|
||||
if httpx is None:
|
||||
raise RuntimeError("httpx is not installed")
|
||||
# Strip /local/ prefix for API call
|
||||
library_id = strip_local_prefix(context7_compatible_library_id)
|
||||
|
||||
async with httpx.AsyncClient(base_url=DOCS_API_URL, timeout=60.0) as client:
|
||||
params = {"tokens": tokens}
|
||||
if topic:
|
||||
params["topic"] = topic
|
||||
|
||||
response = await client.get(f"/libraries/{library_id}/docs", params=params)
|
||||
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
return data.get("content", "")
|
||||
else:
|
||||
raise Exception(f"API error: {response.status_code} - {response.text}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error getting library docs for '{context7_compatible_library_id}': {e}")
|
||||
return f"Error retrieving documentation: {str(e)}"
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
async def list_libraries() -> List[Dict[str, Any]]:
|
||||
"""
|
||||
List all discovered libraries in the system.
|
||||
|
||||
Returns:
|
||||
List of library objects with metadata:
|
||||
[
|
||||
{
|
||||
"id": "/local/foundryvtt",
|
||||
"name": "Foundry VTT",
|
||||
"description": "...",
|
||||
"source": "local"
|
||||
},
|
||||
...
|
||||
]
|
||||
"""
|
||||
try:
|
||||
if httpx is None:
|
||||
raise RuntimeError("httpx is not installed")
|
||||
async with httpx.AsyncClient(base_url=DOCS_API_URL, timeout=60.0) as client:
|
||||
response = await client.get("/libraries")
|
||||
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
return data.get("libraries", [])
|
||||
else:
|
||||
raise Exception(f"API error: {response.status_code} - {response.text}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error listing libraries: {e}")
|
||||
return []
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
async def search_docs(query: str, library_id: Optional[str] = None, limit: int = 10) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Perform semantic search across documents.
|
||||
|
||||
Args:
|
||||
query: The search query string
|
||||
library_id: Optional library ID filter (with /local/ prefix). If None, searches all libraries.
|
||||
limit: Maximum number of results to return (default: 10)
|
||||
|
||||
Returns:
|
||||
List of search results with content snippets:
|
||||
[
|
||||
{
|
||||
"id": "...",
|
||||
"score": 0.123,
|
||||
"library_id": "...",
|
||||
"path": "...",
|
||||
"title": "...",
|
||||
"chunk_index": 0
|
||||
},
|
||||
...
|
||||
]
|
||||
"""
|
||||
try:
|
||||
if httpx is None:
|
||||
raise RuntimeError("httpx is not installed")
|
||||
async with httpx.AsyncClient(base_url=DOCS_API_URL, timeout=60.0) as client:
|
||||
payload = {"query": query, "limit": limit}
|
||||
if library_id:
|
||||
payload["library_id"] = strip_local_prefix(library_id)
|
||||
|
||||
response = await client.post("/search", json=payload)
|
||||
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
return data.get("results", [])
|
||||
else:
|
||||
raise Exception(f"API error: {response.status_code} - {response.text}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error searching for query '{query}': {e}")
|
||||
return []
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
async def refresh_library(library_id: Optional[str] = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Re-ingest documents for a library or all libraries.
|
||||
|
||||
Args:
|
||||
library_id: If provided, re-ingests only this library (with /local/ prefix).
|
||||
If None, ingests all libraries.
|
||||
|
||||
Returns:
|
||||
Ingestion result summary:
|
||||
{
|
||||
"total_libraries": 2,
|
||||
"successful": 2,
|
||||
"failed": 0,
|
||||
"total_chunks": 150
|
||||
}
|
||||
"""
|
||||
try:
|
||||
if httpx is None:
|
||||
raise RuntimeError("httpx is not installed")
|
||||
async with httpx.AsyncClient(base_url=DOCS_API_URL, timeout=60.0) as client:
|
||||
response = await client.post("/ingest/all")
|
||||
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
return {
|
||||
"success": True,
|
||||
"total_libraries": data.get("total_libraries", 0),
|
||||
"successful": data.get("successful", 0),
|
||||
"failed": data.get("failed", 0),
|
||||
"total_chunks": data.get("total_chunks", 0)
|
||||
}
|
||||
else:
|
||||
raise Exception(f"API error: {response.status_code} - {response.text}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error refreshing library '{library_id or 'all'}': {e}")
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
async def sync_sources(override: bool = False) -> Dict[str, Any]:
|
||||
"""
|
||||
Sync all git repositories defined in the sources configuration file.
|
||||
|
||||
Clones/updates each configured repository and ingests matching files
|
||||
into the vector store. Existing repos are updated to latest state unless
|
||||
override is true (clears existing repo before cloning).
|
||||
|
||||
Args:
|
||||
override: If true, clears existing repo before cloning. Default: false
|
||||
|
||||
Returns:
|
||||
Sync result summary:
|
||||
{
|
||||
"success": true,
|
||||
"total_sources": 2,
|
||||
"successful": 1,
|
||||
"failed": 1,
|
||||
"results": [
|
||||
{
|
||||
"library_id": "foundryvtt",
|
||||
"success": true,
|
||||
"message": "...",
|
||||
"files_discovered": 450,
|
||||
"chunks_created": 2340,
|
||||
"vectors_added": 2340
|
||||
},
|
||||
...
|
||||
]
|
||||
}
|
||||
"""
|
||||
try:
|
||||
if httpx is None:
|
||||
raise RuntimeError("httpx is not installed")
|
||||
async with httpx.AsyncClient(base_url=DOCS_API_URL, timeout=60.0) as client:
|
||||
payload = {"override": override} if override else {}
|
||||
|
||||
response = await client.post("/sources/sync", json=payload)
|
||||
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
return {
|
||||
"success": True,
|
||||
"total_sources": data.get("total_sources", 0),
|
||||
"successful": data.get("successful", 0),
|
||||
"failed": data.get("failed", 0),
|
||||
"results": data.get("results", [])
|
||||
}
|
||||
else:
|
||||
raise Exception(f"API error: {response.status_code} - {response.text}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error syncing git sources: {e}")
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Run MCP server using streamable HTTP transport
|
||||
host = os.getenv("MCP_HOST", "0.0.0.0")
|
||||
port = int(os.getenv("MCP_PORT", 8788))
|
||||
|
||||
print(f"Starting MCP server on http://{host}:{port}")
|
||||
print("Tools available:")
|
||||
print(" - resolve-library-id(libraryName)")
|
||||
print(" - get-library-docs(context7_compatible_library_id, topic=None, tokens=8000)")
|
||||
print(" - list-libraries()")
|
||||
print(" - search_docs(query, library_id=None, limit=10)")
|
||||
print(" - refresh_library(library_id=None)")
|
||||
print(" - sync_sources(override=false)")
|
||||
|
||||
if hasattr(mcp, "run"):
|
||||
mcp.run(transport="streamable-http", host=host, port=port)
|
||||
else:
|
||||
import uvicorn
|
||||
|
||||
uvicorn.run(mcp, host=host, port=port)
|
||||
@@ -0,0 +1 @@
|
||||
"""Compatibility package for importing the mcp-server source tree in tests."""
|
||||
@@ -0,0 +1,13 @@
|
||||
"""Import wrapper for ../mcp-server/server.py."""
|
||||
import importlib.util
|
||||
from pathlib import Path
|
||||
|
||||
_source = Path(__file__).resolve().parents[1] / "mcp-server" / "server.py"
|
||||
_spec = importlib.util.spec_from_file_location("_local_context7_mcp_server", _source)
|
||||
_module = importlib.util.module_from_spec(_spec)
|
||||
assert _spec and _spec.loader
|
||||
_spec.loader.exec_module(_module)
|
||||
|
||||
for _name, _value in vars(_module).items():
|
||||
if not _name.startswith("__"):
|
||||
globals()[_name] = _value
|
||||
+35
@@ -0,0 +1,35 @@
|
||||
# Pytest configuration for local-context7 tests
|
||||
|
||||
[pytest]
|
||||
# Test discovery pattern (where to look for tests)
|
||||
testpaths = tests
|
||||
|
||||
# Pattern of test files to discover
|
||||
python_files = test_*.py
|
||||
|
||||
# Pattern of test functions to run
|
||||
python_functions = test_*
|
||||
|
||||
# Markers for test categorization
|
||||
markers =
|
||||
slow: marks tests as slow (deselect with '-m "not slow"')
|
||||
integration: marks tests as integration tests requiring external services
|
||||
unit: marks tests as pure unit tests
|
||||
|
||||
# Add console output during test collection
|
||||
console_output_style = classic
|
||||
|
||||
# Test execution options
|
||||
asyncio_mode = auto
|
||||
testsessionstartfixturesscope = function
|
||||
|
||||
# Logging configuration
|
||||
log_cli = true
|
||||
log_cli_level = INFO
|
||||
log_cli_format = %(asctime)s [%(levelname)s] %(name)s: %(message)s
|
||||
log_cli_date_format = %Y-%m-%d %H:%M:%S
|
||||
|
||||
# Ignore specific warnings during tests
|
||||
filterwarnings =
|
||||
ignore::DeprecationWarning
|
||||
ignore::PendingDeprecationWarning
|
||||
@@ -0,0 +1,2 @@
|
||||
# Tests package for local-context7
|
||||
# Contains unit tests for chunking, database operations, search, and MCP server modules
|
||||
@@ -0,0 +1,191 @@
|
||||
"""
|
||||
Pytest configuration and fixtures for local-context7 tests.
|
||||
|
||||
This module provides:
|
||||
- Mocks for external dependencies (Qdrant, FastEmbed)
|
||||
- Database fixtures for SQLite operations
|
||||
- Common test utilities
|
||||
"""
|
||||
from unittest.mock import MagicMock, patch
|
||||
import pytest
|
||||
import os
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
from backend.app.db import init_db, upsert_library, insert_document_chunk, get_chunks_for_library, list_libraries, clear_library_documents, get_connection
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# FIXTURES
|
||||
# =============================================================================
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
def test_database():
|
||||
"""
|
||||
Create a fresh SQLite database for testing.
|
||||
|
||||
Yields:
|
||||
Database connection with tables initialized
|
||||
"""
|
||||
# Use an in-memory or temporary file database
|
||||
db_path = Path(__file__).parent.parent / "backend" / "data" / "test_db.sqlite"
|
||||
|
||||
# Ensure data directory exists
|
||||
db_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Remove existing test DB if present
|
||||
if db_path.exists():
|
||||
db_path.unlink()
|
||||
|
||||
# Initialize database with tables
|
||||
result = init_db()
|
||||
assert result["success"], f"Failed to initialize test DB: {result.get('error')}"
|
||||
|
||||
yield
|
||||
|
||||
# Cleanup: remove test database after tests
|
||||
if db_path.exists():
|
||||
db_path.unlink()
|
||||
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
def sample_text():
|
||||
"""Sample text for chunking tests."""
|
||||
return """# Introduction
|
||||
|
||||
This is the introduction section.
|
||||
|
||||
## Background
|
||||
|
||||
Background information goes here to make this longer and test chunking.
|
||||
|
||||
This paragraph has more content about the background topic.
|
||||
|
||||
### Details
|
||||
|
||||
Specific details about the background are provided in this subsection.
|
||||
|
||||
More details follow here to ensure we have enough text to properly test heading preservation.
|
||||
|
||||
## Conclusion
|
||||
|
||||
The conclusion wraps up everything nicely."""
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# MOCKS
|
||||
# =============================================================================
|
||||
|
||||
@pytest.fixture
|
||||
def mock_embedding_model():
|
||||
"""
|
||||
Mock FastEmbed model that returns dummy vectors.
|
||||
|
||||
This avoids needing to download and load the actual embedding model.
|
||||
Returns 384-dimensional zero vectors for any input.
|
||||
"""
|
||||
mock_model = MagicMock()
|
||||
|
||||
# Mock embed method - returns list of lists with float values
|
||||
def mock_embed(texts):
|
||||
return [
|
||||
[0.0] * 384 # Zero vector placeholder
|
||||
for _ in texts
|
||||
]
|
||||
|
||||
mock_model.embed = mock_embed
|
||||
|
||||
return mock_model
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_qdrant_client():
|
||||
"""
|
||||
Mock Qdrant client that returns empty or test results.
|
||||
|
||||
Allows testing search logic without needing a running Qdrant server.
|
||||
"""
|
||||
mock_client = MagicMock()
|
||||
|
||||
# Mock search method
|
||||
def mock_search(collection_name, query_vector, limit=10, search_filter=None):
|
||||
# Return empty list (simulating no results)
|
||||
return []
|
||||
|
||||
mock_client.search = mock_search
|
||||
|
||||
# Mock delete_collection for cleanup
|
||||
mock_client.delete_collection = MagicMock(return_value=True)
|
||||
|
||||
return mock_client
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_embedding_model_batch():
|
||||
"""
|
||||
Batch embedding model mock that returns deterministic fake vectors.
|
||||
|
||||
Returns slightly different vectors for different input lengths/first chars,
|
||||
allowing tests to verify vector retrieval if needed.
|
||||
"""
|
||||
def hash_text(text):
|
||||
# Simple hash-based pseudo-random vector generation
|
||||
text_hash = hash(text) % 1000000
|
||||
return [(hash_text(text) / 1000000 + (i * 0.001)) for i in range(384)]
|
||||
|
||||
mock_model = MagicMock()
|
||||
mock_model.embed = lambda texts: [hash_text(t) for t in texts]
|
||||
|
||||
return mock_model
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# SETUP TEARDOWN FIXTURES
|
||||
# =============================================================================
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def clear_test_database(test_database):
|
||||
"""
|
||||
Clear test database before and after each test function.
|
||||
|
||||
Note: This fixture runs the teardown (cleanup) AFTER the test,
|
||||
so we manually clear at the end of the yield context.
|
||||
The db_path is cleaned up by the test_database fixture's yield block.
|
||||
"""
|
||||
pass # Cleanup handled in test_database fixture
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def empty_vector():
|
||||
"""Empty/dummy embedding vector for tests."""
|
||||
return [0.0] * 384
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def fake_embeddings(sample_text):
|
||||
"""Fake embedding vectors for sample text."""
|
||||
def hash_text(text):
|
||||
return [(hash(text) + len(text)) % 1000 / 10000 for _ in range(384)]
|
||||
|
||||
return [hash_text(s) for s in sample_text.split("\n\n") if s.strip()]
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# UTILITY FUNCTIONS
|
||||
# =============================================================================
|
||||
|
||||
@pytest.fixture
|
||||
def temp_file(tmp_path):
|
||||
"""Create a temporary file and yield its path."""
|
||||
test_file = tmp_path / "test.txt"
|
||||
return test_file
|
||||
|
||||
|
||||
# Register custom marker for slow tests (if needed)
|
||||
def pytest_configure(config):
|
||||
config.addinivalue_line("markers", "slow: marks tests as slow (deselect with '-m \"not slow\"')")
|
||||
|
||||
|
||||
def pytest_runtest_setup(item):
|
||||
"""Add custom markers if needed."""
|
||||
pass
|
||||
@@ -0,0 +1,238 @@
|
||||
"""
|
||||
Tests for backend/app/chunking.py
|
||||
|
||||
These are pure unit tests that don't require any external dependencies.
|
||||
They test text chunking logic, token estimation, and heading-aware splitting.
|
||||
"""
|
||||
import pytest
|
||||
|
||||
|
||||
class TestEstimateTokens:
|
||||
"""Tests for the estimate_tokens() function."""
|
||||
|
||||
def test_empty_text(self):
|
||||
"""Empty text should return 0 tokens."""
|
||||
from backend.app.chunking import estimate_tokens
|
||||
assert estimate_tokens("") == 0
|
||||
|
||||
def test_single_char(self):
|
||||
"""Single character = 1 token (using 4 chars per token approximation)."""
|
||||
from backend.app.chunking import estimate_tokens
|
||||
assert estimate_tokens("a") == 0 # 1 char // 4 = 0 tokens
|
||||
|
||||
def test_4_chars(self):
|
||||
"""4 characters = 1 token."""
|
||||
from backend.app.chunking import estimate_tokens
|
||||
assert estimate_tokens("abcd") == 1
|
||||
|
||||
def test_400_chars(self):
|
||||
"""400 characters = 100 tokens."""
|
||||
from backend.app.chunking import estimate_tokens
|
||||
text = "a" * 400
|
||||
assert estimate_tokens(text) == 100
|
||||
|
||||
def test_whitespace_only(self):
|
||||
"""Whitespace-only text should be counted."""
|
||||
from backend.app.chunking import estimate_tokens
|
||||
assert estimate_tokens(" ") == 0 # 3 chars // 4 = 0
|
||||
|
||||
|
||||
class TestChunkText:
|
||||
"""Tests for the chunk_text() function."""
|
||||
|
||||
def test_empty_input(self, sample_text):
|
||||
"""Empty input should return empty list."""
|
||||
from backend.app.chunking import chunk_text
|
||||
assert chunk_text("") == []
|
||||
|
||||
def test_small_text_single_chunk(self, sample_text):
|
||||
"""Small text under limit should be single chunk."""
|
||||
from backend.app.chunking import chunk_text
|
||||
small = "This is a very short text that should be returned as a single chunk."
|
||||
chunks = chunk_text(small, max_tokens=500)
|
||||
assert len(chunks) == 1
|
||||
assert chunks[0] == small
|
||||
|
||||
def test_exact_token_limit(self, sample_text):
|
||||
"""Text exactly at limit should be one chunk."""
|
||||
from backend.app.chunking import chunk_text, estimate_tokens
|
||||
# Create text that is exactly 500 tokens (2000 chars)
|
||||
text = "a" * 2000
|
||||
chunks = chunk_text(text, max_tokens=500)
|
||||
assert len(chunks) == 1
|
||||
assert estimate_tokens(chunks[0]) == 500
|
||||
|
||||
def test_over_limit_splits(self, sample_text):
|
||||
"""Text over limit should be split into multiple chunks."""
|
||||
from backend.app.chunking import chunk_text, estimate_tokens
|
||||
# Create text that is 2500 tokens (10000 chars)
|
||||
text = "b" * 10000
|
||||
chunks = chunk_text(text, max_tokens=500)
|
||||
assert len(chunks) >= 2 # Should be split
|
||||
|
||||
def test_preserves_content(self, sample_text):
|
||||
"""All content should be preserved in chunks (combined)."""
|
||||
from backend.app.chunking import chunk_text
|
||||
original = "Hello world! This is a test of chunking functionality."
|
||||
chunks = chunk_text(original, max_tokens=100)
|
||||
combined = "".join(chunks)
|
||||
assert len(chunks) == 1
|
||||
assert combined == original
|
||||
|
||||
def test_headings_split(self, sample_text):
|
||||
"""Heading-aware splitting should preserve heading boundaries."""
|
||||
from backend.app.chunking import chunk_text
|
||||
markdown_with_headings = """# Introduction
|
||||
|
||||
This is the introduction section.
|
||||
|
||||
## Background
|
||||
|
||||
Background information goes here."""
|
||||
|
||||
# With very small token limit, headings should cause splits
|
||||
chunks = chunk_text(markdown_with_headings, max_tokens=20)
|
||||
heading_chunks = [c for c in chunks if c.strip().startswith('#')]
|
||||
assert len(heading_chunks) >= 1 # At least some heading preserved
|
||||
|
||||
def test_paragraph_split(self):
|
||||
"""Paragraph splitting should respect paragraph boundaries."""
|
||||
from backend.app.chunking import chunk_text
|
||||
text = "First paragraph.\n\nSecond paragraph.\n\nThird paragraph."
|
||||
chunks = chunk_text(text, max_tokens=15) # Small limit forces splits
|
||||
assert len(chunks) >= 3 # At least as many paragraphs
|
||||
|
||||
def test_no_empty_chunks(self):
|
||||
"""Should not return empty chunks."""
|
||||
from backend.app.chunking import chunk_text
|
||||
text = "Hello world"
|
||||
chunks = chunk_text(text, max_tokens=10)
|
||||
for chunk in chunks:
|
||||
assert chunk.strip() != ""
|
||||
|
||||
|
||||
class TestTokenEstimationBoundaries:
|
||||
"""Tests for token estimation boundaries."""
|
||||
|
||||
def test_boundary_precision(self):
|
||||
"""Test boundary conditions around the 4-char-per-token limit."""
|
||||
from backend.app.chunking import estimate_tokens
|
||||
|
||||
# Edge cases around boundary
|
||||
assert estimate_tokens("abcd") == 1 # exactly 4 chars
|
||||
assert estimate_tokens("abcde") == 1 # 5 chars still 1 token
|
||||
assert estimate_tokens("abcdef") == 1 # 6 chars still 1 token
|
||||
assert estimate_tokens("abcdefg") == 1 # 7 chars still 1 token
|
||||
assert estimate_tokens("abcdefgh") == 2 # 8 chars = 2 tokens
|
||||
|
||||
def test_various_languages_chars(self):
|
||||
"""Token estimation uses character count, not unicode complexity."""
|
||||
from backend.app.chunking import estimate_tokens
|
||||
|
||||
# Chinese characters (each counts as 1 char)
|
||||
chinese = "你好世界" # 4 characters
|
||||
assert estimate_tokens(chinese) == 1
|
||||
|
||||
# Emoji
|
||||
emoji = "Hello 🎉 world" # Spaces + letters + emoji
|
||||
# emoji count varies by implementation, just check it's counted
|
||||
assert isinstance(estimate_tokens(emoji), int)
|
||||
|
||||
|
||||
class TestChunkOverlapBehavior:
|
||||
"""Tests for overlap handling between chunks."""
|
||||
|
||||
def test_overlap_not_exceeded(self):
|
||||
"""Chunks should not have excessive overlap."""
|
||||
from backend.app.chunking import chunk_text
|
||||
|
||||
# Text that will be split at a known boundary
|
||||
text = "The quick brown fox jumps over the lazy dog. " * 10
|
||||
chunks = chunk_text(text, max_tokens=30, overlap_tokens=5)
|
||||
|
||||
if len(chunks) > 1:
|
||||
# Last few chars of first chunk shouldn't duplicate excessively
|
||||
assert len(chunks[0]) <= len("".join(chunks)) // 2 # Rough check
|
||||
|
||||
|
||||
class TestChunkEdgeCases:
|
||||
"""Tests for edge cases and error conditions."""
|
||||
|
||||
def test_whitespace_only_text(self):
|
||||
"""Whitespace-only text should handle gracefully."""
|
||||
from backend.app.chunking import chunk_text
|
||||
chunks = chunk_text(" \n\n ", max_tokens=100)
|
||||
# May return empty or whitespace chunk, shouldn't crash
|
||||
assert isinstance(chunks, list)
|
||||
|
||||
def test_very_long_paragraph(self):
|
||||
"""Long paragraph without breaks should be split."""
|
||||
from backend.app.chunking import chunk_text
|
||||
|
||||
long_para = "The quick brown fox jumps over the lazy dog. " * 100
|
||||
chunks = chunk_text(long_para, max_tokens=50)
|
||||
assert len(chunks) > 1 # Should be split
|
||||
|
||||
def test_none_input_raises(self):
|
||||
"""None input should be handled (return empty or raise)."""
|
||||
from backend.app.chunking import chunk_text
|
||||
with pytest.raises((TypeError, AssertionError)):
|
||||
chunk_text(None, max_tokens=100)
|
||||
|
||||
def test_unicode_text(self):
|
||||
"""Unicode text should be handled."""
|
||||
from backend.app.chunking import chunk_text
|
||||
unicode_text = "Hello 世界 مرحبا 🎉"
|
||||
chunks = chunk_text(unicode_text, max_tokens=50)
|
||||
assert len(chunks) == 1 # Small enough to be single chunk
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# SAMPLE TEXT FIXTURE
|
||||
# =============================================================================
|
||||
|
||||
@pytest.fixture
|
||||
def heading_markdown():
|
||||
"""Sample markdown with headings for chunking tests."""
|
||||
return """# Introduction
|
||||
|
||||
This is the introduction section. It contains some introductory text here.
|
||||
|
||||
## Background
|
||||
|
||||
Background information goes here to make this longer and test chunking. This paragraph has more content about the background topic. It provides context.
|
||||
|
||||
### Details
|
||||
|
||||
Specific details about the background are provided in this subsection. More details follow here to ensure we have enough text to properly test heading preservation.
|
||||
|
||||
## Conclusion
|
||||
|
||||
The conclusion wraps up everything nicely."""
|
||||
|
||||
|
||||
class TestHeadingPreservation:
|
||||
"""Tests for heading-aware chunking with sample text."""
|
||||
|
||||
def test_headings_in_separate_chunks(self, heading_markdown):
|
||||
"""Headings should appear in their own chunks when possible."""
|
||||
from backend.app.chunking import chunk_text
|
||||
|
||||
# Very small token limit forces splits at headings
|
||||
chunks = chunk_text(heading_markdown, max_tokens=30)
|
||||
|
||||
heading_sections = [c for c in chunks if c.strip().startswith('#')]
|
||||
assert len(heading_sections) >= 1
|
||||
|
||||
def test_all_content_present(self, heading_markdown):
|
||||
"""All content should be preserved when combined."""
|
||||
from backend.app.chunking import chunk_text
|
||||
|
||||
original = heading_markdown
|
||||
chunks = chunk_text(original, max_tokens=500)
|
||||
combined = "".join(chunks)
|
||||
|
||||
# Content shouldn't be truncated or corrupted
|
||||
assert "Introduction" in combined
|
||||
assert "Background" in combined
|
||||
assert "Conclusion" in combined
|
||||
@@ -0,0 +1,316 @@
|
||||
"""
|
||||
Tests for backend/app/db.py
|
||||
|
||||
These tests verify SQLite database operations including:
|
||||
- Table creation (init_db)
|
||||
- Library CRUD operations
|
||||
- Document chunk storage and retrieval
|
||||
- Full-text search functionality
|
||||
|
||||
All tests use a temporary test database file.
|
||||
"""
|
||||
import pytest
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
class TestInitDatabase:
|
||||
"""Tests for init_db() - table creation."""
|
||||
|
||||
def test_init_db_creates_tables(self, test_database):
|
||||
"""Database should have libraries and documents tables after init."""
|
||||
import sqlite3
|
||||
from backend.app.db import get_connection, get_db_path
|
||||
|
||||
conn = get_connection()
|
||||
cursor = conn.execute("SELECT name FROM sqlite_master WHERE type='table' ORDER BY name")
|
||||
tables = [row[0] for row in cursor.fetchall()]
|
||||
|
||||
# Should have libraries, documents, and FTS virtual table
|
||||
assert "libraries" in tables or any("libraries" in t.lower() for t in tables)
|
||||
conn.close()
|
||||
|
||||
def test_init_db_returns_success(self, test_database):
|
||||
"""init_db should return success indicator."""
|
||||
from backend.app.db import init_db
|
||||
|
||||
result = init_db()
|
||||
assert result["success"] is True
|
||||
|
||||
|
||||
class TestLibraryOperations:
|
||||
"""Tests for library CRUD operations."""
|
||||
|
||||
def test_upsert_library_new(self, test_database):
|
||||
"""Upsert should create new library."""
|
||||
from backend.app.db import upsert_library
|
||||
|
||||
result = upsert_library(
|
||||
library_id="/local/testlib",
|
||||
name="Test Library",
|
||||
description="A test library for unit tests"
|
||||
)
|
||||
|
||||
assert result["success"] is True
|
||||
assert result["id"] == "/local/testlib"
|
||||
|
||||
def test_upsert_library_update(self, test_database):
|
||||
"""Upsert should update existing library."""
|
||||
from backend.app.db import upsert_library
|
||||
|
||||
# Insert first library
|
||||
upsert_library(
|
||||
library_id="/local/upsertlib",
|
||||
name="Original Name",
|
||||
description="Original description"
|
||||
)
|
||||
|
||||
# Update it
|
||||
result = upsert_library(
|
||||
library_id="/local/upsertlib",
|
||||
name="Updated Name",
|
||||
description="Updated description"
|
||||
)
|
||||
|
||||
assert result["success"] is True
|
||||
|
||||
def test_upsert_library_id_normalization(self, test_database):
|
||||
"""Library ID normalization - /local/ prefix should be preserved."""
|
||||
from backend.app.db import upsert_library
|
||||
|
||||
# Test various ID formats
|
||||
test_ids = [
|
||||
"/local/foundryvtt",
|
||||
"foundryvtt",
|
||||
"/local/mydocs",
|
||||
]
|
||||
|
||||
for lib_id in test_ids:
|
||||
result = upsert_library(library_id=lib_id, name="Test", description="Desc")
|
||||
assert result["success"] is True
|
||||
# Verify we can retrieve it back
|
||||
from backend.app.db import get_chunks_for_library
|
||||
# Just ensure no errors occur
|
||||
|
||||
def test_list_libraries(self, test_database):
|
||||
"""list_libraries should return list of libraries."""
|
||||
from backend.app.db import upsert_library, list_libraries
|
||||
|
||||
# Create some libraries
|
||||
for i in range(3):
|
||||
upsert_library(
|
||||
library_id=f"/local/lib{i}",
|
||||
name=f"Library {i}",
|
||||
description=f"Description {i}"
|
||||
)
|
||||
|
||||
libs = list_libraries()
|
||||
assert isinstance(libs, list)
|
||||
assert len(libs) >= 3
|
||||
|
||||
def test_search_libraries(self, test_database):
|
||||
"""search_libraries should find libraries by name/description."""
|
||||
from backend.app.db import upsert_library, search_libraries
|
||||
|
||||
# Create libraries with searchable names
|
||||
upsert_library(library_id="/local/foo1", name="Foo Library", description="Bar baz")
|
||||
upsert_library(library_id="/local/foo2", name="Other Library", description="Different content")
|
||||
|
||||
results = search_libraries("foo")
|
||||
assert isinstance(results, list)
|
||||
|
||||
|
||||
class TestDocumentChunkOperations:
|
||||
"""Tests for document chunk storage and retrieval."""
|
||||
|
||||
def test_insert_document_chunk_new(self, test_database):
|
||||
"""insert_document_chunk should create new chunk record."""
|
||||
from backend.app.db import insert_document_chunk
|
||||
|
||||
result = insert_document_chunk(
|
||||
doc_id="doc-1",
|
||||
library_id="/local/testlib",
|
||||
path="docs/example.md",
|
||||
title="Example Document",
|
||||
content="# Example\n\nThis is the content.",
|
||||
chunk_index=0,
|
||||
token_estimate=100
|
||||
)
|
||||
|
||||
assert result["success"] is True
|
||||
|
||||
def test_insert_document_chunk_update(self, test_database):
|
||||
"""insert_document_chunk should update existing record."""
|
||||
from backend.app.db import insert_document_chunk
|
||||
|
||||
# Insert first
|
||||
insert_document_chunk(
|
||||
doc_id="doc-update-test",
|
||||
library_id="/local/uplib",
|
||||
path="old-path.md",
|
||||
title="Old Title",
|
||||
content="# Old\nContent here.",
|
||||
chunk_index=0,
|
||||
token_estimate=50
|
||||
)
|
||||
|
||||
# Update it
|
||||
result = insert_document_chunk(
|
||||
doc_id="doc-update-test",
|
||||
library_id="/local/uplib",
|
||||
path="new-path.md",
|
||||
title="New Title",
|
||||
content="# New\nUpdated content.",
|
||||
chunk_index=1,
|
||||
token_estimate=75
|
||||
)
|
||||
|
||||
assert result["success"] is True
|
||||
|
||||
def test_get_document_by_id(self, test_database):
|
||||
"""get_document_by_id should retrieve document by ID."""
|
||||
from backend.app.db import insert_document_chunk, get_document_by_id
|
||||
|
||||
# Insert document
|
||||
doc_id = "unique-doc-id-12345"
|
||||
insert_document_chunk(
|
||||
doc_id=doc_id,
|
||||
library_id="/local/testlib",
|
||||
path="docs/test.md",
|
||||
title="Test Document",
|
||||
content="# Test\n\nTest content here.",
|
||||
chunk_index=None,
|
||||
token_estimate=200
|
||||
)
|
||||
|
||||
# Retrieve it
|
||||
doc = get_document_by_id(doc_id)
|
||||
assert doc is not None
|
||||
assert doc["id"] == doc_id
|
||||
|
||||
def test_get_chunks_for_library(self, test_database):
|
||||
"""get_chunks_for_library should return all chunks for a library."""
|
||||
from backend.app.db import upsert_library, insert_document_chunk, get_chunks_for_library
|
||||
|
||||
# Create library
|
||||
upsert_library(library_id="/local/chunktest", name="Chunk Test", description="Test")
|
||||
|
||||
# Add some chunks
|
||||
for i in range(3):
|
||||
insert_document_chunk(
|
||||
doc_id=f"chunk-{i}",
|
||||
library_id="/local/chunktest",
|
||||
path=f"path{i}.md",
|
||||
title=f"Section {i}",
|
||||
content=f"Content section {i}.",
|
||||
chunk_index=i,
|
||||
token_estimate=50
|
||||
)
|
||||
|
||||
chunks = get_chunks_for_library("/local/chunktest")
|
||||
assert isinstance(chunks, list)
|
||||
assert len(chunks) >= 3
|
||||
|
||||
def test_clear_library_documents(self, test_database):
|
||||
"""clear_library_documents should delete all docs for a library."""
|
||||
from backend.app.db import upsert_library, insert_document_chunk, clear_library_documents, get_chunks_for_library
|
||||
|
||||
# Create and populate library
|
||||
upsert_library(library_id="/local/cleartest", name="Clear Test", description="Test")
|
||||
for i in range(5):
|
||||
insert_document_chunk(
|
||||
doc_id=f"clear-{i}",
|
||||
library_id="/local/cleartest",
|
||||
path=f"path{i}.md",
|
||||
content=f"Content {i}.",
|
||||
token_estimate=20
|
||||
)
|
||||
|
||||
# Clear it
|
||||
result = clear_library_documents("/local/cleartest")
|
||||
assert result["success"] is True
|
||||
|
||||
# Verify cleared
|
||||
remaining = get_chunks_for_library("/local/cleartest")
|
||||
assert len(remaining) == 0
|
||||
|
||||
|
||||
class TestDatabaseEdgeCases:
|
||||
"""Tests for edge cases and error handling."""
|
||||
|
||||
def test_empty_library_id(self, test_database):
|
||||
"""Operations with empty ID should handle gracefully."""
|
||||
from backend.app.db import upsert_library
|
||||
|
||||
result = upsert_library(library_id="", name="Test", description="Desc")
|
||||
# Should not crash, though may not be a valid operation
|
||||
|
||||
def test_special_characters_in_content(self, test_database):
|
||||
"""Content with special characters should be stored."""
|
||||
from backend.app.db import insert_document_chunk
|
||||
|
||||
content = "Hello \"world\" <tag /> & amp; 'apostrophe'"
|
||||
result = insert_document_chunk(
|
||||
doc_id="special-test",
|
||||
library_id="/local/speciallib",
|
||||
path="special.md",
|
||||
content=content,
|
||||
token_estimate=100
|
||||
)
|
||||
|
||||
assert result["success"] is True
|
||||
|
||||
def test_very_long_content(self, test_database):
|
||||
"""Long content should be stored."""
|
||||
from backend.app.db import insert_document_chunk
|
||||
|
||||
long_content = "a" * 5000
|
||||
result = insert_document_chunk(
|
||||
doc_id="long-test",
|
||||
library_id="/local/longlib",
|
||||
path="long.md",
|
||||
content=long_content,
|
||||
token_estimate=1000
|
||||
)
|
||||
|
||||
assert result["success"] is True
|
||||
|
||||
def test_none_description(self, test_database):
|
||||
"""Library with None description should work."""
|
||||
from backend.app.db import upsert_library
|
||||
|
||||
result = upsert_library(
|
||||
library_id="/local/nonedesc",
|
||||
name="No Description Lib",
|
||||
description=None
|
||||
)
|
||||
|
||||
assert result["success"] is True
|
||||
|
||||
|
||||
class TestDatabaseInitialization:
|
||||
"""Tests for database initialization state."""
|
||||
|
||||
def test_database_is_empty_after_init(self, test_database):
|
||||
"""Database should be empty right after init."""
|
||||
from backend.app.db import list_libraries
|
||||
|
||||
libs = list_libraries()
|
||||
assert isinstance(libs, list)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# FIXTURES
|
||||
# =============================================================================
|
||||
|
||||
@pytest.fixture
|
||||
def sample_doc():
|
||||
"""Sample document chunk for testing."""
|
||||
return {
|
||||
"doc_id": "sample-doc-1",
|
||||
"library_id": "/local/samplelib",
|
||||
"path": "docs/guide.md",
|
||||
"title": "Getting Started Guide",
|
||||
"content": "# Getting Started\n\nWelcome to the guide. This is a sample document for testing.\n\n## Installation\n\nInstall with pip.",
|
||||
"chunk_index": 0,
|
||||
"token_estimate": 500
|
||||
}
|
||||
@@ -0,0 +1,262 @@
|
||||
"""
|
||||
Tests for mcp-server/server.py
|
||||
|
||||
These are pure unit tests that don't require any external dependencies.
|
||||
They test:
|
||||
- The strip_local_prefix() function directly (no network)
|
||||
- MCP server tool definitions and structure
|
||||
"""
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
class TestStripLocalPrefix:
|
||||
"""Tests for the strip_local_prefix() function."""
|
||||
|
||||
def test_strips_prefix_from_full_id(self):
|
||||
"""Should strip /local/ prefix from full library ID."""
|
||||
from mcp_server.server import strip_local_prefix
|
||||
|
||||
input_id = "/local/foundryvtt"
|
||||
expected_output = "foundryvtt"
|
||||
|
||||
result = strip_local_prefix(input_id)
|
||||
assert result == expected_output
|
||||
|
||||
def test_preserves_id_without_prefix(self):
|
||||
"""Should preserve ID that doesn't have /local/ prefix."""
|
||||
from mcp_server.server import strip_local_prefix
|
||||
|
||||
input_id = "foundryvtt"
|
||||
|
||||
result = strip_local_prefix(input_id)
|
||||
assert result == input_id # Should be unchanged
|
||||
|
||||
def test_strips_from_multiple_local_prefixes(self):
|
||||
"""Should handle edge case of multiple prefixes."""
|
||||
from mcp_server.server import strip_local_prefix
|
||||
|
||||
input_id = "/local//local/foundryvtt"
|
||||
|
||||
result = strip_local_prefix(input_id)
|
||||
# Should only strip first occurrence
|
||||
assert result == "/local/foundryvtt"
|
||||
|
||||
def test_empty_string(self):
|
||||
"""Empty string should remain empty."""
|
||||
from mcp_server.server import strip_local_prefix
|
||||
|
||||
input_id = ""
|
||||
|
||||
result = strip_local_prefix(input_id)
|
||||
assert result == input_id # Should be unchanged
|
||||
|
||||
def test_whitespace_only(self):
|
||||
"""Whitespace only should remain whitespace (no /local/ to strip)."""
|
||||
from mcp_server.server import strip_local_prefix
|
||||
|
||||
input_id = " \t\n"
|
||||
|
||||
result = strip_local_prefix(input_id)
|
||||
assert result == input_id
|
||||
|
||||
def test_case_sensitive_prefix(self):
|
||||
"""Prefix matching is case-sensitive."""
|
||||
from mcp_server.server import strip_local_prefix
|
||||
|
||||
# Lowercase - should strip
|
||||
result1 = strip_local_prefix("/local/test")
|
||||
assert result1 == "test"
|
||||
|
||||
# Uppercase - should NOT strip (not a match)
|
||||
result2 = strip_local_prefix("/LOCAL/test")
|
||||
assert result2 == "/LOCAL/test" # Unchanged
|
||||
|
||||
def test_partial_match_does_not_strip(self):
|
||||
"""Only exact /local/ prefix is stripped, not partial matches."""
|
||||
from mcp_server.server import strip_local_prefix
|
||||
|
||||
# Partial match - should NOT strip
|
||||
input_id = "/local-docs/test"
|
||||
result = strip_local_prefix(input_id)
|
||||
assert result == input_id # Unchanged
|
||||
|
||||
# Different separator - should NOT strip
|
||||
input_id2 = "/localdocs/test"
|
||||
result2 = strip_local_prefix(input_id2)
|
||||
assert result2 == input_id2
|
||||
|
||||
def test_prefix_with_trailing_slash(self):
|
||||
"""Should handle trailing slash in ID."""
|
||||
from mcp_server.server import strip_local_prefix
|
||||
|
||||
input_id = "/local/foundryvtt/"
|
||||
expected_output = "foundryvtt/"
|
||||
|
||||
result = strip_local_prefix(input_id)
|
||||
assert result == expected_output
|
||||
|
||||
|
||||
class TestMcpServerStructure:
|
||||
"""Tests for MCP server tool structure (without starting the server)."""
|
||||
|
||||
def test_import_fastmcp(self):
|
||||
"""Should be able to import FastMCP."""
|
||||
try:
|
||||
from fastmcp import FastMCP
|
||||
# Import successful
|
||||
except ImportError as e:
|
||||
pytest.skip(f"fastmcp not installed: {e}")
|
||||
|
||||
|
||||
class TestMcpServerToolsExistence:
|
||||
"""Tests to verify MCP server has expected tools defined."""
|
||||
|
||||
def test_mcp_instance_created(self):
|
||||
"""MCP instance should be created with tools."""
|
||||
from mcp_server.server import mcp
|
||||
|
||||
assert mcp is not None
|
||||
|
||||
def test_resolve_library_id_tool_exists(self):
|
||||
"""resolve-library-id tool should be registered."""
|
||||
from mcp_server.server import mcp
|
||||
|
||||
# Check if the tool exists by trying to access it
|
||||
if hasattr(mcp, 'tools'):
|
||||
tool_names = [t.name for t in mcp.tools]
|
||||
assert "resolve_library_id" in tool_names
|
||||
|
||||
def test_get_library_docs_tool_exists(self):
|
||||
"""get-library-docs tool should be registered."""
|
||||
from mcp_server.server import mcp
|
||||
|
||||
if hasattr(mcp, 'tools'):
|
||||
tool_names = [t.name for t in mcp.tools]
|
||||
assert "get_library_docs" in tool_names
|
||||
|
||||
def test_list_libraries_tool_exists(self):
|
||||
"""list-libraries tool should be registered."""
|
||||
from mcp_server.server import mcp
|
||||
|
||||
if hasattr(mcp, 'tools'):
|
||||
tool_names = [t.name for t in mcp.tools]
|
||||
assert "list_libraries" in tool_names
|
||||
|
||||
def test_search_docs_tool_exists(self):
|
||||
"""search-docs tool should be registered."""
|
||||
from mcp_server.server import mcp
|
||||
|
||||
if hasattr(mcp, 'tools'):
|
||||
tool_names = [t.name for t in mcp.tools]
|
||||
assert "search_docs" in tool_names
|
||||
|
||||
def test_refresh_library_tool_exists(self):
|
||||
"""refresh-library tool should be registered."""
|
||||
from mcp_server.server import mcp
|
||||
|
||||
if hasattr(mcp, 'tools'):
|
||||
tool_names = [t.name for t in mcp.tools]
|
||||
assert "refresh_library" in tool_names
|
||||
|
||||
def test_sync_sources_tool_exists(self):
|
||||
"""sync-sources tool should be registered."""
|
||||
from mcp_server.server import mcp
|
||||
|
||||
if hasattr(mcp, 'tools'):
|
||||
tool_names = [t.name for t in mcp.tools]
|
||||
assert "sync_sources" in tool_names
|
||||
|
||||
|
||||
class TestMcpServerStripPrefixIntegration:
|
||||
"""Integration tests for strip_prefix usage in MCP server functions."""
|
||||
|
||||
def test_resolve_library_id_calls_strip_prefix(self):
|
||||
"""resolve_library_id should handle /local/ prefix in responses."""
|
||||
# This test verifies that the tool is available and uses the prefix correctly
|
||||
from mcp_server.server import strip_local_prefix
|
||||
|
||||
# Verify the function exists and works
|
||||
assert callable(strip_local_prefix)
|
||||
|
||||
# Test with sample IDs
|
||||
test_ids = [
|
||||
"/local/foundryvtt",
|
||||
"/local/pytest",
|
||||
"/local/mydocs/reference",
|
||||
]
|
||||
|
||||
for lib_id in test_ids:
|
||||
stripped = strip_local_prefix(lib_id)
|
||||
assert not stripped.startswith("/local/")
|
||||
|
||||
|
||||
class TestMcpServerPrefixHandlingVariations:
|
||||
"""Additional tests for prefix handling variations."""
|
||||
|
||||
def test_long_library_id(self):
|
||||
"""Should handle long library IDs with /local/ prefix."""
|
||||
from mcp_server.server import strip_local_prefix
|
||||
|
||||
input_id = "/local/very-long-library-id-with-many-chars-in-name"
|
||||
expected_output = "very-long-library-id-with-many-chars-in-name"
|
||||
|
||||
result = strip_local_prefix(input_id)
|
||||
assert result == expected_output
|
||||
|
||||
def test_special_characters_in_id(self):
|
||||
"""Should handle special characters in library ID."""
|
||||
from mcp_server.server import strip_local_prefix
|
||||
|
||||
# IDs can have underscores, dashes, numbers
|
||||
input_id = "/local/my-doc_v2-3_test"
|
||||
|
||||
result = strip_local_prefix(input_id)
|
||||
assert result == "my-doc_v2-3_test"
|
||||
|
||||
def test_unicode_in_stripped_name(self):
|
||||
"""Stripped name should preserve unicode characters."""
|
||||
from mcp_server.server import strip_local_prefix
|
||||
|
||||
# Library IDs sometimes have unicode in them
|
||||
input_id = "/local/世界文档" # Chinese characters
|
||||
|
||||
result = strip_local_prefix(input_id)
|
||||
assert result == "世界文档"
|
||||
|
||||
def test_mixed_case_stripped_name(self):
|
||||
"""Stripped name can have mixed case."""
|
||||
from mcp_server.server import strip_local_prefix
|
||||
|
||||
input_id = "/local/FoundryVTT"
|
||||
|
||||
result = strip_local_prefix(input_id)
|
||||
assert result == "FoundryVTT"
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# FIXTURES
|
||||
# =============================================================================
|
||||
|
||||
@pytest.fixture
|
||||
def sample_library_ids():
|
||||
"""Sample library IDs for testing prefix stripping."""
|
||||
return [
|
||||
"/local/foundryvtt",
|
||||
"/local/pytest",
|
||||
"/local/mydocs/reference/guide.md",
|
||||
"/local/my-app",
|
||||
"/local/documentation/tutorial/getting-started",
|
||||
]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def expected_stripped_ids(sample_library_ids):
|
||||
"""Expected stripped versions of sample library IDs."""
|
||||
return [
|
||||
"foundryvtt",
|
||||
"pytest",
|
||||
"mydocs/reference/guide.md",
|
||||
"my-app",
|
||||
"documentation/tutorial/getting-started",
|
||||
]
|
||||
@@ -0,0 +1,368 @@
|
||||
"""
|
||||
Tests for backend/app/search.py
|
||||
|
||||
These tests verify search functionality without requiring:
|
||||
- A running Qdrant vector database (mocked)
|
||||
- Loaded embedding models (mocked)
|
||||
|
||||
The tests focus on:
|
||||
- Response shape validation
|
||||
- Library filtering
|
||||
- Error handling
|
||||
- Async function behavior
|
||||
"""
|
||||
import pytest
|
||||
|
||||
|
||||
class TestResolveLibraryId:
|
||||
"""Tests for resolve_library_id() - Context7-style resolution."""
|
||||
|
||||
def test_returns_candidates_list(self, test_database):
|
||||
"""resolve_library_id should return a list of candidates."""
|
||||
from backend.app.search import resolve_library_id
|
||||
|
||||
# Create some libraries first
|
||||
from backend.app.db import upsert_library
|
||||
for i in range(3):
|
||||
upsert_library(
|
||||
library_id=f"/local/searchtest{i}",
|
||||
name=f"Search Test Library {i}",
|
||||
description=f"Description for search test {i}"
|
||||
)
|
||||
|
||||
candidates = resolve_library_id("search")
|
||||
|
||||
assert isinstance(candidates, list)
|
||||
|
||||
def test_captures_matching_names(self, test_database):
|
||||
"""Should capture libraries where query matches name."""
|
||||
from backend.app.db import upsert_library
|
||||
from backend.app.search import resolve_library_id
|
||||
|
||||
# Create a library that should match "search"
|
||||
upsert_library(
|
||||
library_id="/local/searchlib",
|
||||
name="Search Library",
|
||||
description="Main search documentation"
|
||||
)
|
||||
|
||||
candidates = resolve_library_id("search")
|
||||
|
||||
assert isinstance(candidates, list)
|
||||
|
||||
def test_context7_style_prefix(self, test_database):
|
||||
"""Candidates should have /local/ prefix added to ID."""
|
||||
from backend.app.db import upsert_library
|
||||
from backend.app.search import resolve_library_id
|
||||
|
||||
upsert_library(
|
||||
library_id="foundryvtt", # Without /local/
|
||||
name="Foundry VTT",
|
||||
description="Fantasy tabletop virtual table"
|
||||
)
|
||||
|
||||
candidates = resolve_library_id("foundry")
|
||||
|
||||
for candidate in candidates:
|
||||
assert candidate.get("source") == "local"
|
||||
|
||||
def test_partial_name_match(self, test_database):
|
||||
"""Should match on partial name."""
|
||||
from backend.app.db import upsert_library
|
||||
from backend.app.search import resolve_library_id
|
||||
|
||||
upsert_library(
|
||||
library_id="/local/gamefoundry",
|
||||
name="Foundry Game Module",
|
||||
description="Module for foundry games"
|
||||
)
|
||||
|
||||
candidates = resolve_library_id("game")
|
||||
assert isinstance(candidates, list)
|
||||
|
||||
def test_empty_result_on_no_matches(self, test_database):
|
||||
"""Should return empty list when no matches."""
|
||||
from backend.app.search import resolve_library_id
|
||||
|
||||
# No libraries matching "xyznonexistent123"
|
||||
candidates = resolve_library_id("xyznonexistent123")
|
||||
|
||||
assert isinstance(candidates, list)
|
||||
|
||||
|
||||
class TestSearchDocs:
|
||||
"""Tests for search_docs() - semantic search with mocked vector store."""
|
||||
|
||||
def test_returns_results_list(self, mock_qdrant_client, test_database):
|
||||
"""search_docs should return a list of results."""
|
||||
from backend.app.search import search_docs
|
||||
|
||||
# Create some chunks first
|
||||
from backend.app.db import upsert_library, insert_document_chunk
|
||||
upsert_library(library_id="/local/searchdocslib", name="Search Docs Lib", description="Test")
|
||||
|
||||
for i in range(5):
|
||||
insert_document_chunk(
|
||||
doc_id=f"searchdoc-{i}",
|
||||
library_id="/local/searchdocslib",
|
||||
path=f"path{i}.md",
|
||||
title=f"Section {i}",
|
||||
content=f"# Section {i}\n\nContent about section {i} that matches search queries.",
|
||||
chunk_index=i,
|
||||
token_estimate=100
|
||||
)
|
||||
|
||||
results = search_docs("section")
|
||||
|
||||
assert isinstance(results, list)
|
||||
|
||||
def test_empty_query_returns_empty_list(self):
|
||||
"""Empty query should return empty results."""
|
||||
from backend.app.search import search_docs
|
||||
|
||||
results = search_docs("")
|
||||
assert isinstance(results, list)
|
||||
|
||||
def test_limit_parameter(self, mock_qdrant_client):
|
||||
"""Limit parameter should affect result count."""
|
||||
from backend.app.search import search_docs
|
||||
|
||||
results_10 = search_docs("test", limit=10)
|
||||
results_5 = search_docs("test", limit=5)
|
||||
|
||||
assert isinstance(results_10, list)
|
||||
assert isinstance(results_5, list)
|
||||
|
||||
def test_response_shape_matches_spec(self):
|
||||
"""Verify response shape when mocked returns data."""
|
||||
from unittest.mock import patch
|
||||
from backend.app.search import search_docs
|
||||
|
||||
# Mock client to return formatted results
|
||||
mock_results = [
|
||||
{
|
||||
"id": "test-id-1",
|
||||
"score": 0.95,
|
||||
"library_id": "/local/testlib",
|
||||
"path": "docs/example.md",
|
||||
"title": "Example Document",
|
||||
"chunk_index": 0
|
||||
}
|
||||
]
|
||||
|
||||
with patch('backend.app.vector_store.get_client') as mock_get_client:
|
||||
# Setup mock client to return our test data
|
||||
mock_client = mock_get_client.return_value
|
||||
mock_point = type('ScoredPoint', (), {
|
||||
'score': 0.95,
|
||||
'payload': {
|
||||
"id": "test-id-1",
|
||||
"library_id": "/local/testlib",
|
||||
"path": "docs/example.md",
|
||||
"title": "Example Document",
|
||||
"chunk_index": 0
|
||||
}
|
||||
})()
|
||||
mock_client.search.return_value = [mock_point]
|
||||
|
||||
results = search_docs("test query")
|
||||
|
||||
assert isinstance(results, list)
|
||||
if results:
|
||||
# Verify each result has expected fields
|
||||
result = results[0]
|
||||
assert "id" in result
|
||||
assert "score" in result
|
||||
assert "library_id" in result
|
||||
assert "path" in result
|
||||
assert "title" in result
|
||||
assert "chunk_index" in result
|
||||
|
||||
|
||||
class TestGetLibraryDocs:
|
||||
"""Tests for get_library_docs() - document retrieval."""
|
||||
|
||||
def test_returns_empty_string_when_no_documents(self, mock_qdrant_client):
|
||||
"""Should return empty/error when no docs exist."""
|
||||
from backend.app.search import get_library_docs
|
||||
|
||||
result = get_library_docs("/local/nonexistent")
|
||||
|
||||
# Either returns empty string or error message
|
||||
assert isinstance(result, str)
|
||||
|
||||
def test_returns_content_when_documents_exist(self, mock_qdrant_client):
|
||||
"""Should return combined document content."""
|
||||
from backend.app.db import upsert_library, insert_document_chunk
|
||||
from backend.app.search import get_library_docs
|
||||
|
||||
# Create library with chunks
|
||||
upsert_library(library_id="/local/docretrievetest", name="Doc Retrieve", description="Test")
|
||||
insert_document_chunk(
|
||||
doc_id="doc-retrieve-1",
|
||||
library_id="/local/docretrievetest",
|
||||
path="docs/getting-started.md",
|
||||
title="Getting Started",
|
||||
content="# Getting Started\n\nWelcome to the documentation. This is a test document.",
|
||||
chunk_index=0,
|
||||
token_estimate=200
|
||||
)
|
||||
|
||||
result = get_library_docs("/local/docretrievetest")
|
||||
|
||||
assert isinstance(result, str)
|
||||
# Should contain at least library title or content
|
||||
|
||||
def test_topic_filter_searches(self, mock_qdrant_client):
|
||||
"""With topic filter, should search for relevant chunks."""
|
||||
from backend.app.db import upsert_library, insert_document_chunk
|
||||
from backend.app.search import get_library_docs
|
||||
|
||||
upsert_library(library_id="/local/topicsearchlib", name="Topic Search", description="Test")
|
||||
|
||||
# Add documents with different topics
|
||||
insert_document_chunk(
|
||||
doc_id="topic-install",
|
||||
library_id="/local/topicsearchlib",
|
||||
path="docs/install.md",
|
||||
title="Installation Guide",
|
||||
content="# Installation\n\nInstall with pip install mypackage.",
|
||||
chunk_index=0,
|
||||
token_estimate=150
|
||||
)
|
||||
|
||||
insert_document_chunk(
|
||||
doc_id="topic-usage",
|
||||
library_id="/local/topicsearchlib",
|
||||
path="docs/usage.md",
|
||||
title="Usage Guide",
|
||||
content="# Usage\n\nUse mycommand --help for help.",
|
||||
chunk_index=0,
|
||||
token_estimate=150
|
||||
)
|
||||
|
||||
# Search for "install" topic
|
||||
result = get_library_docs("/local/topicsearchlib", topic="install")
|
||||
|
||||
assert isinstance(result, str)
|
||||
|
||||
def test_token_limit_respected(self):
|
||||
"""Token limit should truncate content appropriately."""
|
||||
from backend.app.search import get_library_docs
|
||||
|
||||
# Create a library with lots of content
|
||||
from backend.app.db import upsert_library, insert_document_chunk
|
||||
|
||||
upsert_library(library_id="/local/tokenlimittest", name="Token Limit", description="Test")
|
||||
|
||||
long_content = "# Long Content\n\n" + " ".join(["word"] * 500)
|
||||
insert_document_chunk(
|
||||
doc_id="long-doc",
|
||||
library_id="/local/tokenlimittest",
|
||||
path="docs/long.md",
|
||||
title="Long Document",
|
||||
content=long_content,
|
||||
chunk_index=0,
|
||||
token_estimate=2000
|
||||
)
|
||||
|
||||
# Request with small token limit
|
||||
result = get_library_docs("/local/tokenlimittest", token_limit=100)
|
||||
|
||||
assert isinstance(result, str)
|
||||
|
||||
|
||||
class TestGetLibraryDocsWithMock:
|
||||
"""Tests that verify content retrieval when mocked data is available."""
|
||||
|
||||
def test_retrieves_chunks_by_library_id(self, mock_qdrant_client):
|
||||
"""get_library_docs without topic should fetch all chunks for library."""
|
||||
from backend.app.db import upsert_library, insert_document_chunk
|
||||
from backend.app.search import get_library_docs
|
||||
|
||||
upsert_library(library_id="/local/mockretrievetest", name="Mock Retrieve", description="Test")
|
||||
|
||||
for i in range(3):
|
||||
insert_document_chunk(
|
||||
doc_id=f"mock-retrieve-{i}",
|
||||
library_id="/local/mockretrievetest",
|
||||
path=f"path{i}.md",
|
||||
title=f"Path {i}",
|
||||
content=f"Content for path {i}.",
|
||||
chunk_index=i,
|
||||
token_estimate=50
|
||||
)
|
||||
|
||||
result = get_library_docs("/local/mockretrievetest")
|
||||
|
||||
assert isinstance(result, str)
|
||||
|
||||
|
||||
class TestSearchErrorHandling:
|
||||
"""Tests for error handling in search functions."""
|
||||
|
||||
def test_search_handles_missing_library(self):
|
||||
"""Should handle missing library gracefully."""
|
||||
from backend.app.search import search_docs
|
||||
|
||||
results = search_docs("test", library_id="/local/missing_lib_xyz123")
|
||||
assert isinstance(results, list)
|
||||
|
||||
def test_resolve_handles_no_libraries_in_db(self):
|
||||
"""Should handle empty database gracefully."""
|
||||
from backend.app.db import init_db
|
||||
from backend.app.search import resolve_library_id
|
||||
|
||||
# Initialize fresh DB (empty)
|
||||
from backend.app.db import get_connection, get_chunks_for_library
|
||||
# The test_database fixture already does this
|
||||
|
||||
def test_get_library_docs_handles_empty_library(self):
|
||||
"""Should handle library with no chunks."""
|
||||
from backend.app.search import get_library_docs
|
||||
|
||||
result = get_library_docs("/local/emptylib")
|
||||
assert isinstance(result, str)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# FIXTURES FOR SEARCH TESTS
|
||||
# =============================================================================
|
||||
|
||||
@pytest.fixture
|
||||
def search_sample_text():
|
||||
"""Sample text with headings for search chunking tests."""
|
||||
return """# Installation Guide
|
||||
|
||||
To install the package:
|
||||
```bash
|
||||
pip install mypackage
|
||||
```
|
||||
|
||||
## Configuration
|
||||
|
||||
Configure your environment by setting these variables:
|
||||
- MY_VAR=123
|
||||
- DEBUG=true
|
||||
|
||||
## Usage Examples
|
||||
|
||||
Example 1: Basic usage
|
||||
```python
|
||||
import mymodule
|
||||
module = mymodule.Module()
|
||||
result = module.run()
|
||||
print(result)
|
||||
```
|
||||
|
||||
Example 2: Advanced usage with options
|
||||
```python
|
||||
options = {"verbose": True, "output": "stdout"}
|
||||
result = module.run(options=options)
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
Common issues and their solutions:
|
||||
- ImportError: Ensure package is installed
|
||||
- AttributeError: Check that attributes exist on object"""
|
||||
@@ -0,0 +1,29 @@
|
||||
# Context7 Docs WebUI Configuration
|
||||
# Copy this file to .env and configure for your environment
|
||||
|
||||
# === Ports (optional - use if you need custom ports) ===
|
||||
HOST_PORT=8787 # docs-api port (default: 8787)
|
||||
MCP_HOST_PORT=8788 # docs-mcp port (default: 8788)
|
||||
WEBUI_PORT=8790 # WebUI port (default: 8790)
|
||||
|
||||
# === API Keys (optional - uncomment to enable auth) ===
|
||||
# Docs API key for protecting endpoints like /search, /ingest, etc.
|
||||
# DOCS_API_KEY=your-secret-docs-api-key
|
||||
|
||||
# WebUI API key (optional - separate from docs-api for UI authentication)
|
||||
# DOCS_WEBUI_API_KEY=your-webui-api-key
|
||||
|
||||
# === Application Configuration ===
|
||||
# Path to documentation files (relative to service container)
|
||||
DOCS_PATH=/docs
|
||||
|
||||
# SQLite database path
|
||||
DB_PATH=/data/db.sqlite
|
||||
|
||||
# Logging level: DEBUG, INFO, WARNING, ERROR
|
||||
LOG_LEVEL=INFO
|
||||
|
||||
# === Vector Store ===
|
||||
# Qdrant host and port (internal Docker network)
|
||||
VECTOR_STORE_HOST=qdrant
|
||||
VECTOR_STORE_PORT=6333
|
||||
@@ -0,0 +1,19 @@
|
||||
# WebUI Dockerfile
|
||||
FROM python:3.12-slim
|
||||
|
||||
ENV PYTHONDONTWRITEBYTECODE=1 \
|
||||
PYTHONUNBUFFERED=1 \
|
||||
DOCS_API_URL=http://docs-api:8787
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
COPY app /app/webui
|
||||
|
||||
RUN mkdir -p /app/webui/templates/uploads
|
||||
|
||||
EXPOSE 8790
|
||||
|
||||
CMD ["uvicorn", "webui.main:app", "--host", "0.0.0.0", "--port", "8790"]
|
||||
@@ -0,0 +1,72 @@
|
||||
"""Async docs-api client for the WebUI."""
|
||||
import os
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
from httpx import AsyncClient, Timeout
|
||||
|
||||
|
||||
class DocsAPIClient:
|
||||
"""Small async HTTP client for the docs-api backend."""
|
||||
|
||||
def __init__(self, base_url: Optional[str] = None, api_key: Optional[str] = None):
|
||||
self.base_url = (base_url or os.environ.get("DOCS_API_URL", "http://docs-api:8787")).rstrip("/")
|
||||
self.api_key = api_key if api_key is not None else os.environ.get("WEBUI_API_KEY")
|
||||
self.headers = {"X-API-Key": self.api_key} if self.api_key else {}
|
||||
self._client: Optional[AsyncClient] = None
|
||||
|
||||
async def _get_client(self) -> AsyncClient:
|
||||
if self._client is None or self._client.is_closed:
|
||||
self._client = AsyncClient(
|
||||
base_url=self.base_url,
|
||||
headers=self.headers,
|
||||
timeout=Timeout(120.0),
|
||||
)
|
||||
return self._client
|
||||
|
||||
async def request(self, method: str, path: str, **kwargs: Any) -> Dict[str, Any]:
|
||||
client = await self._get_client()
|
||||
resp = await client.request(method, path, **kwargs)
|
||||
if resp.status_code >= 400:
|
||||
raise RuntimeError(f"{method} {path} failed: {resp.status_code} {resp.text}")
|
||||
if resp.headers.get("content-type", "").startswith("application/json"):
|
||||
data = resp.json()
|
||||
return data if isinstance(data, dict) else {"data": data}
|
||||
return {"data": resp.text}
|
||||
|
||||
async def get(self, path: str, **kwargs: Any) -> Dict[str, Any]:
|
||||
return await self.request("GET", path, **kwargs)
|
||||
|
||||
async def post(self, path: str, **kwargs: Any) -> Dict[str, Any]:
|
||||
return await self.request("POST", path, **kwargs)
|
||||
|
||||
async def delete(self, path: str, **kwargs: Any) -> Dict[str, Any]:
|
||||
return await self.request("DELETE", path, **kwargs)
|
||||
|
||||
async def health(self) -> Dict[str, Any]:
|
||||
try:
|
||||
return await self.get("/health")
|
||||
except Exception as e:
|
||||
return {"status": "error", "message": str(e)}
|
||||
|
||||
async def upload_file(self, library_id: str, filename: str, content: bytes) -> Dict[str, Any]:
|
||||
files = {"file": (filename, content)}
|
||||
return await self.post(f"/api/v1/upload/{library_id}", files=files)
|
||||
|
||||
async def close(self) -> None:
|
||||
if self._client is not None and not self._client.is_closed:
|
||||
await self._client.aclose()
|
||||
|
||||
|
||||
_client_instance: Optional[DocsAPIClient] = None
|
||||
|
||||
|
||||
async def get_client() -> DocsAPIClient:
|
||||
global _client_instance
|
||||
if _client_instance is None:
|
||||
_client_instance = DocsAPIClient()
|
||||
return _client_instance
|
||||
|
||||
|
||||
async def close_client() -> None:
|
||||
if _client_instance is not None:
|
||||
await _client_instance.close()
|
||||
@@ -0,0 +1,17 @@
|
||||
"""WebUI configuration."""
|
||||
from typing import Optional
|
||||
|
||||
|
||||
class Settings:
|
||||
"""WebUI settings from environment variables."""
|
||||
|
||||
# Core API connection
|
||||
DOCS_API_URL: str = "http://docs-api:8787"
|
||||
WEBUI_API_KEY: Optional[str] = None
|
||||
|
||||
# Default parameters for common operations
|
||||
DEFAULT_SEARCH_LIMIT: int = 10
|
||||
DEFAULT_RESULT_TOKENS: int = 8000
|
||||
|
||||
|
||||
settings = Settings()
|
||||
@@ -0,0 +1,259 @@
|
||||
"""WebUI FastAPI application."""
|
||||
import html
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import List, Optional
|
||||
|
||||
from fastapi import FastAPI, File, Form, Request, UploadFile
|
||||
from fastapi.responses import HTMLResponse, RedirectResponse
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
from fastapi.templating import Jinja2Templates
|
||||
|
||||
from .api_client import DocsAPIClient
|
||||
|
||||
|
||||
app = FastAPI(
|
||||
title="Context7 Docs WebUI",
|
||||
description="Web dashboard for managing documentation system",
|
||||
version="1.0.0",
|
||||
)
|
||||
|
||||
templates = Jinja2Templates(directory=os.path.join(os.path.dirname(__file__), "templates"))
|
||||
templates.env.globals["escapeHtml"] = lambda value: html.escape(str(value or ""))
|
||||
app.mount("/static", StaticFiles(directory=os.path.join(os.path.dirname(__file__), "static")), name="static")
|
||||
|
||||
_client: Optional[DocsAPIClient] = None
|
||||
|
||||
|
||||
def get_client() -> DocsAPIClient:
|
||||
global _client
|
||||
if _client is None:
|
||||
_client = DocsAPIClient(
|
||||
os.environ.get("DOCS_API_URL", "http://docs-api:8787"),
|
||||
os.environ.get("WEBUI_API_KEY"),
|
||||
)
|
||||
return _client
|
||||
|
||||
|
||||
@app.on_event("shutdown")
|
||||
async def shutdown() -> None:
|
||||
if _client is not None:
|
||||
await _client.close()
|
||||
|
||||
|
||||
def page(title: str, body: str) -> HTMLResponse:
|
||||
return HTMLResponse(
|
||||
f"""<!DOCTYPE html>
|
||||
<html><head><meta charset="UTF-8"><title>{html.escape(title)}</title></head>
|
||||
<body style="font-family:sans-serif;padding:20px;">{body}</body></html>"""
|
||||
)
|
||||
|
||||
|
||||
@app.get("/")
|
||||
async def dashboard(request: Request):
|
||||
client = get_client()
|
||||
health = await client.health()
|
||||
|
||||
try:
|
||||
collections_data = await client.get("/collections")
|
||||
total_vectors = sum(
|
||||
item.get("vectors", 0)
|
||||
for item in collections_data.get("collections", {}).values()
|
||||
if isinstance(item, dict)
|
||||
)
|
||||
except Exception:
|
||||
total_vectors = 0
|
||||
|
||||
try:
|
||||
libs_data = await client.get("/libraries")
|
||||
libraries = libs_data.get("libraries", [])
|
||||
except Exception:
|
||||
libraries = []
|
||||
|
||||
return templates.TemplateResponse(
|
||||
"dashboard.html",
|
||||
{"request": request, "health": health, "vectors": total_vectors, "libraries": libraries},
|
||||
)
|
||||
|
||||
|
||||
@app.post("/actions/ingest-all")
|
||||
async def ingest_all():
|
||||
client = get_client()
|
||||
try:
|
||||
result = await client.post("/ingest/all")
|
||||
body = f"<h1>Ingestion Complete</h1><pre>{html.escape(str(result))}</pre><a href='/'>Back</a>"
|
||||
except Exception as e:
|
||||
body = f"<h1>Ingestion Failed</h1><pre>{html.escape(str(e))}</pre><a href='/'>Back</a>"
|
||||
return page("Ingestion", body)
|
||||
|
||||
|
||||
@app.post("/actions/sync-sources")
|
||||
async def sync_sources_action():
|
||||
client = get_client()
|
||||
try:
|
||||
result = await client.post("/sources/sync", json={"override": False})
|
||||
body = f"<h1>Git Sync Complete</h1><pre>{html.escape(str(result))}</pre><a href='/'>Back</a>"
|
||||
except Exception as e:
|
||||
body = f"<h1>Git Sync Failed</h1><pre>{html.escape(str(e))}</pre><a href='/'>Back</a>"
|
||||
return page("Git Sync", body)
|
||||
|
||||
|
||||
@app.get("/libraries")
|
||||
async def libraries(request: Request):
|
||||
client = get_client()
|
||||
try:
|
||||
data = await client.get("/libraries")
|
||||
libraries_data = data.get("libraries", [])
|
||||
except Exception:
|
||||
libraries_data = []
|
||||
return templates.TemplateResponse("libraries.html", {"request": request, "data": libraries_data})
|
||||
|
||||
|
||||
@app.post("/libraries/create")
|
||||
async def create_library(
|
||||
library_id: str = Form(...),
|
||||
name: str = Form(...),
|
||||
description: Optional[str] = Form(None),
|
||||
):
|
||||
client = get_client()
|
||||
try:
|
||||
result = await client.post(
|
||||
f"/api/v1/libraries/{library_id.strip()}",
|
||||
data={"name": name, "description": description or ""},
|
||||
)
|
||||
body = f"<h1>Library Created</h1><pre>{html.escape(str(result))}</pre><a href='/libraries'>Back</a>"
|
||||
except Exception as e:
|
||||
body = f"<h1>Create Failed</h1><pre>{html.escape(str(e))}</pre><a href='/libraries'>Back</a>"
|
||||
return page("Library Created", body)
|
||||
|
||||
|
||||
@app.post("/libraries/{library_id}/ingest")
|
||||
async def ingest_library(library_id: str):
|
||||
client = get_client()
|
||||
try:
|
||||
result = await client.post(f"/ingest/{library_id}")
|
||||
body = f"<h1>Ingestion Complete</h1><pre>{html.escape(str(result))}</pre><a href='/libraries'>Back</a>"
|
||||
except Exception as e:
|
||||
body = f"<h1>Ingestion Failed</h1><pre>{html.escape(str(e))}</pre><a href='/libraries'>Back</a>"
|
||||
return page("Ingest Library", body)
|
||||
|
||||
|
||||
@app.post("/libraries/{library_id}/delete")
|
||||
async def delete_library(library_id: str):
|
||||
client = get_client()
|
||||
try:
|
||||
result = await client.delete(f"/api/v1/libraries/{library_id}")
|
||||
body = f"<h1>Library Deleted</h1><pre>{html.escape(str(result))}</pre><a href='/libraries'>Back</a>"
|
||||
except Exception as e:
|
||||
body = f"<h1>Delete Failed</h1><pre>{html.escape(str(e))}</pre><a href='/libraries'>Back</a>"
|
||||
return page("Delete Library", body)
|
||||
|
||||
|
||||
@app.get("/libraries/{library_id}/docs")
|
||||
async def view_library_docs(library_id: str):
|
||||
client = get_client()
|
||||
try:
|
||||
result = await client.get(f"/docs/{library_id}")
|
||||
content = result.get("content", "")
|
||||
except Exception as e:
|
||||
content = str(e)
|
||||
return page(
|
||||
f"Docs: {library_id}",
|
||||
f"<h1>{html.escape(library_id)}</h1><pre>{html.escape(content)}</pre><a href='/libraries'>Back</a>",
|
||||
)
|
||||
|
||||
|
||||
@app.get("/upload")
|
||||
async def upload_form(request: Request):
|
||||
client = get_client()
|
||||
try:
|
||||
libs_data = await client.get("/libraries")
|
||||
libraries = libs_data.get("libraries", [])
|
||||
except Exception:
|
||||
libraries = []
|
||||
return templates.TemplateResponse("upload.html", {"request": request, "libraries": libraries})
|
||||
|
||||
|
||||
@app.post("/upload")
|
||||
async def upload_file(
|
||||
request: Request,
|
||||
library_id: str = Form(""),
|
||||
ingest_after_upload: Optional[str] = Form(None),
|
||||
files: List[UploadFile] = File(...),
|
||||
):
|
||||
client = get_client()
|
||||
results = []
|
||||
total_size = 0
|
||||
|
||||
for upload in files:
|
||||
filename = upload.filename or "upload.txt"
|
||||
target_library = library_id.strip()
|
||||
if not target_library:
|
||||
target_library = Path(filename).stem.lower().replace(" ", "-") or "uploaded"
|
||||
|
||||
try:
|
||||
contents = await upload.read()
|
||||
total_size += len(contents)
|
||||
result = await client.upload_file(target_library, filename, contents)
|
||||
results.append({"filename": filename, "status": "success", "message": result})
|
||||
except Exception as e:
|
||||
results.append({"filename": filename, "status": "error", "message": str(e)})
|
||||
|
||||
if ingest_after_upload == "on":
|
||||
for result in list(results):
|
||||
if result["status"] != "success":
|
||||
continue
|
||||
target_library = result["message"]["library_id"]
|
||||
try:
|
||||
ingest_result = await client.post(f"/ingest/{target_library}")
|
||||
results.append({"filename": "__INGEST__", "status": "success", "message": ingest_result})
|
||||
except Exception as e:
|
||||
results.append({"filename": "__INGEST__", "status": "error", "message": str(e)})
|
||||
|
||||
return templates.TemplateResponse(
|
||||
"upload.html",
|
||||
{"request": request, "libraries": [], "results": results, "total_size_bytes": total_size},
|
||||
)
|
||||
|
||||
|
||||
@app.get("/search")
|
||||
async def search_form(request: Request):
|
||||
return templates.TemplateResponse("search.html", {"request": request, "query": "", "results": []})
|
||||
|
||||
|
||||
@app.get("/search/results")
|
||||
async def search_results(request: Request, q: str = "", limit: int = 10):
|
||||
client = get_client()
|
||||
results = []
|
||||
if q:
|
||||
try:
|
||||
data = await client.post("/search", json={"query": q, "library_id": None, "limit": limit})
|
||||
results = data.get("results", [])
|
||||
except Exception:
|
||||
results = []
|
||||
return templates.TemplateResponse(
|
||||
"search.html",
|
||||
{"request": request, "query": q, "results": results, "limit": limit},
|
||||
)
|
||||
|
||||
|
||||
@app.get("/sources")
|
||||
async def sources_page(request: Request):
|
||||
client = get_client()
|
||||
try:
|
||||
data = await client.get("/api/v1/sources")
|
||||
sources = data.get("sources", [])
|
||||
except Exception:
|
||||
sources = []
|
||||
return templates.TemplateResponse("sources.html", {"request": request, "sources": sources})
|
||||
|
||||
|
||||
@app.post("/sources/sync")
|
||||
async def sync_sources(override: bool = Form(False)):
|
||||
client = get_client()
|
||||
try:
|
||||
result = await client.post("/sources/sync", json={"override": override})
|
||||
body = f"<h1>Git Sync Complete</h1><pre>{html.escape(str(result))}</pre><a href='/sources'>Back</a>"
|
||||
except Exception as e:
|
||||
body = f"<h1>Git Sync Failed</h1><pre>{html.escape(str(e))}</pre><a href='/sources'>Back</a>"
|
||||
return page("Git Sync", body)
|
||||
@@ -0,0 +1,159 @@
|
||||
// WebUI Static JavaScript Utilities
|
||||
// Simple helper functions shared across templates
|
||||
|
||||
/**
|
||||
* Escape HTML to prevent XSS attacks when displaying user content
|
||||
*/
|
||||
function escapeHtml(text) {
|
||||
if (typeof text !== 'string') return "";
|
||||
var e = document.createElement('div');
|
||||
try {
|
||||
e.textContent = text;
|
||||
return e.innerHTML;
|
||||
} catch (err) {
|
||||
return String(text).replace(/[&<>"']/g, function(m) {
|
||||
switch (m) {
|
||||
case '&': return '&';
|
||||
case '<': return '<';
|
||||
case '>': return '>';
|
||||
case '"': return '"';
|
||||
case "'": return ''';
|
||||
default: return m;
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Format number with thousands separators
|
||||
*/
|
||||
function formatNumber(num) {
|
||||
if (num === null || num === undefined) return "N/A";
|
||||
return new Intl.NumberFormat().format(Math.floor(num));
|
||||
}
|
||||
|
||||
/**
|
||||
* Show loading spinner
|
||||
*/
|
||||
function showLoading(elementId) {
|
||||
var el = document.getElementById(elementId);
|
||||
if (el) {
|
||||
el.innerHTML = '<div class="loading-spinner">Loading...</div>';
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Hide loading spinner
|
||||
*/
|
||||
function hideLoading(elementId) {
|
||||
var el = document.getElementById(elementId);
|
||||
if (el) {
|
||||
el.innerHTML = "";
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a toast notification
|
||||
*/
|
||||
function showToast(message, type) {
|
||||
var toast = document.createElement('div');
|
||||
toast.className = 'toast ' + (type || 'info');
|
||||
toast.textContent = message;
|
||||
toast.style.cssText = 'position:fixed;bottom:20px;right:20px;' +
|
||||
'padding:12px 20px;border-radius:4px;margin-bottom:10px;' +
|
||||
'background:#333;color:white;font-size:0.9rem;z-index:1000';
|
||||
document.body.appendChild(toast);
|
||||
|
||||
setTimeout(function() {
|
||||
toast.style.opacity = '0';
|
||||
setTimeout(function() { toast.remove(); }, 200);
|
||||
}, 3000);
|
||||
}
|
||||
|
||||
/**
|
||||
* Show error notification
|
||||
*/
|
||||
function showError(message) {
|
||||
showToast("Error: " + message, "error");
|
||||
}
|
||||
|
||||
/**
|
||||
* Show success notification
|
||||
*/
|
||||
function showSuccess(message) {
|
||||
showToast("Success: " + message, "success");
|
||||
}
|
||||
|
||||
/**
|
||||
* Make an API request with error handling
|
||||
*/
|
||||
async function apiRequest(endpoint, method = 'GET', data = null) {
|
||||
const config = window.webuiConfig;
|
||||
let url = config.apiUrl;
|
||||
|
||||
if (!url.endsWith('/')) url += '/';
|
||||
url += endpoint;
|
||||
|
||||
const headers = {};
|
||||
if (config.apiKey) {
|
||||
headers['X-API-Key'] = config.apiKey;
|
||||
}
|
||||
|
||||
try {
|
||||
let response;
|
||||
if (method === 'POST') {
|
||||
response = await fetch(url, {
|
||||
method: method,
|
||||
headers: headers,
|
||||
body: JSON.stringify(data)
|
||||
});
|
||||
} else {
|
||||
response = await fetch(url, {
|
||||
method: method,
|
||||
headers: headers
|
||||
});
|
||||
}
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(response.statusText);
|
||||
}
|
||||
|
||||
const contentType = response.headers.get('content-type');
|
||||
if (contentType && contentType.includes('application/json')) {
|
||||
return await response.json();
|
||||
} else {
|
||||
return await response.text();
|
||||
}
|
||||
} catch (err) {
|
||||
console.error('API request failed:', err);
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize tooltips if using them
|
||||
*/
|
||||
function initTooltips() {
|
||||
// Add tooltip functionality here if needed
|
||||
}
|
||||
|
||||
/**
|
||||
* Debounce function for input handling
|
||||
*/
|
||||
function debounce(func, wait) {
|
||||
var timeout;
|
||||
return function executedFunction(...args) {
|
||||
var later = function() {
|
||||
clearTimeout(timeout);
|
||||
func.apply(this, args);
|
||||
};
|
||||
timeout = setTimeout(later, wait);
|
||||
};
|
||||
}
|
||||
|
||||
// Export to window for use in templates
|
||||
window.escapeHtml = escapeHtml;
|
||||
window.formatNumber = formatNumber;
|
||||
window.showToast = showToast;
|
||||
window.showError = showError;
|
||||
window.showSuccess = showSuccess;
|
||||
@@ -0,0 +1,395 @@
|
||||
.container {
|
||||
max-width: 1000px;
|
||||
margin: 0 auto;
|
||||
padding: 20px;
|
||||
}
|
||||
|
||||
header {
|
||||
border-bottom: 1px solid #ccc;
|
||||
padding-bottom: 15px;
|
||||
margin-bottom: 20px;
|
||||
}
|
||||
|
||||
header h1 {
|
||||
margin: 0 0 10px 0;
|
||||
font-size: 1.5rem;
|
||||
}
|
||||
|
||||
nav {
|
||||
display: flex;
|
||||
gap: 15px;
|
||||
}
|
||||
|
||||
nav a {
|
||||
text-decoration: none;
|
||||
color: #0066cc;
|
||||
font-size: 0.9rem;
|
||||
}
|
||||
|
||||
nav a.active {
|
||||
font-weight: bold;
|
||||
text-decoration: underline;
|
||||
}
|
||||
|
||||
main h2 {
|
||||
margin-bottom: 15px;
|
||||
}
|
||||
|
||||
footer {
|
||||
margin-top: 40px;
|
||||
padding-top: 15px;
|
||||
border-top: 1px solid #ccc;
|
||||
font-size: 0.8rem;
|
||||
color: #666;
|
||||
}
|
||||
|
||||
.status-card {
|
||||
background: #f5f5f5;
|
||||
padding: 20px;
|
||||
border-radius: 8px;
|
||||
border-left: 4px solid #00c467;
|
||||
margin-bottom: 15px;
|
||||
}
|
||||
|
||||
.status-message {
|
||||
background: #e8f4fd;
|
||||
padding: 10px;
|
||||
border-radius: 4px;
|
||||
margin: 5px 0;
|
||||
}
|
||||
|
||||
pre.code-block {
|
||||
background: #f5f5f5;
|
||||
padding: 15px;
|
||||
border-radius: 4px;
|
||||
overflow-x: auto;
|
||||
white-space: pre-wrap;
|
||||
word-break: break-word;
|
||||
}
|
||||
|
||||
/* Tables */
|
||||
.library-table {
|
||||
width: 100%;
|
||||
border-collapse: collapse;
|
||||
margin-top: 10px;
|
||||
}
|
||||
|
||||
.library-table th, .library-table td {
|
||||
padding: 10px;
|
||||
text-align: left;
|
||||
border-bottom: 1px solid #ddd;
|
||||
}
|
||||
|
||||
.library-table th {
|
||||
background: #f5f5f5;
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
/* Forms */
|
||||
form input[type="text"], form textarea, form select {
|
||||
padding: 8px;
|
||||
border: 1px solid #ccc;
|
||||
border-radius: 4px;
|
||||
margin-right: 10px;
|
||||
margin-bottom: 10px;
|
||||
}
|
||||
|
||||
button {
|
||||
background: #0066cc;
|
||||
color: white;
|
||||
border: none;
|
||||
padding: 10px 20px;
|
||||
border-radius: 4px;
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
button:hover {
|
||||
background: #0055aa;
|
||||
}
|
||||
|
||||
/* Upload form */
|
||||
.upload-form, .search-form, .sync-form {
|
||||
max-width: 600px;
|
||||
}
|
||||
|
||||
/* Search results */
|
||||
.results-count {
|
||||
background: #e8f4fd;
|
||||
padding: 10px;
|
||||
border-radius: 4px;
|
||||
margin-bottom: 15px;
|
||||
}
|
||||
|
||||
.result-card {
|
||||
background: #fff;
|
||||
border: 1px solid #ddd;
|
||||
padding: 15px;
|
||||
margin: 10px 0;
|
||||
border-radius: 4px;
|
||||
}
|
||||
|
||||
.result-card h3 {
|
||||
margin: 0 0 8px 0;
|
||||
}
|
||||
|
||||
/* Results box */
|
||||
.results-box {
|
||||
max-height: 600px;
|
||||
overflow-y: auto;
|
||||
}
|
||||
|
||||
.results-box .new-search-link {
|
||||
display: block;
|
||||
text-align: center;
|
||||
margin-top: 15px;
|
||||
}
|
||||
|
||||
/* Source cards */
|
||||
.source-cards {
|
||||
display: grid;
|
||||
gap: 10px;
|
||||
}
|
||||
|
||||
.source-card {
|
||||
background: #f5f5f5;
|
||||
padding: 15px;
|
||||
border-radius: 4px;
|
||||
border-left: 4px solid #666;
|
||||
}
|
||||
|
||||
.status-message code {
|
||||
background: #333;
|
||||
color: #fff;
|
||||
padding: 2px 6px;
|
||||
border-radius: 3px;
|
||||
}
|
||||
|
||||
.hint {
|
||||
color: #666;
|
||||
font-size: 0.85rem;
|
||||
margin-top: 15px;
|
||||
}
|
||||
|
||||
.results-box .error {
|
||||
color: #cc0000;
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
.source-list, .source-cards, pre {
|
||||
white-space: normal;
|
||||
}
|
||||
|
||||
/* Status cards grid */
|
||||
.status-cards {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
|
||||
gap: 15px;
|
||||
margin-bottom: 20px;
|
||||
}
|
||||
|
||||
.status-card h3 {
|
||||
margin: 0 0 8px 0;
|
||||
font-size: 0.9rem;
|
||||
color: #555;
|
||||
}
|
||||
|
||||
.status-card p {
|
||||
margin: 0;
|
||||
font-size: 1.2rem;
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
/* Message box */
|
||||
.message-box {
|
||||
background: #e8f4fd;
|
||||
padding: 12px;
|
||||
border-radius: 6px;
|
||||
margin-bottom: 20px;
|
||||
border-left: 4px solid #3b82f6;
|
||||
}
|
||||
|
||||
/* Action buttons */
|
||||
.action-buttons {
|
||||
display: flex;
|
||||
gap: 15px;
|
||||
margin-bottom: 20px;
|
||||
}
|
||||
|
||||
.btn {
|
||||
padding: 10px 20px;
|
||||
border: none;
|
||||
border-radius: 4px;
|
||||
cursor: pointer;
|
||||
text-decoration: none;
|
||||
display: inline-block;
|
||||
font-size: 0.9rem;
|
||||
}
|
||||
|
||||
.btn-primary {
|
||||
background: #00c467;
|
||||
color: white;
|
||||
}
|
||||
|
||||
.btn-primary:hover {
|
||||
background: #00a855;
|
||||
}
|
||||
|
||||
.btn-secondary {
|
||||
background: #2563eb;
|
||||
color: white;
|
||||
}
|
||||
|
||||
.btn-secondary:hover {
|
||||
background: #1d4ed8;
|
||||
}
|
||||
|
||||
/* Links section */
|
||||
.links-section h2 {
|
||||
font-size: 1rem;
|
||||
margin-bottom: 10px;
|
||||
}
|
||||
|
||||
.links-section a {
|
||||
color: #0066cc;
|
||||
text-decoration: none;
|
||||
padding: 5px 10px;
|
||||
}
|
||||
|
||||
.links-section a:hover {
|
||||
text-decoration: underline;
|
||||
}
|
||||
|
||||
/* Create library form */
|
||||
.create-form {
|
||||
background: #f9f9f9;
|
||||
padding: 15px;
|
||||
border-radius: 6px;
|
||||
margin-bottom: 20px;
|
||||
border-left: 4px solid #00c467;
|
||||
}
|
||||
|
||||
.create-form label {
|
||||
display: block;
|
||||
margin-bottom: 8px;
|
||||
font-weight: bold;
|
||||
color: #333;
|
||||
}
|
||||
|
||||
.create-form input[type="text"] {
|
||||
width: 100%;
|
||||
padding: 8px;
|
||||
margin-bottom: 12px;
|
||||
border: 1px solid #ccc;
|
||||
border-radius: 4px;
|
||||
box-sizing: border-box;
|
||||
}
|
||||
|
||||
/* Table actions column */
|
||||
.actions {
|
||||
white-space: nowrap;
|
||||
}
|
||||
|
||||
/* Button sizes */
|
||||
.btn-sm {
|
||||
padding: 5px 12px;
|
||||
font-size: 0.8rem;
|
||||
}
|
||||
|
||||
/* Additional action button colors */
|
||||
.btn-info {
|
||||
background: #17a2b8;
|
||||
color: white;
|
||||
}
|
||||
|
||||
.btn-info:hover {
|
||||
background: #138496;
|
||||
}
|
||||
|
||||
.btn-warning {
|
||||
background: #ffc107;
|
||||
color: black;
|
||||
}
|
||||
|
||||
.btn-warning:hover {
|
||||
background: #ffa000;
|
||||
}
|
||||
|
||||
.btn-danger {
|
||||
background: #dc3545;
|
||||
color: white;
|
||||
}
|
||||
|
||||
.btn-danger:hover {
|
||||
background: #c82333;
|
||||
}
|
||||
|
||||
.btn-primary {
|
||||
background: #007bff;
|
||||
color: white;
|
||||
}
|
||||
|
||||
.btn-primary:hover {
|
||||
background: #0056b3;
|
||||
}
|
||||
|
||||
/* Highlight row for popular libraries */
|
||||
tr.highlight {
|
||||
background: #f0fdf4;
|
||||
}
|
||||
|
||||
/* Upload form specific styles */
|
||||
#library_id, #files {
|
||||
width: 100%;
|
||||
padding: 8px;
|
||||
border: 1px solid #ccc;
|
||||
border-radius: 4px;
|
||||
margin-bottom: 12px;
|
||||
box-sizing: border-box;
|
||||
}
|
||||
|
||||
#files {
|
||||
font-family: sans-serif;
|
||||
}
|
||||
|
||||
/* Results box for upload */
|
||||
.result-box {
|
||||
background: #fff;
|
||||
border: 1px solid #ddd;
|
||||
border-radius: 4px;
|
||||
padding: 10px;
|
||||
margin-top: 20px;
|
||||
min-height: 100px;
|
||||
}
|
||||
|
||||
.result-box.error {
|
||||
border-color: #dc3545;
|
||||
background: #fff5f5;
|
||||
}
|
||||
|
||||
/* Result items */
|
||||
.result-item {
|
||||
padding: 6px;
|
||||
margin: 4px 0;
|
||||
border-radius: 3px;
|
||||
font-family: monospace;
|
||||
font-size: 0.85rem;
|
||||
word-break: break-word;
|
||||
}
|
||||
|
||||
.result-item.success {
|
||||
background: #d4edda;
|
||||
border-left: 3px solid #28a745;
|
||||
color: #155724;
|
||||
}
|
||||
|
||||
.result-item.error {
|
||||
background: #f8d7da;
|
||||
border-left: 3px solid #dc3545;
|
||||
color: #721c24;
|
||||
}
|
||||
|
||||
.result-item.info {
|
||||
background: #d1ecf1;
|
||||
border-left: 3px solid #17a2b8;
|
||||
color: #0c5460;
|
||||
}
|
||||
@@ -0,0 +1,32 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>{% block title %}Context7 Docs{% endblock %}</title>
|
||||
<link rel="stylesheet" href="{{ url_for('static', path='style.css') }}">
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<header>
|
||||
<h1>Context7 Docs UI</h1>
|
||||
<nav>
|
||||
<a href="/" {% if request.url.path == '/' %}class="active"{% endif %}>Dashboard</a>
|
||||
<a href="/libraries" {% if request.url.path.startswith('/libraries') %}class="active"{% endif %}>Libraries</a>
|
||||
<a href="/upload" {% if request.url.path.startswith('/upload') %}class="active"{% endif %}>Upload</a>
|
||||
<a href="/search" {% if request.url.path.startswith('/search') %}class="active"{% endif %}>Search</a>
|
||||
<a href="/sources" {% if request.url.path.startswith('/sources') %}class="active"{% endif %}>Sources</a>
|
||||
</nav>
|
||||
</header>
|
||||
|
||||
<main>
|
||||
{% block content %}{% endblock %}
|
||||
</main>
|
||||
|
||||
<footer>Context7 Docs WebUI</footer>
|
||||
</div>
|
||||
|
||||
<script src="{{ url_for('static', path='app.js') }}"></script>
|
||||
{% block scripts %}{% endblock %}
|
||||
</body>
|
||||
</html>
|
||||
@@ -0,0 +1,83 @@
|
||||
{% extends "base.html" %}
|
||||
|
||||
{% block title %}Dashboard - Context7 Docs{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<h1>Dashboard</h1>
|
||||
|
||||
<!-- Status Cards -->
|
||||
<div class="status-cards">
|
||||
<div class="status-card" style="{% if health.status == 'ok' %}border-left-color: #00c467{% else %}border-left-color: #f53800{% endif %}">
|
||||
<h3>Docs API Service</h3>
|
||||
{% if health.status and health.status == 'ok' %}
|
||||
<p style="color: #00c467;"><strong>Status:</strong> Online ✓</p>
|
||||
{% else %}
|
||||
<p style="color: #f53800;"><strong>Status:</strong> {% if health.status == 'error' %}Error{% else %}Offline{% endif %}</p>
|
||||
{% endif %}
|
||||
</div>
|
||||
|
||||
<div class="status-card">
|
||||
<h3>Vectors Stored</h3>
|
||||
<p>{{ vectors|default(0) }}</p>
|
||||
</div>
|
||||
|
||||
<div class="status-card">
|
||||
<h3>Libraries Registered</h3>
|
||||
<p>{{ libraries|length }}</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Recent Messages -->
|
||||
{% if libraries and libraries|length > 0 %}
|
||||
<div class="message-box" style="background: #e8f4fd;">
|
||||
<strong>Libraries:</strong> {{ escapeHtml(libraries) }}
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
<!-- Action Buttons -->
|
||||
<div class="action-buttons">
|
||||
<form method="post" action="/actions/ingest-all" style="display: inline;">
|
||||
<button type="submit" name="ingest-all" class="btn btn-primary">
|
||||
🔄 Ingest All Libraries
|
||||
</button>
|
||||
</form>
|
||||
|
||||
<form method="post" action="/actions/sync-sources" style="display: inline;">
|
||||
<input type="hidden" name="override" value="false">
|
||||
<button type="submit" name="sync-sources" class="btn btn-secondary">
|
||||
📦 Sync Git Sources
|
||||
</button>
|
||||
</form>
|
||||
</div>
|
||||
|
||||
<!-- Links -->
|
||||
<div class="links-section">
|
||||
<h2>Navigate to Other Pages</h2>
|
||||
<a href="/libraries" style="display: inline-block; margin-right: 15px;">View Libraries →</a>
|
||||
<a href="/upload" style="display: inline-block; margin-right: 15px;">Upload Files →</a>
|
||||
<a href="/search" style="display: inline-block; margin-right: 15px;">Search Docs →</a>
|
||||
<a href="/sources" style="display: inline-block;">Git Sources →</a>
|
||||
</div>
|
||||
|
||||
<!-- Script for health refresh on reload -->
|
||||
<script>
|
||||
// On page reload, re-fetch and update status if needed
|
||||
document.addEventListener("DOMContentLoaded", async function() {
|
||||
try {
|
||||
const api = window.docsApiClient;
|
||||
|
||||
// Refresh health status from server-rendered data
|
||||
document.querySelector('.status-cards .status-card:first-of-type')?.classList.remove('error');
|
||||
const newHealth = await api.get("/health");
|
||||
|
||||
if (newHealth.status === 'ok') {
|
||||
document.querySelector('.status-cards .status-card:first-of-type')?.querySelector('p')?.classList.add('online');
|
||||
} else {
|
||||
document.querySelector('.status-cards .status-card:first-of-type')?.querySelector('p')?.classList.add('error');
|
||||
}
|
||||
} catch (err) {
|
||||
console.log('Health refresh skipped:', err);
|
||||
}
|
||||
});
|
||||
</script>
|
||||
{% endblock %}
|
||||
@@ -0,0 +1,74 @@
|
||||
{% extends "base.html" %}
|
||||
|
||||
{% block title %}Libraries - Context7 Docs{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<h1>Libraries</h1>
|
||||
|
||||
<!-- Create Library Form -->
|
||||
<div class="create-form">
|
||||
<form method="post" action="/libraries/create">
|
||||
<label for="new_library_id">Library ID:</label>
|
||||
<input type="text" id="new_library_id" name="library_id" placeholder="e.g., foundryvtt" required>
|
||||
|
||||
<label for="new_name">Name:</label>
|
||||
<input type="text" id="new_name" name="name" placeholder="Display name for this library" required>
|
||||
|
||||
<label for="new_description">Description (optional):</label>
|
||||
<input type="text" id="new_description" name="description" placeholder="Brief description...">
|
||||
|
||||
<button type="submit" class="btn btn-primary">Create Library</button>
|
||||
</form>
|
||||
</div>
|
||||
|
||||
<hr>
|
||||
|
||||
<!-- Libraries Table -->
|
||||
<table class="library-table">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>ID</th>
|
||||
<th>Name</th>
|
||||
<th>Description</th>
|
||||
<th>Source Path</th>
|
||||
<th>Updated At</th>
|
||||
<th>Actions</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody id="libraries-body">
|
||||
{% if data|length > 0 %}
|
||||
{% for lib in data %}
|
||||
<tr class="{% if lib.source_path and 'foundry' in (lib.source_path or '').lower() %}highlight{% endif %}">
|
||||
<td><code>{{ escapeHtml(lib.id) }}</code></td>
|
||||
<td><strong>{{ escapeHtml(lib.name) }}</strong></td>
|
||||
<td>{{ escapeHtml(lib.description) or '-' }}</td>
|
||||
<td><small>{{ escapeHtml(lib.source_path) or '-' }}</small></td>
|
||||
<td><small>{{ lib.updated_at|default('N/A') }}</small></td>
|
||||
<td class="actions">
|
||||
<a href="/libraries/{{ lib.id }}/docs" class="btn btn-sm btn-info">View Docs</a> |
|
||||
<form method="post" action="/libraries/{{ lib.id }}/ingest" style="display:inline;"
|
||||
onsubmit="return confirm('Trigger ingestion for this library?');">
|
||||
<button type="submit" class="btn btn-sm btn-warning">Ingest</button>
|
||||
</form> |
|
||||
<form method="post" action="/libraries/{{ lib.id }}/delete"
|
||||
onsubmit="return confirm('Delete this library and all its contents? This cannot be undone.');">
|
||||
<button type="submit" class="btn btn-sm btn-danger">Delete</button>
|
||||
</form>
|
||||
</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
{% else %}
|
||||
<tr>
|
||||
<td colspan="6" style="text-align:center;">No libraries found. Create one above.</td>
|
||||
</tr>
|
||||
{% endif %}
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
{% if data and data[0] and data[0].get('content') %}
|
||||
<!-- Docs view mode -->
|
||||
<pre class="code-block">{% for chunk in data.get('content', []) %}{% if chunk|length > 0 %}{{ chunk.text | default(chunk.content) | default(chunk) }}{% endif %}{% endfor %}</pre>
|
||||
<a href="/libraries" style="display:block;margin-top:20px;">← Back to Libraries</a>
|
||||
{% endif %}
|
||||
|
||||
{% endblock %}
|
||||
@@ -0,0 +1,71 @@
|
||||
{% extends "base.html" %}
|
||||
|
||||
{% block title %}Search - Context7 Docs{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<h2>Search Documentation</h2>
|
||||
|
||||
<form method="get" action="/search/results" class="search-form">
|
||||
<label for="query">Query:</label>
|
||||
<input type="text" id="query" name="q" required placeholder="Enter your search query..." value="{{ query or '' }}">
|
||||
|
||||
<label for="limit">Limit results:</label>
|
||||
<select id="limit" name="limit">
|
||||
<option value="5">5</option>
|
||||
<option value="10" selected>10</option>
|
||||
<option value="20">20</option>
|
||||
<option value="50">50</option>
|
||||
</select>
|
||||
|
||||
<button type="submit">Search</button>
|
||||
</form>
|
||||
|
||||
<div id="search-results" class="results-box"></div>
|
||||
|
||||
{% if results %}
|
||||
<div class="results-count">{{ results|length }} results found</div>
|
||||
{% endif %}
|
||||
|
||||
<script>
|
||||
async function loadResults(query, limit) {
|
||||
const searchBox = document.getElementById("search-results");
|
||||
|
||||
try {
|
||||
const payload = { query: query || "{{ initial_query or '' }}", library_id: null, limit: parseInt(limit) };
|
||||
const api = window.docsApiClient;
|
||||
|
||||
const result = await api.post("/search", payload);
|
||||
|
||||
if (result.results && Array.isArray(result.results)) {
|
||||
searchBox.className = "results-box";
|
||||
let html = '<div class="results-count">' + result.results.length + ' results found</div>';
|
||||
|
||||
for (const r of result.results) {
|
||||
const title = r.title || (r.content || '').substring(0, 100);
|
||||
const content = (r.content || '').substring(0, 500);
|
||||
html += '<div class="result-card">' +
|
||||
'<h3>' + escapeHtml(title) + '</h3>' +
|
||||
'<p>' + escapeHtml(content) + '...</p>' +
|
||||
'<a href="/docs/' + (r.library_id || '') + '">View Full</a></div>';
|
||||
}
|
||||
|
||||
html += '<a href="/search/form" class="new-search-link">← New Search</a>';
|
||||
searchBox.innerHTML = html;
|
||||
}
|
||||
} catch (err) {
|
||||
searchBox.innerHTML = '<p class="error">Error loading results: ' + escapeHtml(err.message) + '</p>';
|
||||
}
|
||||
}
|
||||
|
||||
// Load initial results if query parameter exists in URL
|
||||
var urlParams = new URLSearchParams(window.location.search);
|
||||
{% if query %}loadResults(urlParams.get('q') || urlParams.get('q'), urlParams.get('limit'));{% endif %}
|
||||
|
||||
function escapeHtml(str) {
|
||||
if (!str) return "";
|
||||
var e = document.createElement('div');
|
||||
e.textContent = str;
|
||||
return e.innerHTML;
|
||||
}
|
||||
</script>
|
||||
{% endblock %}
|
||||
@@ -0,0 +1,34 @@
|
||||
{% extends "base.html" %}
|
||||
|
||||
{% block title %}Sources - Context7 Docs{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<h2>Git Repository Sync</h2>
|
||||
|
||||
<div class="status-message">Syncs all git repositories configured in <code>docs_sources.yaml</code>.</div>
|
||||
|
||||
<form method="post" action="/sources/sync" class="sync-form">
|
||||
<label for="override">Override existing repos:</label>
|
||||
<input type="checkbox" id="override" name="override">
|
||||
<button type="submit">Sync All Repositories</button>
|
||||
</form>
|
||||
|
||||
<div id="source-list"></div>
|
||||
|
||||
{% if sources %}
|
||||
<h3>Configured Sources</h3>
|
||||
<div class="source-cards">
|
||||
{% for src in sources %}
|
||||
<div class="source-card">
|
||||
<strong>{{ src.library_id | default('unknown') }}</strong><br>
|
||||
URL: {{ src.repo_url | default('N/A')[:60] }}<br>
|
||||
Branch: {{ src.branch | default('main') }}<br>
|
||||
Include: {{ (src.include_paths | default(['*']) | join(', ')) }}
|
||||
</div>
|
||||
{% endfor %}
|
||||
</div>
|
||||
{% else %}
|
||||
<p>No git sources configured. Add repositories to <code>docs_sources.yaml</code>.</p>
|
||||
{% endif %}
|
||||
|
||||
{% endblock %}
|
||||
@@ -0,0 +1,48 @@
|
||||
{% extends "base.html" %}
|
||||
|
||||
{% block title %}Upload - Context7 Docs{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<h2>Upload Documentation Files</h2>
|
||||
|
||||
<form method="post" enctype="multipart/form-data" class="upload-form">
|
||||
<!-- Library Selector -->
|
||||
<label for="library_id">Select Library:</label>
|
||||
<select id="library_id" name="library_id" required>
|
||||
<option value="">(New library - will be created from filename)</option>
|
||||
{% for lib in libraries %}
|
||||
<option value="{{ lib.id }}" data-name="{{ lib.name or lib.id }}">{{ lib.name or lib.id }}</option>
|
||||
{% endfor %}
|
||||
</select>
|
||||
|
||||
<!-- File Input (multiple files allowed) -->
|
||||
<label for="files">Select Files:</label>
|
||||
<input type="file" name="files" id="files" multiple accept=".md,.txt,.py,.js,.ts,.json,.yaml,.yml,.html,.css,.pdf" required>
|
||||
|
||||
<!-- Ingest Checkbox -->
|
||||
<div style="margin-top: 10px;">
|
||||
<label>
|
||||
<input type="checkbox" name="ingest_after_upload" value="on">
|
||||
Trigger ingestion after upload
|
||||
</label>
|
||||
</div>
|
||||
|
||||
<button type="submit" class="btn btn-primary">Upload Files</button>
|
||||
</form>
|
||||
|
||||
<!-- Allowed extensions hint -->
|
||||
<p class="hint">Allowed: .md, .txt, .py, .js, .ts, .json, .yaml, .yml, .html, .css, .pdf (max 5MB each)</p>
|
||||
|
||||
<!-- Results Display -->
|
||||
<div id="upload-result" class="result-box"></div>
|
||||
|
||||
{% if results %}
|
||||
<h3>Upload Results</h3>
|
||||
<ul>
|
||||
{% for result in results %}
|
||||
<li><strong>{{ result.filename }}</strong>: {{ result.status }} - {{ escapeHtml(result.message) }}</li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
{% endif %}
|
||||
|
||||
{% endblock %}
|
||||
@@ -0,0 +1,7 @@
|
||||
# WebUI Dependencies
|
||||
fastapi==0.109.0
|
||||
uvicorn[standard]==0.27.0
|
||||
pydantic==2.5.3
|
||||
python-multipart==0.0.6
|
||||
httpx==0.26.0
|
||||
PyYAML==6.0.1
|
||||
Reference in New Issue
Block a user