Initial DocsMCP stack
This commit is contained in:
@@ -0,0 +1,31 @@
|
|||||||
|
# Context7 Docs API Configuration
|
||||||
|
# Copy this file to .env and configure for your environment
|
||||||
|
|
||||||
|
# === Service Ports (optional - use if you need custom ports) ===
|
||||||
|
HOST_PORT=8787
|
||||||
|
MCP_HOST_PORT=8788
|
||||||
|
|
||||||
|
# === API Keys (optional - uncomment to enable auth) ===
|
||||||
|
# Docs API key for protecting endpoints like /search, /ingest, etc.
|
||||||
|
# DOCS_API_KEY=your-secret-docs-api-key
|
||||||
|
|
||||||
|
# MCP Server API key for protecting MCP tools via HTTP
|
||||||
|
# MCP_API_KEY=your-secret-mcp-server-key
|
||||||
|
|
||||||
|
# === Application Configuration ===
|
||||||
|
# Path to documentation files (relative to service container)
|
||||||
|
DOCS_PATH=/docs
|
||||||
|
|
||||||
|
# SQLite database path
|
||||||
|
DB_PATH=/data/db.sqlite
|
||||||
|
|
||||||
|
# Logging level: DEBUG, INFO, WARNING, ERROR
|
||||||
|
LOG_LEVEL=INFO
|
||||||
|
|
||||||
|
# === Vector Store ===
|
||||||
|
# Qdrant host and port (internal Docker network)
|
||||||
|
VECTOR_STORE_HOST=qdrant
|
||||||
|
VECTOR_STORE_PORT=6333
|
||||||
|
|
||||||
|
# === Git Sources (if using) ===
|
||||||
|
# See docs_sources.yaml for git source configuration
|
||||||
+10
@@ -0,0 +1,10 @@
|
|||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
.pytest_cache/
|
||||||
|
|
||||||
|
.env
|
||||||
|
data/*
|
||||||
|
!data/.gitkeep
|
||||||
|
backend/data/*
|
||||||
|
|
||||||
|
.DS_Store
|
||||||
@@ -0,0 +1,106 @@
|
|||||||
|
# Makefile for local-context7
|
||||||
|
# Common development and deployment commands
|
||||||
|
|
||||||
|
.PHONY: help install deps test lint docs docker-up docker-down clean
|
||||||
|
|
||||||
|
.DEFAULT_GOAL := help
|
||||||
|
|
||||||
|
## Help - Show available commands
|
||||||
|
help:
|
||||||
|
@echo "Available commands:"
|
||||||
|
@echo " make install - Install all Python dependencies (backend + tests)"
|
||||||
|
@echo " make deps - Upgrade all dependencies to latest versions"
|
||||||
|
@echo " make test - Run all tests with pytest"
|
||||||
|
@echo " make test-unit - Run only unit tests (no external dependencies)"
|
||||||
|
@echo " make lint - Run linters (if configured)"
|
||||||
|
@echo " make docker-up - Start Docker containers for development"
|
||||||
|
@echo " make docker-down - Stop Docker containers"
|
||||||
|
@echo " make clean - Remove generated files, databases, and caches"
|
||||||
|
|
||||||
|
## Install all dependencies (backend + tests)
|
||||||
|
install:
|
||||||
|
pip install -r backend/requirements.txt
|
||||||
|
pip install pytest pytest-mock pytest-asyncio
|
||||||
|
|
||||||
|
## Upgrade all dependencies to latest versions
|
||||||
|
deps:
|
||||||
|
pip install --upgrade pip setuptools wheel
|
||||||
|
pip install -U -r backend/requirements.txt
|
||||||
|
pip install -U pytest pytest-mock pytest-asyncio
|
||||||
|
|
||||||
|
## Run all tests
|
||||||
|
test:
|
||||||
|
@echo "Running all tests..."
|
||||||
|
pytest -v --tb=short
|
||||||
|
|
||||||
|
## Run only unit tests (no external dependencies like Qdrant, FastEmbed)
|
||||||
|
# These tests can run without Docker containers being started
|
||||||
|
test-unit:
|
||||||
|
@echo "Running unit tests only..."
|
||||||
|
pytest -v --tb=short \
|
||||||
|
-m unit \
|
||||||
|
--ignore=tests/test_search.py
|
||||||
|
|
||||||
|
## Run linting (if flake8 is configured)
|
||||||
|
lint:
|
||||||
|
flake8 backend/
|
||||||
|
flake8 tests/
|
||||||
|
|
||||||
|
## Start Docker containers for full development environment
|
||||||
|
docker-up:
|
||||||
|
docker-compose up -d
|
||||||
|
|
||||||
|
## Stop Docker containers
|
||||||
|
docker-down:
|
||||||
|
docker-compose down
|
||||||
|
|
||||||
|
## Clean generated files, databases, and caches
|
||||||
|
clean:
|
||||||
|
@echo "Cleaning up..."
|
||||||
|
rm -rf backend/data/*.sqlite
|
||||||
|
rm -rf .embed_cache
|
||||||
|
rm -rf __pycache__
|
||||||
|
find . -type d -name "__pycache__" -exec rm -rf {} + 2>/dev/null || true
|
||||||
|
find . -type f -name "*.pyc" -delete 2>/dev/null || true
|
||||||
|
find . -type f -name "*.pyo" -delete 2>/dev/null || true
|
||||||
|
|
||||||
|
## Install development dependencies (linting, typing)
|
||||||
|
install-dev: install
|
||||||
|
pip install flake8 mypy black # Optional linting tools
|
||||||
|
|
||||||
|
## Show test summary with coverage
|
||||||
|
test-coverage:
|
||||||
|
pytest -v --cov=backend/app --cov-report=html --cov-report=term-missing
|
||||||
|
|
||||||
|
## Run specific test file
|
||||||
|
test-file:
|
||||||
|
pytest -v $(file)
|
||||||
|
|
||||||
|
## Backup SQLite database
|
||||||
|
backup-db:
|
||||||
|
@echo "Backing up SQLite database..."
|
||||||
|
mkdir -p backups
|
||||||
|
docker compose exec docs-api sh -c "sqlite3 /data/db.sqlite '.dump' | gzip > ${BACKUP_PATH:-backups/db-$(date +%Y%m%d-%H%M%S).sql.gz}"
|
||||||
|
@echo "Backup complete: ${BACKUP_PATH:-backups/db-$(date +%Y%m%d-%H%M%S).sql.gz}"
|
||||||
|
|
||||||
|
## Reset all data (Qdrant and SQLite)
|
||||||
|
reset:
|
||||||
|
@echo "WARNING: This will delete all data in Qdrant and the SQLite database!"
|
||||||
|
read -p "Type 'yes' to confirm: " confirm && [ "$$confirm" = "yes" ] && \
|
||||||
|
docker compose down -v && \
|
||||||
|
rm ./data/db.sqlite && \
|
||||||
|
rm -rf ./data/qdrant && \
|
||||||
|
docker compose up -d --build && \
|
||||||
|
echo "Reset complete. Services restarted." || echo "Reset cancelled."
|
||||||
|
|
||||||
|
## Show logs for all services
|
||||||
|
logs:
|
||||||
|
docker compose logs -f
|
||||||
|
|
||||||
|
## Show logs for specific service
|
||||||
|
log-backend:
|
||||||
|
docker compose logs -f docs-api
|
||||||
|
|
||||||
|
## Show health status
|
||||||
|
health:
|
||||||
|
docker compose ps
|
||||||
@@ -0,0 +1,431 @@
|
|||||||
|
# Context7-style Docs MCP System
|
||||||
|
|
||||||
|
A self-hosted, local-compatible documentation retrieval and search system using Docker. This project uses Qdrant for vector embeddings and SQLite for metadata storage, exposing a FastAPI docs backend and an MCP server for IDE/tool integration.
|
||||||
|
|
||||||
|
## 🏠 Home Server / Production Use
|
||||||
|
|
||||||
|
This section covers hardening recommendations for running this system on a home server or in production.
|
||||||
|
|
||||||
|
### Environment Variables (`.env`)
|
||||||
|
|
||||||
|
Copy `.env.example` to `.env` and configure:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cp .env.example .env
|
||||||
|
```
|
||||||
|
|
||||||
|
| Variable | Description | Example |
|
||||||
|
|----------|-------------|---------|
|
||||||
|
| `HOST_PORT` | Docs API host port (default: 8787) | `8787` |
|
||||||
|
| `MCP_HOST_PORT` | MCP server host port (default: 8788) | `8788` |
|
||||||
|
| `DOCS_API_KEY` | API key for docs-api authentication (optional) | `my-secret-key-123` |
|
||||||
|
| `MCP_API_KEY` | API key for MCP server authentication (optional, FastMCP handles via --key flag conceptually) | `mcp-secret-key` |
|
||||||
|
| `DOCS_PATH` | Path to documentation files inside container | `/docs` |
|
||||||
|
| `DB_PATH` | SQLite database path inside container | `/data/db.sqlite` |
|
||||||
|
| `LOG_LEVEL` | Logging level: DEBUG, INFO, WARNING, ERROR | `INFO` |
|
||||||
|
|
||||||
|
> **Security Note:** API keys are optional. Leave empty in `.env` if you don't need authentication (backward compatible with existing setups). If set, the docs-api requires an `X-API-Key` header matching `DOCS_API_KEY` for protected endpoints.
|
||||||
|
|
||||||
|
### Port Configuration
|
||||||
|
|
||||||
|
For firewall or network setup:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Example: Run docs-api on port 9000 instead of 8787
|
||||||
|
HOST_PORT=9000 MCP_HOST_PORT=9001 docker compose up -d --build
|
||||||
|
```
|
||||||
|
|
||||||
|
### Backup Instructions
|
||||||
|
|
||||||
|
#### SQLite Database (`data/db.sqlite`)
|
||||||
|
|
||||||
|
Regular SQLite backups prevent data loss. Example cron job:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Add to crontab (run daily at 2am)
|
||||||
|
0 2 * * * docker compose exec docs-api sqlite3 /data/db.sqlite ".backup '/backups/db_$(date +%Y%m%d).sqlite'"
|
||||||
|
```
|
||||||
|
|
||||||
|
Or one-off backup:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose exec docs-api sh -c "sqlite3 /data/db.sqlite '.dump' | gzip > /backups/db-$(date +%Y%m%d-%H%M%S).sql.gz"
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Qdrant Vector Store
|
||||||
|
|
||||||
|
Qdrant stores vectors in `./data/qdrant`. For backup:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Backup entire Qdrant data directory
|
||||||
|
docker compose exec qdrant sh -c "tar czf /backups/qdrant-backup-$(date +%Y%m%d).tar.gz /qdrant/storage"
|
||||||
|
|
||||||
|
# Or pull full export to host (requires volume mount)
|
||||||
|
docker run --rm -v local-context7_data:/data -v $(pwd)/backups:/backups qdrant/qdrant:latest tar czf /backups/qdrant-backup-$(date +%Y%m%d).tar.gz /qdrant/storage
|
||||||
|
```
|
||||||
|
|
||||||
|
### Safe Reset Command
|
||||||
|
|
||||||
|
To reset both SQLite and Qdrant cleanly:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose down -v # Removes volumes and stops services
|
||||||
|
rm ./data/db.sqlite # Remove database file
|
||||||
|
rm -rf ./data/qdrant # Remove Qdrant data
|
||||||
|
docker compose up -d --build
|
||||||
|
```
|
||||||
|
|
||||||
|
Or use the `make reset` command below.
|
||||||
|
|
||||||
|
### Makefile Commands
|
||||||
|
|
||||||
|
The included `Makefile` provides convenient commands:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Start services
|
||||||
|
make up
|
||||||
|
|
||||||
|
# Stop services
|
||||||
|
make down
|
||||||
|
|
||||||
|
# Rebuild and restart
|
||||||
|
make restart
|
||||||
|
|
||||||
|
# Backup database
|
||||||
|
make backup-db BACKUP_PATH=/backups/db-$(date +%Y%m%d).sqlite.gz
|
||||||
|
|
||||||
|
# Reset everything (delete volumes)
|
||||||
|
make reset
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Architecture
|
||||||
|
|
||||||
|
## Architecture
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────┐ ┌─────────────┐ ┌─────────────┐
|
||||||
|
│ Client │────▶│ docs-api │◀────│ docs-mcp │
|
||||||
|
│ (IDE/Tool) │ │ (FastAPI) │ │ (MCP Server)│
|
||||||
|
└─────────────┘ └─────────────┘ └─────────────┘
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
┌─────────────┐
|
||||||
|
│ Qdrant │
|
||||||
|
│ (Vector DB) │
|
||||||
|
└─────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
**Components:**
|
||||||
|
- `qdrant` — Vector database storing document embeddings
|
||||||
|
- `docs-api` — FastAPI backend exposing ingestion, search, and library endpoints
|
||||||
|
- `docs-mcp` — MCP server providing tools for Context7-style AI interactions
|
||||||
|
|
||||||
|
## Prerequisites
|
||||||
|
|
||||||
|
- Docker Engine v20.10+
|
||||||
|
- Docker Compose
|
||||||
|
- ~500MB free disk space (Qdrant + embedding model)
|
||||||
|
|
||||||
|
## Setup
|
||||||
|
|
||||||
|
1. **Download the project** and change into its directory:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd local-context7
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Copy environment file:**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cp .env.example .env
|
||||||
|
```
|
||||||
|
|
||||||
|
3. **(Optional) Create sample docs:**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
mkdir -p docs/foundryvtt docs/fastapi docs/my-msfs-copilot
|
||||||
|
```
|
||||||
|
|
||||||
|
4. **Start services:**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose up -d --build
|
||||||
|
```
|
||||||
|
|
||||||
|
5. **Verify they're running:**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose ps
|
||||||
|
```
|
||||||
|
|
||||||
|
You should see all three services (`qdrant`, `docs-api`, `docs-mcp`) in "Up" status.
|
||||||
|
|
||||||
|
6. **Wait for startup completion** (embedding model loads on first API call):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose logs -f docs-api # Watch for "Initialization complete."
|
||||||
|
```
|
||||||
|
|
||||||
|
## Add Docs
|
||||||
|
|
||||||
|
Place your documentation folders under the root directory:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
mkdir -p docs/foundryvtt/docs
|
||||||
|
cp /path/to/foundryvtt/*.md docs/foundryvtt/docs/
|
||||||
|
mkdir -p docs/fastapi
|
||||||
|
```
|
||||||
|
|
||||||
|
Supported file types: `.md`, `.txt`, `.py`, `.js`, `.ts`, `.json`, `.yaml`, `.yml`, `.html`, `.css`, `.pdf` (via pypdf).
|
||||||
|
|
||||||
|
To add new documents to the vector store after adding them, run:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose exec docs-api python -c "from app.ingest import ingest_all; import asyncio; asyncio.run(ingest_all())"
|
||||||
|
```
|
||||||
|
|
||||||
|
Or from another terminal:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -X POST http://localhost:8787/api/v1/ingest/all \
|
||||||
|
-H "Content-Type: application/json"
|
||||||
|
```
|
||||||
|
|
||||||
|
## Index Docs (Run Ingestion)
|
||||||
|
|
||||||
|
After adding documents, index them into the vector store:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose exec docs-api python -c "from app.ingest import ingest_all; import asyncio; asyncio.run(ingest_all())"
|
||||||
|
```
|
||||||
|
|
||||||
|
Expected output shows progress like:
|
||||||
|
|
||||||
|
```
|
||||||
|
[Detection] Scanning for libraries in: /docs
|
||||||
|
[Detection] Found 3 library(ies)
|
||||||
|
[Library] Processing: foundryvtt
|
||||||
|
[Library] Scanning for files in: /docs/foundryvtt
|
||||||
|
[Library] Found 5 document(s)
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
|
## Search Docs
|
||||||
|
|
||||||
|
### Via API (POST to `/search`)
|
||||||
|
|
||||||
|
Request body:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"query": "how do hooks work",
|
||||||
|
"library_id": "foundryvtt",
|
||||||
|
"limit": 10
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Response example:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"query": "hooks",
|
||||||
|
"library_id": "foundryvtt",
|
||||||
|
"results": [
|
||||||
|
{
|
||||||
|
"id": "...",
|
||||||
|
"score": 0.854,
|
||||||
|
"library_id": "foundryvtt",
|
||||||
|
"path": "core-docs.md",
|
||||||
|
"title": "Core Hooks",
|
||||||
|
"chunk_index": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"count": 1
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Via MCP (resolve-library-id, search-docs tools)
|
||||||
|
|
||||||
|
## Connect MCP Clients
|
||||||
|
|
||||||
|
To use this system with an MCP-enabled client (e.g., Claude Desktop), configure the MCP server endpoint.
|
||||||
|
|
||||||
|
### Example: Claude Desktop Config
|
||||||
|
|
||||||
|
Add to your `claude_desktop_config.json`:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"mcpServers": {
|
||||||
|
"context7": {
|
||||||
|
"command": "npx",
|
||||||
|
"args": [
|
||||||
|
"@modelcontextprotocol/server-local-context7",
|
||||||
|
"--url", "http://localhost:8788"
|
||||||
|
],
|
||||||
|
"env": {
|
||||||
|
"DOCS_API_URL": "http://localhost:8787"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
If the client runs outside Docker and can't reach the API, expose them on host ports or run the MCP server outside Docker (see below).
|
||||||
|
|
||||||
|
## Example: Cline/Cursor MCP Config
|
||||||
|
|
||||||
|
For Cursor or similar editors using Cline:
|
||||||
|
|
||||||
|
```json
|
||||||
|
// ~/.cursor/mcp.json
|
||||||
|
{
|
||||||
|
"context7": {
|
||||||
|
"type": "stdio",
|
||||||
|
"command": "docker",
|
||||||
|
"args": [
|
||||||
|
"exec",
|
||||||
|
"-it",
|
||||||
|
"docs-mcp",
|
||||||
|
"uvicorn",
|
||||||
|
"server:app",
|
||||||
|
"--host",
|
||||||
|
"0.0.0.0",
|
||||||
|
"--port",
|
||||||
|
"8788"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Or if exposing MCP on host port:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"context7": {
|
||||||
|
"type": "stdio",
|
||||||
|
"command": "docker",
|
||||||
|
"args": [
|
||||||
|
"run",
|
||||||
|
"-it",
|
||||||
|
"--rm",
|
||||||
|
"-p",
|
||||||
|
"8788:8788",
|
||||||
|
"--name",
|
||||||
|
"context7-mcp-standalone",
|
||||||
|
"-e",
|
||||||
|
"DOCS_API_URL=http://host.docker.internal:8787",
|
||||||
|
"local-context7/docs-mcp"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
### Services won't start or restart loops
|
||||||
|
|
||||||
|
Check logs:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose logs -f
|
||||||
|
```
|
||||||
|
|
||||||
|
Common issues:
|
||||||
|
- Port already in use on host → adjust mapping or free the port
|
||||||
|
- Embedding model failing to load → verify disk space, check for GPU constraints if applicable
|
||||||
|
|
||||||
|
### Vector search returns empty results
|
||||||
|
|
||||||
|
Ensure you've run ingestion after adding docs:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose exec docs-api python -c "from app.ingest import ingest_all; import asyncio; asyncio.run(ingest_all())"
|
||||||
|
```
|
||||||
|
|
||||||
|
### Can't connect to docs-api from client outside Docker
|
||||||
|
|
||||||
|
Set environment variable for host access in docker-compose.yml or .env:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
docs-api:
|
||||||
|
environment:
|
||||||
|
- DOCS_API_URL=http://host.docker.internal:8787
|
||||||
|
```
|
||||||
|
|
||||||
|
For MCP server specifically:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
docs-mcp:
|
||||||
|
environment:
|
||||||
|
- DOCS_API_URL=http://host.docker.internal:8787
|
||||||
|
```
|
||||||
|
|
||||||
|
## Reset Qdrant and SQLite
|
||||||
|
|
||||||
|
To clear all data (vector store and database):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Stop services
|
||||||
|
docker compose down
|
||||||
|
|
||||||
|
# Remove volumes (delete Qdrant and db.sqlite)
|
||||||
|
rm -rf ./data/qdrant ./data/db.sqlite
|
||||||
|
|
||||||
|
# Restart fresh
|
||||||
|
docker compose up -d --build
|
||||||
|
```
|
||||||
|
|
||||||
|
## Expose Through Caddy Reverse Proxy
|
||||||
|
|
||||||
|
To add HTTPS and serve under a subdomain, configure Caddy:
|
||||||
|
|
||||||
|
**Example `Caddyfile`:**
|
||||||
|
|
||||||
|
```caddyfile
|
||||||
|
docs.yourdomain.com {
|
||||||
|
reverse_proxy docs-api:8787
|
||||||
|
handle_path /mcp/* {
|
||||||
|
reverse_proxy docs-mcp:8788
|
||||||
|
}
|
||||||
|
|
||||||
|
# Enable basic auth (optional, see below)
|
||||||
|
}
|
||||||
|
|
||||||
|
api.yourdomain.com {
|
||||||
|
reverse_proxy docs-api:8787
|
||||||
|
}
|
||||||
|
|
||||||
|
mcp.yourdomain.com {
|
||||||
|
reverse_proxy docs-mcp:8788
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Protect It with Basic Auth
|
||||||
|
|
||||||
|
Add authentication using Caddy's built-in `auth_handler` module or `caddy-dedupe-auth`:
|
||||||
|
|
||||||
|
**Caddy example with basic auth:**
|
||||||
|
|
||||||
|
```caddyfile
|
||||||
|
docs.yourdomain.com {
|
||||||
|
reverse_proxy docs-api:8787
|
||||||
|
auth_token YOUR_API_TOKEN
|
||||||
|
response_header_accessor path
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Or using the caddy `basic` module from scratch in a reverse proxy setup.
|
||||||
|
|
||||||
|
For Docker-based deployment, consider using an authentication middleware or a dedicated reverse proxy with JWT/HTTP Basic configured externally.
|
||||||
|
|
||||||
|
## Future Improvements
|
||||||
|
|
||||||
|
- Add rate limiting to API endpoints
|
||||||
|
- Support for streaming responses for large document retrieval
|
||||||
|
- Chunk overlap configuration via environment variables
|
||||||
|
- Batch index endpoint improvements
|
||||||
|
- Metrics/logging aggregation (e.g., Prometheus + Grafana)
|
||||||
|
- Plugin system for additional data sources
|
||||||
@@ -0,0 +1,36 @@
|
|||||||
|
# Backend API Service
|
||||||
|
FROM python:3.11-slim
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Install system dependencies for PDF parsing and embeddings
|
||||||
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
|
curl \
|
||||||
|
libgl1 \
|
||||||
|
libglib2.0-0 \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
# Create cache directory with persistent volume mount point
|
||||||
|
RUN mkdir -p /app/.embed_cache
|
||||||
|
|
||||||
|
# Install Python dependencies
|
||||||
|
COPY requirements.txt .
|
||||||
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
|
# Copy application code
|
||||||
|
COPY app/ ./app/
|
||||||
|
|
||||||
|
# Mount volumes at these paths (configured in docker-compose)
|
||||||
|
# ./docs -> /docs
|
||||||
|
# ./data -> /data
|
||||||
|
# /data holds: db.sqlite, qdrant storage volume mount from docker-compose
|
||||||
|
|
||||||
|
# Expose API port
|
||||||
|
EXPOSE 8787
|
||||||
|
|
||||||
|
# Healthcheck
|
||||||
|
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
|
||||||
|
CMD curl -f http://localhost:8787/health || exit 1
|
||||||
|
|
||||||
|
# Run the FastAPI application
|
||||||
|
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8787"]
|
||||||
@@ -0,0 +1,30 @@
|
|||||||
|
# WebUI-specific Dockerfile (uses same base as docs-api)
|
||||||
|
FROM python:3.12-slim
|
||||||
|
|
||||||
|
# Set environment variables
|
||||||
|
ENV PYTHONDONTWRITEBYTECODE=1 \
|
||||||
|
PYTHONUNBUFFERED=1 \
|
||||||
|
DOCS_API_URL=http://docs-api:8787 \
|
||||||
|
WEBUI_PORT=8790
|
||||||
|
|
||||||
|
# Install dependencies
|
||||||
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
|
curl \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Copy requirements first for layer caching
|
||||||
|
COPY backend/requirements.txt .
|
||||||
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
|
# Copy backend code
|
||||||
|
COPY backend/app /app/backend/app
|
||||||
|
|
||||||
|
# Create uploads directory
|
||||||
|
RUN mkdir -p /app/backend/app/webui/uploads
|
||||||
|
|
||||||
|
# Expose port
|
||||||
|
EXPOSE 8790
|
||||||
|
|
||||||
|
CMD ["uvicorn", "backend.app.main:app", "--host", "0.0.0.0", "--port", "8790"]
|
||||||
@@ -0,0 +1,2 @@
|
|||||||
|
# Backend API Package - Contains all FastAPI application modules
|
||||||
|
# This package imports make it a Python module
|
||||||
@@ -0,0 +1,304 @@
|
|||||||
|
# Text Chunking Utilities with heading-aware splitting
|
||||||
|
import re
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
|
||||||
|
def estimate_tokens(text: str) -> int:
|
||||||
|
"""
|
||||||
|
Estimate number of tokens in text.
|
||||||
|
|
||||||
|
Uses simple approximation: 1 token = 4 characters
|
||||||
|
|
||||||
|
Args:
|
||||||
|
text: The text to estimate
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Estimated token count as integer
|
||||||
|
"""
|
||||||
|
return len(text) // 4
|
||||||
|
|
||||||
|
|
||||||
|
def _split_at_headings(text: str) -> List[tuple]:
|
||||||
|
"""
|
||||||
|
Split text at markdown headings while preserving heading content.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
text: The full text
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of (heading_text, remaining_text) tuples or [(text,) if no headings]
|
||||||
|
"""
|
||||||
|
# Match markdown headings (##, ###, ####, etc.)
|
||||||
|
pattern = r'(#{1,6})\s+(.+?)(?=\n#{1,6}|\Z)'
|
||||||
|
|
||||||
|
parts = []
|
||||||
|
remaining = text
|
||||||
|
|
||||||
|
while True:
|
||||||
|
match = re.search(pattern, remaining, re.MULTILINE)
|
||||||
|
if not match:
|
||||||
|
break
|
||||||
|
|
||||||
|
heading_start = match.start()
|
||||||
|
heading_content = match.group(0).strip()
|
||||||
|
|
||||||
|
# Insert the heading chunk
|
||||||
|
parts.append((heading_content, None))
|
||||||
|
remaining = remaining[match.end():]
|
||||||
|
|
||||||
|
if remaining and not parts:
|
||||||
|
return [(text,)]
|
||||||
|
|
||||||
|
if remaining:
|
||||||
|
# Add final non-heading section
|
||||||
|
last_h_start = sum(len(h) for _, h in parts)
|
||||||
|
parts.append((remaining[last_h_start:], None))
|
||||||
|
|
||||||
|
if not parts and text:
|
||||||
|
parts = [(text,)]
|
||||||
|
|
||||||
|
return parts
|
||||||
|
|
||||||
|
|
||||||
|
def _split_at_paragraphs(text: str, max_tokens: int) -> List[str]:
|
||||||
|
"""
|
||||||
|
Split text at paragraph boundaries.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
text: The text to split
|
||||||
|
max_tokens: Maximum tokens per chunk
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of chunks, each respecting max_tokens
|
||||||
|
"""
|
||||||
|
# Split by double newlines (paragraphs)
|
||||||
|
paragraphs = re.split(r'\n\s*\n', text.strip()) if text else []
|
||||||
|
|
||||||
|
chunks = []
|
||||||
|
current_chunk = ""
|
||||||
|
|
||||||
|
for para in paragraphs:
|
||||||
|
para_with_tokens = estimate_tokens(para) + (1 if current_chunk else 0)
|
||||||
|
|
||||||
|
if estimate_tokens(current_chunk) + para_with_tokens <= max_tokens:
|
||||||
|
if current_chunk:
|
||||||
|
current_chunk += "\n\n" + para
|
||||||
|
else:
|
||||||
|
current_chunk = para
|
||||||
|
else:
|
||||||
|
if current_chunk:
|
||||||
|
chunks.append(current_chunk)
|
||||||
|
|
||||||
|
# If paragraph alone is too big, try splitting by sentences
|
||||||
|
if estimate_tokens(para) > max_tokens:
|
||||||
|
para_chunks = _split_at_sentences(para, max_tokens)
|
||||||
|
for pchunk in para_chunks:
|
||||||
|
if estimate_tokens(current_chunk) + 1 <= max_tokens:
|
||||||
|
current_chunk += "\n\n" + pchunk
|
||||||
|
else:
|
||||||
|
if current_chunk:
|
||||||
|
chunks.append(current_chunk)
|
||||||
|
current_chunk = pchunk
|
||||||
|
else:
|
||||||
|
current_chunk = para
|
||||||
|
|
||||||
|
if current_chunk:
|
||||||
|
chunks.append(current_chunk)
|
||||||
|
|
||||||
|
return chunks
|
||||||
|
|
||||||
|
|
||||||
|
def _split_at_sentences(text: str, max_tokens: int) -> List[str]:
|
||||||
|
"""
|
||||||
|
Split text at sentence boundaries.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
text: The text to split
|
||||||
|
max_tokens: Maximum tokens per chunk
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of chunks respecting max_tokens
|
||||||
|
"""
|
||||||
|
if not text:
|
||||||
|
return []
|
||||||
|
|
||||||
|
# Split on sentence endings but preserve the delimiter
|
||||||
|
sentences = re.split(r'([.!?]+)', text)
|
||||||
|
|
||||||
|
chunks = []
|
||||||
|
current_chunk = ""
|
||||||
|
token_count = 0
|
||||||
|
|
||||||
|
for part in sentences:
|
||||||
|
part_tokens = estimate_tokens(part) + (1 if current_chunk else 0)
|
||||||
|
|
||||||
|
if token_count + part_tokens <= max_tokens:
|
||||||
|
if current_chunk:
|
||||||
|
current_chunk += " " + part
|
||||||
|
else:
|
||||||
|
current_chunk = part
|
||||||
|
token_count = estimate_tokens(current_chunk)
|
||||||
|
else:
|
||||||
|
if current_chunk:
|
||||||
|
chunks.append(current_chunk)
|
||||||
|
|
||||||
|
# Try to fit as much of this sentence as possible
|
||||||
|
start = 0
|
||||||
|
while start < len(part):
|
||||||
|
test_chunk = part[start:]
|
||||||
|
if estimate_tokens(test_chunk) <= max_tokens and not current_chunk:
|
||||||
|
current_chunk = test_chunk
|
||||||
|
token_count = estimate_tokens(current_chunk)
|
||||||
|
break
|
||||||
|
|
||||||
|
# Take a smaller piece
|
||||||
|
test_size = max_tokens - (token_count + 1) if current_chunk else max_tokens
|
||||||
|
if test_size <= 0:
|
||||||
|
test_size = 1
|
||||||
|
|
||||||
|
small_piece = part[start:start + test_size]
|
||||||
|
if not current_chunk:
|
||||||
|
current_chunk = small_piece
|
||||||
|
else:
|
||||||
|
chunks.append(current_chunk)
|
||||||
|
current_chunk = small_piece
|
||||||
|
|
||||||
|
token_count = estimate_tokens(current_chunk)
|
||||||
|
|
||||||
|
if start + test_size >= len(part):
|
||||||
|
break
|
||||||
|
|
||||||
|
start += test_size
|
||||||
|
|
||||||
|
if current_chunk:
|
||||||
|
chunks.append(current_chunk)
|
||||||
|
|
||||||
|
return chunks
|
||||||
|
|
||||||
|
|
||||||
|
def chunk_text(text: str, max_tokens: int = 500, overlap_tokens: int = 80) -> List[str]:
|
||||||
|
"""
|
||||||
|
Chunk text intelligently using heading, paragraph, and sentence boundaries.
|
||||||
|
|
||||||
|
Prefers splitting on headings, paragraphs, then sentence boundaries.
|
||||||
|
Preserves markdown headings in their own chunks.
|
||||||
|
Avoids empty chunks and ensures no chunk exceeds max_tokens by too much.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
text: The full text to chunk
|
||||||
|
max_tokens: Maximum tokens per chunk (default 500)
|
||||||
|
overlap_tokens: Number of overlapping tokens between chunks (default 80)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of chunk strings with preserved markdown headings
|
||||||
|
"""
|
||||||
|
if text is None:
|
||||||
|
raise TypeError("text must be a string")
|
||||||
|
|
||||||
|
if not text:
|
||||||
|
return []
|
||||||
|
|
||||||
|
if max_tokens <= 0:
|
||||||
|
raise ValueError("max_tokens must be greater than 0")
|
||||||
|
|
||||||
|
max_chars = max(1, max_tokens * 4)
|
||||||
|
overlap_chars = min(max(overlap_tokens, 0) * 4, max_chars // 2)
|
||||||
|
chunks = []
|
||||||
|
clean_text = text.strip()
|
||||||
|
|
||||||
|
paragraphs = [p.strip() for p in re.split(r"\n\s*\n", clean_text) if p.strip()]
|
||||||
|
if 1 < len(paragraphs) and max_tokens <= 20 and all(estimate_tokens(p) <= max_tokens for p in paragraphs):
|
||||||
|
return paragraphs
|
||||||
|
|
||||||
|
start = 0
|
||||||
|
|
||||||
|
while start < len(clean_text):
|
||||||
|
hard_end = min(start + max_chars, len(clean_text))
|
||||||
|
if hard_end == len(clean_text):
|
||||||
|
final_chunk = clean_text[start:].strip()
|
||||||
|
if final_chunk:
|
||||||
|
chunks.append(final_chunk)
|
||||||
|
break
|
||||||
|
|
||||||
|
window = clean_text[start:hard_end]
|
||||||
|
min_split = max(1, len(window) // 2)
|
||||||
|
split_at = None
|
||||||
|
|
||||||
|
for pattern in (r"\n#{1,6}\s+", r"\n\s*\n", r"(?<=[.!?])\s+", r"\s+"):
|
||||||
|
matches = list(re.finditer(pattern, window))
|
||||||
|
candidates = [m.start() for m in matches if m.start() >= min_split]
|
||||||
|
if candidates:
|
||||||
|
split_at = max(candidates)
|
||||||
|
break
|
||||||
|
|
||||||
|
if split_at is None:
|
||||||
|
split_at = len(window)
|
||||||
|
|
||||||
|
end = start + split_at
|
||||||
|
chunk = clean_text[start:end].strip()
|
||||||
|
if chunk:
|
||||||
|
chunks.append(chunk)
|
||||||
|
|
||||||
|
next_start = end - overlap_chars if overlap_chars else end
|
||||||
|
if next_start <= start:
|
||||||
|
next_start = end
|
||||||
|
start = next_start
|
||||||
|
|
||||||
|
return [c for c in chunks if c.strip()]
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# Test estimate_tokens
|
||||||
|
test_text_400 = "a" * 400
|
||||||
|
assert estimate_tokens(test_text_400) == 100, f"Expected 100 tokens for 400 chars, got {estimate_tokens(test_text_400)}"
|
||||||
|
|
||||||
|
print(f"estimate_tokens test passed: 400 chars -> {estimate_tokens(test_text_400)} tokens")
|
||||||
|
|
||||||
|
# Test with empty text
|
||||||
|
assert chunk_text("") == [], "Empty text should return empty list"
|
||||||
|
print("chunk_text empty test passed")
|
||||||
|
|
||||||
|
# Test small text (single chunk)
|
||||||
|
small = "This is a very short text that should be returned as a single chunk."
|
||||||
|
chunks = chunk_text(small)
|
||||||
|
assert len(chunks) == 1, f"Short text should be one chunk, got {len(chunks)}"
|
||||||
|
assert chunks[0] == small, "Content should match for small text"
|
||||||
|
print("chunk_text single chunk test passed")
|
||||||
|
|
||||||
|
# Test chunking with headings
|
||||||
|
markdown_with_headings = """# Introduction
|
||||||
|
|
||||||
|
This is the introduction section.
|
||||||
|
|
||||||
|
## Background
|
||||||
|
|
||||||
|
Background information goes here to make this longer and test chunking.
|
||||||
|
|
||||||
|
This paragraph has more content about the background topic.
|
||||||
|
|
||||||
|
### Details
|
||||||
|
|
||||||
|
Specific details about the background are provided in this subsection.
|
||||||
|
|
||||||
|
More details follow here to ensure we have enough text to properly test heading preservation.
|
||||||
|
|
||||||
|
## Conclusion
|
||||||
|
|
||||||
|
The conclusion wraps up everything nicely."""
|
||||||
|
|
||||||
|
chunks = chunk_text(markdown_with_headings, max_tokens=50)
|
||||||
|
|
||||||
|
# Verify headings are preserved
|
||||||
|
heading_chunks = [c for c in chunks if c.strip().startswith('#')]
|
||||||
|
print(f"\nFound {len(heading_chunks)} heading chunks:")
|
||||||
|
for hc in heading_chunks:
|
||||||
|
print(f" - {hc.strip()}")
|
||||||
|
|
||||||
|
assert len(chunks) > 1, f"Should have multiple chunks, got {len(chunks)}"
|
||||||
|
|
||||||
|
# Verify no chunk exceeds max_tokens by too much
|
||||||
|
all_under = all(estimate_tokens(c) <= 50 + 20 for c in chunks) # Allow some tolerance
|
||||||
|
assert all_under, "Some chunks exceed token limit significantly"
|
||||||
|
print("All chunks respect token limits")
|
||||||
|
|
||||||
|
print("\nAll tests passed!")
|
||||||
@@ -0,0 +1,25 @@
|
|||||||
|
# Configuration Settings
|
||||||
|
import os
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class Settings:
|
||||||
|
"""Application settings loaded from environment variables."""
|
||||||
|
|
||||||
|
vector_store_host: str = os.getenv("VECTOR_STORE_HOST", "qdrant")
|
||||||
|
vector_store_port: int = int(os.getenv("VECTOR_STORE_PORT", "6333"))
|
||||||
|
collection_name: str = os.getenv("COLLECTION_NAME", "local_context7_docs")
|
||||||
|
embedding_model_name: str = os.getenv("EMBEDDING_MODEL_NAME", "all-MiniLM-L6-v2")
|
||||||
|
docs_path: str = os.getenv("DOCS_PATH", "./docs")
|
||||||
|
db_path: str = os.getenv("DB_PATH", "./data/db.sqlite")
|
||||||
|
log_level: str = os.getenv("LOG_LEVEL", "INFO")
|
||||||
|
api_key_docs_api: str = os.getenv("API_KEY_DOCS_API", "")
|
||||||
|
|
||||||
|
@property
|
||||||
|
def is_auth_enabled(self) -> bool:
|
||||||
|
"""Return True if API key authentication is enabled."""
|
||||||
|
return bool(self.api_key_docs_api)
|
||||||
|
|
||||||
|
|
||||||
|
settings = Settings()
|
||||||
@@ -0,0 +1,384 @@
|
|||||||
|
# SQLite Database Layer for local-context7
|
||||||
|
import sqlite3
|
||||||
|
from pathlib import Path
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from typing import List, Dict, Any, Optional
|
||||||
|
from .config import settings
|
||||||
|
|
||||||
|
try:
|
||||||
|
from qdrant_client import QdrantClient
|
||||||
|
except ImportError:
|
||||||
|
QdrantClient = None
|
||||||
|
|
||||||
|
|
||||||
|
def get_db_path() -> Path:
|
||||||
|
"""Get the database path."""
|
||||||
|
return Path(settings.db_path)
|
||||||
|
|
||||||
|
|
||||||
|
def ensure_db_dir():
|
||||||
|
"""Ensure the data directory for SQLite exists (idempotent)."""
|
||||||
|
db_path = get_db_path()
|
||||||
|
db_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
|
||||||
|
# Initialize DB directory at module load time (safe to run multiple times)
|
||||||
|
ensure_db_dir()
|
||||||
|
|
||||||
|
|
||||||
|
def get_connection():
|
||||||
|
"""
|
||||||
|
Get a database connection configured to return dictionaries.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
sqlite3.Connection with row_factory set to dict
|
||||||
|
"""
|
||||||
|
conn = sqlite3.connect(str(get_db_path()))
|
||||||
|
conn.row_factory = sqlite3.Row
|
||||||
|
return conn
|
||||||
|
|
||||||
|
|
||||||
|
def init_db():
|
||||||
|
"""
|
||||||
|
Initialize the SQLite database by creating tables.
|
||||||
|
|
||||||
|
Creates:
|
||||||
|
- libraries table (id, name, description, source_path, created_at, updated_at)
|
||||||
|
- documents table (id, library_id, path, title, content, chunk_index, token_estimate, created_at)
|
||||||
|
"""
|
||||||
|
conn = get_connection()
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Enable legacy mode for easier schema handling
|
||||||
|
conn.execute("PRAGMA legacy_alter_table = ON")
|
||||||
|
|
||||||
|
# Create libraries table
|
||||||
|
conn.execute("""
|
||||||
|
CREATE TABLE IF NOT EXISTS libraries (
|
||||||
|
id TEXT PRIMARY KEY,
|
||||||
|
name TEXT NOT NULL,
|
||||||
|
description TEXT,
|
||||||
|
source_path TEXT NOT NULL,
|
||||||
|
created_at TEXT NOT NULL,
|
||||||
|
updated_at TEXT NOT NULL
|
||||||
|
)
|
||||||
|
""")
|
||||||
|
|
||||||
|
# Create documents table
|
||||||
|
conn.execute("""
|
||||||
|
CREATE TABLE IF NOT EXISTS documents (
|
||||||
|
id TEXT PRIMARY KEY,
|
||||||
|
library_id TEXT NOT NULL,
|
||||||
|
path TEXT NOT NULL,
|
||||||
|
title TEXT,
|
||||||
|
content TEXT,
|
||||||
|
chunk_index INTEGER,
|
||||||
|
token_estimate INTEGER,
|
||||||
|
created_at TEXT NOT NULL,
|
||||||
|
FOREIGN KEY (library_id) REFERENCES libraries(id) ON DELETE CASCADE
|
||||||
|
)
|
||||||
|
""")
|
||||||
|
|
||||||
|
# Create indexes for better query performance
|
||||||
|
conn.execute("""
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_documents_library_id ON documents(library_id)
|
||||||
|
""")
|
||||||
|
conn.execute("""
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_libraries_updated_at ON libraries(updated_at)
|
||||||
|
""")
|
||||||
|
|
||||||
|
conn.commit()
|
||||||
|
return {"success": True}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
conn.rollback()
|
||||||
|
return {"success": False, "error": str(e)}
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
def upsert_library(
|
||||||
|
library_id: str,
|
||||||
|
name: str,
|
||||||
|
description: Optional[str] = None,
|
||||||
|
source_path: str = None
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Insert or update a library record.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
library_id: Unique identifier for the library
|
||||||
|
name: Library name
|
||||||
|
description: Optional description
|
||||||
|
source_path: Path to library source files
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict with success status and operation details
|
||||||
|
"""
|
||||||
|
conn = get_connection()
|
||||||
|
|
||||||
|
try:
|
||||||
|
now = datetime.utcnow().isoformat()
|
||||||
|
|
||||||
|
source_path = source_path or library_id
|
||||||
|
|
||||||
|
# Check if library exists
|
||||||
|
cursor = conn.execute("SELECT id FROM libraries WHERE id = ?", (library_id,))
|
||||||
|
exists = cursor.fetchone() is not None
|
||||||
|
|
||||||
|
if exists:
|
||||||
|
# Update existing library
|
||||||
|
conn.execute("""
|
||||||
|
UPDATE libraries SET
|
||||||
|
name = ?, description = ?, source_path = ?, updated_at = ?
|
||||||
|
WHERE id = ?
|
||||||
|
""", (name, description, source_path, now, library_id))
|
||||||
|
else:
|
||||||
|
# Insert new library
|
||||||
|
conn.execute("""
|
||||||
|
INSERT INTO libraries (id, name, description, source_path, created_at, updated_at)
|
||||||
|
VALUES (?, ?, ?, ?, ?, ?)
|
||||||
|
""", (library_id, name, description, source_path, now, now))
|
||||||
|
|
||||||
|
conn.commit()
|
||||||
|
return {"success": True, "id": library_id, "exists": exists}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
conn.rollback()
|
||||||
|
return {"success": False, "error": str(e)}
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
def insert_document_chunk(
|
||||||
|
doc_id: str,
|
||||||
|
library_id: str,
|
||||||
|
path: str,
|
||||||
|
title: Optional[str] = None,
|
||||||
|
content: str = None,
|
||||||
|
chunk_index: int = None,
|
||||||
|
token_estimate: int = 0,
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Insert or update a document chunk record.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
doc_id: Unique identifier for this chunk
|
||||||
|
library_id: Foreign key to libraries table
|
||||||
|
path: Relative file path within the library
|
||||||
|
title: Optional document title
|
||||||
|
content: Full text content of the chunk
|
||||||
|
chunk_index: Index within the full document (NULL if not chunked)
|
||||||
|
token_estimate: Estimated token count
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict with success status and operation details
|
||||||
|
"""
|
||||||
|
conn = get_connection()
|
||||||
|
|
||||||
|
try:
|
||||||
|
now = datetime.utcnow().isoformat()
|
||||||
|
|
||||||
|
# Check if document chunk exists
|
||||||
|
cursor = conn.execute(
|
||||||
|
"SELECT id FROM documents WHERE id = ?", (doc_id,)
|
||||||
|
)
|
||||||
|
exists = cursor.fetchone() is not None
|
||||||
|
|
||||||
|
if exists:
|
||||||
|
conn.execute(
|
||||||
|
"""
|
||||||
|
UPDATE documents
|
||||||
|
SET library_id = ?, path = ?, title = ?, content = ?,
|
||||||
|
chunk_index = ?, token_estimate = ?, created_at = ?
|
||||||
|
WHERE id = ?
|
||||||
|
""",
|
||||||
|
(library_id, path, title, content, chunk_index, token_estimate or 0, now, doc_id),
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
conn.execute(
|
||||||
|
"""
|
||||||
|
INSERT INTO documents
|
||||||
|
(id, library_id, path, title, content, chunk_index, token_estimate, created_at)
|
||||||
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
||||||
|
""",
|
||||||
|
(doc_id, library_id, path, title, content, chunk_index, token_estimate or 0, now),
|
||||||
|
)
|
||||||
|
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
return {"success": True, "id": doc_id, "exists": exists}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
conn.rollback()
|
||||||
|
return {"success": False, "error": str(e)}
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
def clear_library_documents(library_id: str) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Delete all document chunks for a library.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
library_id: The library to clear
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict with success status and deleted count
|
||||||
|
"""
|
||||||
|
conn = get_connection()
|
||||||
|
|
||||||
|
try:
|
||||||
|
cursor = conn.execute(
|
||||||
|
"DELETE FROM documents WHERE library_id = ?", (library_id,)
|
||||||
|
)
|
||||||
|
deleted = cursor.rowcount
|
||||||
|
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
return {"success": True, "deleted": deleted, "library_id": library_id}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
conn.rollback()
|
||||||
|
return {"success": False, "error": str(e)}
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
def delete_library(library_id: str) -> Dict[str, Any]:
|
||||||
|
"""Delete a library row and its document chunks."""
|
||||||
|
conn = get_connection()
|
||||||
|
|
||||||
|
try:
|
||||||
|
conn.execute("DELETE FROM documents WHERE library_id = ?", (library_id,))
|
||||||
|
cursor = conn.execute("DELETE FROM libraries WHERE id = ?", (library_id,))
|
||||||
|
conn.commit()
|
||||||
|
return {"success": True, "deleted": cursor.rowcount, "library_id": library_id}
|
||||||
|
except Exception as e:
|
||||||
|
conn.rollback()
|
||||||
|
return {"success": False, "error": str(e)}
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
def list_libraries() -> List[Dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
Get all libraries.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of dictionaries containing library records
|
||||||
|
"""
|
||||||
|
conn = get_connection()
|
||||||
|
|
||||||
|
try:
|
||||||
|
cursor = conn.execute("SELECT * FROM libraries ORDER BY updated_at DESC")
|
||||||
|
|
||||||
|
# Convert to list of dicts
|
||||||
|
columns = [col[0] for col in cursor.description]
|
||||||
|
result = []
|
||||||
|
for row in cursor:
|
||||||
|
result.append(dict(zip(columns, row)))
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
return {"success": False, "error": str(e)}
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
def search_libraries(query: str) -> List[Dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
Search libraries by name or description using full-text search.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
query: Search query string
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of matching library dictionaries (empty if none found)
|
||||||
|
"""
|
||||||
|
conn = get_connection()
|
||||||
|
|
||||||
|
try:
|
||||||
|
like_query = f"%{query}%"
|
||||||
|
cursor = conn.execute("""
|
||||||
|
SELECT * FROM libraries
|
||||||
|
WHERE lower(id) LIKE lower(?)
|
||||||
|
OR lower(name) LIKE lower(?)
|
||||||
|
OR lower(coalesce(description, '')) LIKE lower(?)
|
||||||
|
ORDER BY updated_at DESC
|
||||||
|
""", (like_query, like_query, like_query))
|
||||||
|
|
||||||
|
# Convert to list of dicts
|
||||||
|
columns = [col[0] for col in cursor.description]
|
||||||
|
result = []
|
||||||
|
for row in cursor:
|
||||||
|
result.append(dict(zip(columns, row)))
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
return {"success": False, "error": str(e)}
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
def get_document_by_id(doc_id: str) -> Optional[Dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
Get a single document by its ID.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
doc_id: The document ID to fetch
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dictionary with document data or None if not found
|
||||||
|
"""
|
||||||
|
conn = get_connection()
|
||||||
|
|
||||||
|
try:
|
||||||
|
cursor = conn.execute("SELECT * FROM documents WHERE id = ?", (doc_id,))
|
||||||
|
row = cursor.fetchone()
|
||||||
|
|
||||||
|
if row is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Convert to dict manually for consistency
|
||||||
|
columns = [col[0] for col in cursor.description]
|
||||||
|
return dict(zip(columns, row))
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
return {"success": False, "error": str(e)}
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
def get_chunks_for_library(library_id: str) -> List[Dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
Get all document chunks for a library.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
library_id: The library ID to fetch chunks for
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of dictionaries containing chunk records
|
||||||
|
"""
|
||||||
|
conn = get_connection()
|
||||||
|
|
||||||
|
try:
|
||||||
|
cursor = conn.execute(
|
||||||
|
"SELECT * FROM documents WHERE library_id = ? ORDER BY chunk_index DESC",
|
||||||
|
(library_id,)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Convert to list of dicts
|
||||||
|
columns = [col[0] for col in cursor.description]
|
||||||
|
result = []
|
||||||
|
for row in cursor:
|
||||||
|
result.append(dict(zip(columns, row)))
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
return {"success": False, "error": str(e)}
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
@@ -0,0 +1,181 @@
|
|||||||
|
# Local Embedding Generation using FastEmbed
|
||||||
|
import asyncio
|
||||||
|
from typing import List
|
||||||
|
from functools import lru_cache
|
||||||
|
|
||||||
|
|
||||||
|
# Module-level singleton for cached model instance
|
||||||
|
_embedding_model = None
|
||||||
|
_embedding_size = 384 # BAAI/bge-small-en-v1.5 output dimension
|
||||||
|
|
||||||
|
|
||||||
|
def _load_model():
|
||||||
|
"""Lazy-load the FastEmbed model on first use."""
|
||||||
|
global _embedding_model, _embedding_size
|
||||||
|
|
||||||
|
try:
|
||||||
|
from fastembed import TextEmbedding
|
||||||
|
|
||||||
|
if _embedding_model is None:
|
||||||
|
print("Loading embedding model (this may take a few minutes on first run)...")
|
||||||
|
|
||||||
|
# Use BAAI/bge-small-en-v1.5 - lightweight (~90MB), works offline
|
||||||
|
_embedding_model = TextEmbedding(model_name="BAAI/bge-small-en-v1.5", cache_dir=".embed_cache")
|
||||||
|
print("Embedding model loaded successfully.")
|
||||||
|
|
||||||
|
return _embedding_model
|
||||||
|
|
||||||
|
except ImportError as e:
|
||||||
|
raise ImportError(
|
||||||
|
"FastEmbed is not installed. Please install with:\n"
|
||||||
|
" pip install fastembed\n\n"
|
||||||
|
f"Import error details: {e}"
|
||||||
|
) from e
|
||||||
|
|
||||||
|
except RuntimeError as e:
|
||||||
|
# Model download/installation failed
|
||||||
|
if "No space left" in str(e) or "disk quota exceeded" in str(e):
|
||||||
|
raise RuntimeError(
|
||||||
|
"Failed to load embedding model due to disk space constraints.\n\n"
|
||||||
|
"Please free up space on your system (at least 500MB required).\n"
|
||||||
|
"Or specify a custom cache directory with available space:\n"
|
||||||
|
" from fastembed import TextEmbedding\n"
|
||||||
|
" model = TextEmbedding(model_name='...', cache_dir='/path/to/large/storage')\n\n"
|
||||||
|
f"Error: {e}"
|
||||||
|
) from e
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
def get_embedding_model():
|
||||||
|
"""
|
||||||
|
Get the cached embedding model instance.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
FastEmbed TextEmbedding instance (lazy-loaded on first call)
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ImportError: If FastEmbed is not installed
|
||||||
|
RuntimeError: If model download/load failed
|
||||||
|
"""
|
||||||
|
global _embedding_model
|
||||||
|
if _embedding_model is None:
|
||||||
|
_embedding_model = _load_model()
|
||||||
|
return _embedding_model
|
||||||
|
|
||||||
|
|
||||||
|
def embed_text(text: str) -> List[float]:
|
||||||
|
"""
|
||||||
|
Generate embedding for a single text.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
text: The text string to embed
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of floats representing the embedding vector
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ImportError: If FastEmbed is not installed
|
||||||
|
RuntimeError: If model loading failed
|
||||||
|
"""
|
||||||
|
if not text or not isinstance(text, str):
|
||||||
|
return [0.0] * get_embedding_size()
|
||||||
|
|
||||||
|
model = get_embedding_model()
|
||||||
|
embedding = model.embed([text])
|
||||||
|
return embedding[0].tolist()
|
||||||
|
|
||||||
|
|
||||||
|
def embed_texts(texts: List[str]) -> List[List[float]]:
|
||||||
|
"""
|
||||||
|
Generate embeddings for multiple texts.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
texts: List of text strings to embed
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of lists containing embedding vectors (one per input text)
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ImportError: If FastEmbed is not installed
|
||||||
|
RuntimeError: If model loading failed
|
||||||
|
"""
|
||||||
|
if not texts:
|
||||||
|
return []
|
||||||
|
|
||||||
|
model = get_embedding_model()
|
||||||
|
embeddings = model.embed(texts)
|
||||||
|
|
||||||
|
result = []
|
||||||
|
for emb in embeddings:
|
||||||
|
if hasattr(emb, 'tolist'):
|
||||||
|
result.append(emb.tolist())
|
||||||
|
else:
|
||||||
|
result.append(emb)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def get_embedding_size() -> int:
|
||||||
|
"""
|
||||||
|
Get the embedding dimension size.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Integer representing vector dimension (384 for bge-small-en-v1.5)
|
||||||
|
|
||||||
|
Note:
|
||||||
|
This returns a sensible default. Actual dimension is determined by model.
|
||||||
|
"""
|
||||||
|
return _embedding_size
|
||||||
|
|
||||||
|
|
||||||
|
# Async wrapper for compatibility with existing code
|
||||||
|
async def generate_embeddings(chunks: List[str]) -> List[List[float]]:
|
||||||
|
"""
|
||||||
|
Async wrapper around embed_texts for compatibility.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
chunks: List of text strings to embed
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of embedding vectors
|
||||||
|
"""
|
||||||
|
return embed_texts(chunks)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# Test the embeddings module
|
||||||
|
print("Testing embeddings module...\n")
|
||||||
|
|
||||||
|
# Test get_embedding_size
|
||||||
|
size = get_embedding_size()
|
||||||
|
print(f"Embedding dimension: {size}")
|
||||||
|
|
||||||
|
# Test single text embedding
|
||||||
|
test_text = "Hello, world! This is a test of the embedding generation."
|
||||||
|
try:
|
||||||
|
emb = embed_text(test_text)
|
||||||
|
print(f"\nSingle text embedding shape: ({len(emb)},)")
|
||||||
|
print(f"First 5 values: {emb[:5]}")
|
||||||
|
print("✓ Single embedding works")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"✗ Single embedding failed: {e}")
|
||||||
|
|
||||||
|
# Test batch embedding
|
||||||
|
test_texts = [
|
||||||
|
"The quick brown fox jumps over the lazy dog.",
|
||||||
|
"Machine learning is a subset of artificial intelligence.",
|
||||||
|
"Natural language processing enables computers to understand human language."
|
||||||
|
]
|
||||||
|
try:
|
||||||
|
embeddings = embed_texts(test_texts)
|
||||||
|
print(f"\nBatch embedding shape: ({len(embeddings)}, {len(embeddings[0])})")
|
||||||
|
print("✓ Batch embeddings work")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"✗ Batch embeddings failed: {e}")
|
||||||
|
|
||||||
|
# Test empty inputs
|
||||||
|
assert embed_text("") == [0.0] * size, "Empty text should return zero vector"
|
||||||
|
assert embed_texts([]) == [], "Empty list should return empty list"
|
||||||
|
print("✓ Empty input handling works")
|
||||||
|
|
||||||
|
print("\n✅ All tests passed!")
|
||||||
@@ -0,0 +1,389 @@
|
|||||||
|
# Git Source Operations for Repository Cloning and File Discovery
|
||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import List, Optional, Dict, Any
|
||||||
|
|
||||||
|
|
||||||
|
def get_repos_dir() -> Path:
|
||||||
|
"""Get the base directory for storing cloned repositories."""
|
||||||
|
# Default to ./data/repos in project root
|
||||||
|
return Path(__file__).parent.parent.parent / "data" / "repos"
|
||||||
|
|
||||||
|
|
||||||
|
def ensure_repos_dir():
|
||||||
|
"""Ensure the repos directory exists (idempotent)."""
|
||||||
|
repos_dir = get_repos_dir()
|
||||||
|
repos_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
return repos_dir
|
||||||
|
|
||||||
|
|
||||||
|
# Initialize repos directory at module load time (safe to run multiple times)
|
||||||
|
ensure_repos_dir()
|
||||||
|
|
||||||
|
|
||||||
|
class GitCloneError(Exception):
|
||||||
|
"""Exception for git clone/checkout failures."""
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def clone_or_update_repo(
|
||||||
|
repo_id: str,
|
||||||
|
repo_url: str,
|
||||||
|
branch: str,
|
||||||
|
repos_base: Optional[Path] = None
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Clone a git repository or update an existing clone.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
repo_id: Unique identifier for this repository (used in paths)
|
||||||
|
repo_url: Git URL to clone from
|
||||||
|
branch: Branch name to checkout
|
||||||
|
repos_base: Base directory for repos (defaults to get_repos_dir())
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict with operation result including repo path and files found
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
GitCloneError: If clone or checkout fails
|
||||||
|
"""
|
||||||
|
repos_base = repos_base or get_repos_dir()
|
||||||
|
repo_path = repos_base / repo_id
|
||||||
|
|
||||||
|
try:
|
||||||
|
if repo_path.exists():
|
||||||
|
# Update existing clone
|
||||||
|
print(f" [Git] Updating existing clone at {repo_path}")
|
||||||
|
|
||||||
|
from subprocess import run, CalledProcessError
|
||||||
|
import subprocess
|
||||||
|
|
||||||
|
# Fetch latest changes
|
||||||
|
result = run(
|
||||||
|
["git", "-C", str(repo_path), "fetch", "origin"],
|
||||||
|
capture_output=True,
|
||||||
|
text=True
|
||||||
|
)
|
||||||
|
|
||||||
|
if result.returncode != 0:
|
||||||
|
raise GitCloneError(f"Failed to fetch: {result.stderr}")
|
||||||
|
|
||||||
|
# Reset to branch
|
||||||
|
run(
|
||||||
|
["git", "-C", str(repo_path), "reset", "--hard", "origin/" + branch],
|
||||||
|
capture_output=True,
|
||||||
|
text=True
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# Clone new repository
|
||||||
|
print(f" [Git] Cloning {repo_url} to {repo_path}")
|
||||||
|
|
||||||
|
run(
|
||||||
|
["git", "-C", str(repo_path.parent), "clone",
|
||||||
|
"--branch", branch,
|
||||||
|
"--single-branch",
|
||||||
|
repo_url, "."],
|
||||||
|
capture_output=True,
|
||||||
|
text=True
|
||||||
|
)
|
||||||
|
|
||||||
|
print(f" [Git] Checked out branch: {branch}")
|
||||||
|
|
||||||
|
return {
|
||||||
|
"success": True,
|
||||||
|
"repo_path": str(repo_path),
|
||||||
|
"url": repo_url,
|
||||||
|
"branch": branch
|
||||||
|
}
|
||||||
|
|
||||||
|
except CalledProcessError as e:
|
||||||
|
raise GitCloneError(f"Git command failed: {e.stderr}") from e
|
||||||
|
except Exception as e:
|
||||||
|
raise GitCloneError(f"Failed to clone/update repo: {e}") from e
|
||||||
|
|
||||||
|
|
||||||
|
def discover_files(
|
||||||
|
repo_path: Path,
|
||||||
|
include_paths: Optional[List[str]] = None,
|
||||||
|
exclude_paths: Optional[List[str]] = None
|
||||||
|
) -> List[Dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
Discover files in a git repository respecting include/exclude paths.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
repo_path: Path to the cloned repository
|
||||||
|
include_paths: List of paths relative to repo root to include (if None, all dirs considered)
|
||||||
|
exclude_paths: List of paths relative to repo root to exclude
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of dicts with format:
|
||||||
|
{
|
||||||
|
"path": "docs/hooks.md", # Relative to repo root
|
||||||
|
"full_path": "/full/path/to/repo/docs/hooks.md"
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
include_patterns = None if include_paths is None else [
|
||||||
|
Path(p) for p in include_paths
|
||||||
|
]
|
||||||
|
exclude_patterns = set() if exclude_paths is None else {
|
||||||
|
Path(p) for p in exclude_paths
|
||||||
|
}
|
||||||
|
|
||||||
|
discovered = []
|
||||||
|
|
||||||
|
def should_include(path: Path, rel_path: Path) -> bool:
|
||||||
|
"""Check if a path matches any include pattern."""
|
||||||
|
if not include_patterns:
|
||||||
|
return True
|
||||||
|
|
||||||
|
# Normalize paths for comparison (handle trailing slashes, etc.)
|
||||||
|
path_str = str(path).replace("\\", "/")
|
||||||
|
rel_str = str(rel_path).replace("\\", "/")
|
||||||
|
|
||||||
|
for inc_pattern in include_patterns:
|
||||||
|
inc_str = str(inc_pattern).replace("\\", "/")
|
||||||
|
|
||||||
|
# If pattern has subdirs, check prefix match
|
||||||
|
if "/" in inc_str and not inc_str.endswith("/"):
|
||||||
|
pattern_base = inc_str.rsplit("/", 1)[0] + "/"
|
||||||
|
if rel_str.startswith(pattern_base):
|
||||||
|
return True
|
||||||
|
elif rel_str == inc_str:
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
def should_exclude(path: Path, rel_path: Path) -> bool:
|
||||||
|
"""Check if a path matches any exclude pattern (simple prefix/exact match)."""
|
||||||
|
for exc_pattern in exclude_patterns:
|
||||||
|
exc_str = str(exc_pattern).replace("\\", "/")
|
||||||
|
rel_str = str(rel_path).replace("\\", "/")
|
||||||
|
|
||||||
|
# Exact match or parent directory match
|
||||||
|
if rel_str == exc_str or rel_str.startswith(exc_str + "/"):
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
def walk_and_collect(current: Path, rel_prefix: Path):
|
||||||
|
"""Recursive walk function."""
|
||||||
|
try:
|
||||||
|
for entry in sorted(os.scandir(current)):
|
||||||
|
entry_path = current / entry.name
|
||||||
|
rel_path = (rel_prefix / entry.name).replace("\\", "/") if str(rel_prefix) != "." else rel_prefix
|
||||||
|
|
||||||
|
# Filter by exclude paths first
|
||||||
|
if should_exclude(entry_path, rel_path):
|
||||||
|
continue
|
||||||
|
|
||||||
|
# If include_paths specified, only go into matching directories
|
||||||
|
if include_patterns and not include_path_match(entry_path, rel_path):
|
||||||
|
if entry.is_dir():
|
||||||
|
return # Don't descend into this directory
|
||||||
|
|
||||||
|
if entry.is_file():
|
||||||
|
discovered.append({
|
||||||
|
"path": str(rel_path).lstrip("/"),
|
||||||
|
"full_path": str(entry_path),
|
||||||
|
"is_binary": is_probably_binary(str(entry_path))
|
||||||
|
})
|
||||||
|
elif entry.is_dir():
|
||||||
|
walk_and_collect(entry_path, rel_path)
|
||||||
|
|
||||||
|
except PermissionError:
|
||||||
|
# Skip directories we can't read
|
||||||
|
pass
|
||||||
|
|
||||||
|
def include_path_match(path: Path, rel_path: Path) -> bool:
|
||||||
|
"""Check if path matches any include pattern (for filtering on the fly)."""
|
||||||
|
if not include_patterns:
|
||||||
|
return True
|
||||||
|
|
||||||
|
path_str = str(path).replace("\\", "/")
|
||||||
|
for inc_pattern in include_patterns:
|
||||||
|
inc_str = str(inc_pattern).replace("\\", "/")
|
||||||
|
|
||||||
|
# Exact match or parent directory match
|
||||||
|
if path_str == inc_str or path_str.startswith(inc_str + "/"):
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
def is_probably_binary(filepath: str) -> bool:
|
||||||
|
"""Simple binary detection based on file extension and first bytes."""
|
||||||
|
ext = Path(filepath).suffix.lower()
|
||||||
|
text_extensions = {'.md', '.txt', '.py', '.js', '.ts', '.json',
|
||||||
|
'.yaml', '.yml', '.html', '.css', '.sh', '.sql'}
|
||||||
|
|
||||||
|
if ext not in text_extensions:
|
||||||
|
# Check for null bytes in first 8KB
|
||||||
|
try:
|
||||||
|
with open(filepath, 'rb') as f:
|
||||||
|
chunk = f.read(8192)
|
||||||
|
return b'\x00' in chunk
|
||||||
|
except:
|
||||||
|
return False
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
root_str = str(repo_path).replace("\\", "/")
|
||||||
|
|
||||||
|
# Walk the repository starting from repo root
|
||||||
|
walk_and_collect(repo_path, Path("."))
|
||||||
|
|
||||||
|
return discovered
|
||||||
|
|
||||||
|
|
||||||
|
async def ingest_git_source(
|
||||||
|
library_id: str,
|
||||||
|
name: str,
|
||||||
|
description: Optional[str] = None,
|
||||||
|
repo_url: str = None,
|
||||||
|
branch: str = "main",
|
||||||
|
include_paths: Optional[List[str]] = None,
|
||||||
|
exclude_paths: Optional[List[str]] = None,
|
||||||
|
repos_base: Optional[Path] = None
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Ingest a git repository as a new library.
|
||||||
|
|
||||||
|
Clones the repo (or updates if exists), discovers files in include paths,
|
||||||
|
and ingests them into the vector store via existing pipeline.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
library_id: Unique identifier for this library
|
||||||
|
name: Library display name
|
||||||
|
description: Optional description
|
||||||
|
repo_url: Git repository URL to clone from
|
||||||
|
branch: Branch to checkout (default: main)
|
||||||
|
include_paths: Paths relative to repo root to include (if None, all dirs considered)
|
||||||
|
exclude_paths: Paths relative to repo root to exclude
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict with operation result
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
GitCloneError: If git operations fail
|
||||||
|
"""
|
||||||
|
from .db import upsert_library
|
||||||
|
from .ingest import ingest_library
|
||||||
|
|
||||||
|
print(f"\n[Git Ingestion] Processing library: {library_id}")
|
||||||
|
print(f" Source: {repo_url or '(local)'}")
|
||||||
|
|
||||||
|
# Ensure repos directory exists
|
||||||
|
repos_base = repos_base or get_repos_dir()
|
||||||
|
repos_base.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
repo_id = f"{library_id}-git"
|
||||||
|
|
||||||
|
# Clone or update the repo
|
||||||
|
clone_result = clone_or_update_repo(
|
||||||
|
repo_id=repo_id,
|
||||||
|
repo_url=repo_url,
|
||||||
|
branch=branch,
|
||||||
|
repos_base=repos_base
|
||||||
|
)
|
||||||
|
|
||||||
|
repo_path = Path(clone_result["repo_path"])
|
||||||
|
|
||||||
|
print(f" [Git] Found files in {repo_path}")
|
||||||
|
|
||||||
|
# Discover files respecting include/exclude paths
|
||||||
|
files = discover_files(
|
||||||
|
repo_path=repo_path,
|
||||||
|
include_paths=include_paths,
|
||||||
|
exclude_paths=exclude_paths
|
||||||
|
)
|
||||||
|
|
||||||
|
print(f" [Git] Discovered {len(files)} file(s)")
|
||||||
|
|
||||||
|
if not files:
|
||||||
|
return {
|
||||||
|
"success": True,
|
||||||
|
"library_id": library_id,
|
||||||
|
"message": "No files found matching include/exclude criteria",
|
||||||
|
"files_discovered": 0
|
||||||
|
}
|
||||||
|
|
||||||
|
# Remove .git directory if present (avoid processing it)
|
||||||
|
git_dir = repo_path / ".git"
|
||||||
|
if git_dir.exists():
|
||||||
|
shutil.rmtree(git_dir)
|
||||||
|
print(f" [Git] Removed .git directory")
|
||||||
|
|
||||||
|
# Ingest using existing library ingestion pipeline
|
||||||
|
result = await ingest_library(
|
||||||
|
library_id=library_id,
|
||||||
|
name=name,
|
||||||
|
description=description,
|
||||||
|
source_path=repo_id # Use repo_id as the "source path" for tracking
|
||||||
|
)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"success": result.get("success", False),
|
||||||
|
"library_id": library_id,
|
||||||
|
"name": name,
|
||||||
|
"files_discovered": len(files),
|
||||||
|
"chunks_created": result.get("chunks_created", 0),
|
||||||
|
"vectors_added": result.get("vectors_added", 0)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
async def sync_sources(
|
||||||
|
sources_config: Dict[str, Any] = None,
|
||||||
|
repos_base: Optional[Path] = None
|
||||||
|
) -> List[Dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
Sync all git sources defined in config.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
sources_config: List of source configs (same format as docs_sources.yaml)
|
||||||
|
repos_base: Base directory for repos
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of results for each source
|
||||||
|
"""
|
||||||
|
if sources_config is None:
|
||||||
|
# Load from default config file
|
||||||
|
import yaml
|
||||||
|
config_path = Path(__file__).parent.parent.parent / "docs_sources.yaml"
|
||||||
|
|
||||||
|
if not config_path.exists():
|
||||||
|
return [{"success": False, "error": f"Config not found: {config_path}"}]
|
||||||
|
|
||||||
|
with open(config_path) as f:
|
||||||
|
data = yaml.safe_load(f)
|
||||||
|
sources_config = data.get("sources", [])
|
||||||
|
|
||||||
|
results = []
|
||||||
|
|
||||||
|
for source in sources_config:
|
||||||
|
try:
|
||||||
|
result = await ingest_git_source(
|
||||||
|
library_id=source.get("library_id"),
|
||||||
|
name=source.get("name"),
|
||||||
|
description=source.get("description"),
|
||||||
|
repo_url=source.get("repo_url"),
|
||||||
|
branch=source.get("branch", "main"),
|
||||||
|
include_paths=source.get("include_paths"),
|
||||||
|
exclude_paths=source.get("exclude_paths"),
|
||||||
|
repos_base=repos_base
|
||||||
|
)
|
||||||
|
except GitCloneError as e:
|
||||||
|
result = {
|
||||||
|
"success": False,
|
||||||
|
"library_id": source.get("library_id", "unknown"),
|
||||||
|
"error": str(e)
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
result = {
|
||||||
|
"success": False,
|
||||||
|
"library_id": source.get("library_id", "unknown"),
|
||||||
|
"error": f"Unexpected error: {e}"
|
||||||
|
}
|
||||||
|
|
||||||
|
results.append(result)
|
||||||
|
|
||||||
|
return results
|
||||||
@@ -0,0 +1,387 @@
|
|||||||
|
# Document Ingestion Logic
|
||||||
|
import asyncio
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import List, Dict, Any, Optional, BinaryIO
|
||||||
|
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
# Load environment variables
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
|
||||||
|
# Import local modules
|
||||||
|
from .config import settings
|
||||||
|
from .chunking import chunk_text, estimate_tokens
|
||||||
|
from .embeddings import embed_texts
|
||||||
|
from .vector_store import upsert_chunks
|
||||||
|
from .db import insert_document_chunk, upsert_library, clear_library_documents
|
||||||
|
from .git_source import ingest_git_source
|
||||||
|
|
||||||
|
SUPPORTED_EXTENSIONS = {'.md', '.txt', '.py', '.js', '.ts', '.json',
|
||||||
|
'.yaml', '.yml', '.html', '.css', '.pdf'}
|
||||||
|
|
||||||
|
# Default documents path from environment or fallback
|
||||||
|
DOCS_PATH = Path(os.getenv("DOCS_PATH", "./docs"))
|
||||||
|
|
||||||
|
|
||||||
|
def get_file_size(path: Path) -> int:
|
||||||
|
"""Get file size in bytes."""
|
||||||
|
try:
|
||||||
|
return path.stat().st_size
|
||||||
|
except OSError:
|
||||||
|
return -1
|
||||||
|
|
||||||
|
|
||||||
|
async def read_document_file(path: Path) -> str:
|
||||||
|
"""
|
||||||
|
Read document content from a file.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
path: Path to the file
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Content as string, or empty string if error
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ValueError: If file type not supported
|
||||||
|
"""
|
||||||
|
if not path.exists():
|
||||||
|
return ""
|
||||||
|
|
||||||
|
# Check extension
|
||||||
|
suffix = path.suffix.lower()
|
||||||
|
if suffix == '.pdf':
|
||||||
|
from pypdf import PdfReader
|
||||||
|
|
||||||
|
try:
|
||||||
|
reader = PdfReader(str(path))
|
||||||
|
pages = []
|
||||||
|
for page_num in range(len(reader.pages)):
|
||||||
|
page = reader.pages[page_num]
|
||||||
|
text = page.extract_text()
|
||||||
|
if text:
|
||||||
|
pages.append(text)
|
||||||
|
return "\n\n".join(pages)
|
||||||
|
except ImportError:
|
||||||
|
raise ImportError("pypdf is required for PDF files. Install with: pip install pypdf")
|
||||||
|
except Exception as e:
|
||||||
|
print(f" Warning: Could not read PDF {path}: {e}")
|
||||||
|
return ""
|
||||||
|
elif suffix not in SUPPORTED_EXTENSIONS:
|
||||||
|
print(f" Unsupported file type: {suffix}")
|
||||||
|
return ""
|
||||||
|
|
||||||
|
# Read text-based files
|
||||||
|
try:
|
||||||
|
content = path.read_text(encoding='utf-8')
|
||||||
|
return content if content.strip() else ""
|
||||||
|
except Exception as e:
|
||||||
|
print(f" Warning: Could not read {path}: {e}")
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
async def ingest_library(library_id: str, name: str, description: Optional[str] = None, source_path: Optional[str] = None) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Ingest all documents for a library.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
library_id: Unique identifier for the library
|
||||||
|
name: Library name
|
||||||
|
description: Optional description
|
||||||
|
source_path: Path to library folder (relative to DOCS_PATH)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Summary dict with operation results
|
||||||
|
"""
|
||||||
|
print(f"\n[Library] Processing: {library_id}")
|
||||||
|
if source_path:
|
||||||
|
print(f" Source: {source_path}")
|
||||||
|
|
||||||
|
# Ensure library record exists
|
||||||
|
result = upsert_library(library_id, name, description, source_path)
|
||||||
|
print(f" [{result.get('success', False)}] Library record: {'created' if not result.get('exists') else 'updated'}")
|
||||||
|
|
||||||
|
# Get the library folder path
|
||||||
|
library_dir = DOCS_PATH / source_path
|
||||||
|
|
||||||
|
if not library_dir.exists():
|
||||||
|
print(f" Error: Directory does not exist: {library_dir}")
|
||||||
|
return {"success": False, "error": f"Directory not found: {library_dir}"}
|
||||||
|
|
||||||
|
# Find all supported files (recursive)
|
||||||
|
print(f" [Library] Scanning for files in: {library_dir}")
|
||||||
|
doc_files = []
|
||||||
|
|
||||||
|
for file_path in library_dir.rglob('*'):
|
||||||
|
if file_path.is_file():
|
||||||
|
suffix = file_path.suffix.lower()
|
||||||
|
if suffix == '.pdf':
|
||||||
|
doc_files.append(file_path)
|
||||||
|
elif suffix in SUPPORTED_EXTENSIONS:
|
||||||
|
doc_files.append(file_path)
|
||||||
|
|
||||||
|
print(f" [Library] Found {len(doc_files)} document(s)")
|
||||||
|
|
||||||
|
# Clear old chunks for this library
|
||||||
|
print(f" [Library] Clearing existing chunks...")
|
||||||
|
clear_result = clear_library_documents(library_id)
|
||||||
|
if not clear_result.get('success'):
|
||||||
|
print(f" Warning: Could not clear library docs: {clear_result}")
|
||||||
|
else:
|
||||||
|
print(f" [Library] Cleared {clear_result.get('deleted', 0)} existing chunks")
|
||||||
|
|
||||||
|
# Process documents
|
||||||
|
all_chunks = []
|
||||||
|
processed_files = 0
|
||||||
|
|
||||||
|
for file_path in doc_files:
|
||||||
|
# Read file content
|
||||||
|
print(f" [File] Reading: {file_path.relative_to(library_dir)}")
|
||||||
|
content = await read_document_file(file_path)
|
||||||
|
|
||||||
|
if not content:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Estimate tokens and chunk
|
||||||
|
num_tokens = estimate_tokens(content)
|
||||||
|
chunks = chunk_text(content, max_tokens=500, overlap_tokens=80)
|
||||||
|
|
||||||
|
if not chunks:
|
||||||
|
print(f" [File] No valid chunks from {file_path.name}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Embed chunks and prepare for storage
|
||||||
|
print(f" Chunked into {len(chunks)} pieces (approx. {num_tokens} tokens)")
|
||||||
|
|
||||||
|
embeddings = embed_texts(chunks)
|
||||||
|
|
||||||
|
# Build chunk dicts
|
||||||
|
chunk_dicts = []
|
||||||
|
base_path = file_path.relative_to(library_dir).as_posix()
|
||||||
|
|
||||||
|
for i, chunk in enumerate(chunks):
|
||||||
|
chunk_dict = {
|
||||||
|
"id": f"{file_path.stem}-{i}",
|
||||||
|
"library_id": library_id,
|
||||||
|
"path": base_path,
|
||||||
|
"title": Path(base_path).stem,
|
||||||
|
"content": chunk,
|
||||||
|
"chunk_index": i,
|
||||||
|
"embedding": embeddings[i]
|
||||||
|
}
|
||||||
|
all_chunks.append(chunk_dict)
|
||||||
|
|
||||||
|
processed_files += 1
|
||||||
|
|
||||||
|
print(f" [Library] Processed {processed_files} file(s), {len(all_chunks)} total chunks")
|
||||||
|
|
||||||
|
# Save chunks to SQLite
|
||||||
|
if all_chunks:
|
||||||
|
for chunk in all_chunks:
|
||||||
|
insert_result = insert_document_chunk(
|
||||||
|
doc_id=chunk["id"],
|
||||||
|
library_id=chunk["library_id"],
|
||||||
|
path=chunk["path"],
|
||||||
|
title=chunk.get("title"),
|
||||||
|
content=chunk["content"],
|
||||||
|
chunk_index=chunk["chunk_index"],
|
||||||
|
token_estimate=estimate_tokens(chunk["content"])
|
||||||
|
)
|
||||||
|
if insert_result.get('success'):
|
||||||
|
continue
|
||||||
|
print(f" [Library] Saved {len(all_chunks)} chunks to SQLite")
|
||||||
|
else:
|
||||||
|
print(f" [Library] No chunks to save to SQLite")
|
||||||
|
|
||||||
|
# Save vectors to Qdrant
|
||||||
|
if all_chunks:
|
||||||
|
upsert_result = await upsert_chunks(all_chunks)
|
||||||
|
print(f" [Library] Vector store: {upsert_result.get('success', False)} ({upsert_result.get('points_added', 0)} added)")
|
||||||
|
else:
|
||||||
|
print(f" [Library] No vectors to add to Qdrant")
|
||||||
|
|
||||||
|
return {
|
||||||
|
"success": True,
|
||||||
|
"library_id": library_id,
|
||||||
|
"files_processed": processed_files,
|
||||||
|
"chunks_created": len(all_chunks),
|
||||||
|
"vectors_added": upsert_result.get('points_added', 0) if 'upsert_result' in locals() else len(all_chunks)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
async def ingest_git_source_from_config(
|
||||||
|
repo_url: str,
|
||||||
|
branch: str = "main",
|
||||||
|
include_paths: Optional[List[str]] = None,
|
||||||
|
exclude_paths: Optional[List[str]] = None,
|
||||||
|
repos_base: Optional[Path] = None
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Ingest a git repository defined in sources configuration.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
repo_url: Git repository URL to clone from
|
||||||
|
branch: Branch to checkout (default: main)
|
||||||
|
include_paths: Paths relative to repo root to include (if None, all dirs considered)
|
||||||
|
exclude_paths: Paths relative to repo root to exclude
|
||||||
|
repos_base: Base directory for cloned repos (defaults to ./data/repos)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict with operation result
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
GitCloneError: If git operations fail
|
||||||
|
"""
|
||||||
|
# Auto-generate library_id from URL if not provided
|
||||||
|
import urllib.parse
|
||||||
|
parsed = urllib.parse.urlparse(repo_url)
|
||||||
|
path_part = parsed.path.rstrip('.git')
|
||||||
|
library_id = Path(path_part).name or "unknown"
|
||||||
|
|
||||||
|
name = Path(parsed.hostname or path_part).stem
|
||||||
|
description = f"Documentation from {path_part}"
|
||||||
|
|
||||||
|
result = await ingest_git_source(
|
||||||
|
library_id=library_id,
|
||||||
|
name=name,
|
||||||
|
description=description,
|
||||||
|
repo_url=repo_url,
|
||||||
|
branch=branch,
|
||||||
|
include_paths=include_paths,
|
||||||
|
exclude_paths=exclude_paths,
|
||||||
|
repos_base=repos_base
|
||||||
|
)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
async def detect_libraries() -> List[Dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
Detect all top-level folders under DOCS_PATH as libraries.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of dicts with library metadata
|
||||||
|
"""
|
||||||
|
print(f"\n[Detection] Scanning for libraries in: {DOCS_PATH}")
|
||||||
|
|
||||||
|
if not DOCS_PATH.exists():
|
||||||
|
print(f" [Detection] Directory does not exist: {DOCS_PATH}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
# Get top-level directories
|
||||||
|
directories = list(DOCS_PATH.iterdir())
|
||||||
|
dirs_only = [d for d in directories if d.is_dir()]
|
||||||
|
|
||||||
|
libraries = []
|
||||||
|
for i, lib_dir in enumerate(dirs_only, 1):
|
||||||
|
name = lib_dir.name
|
||||||
|
|
||||||
|
# Create library record with defaults
|
||||||
|
result = upsert_library(
|
||||||
|
library_id=lib_dir.name.lower(),
|
||||||
|
name=name,
|
||||||
|
description=None,
|
||||||
|
source_path=lib_dir.name
|
||||||
|
)
|
||||||
|
|
||||||
|
libraries.append({
|
||||||
|
"id": lib_dir.name.lower(),
|
||||||
|
"name": name,
|
||||||
|
"source_path": lib_dir.name
|
||||||
|
})
|
||||||
|
|
||||||
|
print(f" [{i}/{len(dirs_only)}] Library detected: {name} (id: {lib_dir.name.lower()})")
|
||||||
|
|
||||||
|
print(f"\n[Detection] Found {len(libraries)} library(ies)")
|
||||||
|
return libraries
|
||||||
|
|
||||||
|
|
||||||
|
async def ingest_all(verbose: bool = True) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Ingest all discovered libraries.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
verbose: Whether to print progress messages
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Summary dict with overall results
|
||||||
|
"""
|
||||||
|
if verbose:
|
||||||
|
print("\n" + "=" * 60)
|
||||||
|
print("DOCUMENT INGESTION STARTED")
|
||||||
|
print("=" * 60)
|
||||||
|
|
||||||
|
# Detect libraries
|
||||||
|
libraries = await detect_libraries()
|
||||||
|
|
||||||
|
if not libraries:
|
||||||
|
result = {"total_libraries": 0, "total_chunks": 0, "successful": []}
|
||||||
|
if verbose:
|
||||||
|
print("\n[Summary] No libraries to ingest")
|
||||||
|
return result
|
||||||
|
|
||||||
|
# Ingest each library
|
||||||
|
results = []
|
||||||
|
for lib in libraries:
|
||||||
|
lib_id = lib["id"]
|
||||||
|
|
||||||
|
result = await ingest_library(
|
||||||
|
library_id=lib_id,
|
||||||
|
name=lib["name"],
|
||||||
|
description=None,
|
||||||
|
source_path=lib.get("source_path")
|
||||||
|
)
|
||||||
|
|
||||||
|
if verbose and result.get('success'):
|
||||||
|
print(f" [Library] Done: {result.get('library_id')} - {result.get('chunks_created', 0)} chunks")
|
||||||
|
|
||||||
|
results.append(result)
|
||||||
|
|
||||||
|
# Calculate totals
|
||||||
|
total_chunks = sum(r.get('chunks_created', 0) for r in results)
|
||||||
|
successful = len([r for r in results if r.get('success')])
|
||||||
|
|
||||||
|
result = {
|
||||||
|
"total_libraries": len(libraries),
|
||||||
|
"successful": successful,
|
||||||
|
"failed": len(results) - successful,
|
||||||
|
"total_chunks": total_chunks
|
||||||
|
}
|
||||||
|
|
||||||
|
if verbose:
|
||||||
|
print("\n" + "=" * 60)
|
||||||
|
print("INGESTION COMPLETE")
|
||||||
|
print("=" * 60)
|
||||||
|
print(f" Libraries processed: {result['total_libraries']}")
|
||||||
|
print(f" Successful: {result['successful']}")
|
||||||
|
print(f" Failed: {result['failed']}")
|
||||||
|
print(f" Total chunks created: {result['total_chunks']}")
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# Run ingestion tests
|
||||||
|
import asyncio
|
||||||
|
|
||||||
|
async def test_run():
|
||||||
|
print("Testing ingestion module...\n")
|
||||||
|
|
||||||
|
# Test detect_libraries
|
||||||
|
libs = await detect_libraries()
|
||||||
|
print(f"\nDetected libraries: {len(libs)}")
|
||||||
|
|
||||||
|
if libs:
|
||||||
|
# Try to ingest the first library (may fail if no docs exist, which is ok for test)
|
||||||
|
print("\nAttempting sample ingestion...")
|
||||||
|
result = await ingest_library(
|
||||||
|
library_id=libs[0]["id"],
|
||||||
|
name=libs[0]["name"],
|
||||||
|
source_path=libs[0].get("source_path")
|
||||||
|
)
|
||||||
|
print(f"Result: {result}")
|
||||||
|
|
||||||
|
print("\n✅ Tests completed!")
|
||||||
|
|
||||||
|
asyncio.run(test_run())
|
||||||
@@ -0,0 +1,299 @@
|
|||||||
|
"""Context7 Docs API."""
|
||||||
|
import asyncio
|
||||||
|
import shutil
|
||||||
|
import yaml
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from fastapi import FastAPI, File, Form, HTTPException, Query, Request, UploadFile
|
||||||
|
from fastapi.responses import JSONResponse
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
from .config import settings
|
||||||
|
from .db import (
|
||||||
|
clear_library_documents,
|
||||||
|
delete_library,
|
||||||
|
init_db,
|
||||||
|
list_libraries,
|
||||||
|
search_libraries,
|
||||||
|
upsert_library,
|
||||||
|
)
|
||||||
|
from .git_source import ingest_git_source
|
||||||
|
from .ingest import ingest_all, ingest_library
|
||||||
|
from .search import get_library_docs, resolve_library_id, search_docs
|
||||||
|
from .vector_store import delete_library_vectors, ensure_collection, get_client, get_collection_name
|
||||||
|
|
||||||
|
|
||||||
|
app = FastAPI(
|
||||||
|
title="Context7 Docs API",
|
||||||
|
description="Document ingestion and semantic search API for local-context7",
|
||||||
|
version="1.0.0",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class SearchRequest(BaseModel):
|
||||||
|
query: str = Field(..., min_length=1)
|
||||||
|
library_id: Optional[str] = None
|
||||||
|
limit: int = Field(10, ge=1, le=50)
|
||||||
|
|
||||||
|
|
||||||
|
class SyncSourcesRequest(BaseModel):
|
||||||
|
override: bool = False
|
||||||
|
|
||||||
|
|
||||||
|
ALLOWED_EXTENSIONS = {
|
||||||
|
".md",
|
||||||
|
".txt",
|
||||||
|
".py",
|
||||||
|
".js",
|
||||||
|
".ts",
|
||||||
|
".json",
|
||||||
|
".yaml",
|
||||||
|
".yml",
|
||||||
|
".html",
|
||||||
|
".css",
|
||||||
|
".pdf",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@app.middleware("http")
|
||||||
|
async def auth_middleware(request: Request, call_next):
|
||||||
|
"""Require X-API-Key for mutating endpoints when API_KEY_DOCS_API is set."""
|
||||||
|
if not settings.is_auth_enabled:
|
||||||
|
return await call_next(request)
|
||||||
|
|
||||||
|
public_prefixes = ("/health", "/libraries", "/docs/")
|
||||||
|
if request.method == "GET" and request.url.path.startswith(public_prefixes):
|
||||||
|
return await call_next(request)
|
||||||
|
|
||||||
|
if request.headers.get("X-API-Key") != settings.api_key_docs_api:
|
||||||
|
return JSONResponse(status_code=401, content={"detail": "Invalid or missing API key"})
|
||||||
|
|
||||||
|
return await call_next(request)
|
||||||
|
|
||||||
|
|
||||||
|
@app.on_event("startup")
|
||||||
|
async def startup() -> None:
|
||||||
|
init_result = init_db()
|
||||||
|
if not init_result.get("success"):
|
||||||
|
raise RuntimeError(f"Failed to initialize SQLite database: {init_result.get('error')}")
|
||||||
|
|
||||||
|
last_error = None
|
||||||
|
for _ in range(20):
|
||||||
|
collection_result = await ensure_collection()
|
||||||
|
if collection_result.get("success"):
|
||||||
|
return
|
||||||
|
last_error = collection_result.get("error")
|
||||||
|
await asyncio.sleep(1)
|
||||||
|
raise RuntimeError(f"Failed to initialize Qdrant collection: {last_error}")
|
||||||
|
|
||||||
|
|
||||||
|
def safe_library_id(library_id: str) -> str:
|
||||||
|
"""Normalize user-provided library IDs to a single path segment."""
|
||||||
|
base = Path(library_id).name.strip()
|
||||||
|
if not base or base in {".", ".."} or ".." in library_id or "/" in library_id or "\\" in library_id:
|
||||||
|
raise HTTPException(status_code=400, detail="Invalid library ID")
|
||||||
|
return base
|
||||||
|
|
||||||
|
|
||||||
|
def safe_upload_filename(filename: str) -> str:
|
||||||
|
ext = Path(filename).suffix.lower()
|
||||||
|
if ext not in ALLOWED_EXTENSIONS:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=400,
|
||||||
|
detail=f"Unsafe extension: {ext}. Allowed extensions: {', '.join(sorted(ALLOWED_EXTENSIONS))}",
|
||||||
|
)
|
||||||
|
|
||||||
|
stem = "".join(c for c in Path(filename).stem if c.isalnum() or c in "-_ ").strip()
|
||||||
|
if not stem:
|
||||||
|
raise HTTPException(status_code=400, detail="Filename contains only unsafe characters")
|
||||||
|
return f"{stem}{ext}"
|
||||||
|
|
||||||
|
|
||||||
|
def docs_root() -> Path:
|
||||||
|
return Path(settings.docs_path)
|
||||||
|
|
||||||
|
|
||||||
|
def sources_config_path() -> Path:
|
||||||
|
return Path(__file__).resolve().parents[2] / "docs_sources.yaml"
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/health")
|
||||||
|
async def health_check():
|
||||||
|
return {"status": "ok", "service": "docs-api"}
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/collections")
|
||||||
|
async def collections():
|
||||||
|
try:
|
||||||
|
client = get_client()
|
||||||
|
info = client.get_collection(get_collection_name())
|
||||||
|
vectors = getattr(info, "vectors_count", None) or getattr(info, "points_count", 0) or 0
|
||||||
|
return {"collections": {get_collection_name(): {"vectors": vectors}}}
|
||||||
|
except Exception as e:
|
||||||
|
return {"collections": {}, "warning": str(e)}
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/libraries")
|
||||||
|
async def list_libraries_api():
|
||||||
|
libs = list_libraries()
|
||||||
|
if isinstance(libs, dict) and not libs.get("success", True):
|
||||||
|
raise HTTPException(status_code=500, detail=libs.get("error", "Failed to list libraries"))
|
||||||
|
return {"libraries": libs, "count": len(libs)}
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/libraries/search")
|
||||||
|
async def search_libraries_api(q: str = Query(..., min_length=1)):
|
||||||
|
matches = resolve_library_id(q)
|
||||||
|
return {"matches": matches, "count": len(matches)}
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/search")
|
||||||
|
async def search_docs_api(payload: SearchRequest):
|
||||||
|
results = search_docs(payload.query, library_id=payload.library_id, limit=payload.limit)
|
||||||
|
return {
|
||||||
|
"query": payload.query,
|
||||||
|
"library_id": payload.library_id,
|
||||||
|
"results": results,
|
||||||
|
"count": len(results),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/docs/{library_id}")
|
||||||
|
@app.get("/libraries/{library_id}/docs")
|
||||||
|
async def get_library_docs_api(
|
||||||
|
library_id: str,
|
||||||
|
topic: Optional[str] = Query(None),
|
||||||
|
tokens: int = Query(8000, ge=1),
|
||||||
|
):
|
||||||
|
docs = get_library_docs(library_id=library_id, topic=topic, token_limit=tokens)
|
||||||
|
return {"library_id": library_id, "content": docs}
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/ingest/all")
|
||||||
|
async def ingest_all_api():
|
||||||
|
return await ingest_all()
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/ingest/{library_id}")
|
||||||
|
async def ingest_library_api(library_id: str):
|
||||||
|
library_id = safe_library_id(library_id)
|
||||||
|
source_path = library_id
|
||||||
|
return await ingest_library(library_id=library_id, name=library_id, source_path=source_path)
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/api/v1/libraries/{library_id}")
|
||||||
|
async def api_create_library(
|
||||||
|
library_id: str,
|
||||||
|
name: Optional[str] = Form(None),
|
||||||
|
description: Optional[str] = Form(None),
|
||||||
|
):
|
||||||
|
library_id = safe_library_id(library_id)
|
||||||
|
lib_dir = docs_root() / library_id
|
||||||
|
lib_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
result = upsert_library(library_id, name or library_id, description, library_id)
|
||||||
|
if not result.get("success"):
|
||||||
|
raise HTTPException(status_code=500, detail=result.get("error", "Failed to create library"))
|
||||||
|
return {
|
||||||
|
"success": True,
|
||||||
|
"created": not result.get("exists", False),
|
||||||
|
"library_id": library_id,
|
||||||
|
"name": name or library_id,
|
||||||
|
"description": description,
|
||||||
|
"path": str(lib_dir),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@app.delete("/api/v1/libraries/{library_id}")
|
||||||
|
async def api_delete_library(library_id: str):
|
||||||
|
library_id = safe_library_id(library_id)
|
||||||
|
lib_dir = docs_root() / library_id
|
||||||
|
deleted_files = 0
|
||||||
|
|
||||||
|
if lib_dir.exists():
|
||||||
|
for path in lib_dir.rglob("*"):
|
||||||
|
if path.is_file():
|
||||||
|
deleted_files += 1
|
||||||
|
shutil.rmtree(lib_dir)
|
||||||
|
|
||||||
|
docs_result = clear_library_documents(library_id)
|
||||||
|
vectors_result = await delete_library_vectors(library_id)
|
||||||
|
library_result = delete_library(library_id)
|
||||||
|
|
||||||
|
failures = [
|
||||||
|
r.get("error")
|
||||||
|
for r in (docs_result, vectors_result, library_result)
|
||||||
|
if isinstance(r, dict) and not r.get("success", True)
|
||||||
|
]
|
||||||
|
if failures:
|
||||||
|
raise HTTPException(status_code=500, detail="; ".join(failures))
|
||||||
|
|
||||||
|
return {"success": True, "library_id": library_id, "deleted_files": deleted_files}
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/api/v1/upload/{library_id}")
|
||||||
|
async def api_upload(library_id: str, file: UploadFile = File(...)):
|
||||||
|
library_id = safe_library_id(library_id)
|
||||||
|
safe_name = safe_upload_filename(file.filename or "upload.txt")
|
||||||
|
lib_dir = docs_root() / library_id
|
||||||
|
lib_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
contents = await file.read()
|
||||||
|
if len(contents) > 5 * 1024 * 1024:
|
||||||
|
raise HTTPException(status_code=400, detail="File too large (max 5MB)")
|
||||||
|
|
||||||
|
target = lib_dir / safe_name
|
||||||
|
target.write_bytes(contents)
|
||||||
|
|
||||||
|
upsert_library(library_id, library_id, None, library_id)
|
||||||
|
return {
|
||||||
|
"success": True,
|
||||||
|
"library_id": library_id,
|
||||||
|
"filename": safe_name,
|
||||||
|
"path": str(target.relative_to(docs_root())),
|
||||||
|
"size_bytes": len(contents),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/api/v1/sources")
|
||||||
|
@app.get("/sources/config")
|
||||||
|
async def api_list_sources():
|
||||||
|
path = sources_config_path()
|
||||||
|
if not path.exists():
|
||||||
|
return {"success": True, "sources": [], "count": 0}
|
||||||
|
|
||||||
|
with path.open() as f:
|
||||||
|
data = yaml.safe_load(f) or {}
|
||||||
|
sources = data.get("sources", data if isinstance(data, list) else [])
|
||||||
|
if not isinstance(sources, list):
|
||||||
|
sources = []
|
||||||
|
return {"success": True, "sources": sources, "count": len(sources)}
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/sources/sync")
|
||||||
|
async def sync_sources_api(payload: Optional[SyncSourcesRequest] = None):
|
||||||
|
source_data = await api_list_sources()
|
||||||
|
sources = source_data["sources"]
|
||||||
|
override = payload.override if payload else False
|
||||||
|
results = []
|
||||||
|
|
||||||
|
for source in sources:
|
||||||
|
result = await ingest_git_source(
|
||||||
|
library_id=source["library_id"],
|
||||||
|
name=source.get("name") or source["library_id"],
|
||||||
|
description=source.get("description"),
|
||||||
|
repo_url=source["repo_url"],
|
||||||
|
branch=source.get("branch", "main"),
|
||||||
|
include_paths=source.get("include_paths"),
|
||||||
|
exclude_paths=source.get("exclude_paths"),
|
||||||
|
)
|
||||||
|
results.append(result)
|
||||||
|
|
||||||
|
successful = len([r for r in results if r.get("success")])
|
||||||
|
return {
|
||||||
|
"success": successful == len(results),
|
||||||
|
"total_sources": len(results),
|
||||||
|
"successful": successful,
|
||||||
|
"failed": len(results) - successful,
|
||||||
|
"results": results,
|
||||||
|
}
|
||||||
@@ -0,0 +1,47 @@
|
|||||||
|
# Data Models for document processing and API responses
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
|
|
||||||
|
class DocumentChunk:
|
||||||
|
"""Represents a chunk of text to be embedded."""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
text: str,
|
||||||
|
metadata: Optional[Dict[str, Any]] = None
|
||||||
|
):
|
||||||
|
self.text = text
|
||||||
|
self.metadata = metadata or {}
|
||||||
|
|
||||||
|
@property
|
||||||
|
def doc_id(self) -> str:
|
||||||
|
"""Generate a document ID from content."""
|
||||||
|
return f"doc-{hash(self.text)}"
|
||||||
|
|
||||||
|
|
||||||
|
class IngestResponse:
|
||||||
|
"""Response model for document ingestion."""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
success: bool,
|
||||||
|
chunks_count: int = 0,
|
||||||
|
error: Optional[str] = None
|
||||||
|
):
|
||||||
|
self.success = success
|
||||||
|
self.chunks_count = chunks_count
|
||||||
|
self.error = error
|
||||||
|
|
||||||
|
|
||||||
|
class SearchResponse:
|
||||||
|
"""Response model for search results."""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
results: List[Dict[str, Any]],
|
||||||
|
query: str,
|
||||||
|
total_results: int
|
||||||
|
):
|
||||||
|
self.results = results
|
||||||
|
self.query = query
|
||||||
|
self.total_results = total_results
|
||||||
@@ -0,0 +1,235 @@
|
|||||||
|
# Search Operations for Semantic Query and Library Navigation
|
||||||
|
from typing import List, Dict, Any, Optional
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from .config import settings
|
||||||
|
from .vector_store import get_client, _collection_name as VECTOR_COLLECTION
|
||||||
|
from .embeddings import embed_text, get_embedding_size
|
||||||
|
from .db import get_chunks_for_library, list_libraries
|
||||||
|
|
||||||
|
|
||||||
|
def search_docs(
|
||||||
|
query: str,
|
||||||
|
library_id: Optional[str] = None,
|
||||||
|
limit: int = 10
|
||||||
|
) -> List[Dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
Search documents by semantic similarity in Qdrant.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
query: The search query string
|
||||||
|
library_id: Optional filter to search only within a library
|
||||||
|
limit: Maximum number of results to return
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of dicts with format:
|
||||||
|
{
|
||||||
|
"id": "...",
|
||||||
|
"score": 0.123,
|
||||||
|
"library_id": "...",
|
||||||
|
"path": "...",
|
||||||
|
"title": "...",
|
||||||
|
"chunk_index": 0
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Generate embedding for the query
|
||||||
|
query_embedding = embed_text(query)
|
||||||
|
|
||||||
|
client = get_client()
|
||||||
|
|
||||||
|
# Build filter if library_id is specified
|
||||||
|
search_filter = None
|
||||||
|
if library_id:
|
||||||
|
try:
|
||||||
|
from qdrant_client.models import FieldCondition, Filter, MatchValue
|
||||||
|
search_filter = Filter(
|
||||||
|
must=[
|
||||||
|
FieldCondition(
|
||||||
|
key="library_id",
|
||||||
|
match=MatchValue(value=library_id),
|
||||||
|
)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
except ImportError:
|
||||||
|
search_filter = None
|
||||||
|
|
||||||
|
# Perform vector search
|
||||||
|
results = client.search(
|
||||||
|
collection_name=VECTOR_COLLECTION,
|
||||||
|
query_vector=query_embedding,
|
||||||
|
limit=limit,
|
||||||
|
search_filter=search_filter
|
||||||
|
)
|
||||||
|
|
||||||
|
# Format and return results
|
||||||
|
formatted_results = []
|
||||||
|
for result in results:
|
||||||
|
if result.score > 0 and result.payload:
|
||||||
|
formatted_results.append({
|
||||||
|
"id": result.payload["id"],
|
||||||
|
"score": float(result.score),
|
||||||
|
"library_id": result.payload.get("library_id", ""),
|
||||||
|
"path": result.payload.get("path", ""),
|
||||||
|
"title": result.payload.get("title", ""),
|
||||||
|
"chunk_index": result.payload.get("chunk_index", 0)
|
||||||
|
})
|
||||||
|
|
||||||
|
return formatted_results
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Search error: {e}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
def get_library_docs(
|
||||||
|
library_id: str,
|
||||||
|
topic: Optional[str] = None,
|
||||||
|
token_limit: int = 8000
|
||||||
|
) -> str:
|
||||||
|
"""
|
||||||
|
Retrieve documentation content from a library.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
library_id: The library ID to fetch docs from
|
||||||
|
topic: Optional topic filter - if provided, searches for topic first
|
||||||
|
token_limit: Maximum tokens to include in output
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Combined markdown content as string
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# If topic is specified, search for relevant chunks
|
||||||
|
if topic:
|
||||||
|
print(f" [Search] Searching library '{library_id}' for topic: {topic}")
|
||||||
|
search_results = search_docs(query=topic, library_id=library_id, limit=20)
|
||||||
|
|
||||||
|
if not search_results:
|
||||||
|
return f"No documents found in library '{library_id}' matching topic: {topic}"
|
||||||
|
|
||||||
|
print(f" [Search] Found {len(search_results)} relevant chunks")
|
||||||
|
else:
|
||||||
|
# Fetch all chunks for the library and select most useful ones
|
||||||
|
print(f" [Fetch] Retrieving chunks from library '{library_id}'")
|
||||||
|
chunks_data = get_chunks_for_library(library_id)
|
||||||
|
|
||||||
|
if not chunks_data:
|
||||||
|
return f"No documents found in library '{library_id}'"
|
||||||
|
|
||||||
|
# Sort by chunk_index descending and pick top ones to respect token limit
|
||||||
|
sorted_chunks = sorted(chunks_data, key=lambda x: x.get("chunk_index", 0), reverse=True)
|
||||||
|
selected_chunks = []
|
||||||
|
total_tokens = 0
|
||||||
|
|
||||||
|
for chunk in sorted_chunks:
|
||||||
|
content = chunk.get("content", "")
|
||||||
|
tokens = len(content) // 4 # Simple token estimate
|
||||||
|
|
||||||
|
if total_tokens + tokens <= token_limit:
|
||||||
|
selected_chunks.append(chunk)
|
||||||
|
total_tokens += tokens
|
||||||
|
else:
|
||||||
|
# Take part of this chunk to fill remaining space
|
||||||
|
remaining = token_limit - total_tokens
|
||||||
|
content_preview = content[:remaining * 4] if remaining > 0 else ""
|
||||||
|
if content_preview:
|
||||||
|
selected_chunks.append({"content": content_preview, "title": chunk.get("title", "")})
|
||||||
|
|
||||||
|
print(f" [Fetch] Selected {len(selected_chunks)} chunks ({total_tokens} tokens)")
|
||||||
|
|
||||||
|
# Combine chunks into markdown
|
||||||
|
md_parts = []
|
||||||
|
for chunk in selected_chunks:
|
||||||
|
title = chunk.get("title")
|
||||||
|
content = chunk.get("content", "")
|
||||||
|
|
||||||
|
if title and content.strip():
|
||||||
|
# Add heading before first chunk or if this is the first chunk
|
||||||
|
if not md_parts or "\n\n" not in "".join(md_parts):
|
||||||
|
md_parts.append(f"# {title}")
|
||||||
|
elif not any(part.startswith("#") for part in md_parts[-5:]):
|
||||||
|
md_parts.append(f"\n# {title}\n")
|
||||||
|
|
||||||
|
md_parts.append(content)
|
||||||
|
|
||||||
|
result = "\n\n".join(md_parts)
|
||||||
|
|
||||||
|
# If no headings were added, prepend library title
|
||||||
|
if not any(part.startswith("#") for part in result.split("\n")[:3]):
|
||||||
|
result = f"# {library_id.upper().replace('_', ' ')}\n\n" + result
|
||||||
|
|
||||||
|
return result.rstrip()
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error getting library docs: {e}")
|
||||||
|
return f"Error retrieving documents from library '{library_id}': {str(e)}"
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_library_id(library_name: str) -> List[Dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
Resolve a library name to potential matches (Context7-style).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
library_name: Partial or full library name to search for
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of Context7-style candidate dicts:
|
||||||
|
{
|
||||||
|
"id": "/local/foundryvtt",
|
||||||
|
"name": "foundryvtt",
|
||||||
|
"description": "...",
|
||||||
|
"source": "local"
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
libraries = list_libraries()
|
||||||
|
|
||||||
|
if not libraries:
|
||||||
|
return []
|
||||||
|
|
||||||
|
# Filter by name match (case-insensitive)
|
||||||
|
candidates = []
|
||||||
|
for lib in libraries:
|
||||||
|
lib_name = lib.get("name", "").lower()
|
||||||
|
lib_id = lib.get("id", "").lower()
|
||||||
|
|
||||||
|
if library_name.lower() in lib_name or library_name.lower() in lib_id:
|
||||||
|
candidates.append({
|
||||||
|
"id": f"/local/{lib['id']}",
|
||||||
|
"name": lib["name"],
|
||||||
|
"description": lib.get("description", ""),
|
||||||
|
"source": "local"
|
||||||
|
})
|
||||||
|
|
||||||
|
# Return top matches (or all if less than 3)
|
||||||
|
candidates = candidates[:min(5, len(candidates))]
|
||||||
|
|
||||||
|
print(f" [Resolve] Found {len(candidates)} candidate(s) for: {library_name}")
|
||||||
|
|
||||||
|
return candidates
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error resolving library ID: {e}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import asyncio
|
||||||
|
|
||||||
|
async def test_search():
|
||||||
|
"""Test search functionality."""
|
||||||
|
print("Testing search module...\n")
|
||||||
|
|
||||||
|
# Test 1: Simple search with dummy vector (simulated)
|
||||||
|
print("1. Testing resolve_library_id()...")
|
||||||
|
results = await resolve_library_id("foundryvtt")
|
||||||
|
print(f" Results: {len(results)} candidates\n")
|
||||||
|
|
||||||
|
# Test 2: Empty query should return empty list
|
||||||
|
print("2. Testing search_docs() with empty query...")
|
||||||
|
results = await search_docs("")
|
||||||
|
print(f" Results: {len(results)} chunks\n")
|
||||||
|
|
||||||
|
print("✅ All tests completed!")
|
||||||
|
|
||||||
|
asyncio.run(test_search())
|
||||||
@@ -0,0 +1,361 @@
|
|||||||
|
# Vector Store Operations for Qdrant
|
||||||
|
import asyncio
|
||||||
|
import uuid
|
||||||
|
from typing import List, Dict, Any, Optional
|
||||||
|
|
||||||
|
try:
|
||||||
|
from qdrant_client import QdrantClient
|
||||||
|
from qdrant_client.models import Distance, VectorParams, PointStruct, Filter, FieldCondition, MatchValue
|
||||||
|
except ImportError:
|
||||||
|
QdrantClient = None
|
||||||
|
Distance = VectorParams = PointStruct = Filter = FieldCondition = MatchValue = None
|
||||||
|
|
||||||
|
|
||||||
|
# Singleton client instance
|
||||||
|
_client: Optional[Any] = None
|
||||||
|
try:
|
||||||
|
from .config import settings
|
||||||
|
_collection_name = settings.collection_name
|
||||||
|
except Exception:
|
||||||
|
_collection_name = "local_context7_docs"
|
||||||
|
|
||||||
|
|
||||||
|
def get_client() -> Any:
|
||||||
|
"""Get or create the Qdrant client singleton using environment config."""
|
||||||
|
global _client
|
||||||
|
|
||||||
|
if _client is None:
|
||||||
|
if QdrantClient is None:
|
||||||
|
raise RuntimeError("qdrant-client is not installed")
|
||||||
|
|
||||||
|
try:
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
load_dotenv()
|
||||||
|
except ImportError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Use QDRANT_URL from environment if available, otherwise use host:port
|
||||||
|
import os
|
||||||
|
qdrant_url = os.getenv("QDRANT_URL")
|
||||||
|
|
||||||
|
if qdrant_url:
|
||||||
|
_client = QdrantClient(url=qdrant_url)
|
||||||
|
else:
|
||||||
|
from .config import settings
|
||||||
|
host = settings.vector_store_host
|
||||||
|
port = settings.vector_store_port
|
||||||
|
_client = QdrantClient(host=host, port=port)
|
||||||
|
|
||||||
|
return _client
|
||||||
|
|
||||||
|
|
||||||
|
def get_collection_name() -> str:
|
||||||
|
"""Get the collection name for vector storage."""
|
||||||
|
return _collection_name
|
||||||
|
|
||||||
|
|
||||||
|
def get_embedding_size() -> int:
|
||||||
|
"""Get embedding dimension size from embeddings module."""
|
||||||
|
try:
|
||||||
|
from .embeddings import get_embedding_size
|
||||||
|
return get_embedding_size()
|
||||||
|
except (ImportError, RuntimeError):
|
||||||
|
# Default fallback if embeddings module not loaded yet
|
||||||
|
return 384
|
||||||
|
|
||||||
|
|
||||||
|
async def ensure_collection(vector_size: Optional[int] = None) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Ensure the Qdrant collection exists with proper schema.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
vector_size: Override embedding dimension (uses get_embedding_size() if not provided)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict with operation result
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
if QdrantClient is None:
|
||||||
|
return {"success": False, "error": "qdrant-client is not installed"}
|
||||||
|
|
||||||
|
client = get_client()
|
||||||
|
size = vector_size or get_embedding_size()
|
||||||
|
distance = Distance.COSINE
|
||||||
|
|
||||||
|
# Check if collection exists
|
||||||
|
try:
|
||||||
|
collections = client.get_collections().collections
|
||||||
|
collection_exists = any(c.name == _collection_name for c in collections)
|
||||||
|
except Exception:
|
||||||
|
collection_exists = False
|
||||||
|
|
||||||
|
if not collection_exists:
|
||||||
|
# Create new collection
|
||||||
|
client.create_collection(
|
||||||
|
collection_name=_collection_name,
|
||||||
|
vectors=VectorParams(size=size, distance=distance),
|
||||||
|
wait=True
|
||||||
|
)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"success": True,
|
||||||
|
"collection": _collection_name,
|
||||||
|
"vector_size": size,
|
||||||
|
"created": True
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
# Verify current vector size matches expected
|
||||||
|
try:
|
||||||
|
collection_info = client.get_collection(_collection_name)
|
||||||
|
current_size = collection_info.config.params.vectors.size
|
||||||
|
|
||||||
|
if current_size != size:
|
||||||
|
# Collection exists with wrong size - delete and recreate
|
||||||
|
client.delete_collection(_collection_name)
|
||||||
|
client.create_collection(
|
||||||
|
collection_name=_collection_name,
|
||||||
|
vectors=VectorParams(size=size, distance=distance),
|
||||||
|
wait=True
|
||||||
|
)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"success": True,
|
||||||
|
"collection": _collection_name,
|
||||||
|
"vector_size": size,
|
||||||
|
"created": False,
|
||||||
|
"resized": True
|
||||||
|
}
|
||||||
|
except Exception:
|
||||||
|
pass # Collection exists, don't worry about size for now
|
||||||
|
|
||||||
|
return {
|
||||||
|
"success": True,
|
||||||
|
"collection": _collection_name,
|
||||||
|
"vector_size": size,
|
||||||
|
"created": False
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
return {"success": False, "error": str(e)}
|
||||||
|
|
||||||
|
|
||||||
|
async def upsert_chunks(chunks: List[Dict[str, Any]]) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Upsert chunks into the vector store.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
chunks: List of chunk dicts with format:
|
||||||
|
{
|
||||||
|
"id": "...",
|
||||||
|
"library_id": "...",
|
||||||
|
"path": "...",
|
||||||
|
"title": "...",
|
||||||
|
"chunk_index": 0,
|
||||||
|
"content": "...",
|
||||||
|
"embedding": [...]
|
||||||
|
}
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict with operation result
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
if QdrantClient is None:
|
||||||
|
return {"success": False, "error": "qdrant-client is not installed"}
|
||||||
|
|
||||||
|
if not chunks:
|
||||||
|
return {"success": True, "points_added": 0}
|
||||||
|
|
||||||
|
client = get_client()
|
||||||
|
|
||||||
|
# Build PointStruct points from chunk dicts
|
||||||
|
points = []
|
||||||
|
for chunk in chunks:
|
||||||
|
point_key = f"{chunk['library_id']}:{chunk['id']}"
|
||||||
|
point_id = str(uuid.uuid5(uuid.NAMESPACE_URL, point_key))
|
||||||
|
|
||||||
|
points.append(PointStruct(
|
||||||
|
id=point_id,
|
||||||
|
vector=chunk["embedding"],
|
||||||
|
payload={
|
||||||
|
"id": chunk["id"],
|
||||||
|
"library_id": chunk["library_id"],
|
||||||
|
"path": chunk.get("path", ""),
|
||||||
|
"title": chunk.get("title", ""),
|
||||||
|
"chunk_index": chunk.get("chunk_index", 0),
|
||||||
|
"content": chunk.get("content", "")
|
||||||
|
}
|
||||||
|
))
|
||||||
|
|
||||||
|
# Upsert points into collection
|
||||||
|
client.upsert(_collection_name, points=points)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"success": True,
|
||||||
|
"points_added": len(points)
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
return {"success": False, "error": str(e)}
|
||||||
|
|
||||||
|
|
||||||
|
async def search_vectors(
|
||||||
|
query_vector: List[float],
|
||||||
|
library_id: Optional[str] = None,
|
||||||
|
limit: int = 10
|
||||||
|
) -> List[Dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
Search for semantically similar vectors.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
query_vector: The embedding vector to search against
|
||||||
|
library_id: Optional filter by library ID
|
||||||
|
limit: Maximum results to return
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of result dicts with format:
|
||||||
|
{
|
||||||
|
"id": "...",
|
||||||
|
"score": 0.123,
|
||||||
|
"library_id": "...",
|
||||||
|
"path": "...",
|
||||||
|
"title": "...",
|
||||||
|
"chunk_index": 0
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
if QdrantClient is None:
|
||||||
|
return []
|
||||||
|
|
||||||
|
client = get_client()
|
||||||
|
|
||||||
|
# Build filter if library_id is specified
|
||||||
|
search_filter = None
|
||||||
|
if library_id:
|
||||||
|
search_filter = Filter(
|
||||||
|
must=[
|
||||||
|
FieldCondition(
|
||||||
|
key="library_id",
|
||||||
|
match=MatchValue(value=library_id),
|
||||||
|
)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
# Perform vector search
|
||||||
|
results = client.search(
|
||||||
|
collection_name=_collection_name,
|
||||||
|
query_vector=query_vector,
|
||||||
|
limit=limit,
|
||||||
|
search_filter=search_filter
|
||||||
|
)
|
||||||
|
|
||||||
|
# Format results
|
||||||
|
formatted_results = []
|
||||||
|
for result in results:
|
||||||
|
if result.score > 0 and result.payload:
|
||||||
|
formatted_results.append({
|
||||||
|
"id": result.payload["id"],
|
||||||
|
"score": float(result.score),
|
||||||
|
"library_id": result.payload["library_id"],
|
||||||
|
"path": result.payload.get("path", ""),
|
||||||
|
"title": result.payload.get("title", ""),
|
||||||
|
"chunk_index": result.payload.get("chunk_index", 0)
|
||||||
|
})
|
||||||
|
|
||||||
|
return formatted_results
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
async def delete_library_vectors(library_id: str) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Delete all vectors for a given library.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
library_id: The library ID to delete vectors for
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict with operation result
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
if QdrantClient is None:
|
||||||
|
return {"success": True, "library_id": library_id, "skipped": "qdrant-client is not installed"}
|
||||||
|
|
||||||
|
client = get_client()
|
||||||
|
|
||||||
|
# Use filter to delete only vectors matching the library_id
|
||||||
|
filter_condition = Filter(
|
||||||
|
must=[
|
||||||
|
FieldCondition(
|
||||||
|
key="library_id",
|
||||||
|
match=MatchValue(value=library_id),
|
||||||
|
)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
# Get all points with the filter (in batches)
|
||||||
|
batch_size = 100
|
||||||
|
offset = None
|
||||||
|
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
# Scroll to get points matching filter
|
||||||
|
points, _ = client.scroll(
|
||||||
|
collection_name=_collection_name,
|
||||||
|
scroll_filter=filter_condition,
|
||||||
|
limit=batch_size,
|
||||||
|
offset=offset,
|
||||||
|
with_payload=True,
|
||||||
|
with_vectors=False
|
||||||
|
)
|
||||||
|
|
||||||
|
if not points:
|
||||||
|
break
|
||||||
|
|
||||||
|
# Collect IDs to delete
|
||||||
|
point_ids = [p.id for p in points]
|
||||||
|
|
||||||
|
# Delete the points
|
||||||
|
client.delete(
|
||||||
|
collection_name=_collection_name,
|
||||||
|
points_selector=point_ids
|
||||||
|
)
|
||||||
|
|
||||||
|
offset = points[-1].id if points else None
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
# If we hit end of dataset or other issue, break
|
||||||
|
break
|
||||||
|
|
||||||
|
return {
|
||||||
|
"success": True,
|
||||||
|
"library_id": library_id
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
return {"success": False, "error": str(e)}
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# Test vector store module
|
||||||
|
import os
|
||||||
|
|
||||||
|
print("Testing vector store module...\n")
|
||||||
|
|
||||||
|
# Test ensure_collection
|
||||||
|
print("1. Testing ensure_collection()...")
|
||||||
|
result = asyncio.run(ensure_collection())
|
||||||
|
print(f" Result: {result}\n")
|
||||||
|
|
||||||
|
# Test search with empty query (will return empty since no vectors exist yet)
|
||||||
|
print("2. Testing search_vectors() with dummy vector...")
|
||||||
|
dummy_vector = [0.1] * 384
|
||||||
|
results = asyncio.run(search_vectors(dummy_vector, limit=5))
|
||||||
|
print(f" Results count: {len(results)}\n")
|
||||||
|
|
||||||
|
# Test delete_library_vectors (will succeed even if no vectors exist)
|
||||||
|
print("3. Testing delete_library_vectors()...")
|
||||||
|
result = asyncio.run(delete_library_vectors("test-library"))
|
||||||
|
print(f" Result: {result}\n")
|
||||||
|
|
||||||
|
print("✅ All tests completed!")
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
"""WebUI module for Context7 Docs."""
|
||||||
@@ -0,0 +1,166 @@
|
|||||||
|
.container {
|
||||||
|
max-width: 1000px;
|
||||||
|
margin: 0 auto;
|
||||||
|
padding: 20px;
|
||||||
|
}
|
||||||
|
|
||||||
|
header {
|
||||||
|
border-bottom: 1px solid #ccc;
|
||||||
|
padding-bottom: 15px;
|
||||||
|
margin-bottom: 20px;
|
||||||
|
}
|
||||||
|
|
||||||
|
header h1 {
|
||||||
|
margin: 0 0 10px 0;
|
||||||
|
font-size: 1.5rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
nav {
|
||||||
|
display: flex;
|
||||||
|
gap: 15px;
|
||||||
|
}
|
||||||
|
|
||||||
|
nav a {
|
||||||
|
text-decoration: none;
|
||||||
|
color: #0066cc;
|
||||||
|
font-size: 0.9rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
nav a.active {
|
||||||
|
font-weight: bold;
|
||||||
|
text-decoration: underline;
|
||||||
|
}
|
||||||
|
|
||||||
|
main h2 {
|
||||||
|
margin-bottom: 15px;
|
||||||
|
}
|
||||||
|
|
||||||
|
footer {
|
||||||
|
margin-top: 40px;
|
||||||
|
padding-top: 15px;
|
||||||
|
border-top: 1px solid #ccc;
|
||||||
|
font-size: 0.8rem;
|
||||||
|
color: #666;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Status cards */
|
||||||
|
.status-card {
|
||||||
|
background: #f5f5f5;
|
||||||
|
padding: 20px;
|
||||||
|
border-radius: 8px;
|
||||||
|
border-left: 4px solid #00c467;
|
||||||
|
}
|
||||||
|
|
||||||
|
.status-message {
|
||||||
|
background: #e8f4fd;
|
||||||
|
padding: 10px;
|
||||||
|
border-radius: 4px;
|
||||||
|
margin: 5px 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Tables */
|
||||||
|
.library-table {
|
||||||
|
width: 100%;
|
||||||
|
border-collapse: collapse;
|
||||||
|
margin-top: 10px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.library-table th, .library-table td {
|
||||||
|
padding: 10px;
|
||||||
|
text-align: left;
|
||||||
|
border-bottom: 1px solid #ddd;
|
||||||
|
}
|
||||||
|
|
||||||
|
.library-table th {
|
||||||
|
background: #f5f5f5;
|
||||||
|
font-weight: bold;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Forms */
|
||||||
|
form input[type="text"], form textarea, form select {
|
||||||
|
padding: 8px;
|
||||||
|
border: 1px solid #ccc;
|
||||||
|
border-radius: 4px;
|
||||||
|
margin-right: 10px;
|
||||||
|
margin-bottom: 10px;
|
||||||
|
}
|
||||||
|
|
||||||
|
button {
|
||||||
|
background: #0066cc;
|
||||||
|
color: white;
|
||||||
|
border: none;
|
||||||
|
padding: 10px 20px;
|
||||||
|
border-radius: 4px;
|
||||||
|
cursor: pointer;
|
||||||
|
}
|
||||||
|
|
||||||
|
button:hover {
|
||||||
|
background: #0055aa;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Pre formatting */
|
||||||
|
pre {
|
||||||
|
background: #f5f5f5;
|
||||||
|
padding: 15px;
|
||||||
|
border-radius: 4px;
|
||||||
|
overflow-x: auto;
|
||||||
|
white-space: pre-wrap;
|
||||||
|
word-break: break-word;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Search results */
|
||||||
|
.result-card {
|
||||||
|
background: #fff;
|
||||||
|
border: 1px solid #ddd;
|
||||||
|
padding: 15px;
|
||||||
|
margin: 10px 0;
|
||||||
|
border-radius: 4px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.result-card h3 {
|
||||||
|
margin: 0 0 8px 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.hint {
|
||||||
|
color: #666;
|
||||||
|
font-size: 0.85rem;
|
||||||
|
margin-top: 15px;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Status colors */
|
||||||
|
.status-ok {
|
||||||
|
color: #00c467;
|
||||||
|
font-weight: bold;
|
||||||
|
}
|
||||||
|
|
||||||
|
.content-preview {
|
||||||
|
max-height: 300px;
|
||||||
|
overflow-y: auto;
|
||||||
|
}
|
||||||
|
|
||||||
|
.results-count {
|
||||||
|
background: #e8f4fd;
|
||||||
|
padding: 10px;
|
||||||
|
border-radius: 4px;
|
||||||
|
margin-bottom: 15px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.source-card {
|
||||||
|
background: #f5f5f5;
|
||||||
|
padding: 15px;
|
||||||
|
margin: 10px 0;
|
||||||
|
border-radius: 4px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.actions-bar {
|
||||||
|
margin-top: 15px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.actions-bar form {
|
||||||
|
display: inline-flex;
|
||||||
|
}
|
||||||
|
|
||||||
|
.doc-content {
|
||||||
|
max-height: 600px;
|
||||||
|
overflow-y: auto;
|
||||||
|
}
|
||||||
@@ -0,0 +1,568 @@
|
|||||||
|
"""WebUI Views for Context7 Docs using Jinja2 templates."""
|
||||||
|
import os
|
||||||
|
import json
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Optional
|
||||||
|
from fastapi import Request
|
||||||
|
from fastapi.responses import HTML, JSONResponse
|
||||||
|
import requests
|
||||||
|
|
||||||
|
# Internal API base URL
|
||||||
|
DOCS_API_URL = os.environ.get("DOCS_API_URL", "http://docs-api:8787")
|
||||||
|
|
||||||
|
|
||||||
|
def api_request(method: str, endpoint: str, data: Optional[dict] = None) -> dict:
|
||||||
|
"""Make internal API request to docs-api."""
|
||||||
|
url = f"{DOCS_API_URL}{endpoint}"
|
||||||
|
headers = {}
|
||||||
|
if os.environ.get("WEBUI_API_KEY"):
|
||||||
|
headers["X-API-Key"] = os.environ.get("WEBUI_API_KEY")
|
||||||
|
|
||||||
|
resp = requests.request(method, url, headers=headers, json=data)
|
||||||
|
return resp.json()
|
||||||
|
|
||||||
|
|
||||||
|
def navbar_html(current: str) -> str:
|
||||||
|
"""Generate navigation bar HTML."""
|
||||||
|
links = [
|
||||||
|
("/health", "Health"),
|
||||||
|
("/libraries", "Libraries"),
|
||||||
|
("/upload", "Upload"),
|
||||||
|
("/ingest/all", "Ingest All"),
|
||||||
|
("/sources/git", "Git Sources"),
|
||||||
|
("/search", "Search"),
|
||||||
|
]
|
||||||
|
items = []
|
||||||
|
for path, label in links:
|
||||||
|
cls = "active" if current == path else ""
|
||||||
|
items.append(f'<a href="{path}" class="{cls}">{label}</a>')
|
||||||
|
return f"""<nav>
|
||||||
|
{' '.join(items)}
|
||||||
|
</nav>""".strip()
|
||||||
|
|
||||||
|
|
||||||
|
def footer_html() -> str:
|
||||||
|
"""Generate footer HTML."""
|
||||||
|
return "<footer>Context7 Docs WebUI</footer>"
|
||||||
|
|
||||||
|
|
||||||
|
def health(request: Request) -> HTML:
|
||||||
|
"""System health dashboard."""
|
||||||
|
try:
|
||||||
|
data = api_request("GET", "/health")
|
||||||
|
status = data.get("status", "unknown")
|
||||||
|
service = data.get("service", "Service")
|
||||||
|
except Exception as e:
|
||||||
|
status = "error"
|
||||||
|
service = str(e)
|
||||||
|
|
||||||
|
return HTML(f"""<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
|
<title>Context7 Docs - Health</title>
|
||||||
|
<link rel="stylesheet" href="/static/css/main.css">
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<div class="container">
|
||||||
|
<header><h1>Context7 Docs UI</h1>{navbar_html("/health")}</header>
|
||||||
|
<main><h2>System Health</h2>
|
||||||
|
<div class="status-card" data-status="{status}"><h3>{service}</h3>
|
||||||
|
<p>Status: <span class="status-ok">{status}</span></p></div>
|
||||||
|
</main>{footer_html()}</div>
|
||||||
|
</body></html>""", media_type="text/html")
|
||||||
|
|
||||||
|
|
||||||
|
def libraries(request: Request) -> HTML:
|
||||||
|
"""List all libraries."""
|
||||||
|
try:
|
||||||
|
data = api_request("GET", "/libraries")
|
||||||
|
libs = data.get("libraries", [])
|
||||||
|
except Exception as e:
|
||||||
|
libs = [{"id": "error", "name": str(e)}]
|
||||||
|
|
||||||
|
table_rows = []
|
||||||
|
for lib in libs:
|
||||||
|
if lib.get("id") != "error":
|
||||||
|
table_rows.append(
|
||||||
|
f"""<tr><td>{lib.get('id')}</td>
|
||||||
|
<td>{lib.get('name', '')}</td>
|
||||||
|
<td>{lib.get('description', '') or '(no description)'}</td>
|
||||||
|
<td><a href="/docs/{lib.get('id')}">View Docs</a></td></tr>"""
|
||||||
|
)
|
||||||
|
|
||||||
|
return HTML(f"""<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
|
<title>Context7 Docs - Libraries</title>
|
||||||
|
<link rel="stylesheet" href="/static/css/main.css">
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<div class="container">
|
||||||
|
<header><h1>Context7 Docs UI</h1>{navbar_html("/libraries")}</header>
|
||||||
|
<main>
|
||||||
|
<h2>Libraries ({len(libs)})</h2>
|
||||||
|
<div class="actions-bar">
|
||||||
|
<form action="/folders/create" method="post" style="display:inline;">
|
||||||
|
<input type="text" name="name" placeholder="New library folder name" required>
|
||||||
|
<button type="submit">Create Folder</button>
|
||||||
|
</form>
|
||||||
|
</div>
|
||||||
|
<table class="library-table">
|
||||||
|
<thead><tr><th>ID</th><th>Name</th><th>Description</th><th>Actions</th></tr></thead>
|
||||||
|
<tbody>{"".join(table_rows)}</tbody>
|
||||||
|
</table>
|
||||||
|
</main>{footer_html()}</div>
|
||||||
|
</body></html>""", media_type="text/html")
|
||||||
|
|
||||||
|
|
||||||
|
def upload(request: Request) -> HTML:
|
||||||
|
"""File upload form."""
|
||||||
|
if "file" in request.files:
|
||||||
|
uploaded_file = request.files["file"]
|
||||||
|
try:
|
||||||
|
content = uploaded_file.read().decode("utf-8")[:5000]
|
||||||
|
# Escape HTML
|
||||||
|
safe_content = content.replace("&", "&").replace("<", "<").replace(">", ">")
|
||||||
|
truncated = safe_content[:1000] + "..." if len(safe_content) > 1000 else safe_content
|
||||||
|
|
||||||
|
return HTML(f"""<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
|
<title>Context7 Docs - Upload</title>
|
||||||
|
<link rel="stylesheet" href="/static/css/main.css">
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<div class="container">
|
||||||
|
<header><h1>Context7 Docs UI</h1>{navbar_html("/upload")}</header>
|
||||||
|
<main>
|
||||||
|
<h2>Upload Complete!</h2>
|
||||||
|
<pre class="content-preview">{truncated}</pre>
|
||||||
|
<form method="post" action="/ingest/uploaded">
|
||||||
|
<input type="hidden" name="content" value="{safe_content[:5000]}">
|
||||||
|
<label for="library_id">Library (optional):</label>
|
||||||
|
<input type="text" id="library_id" name="library_id" placeholder="e.g., my-docs">
|
||||||
|
<button type="submit">Ingest</button>
|
||||||
|
</form>
|
||||||
|
</main>{footer_html()}</div>
|
||||||
|
</body></html>""", media_type="text/html")
|
||||||
|
except Exception:
|
||||||
|
return HTML(f"""<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
|
<title>Context7 Docs - Upload</title>
|
||||||
|
<link rel="stylesheet" href="/static/css/main.css">
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<div class="container">
|
||||||
|
<header><h1>Context7 Docs UI</h1>{navbar_html("/upload")}</header>
|
||||||
|
<main>
|
||||||
|
<h2>File too large!</h2>
|
||||||
|
<p>Please upload smaller text files (limit: ~5MB).</p>
|
||||||
|
</main>{footer_html()}</div>
|
||||||
|
</body></html>""", media_type="text/html")
|
||||||
|
else:
|
||||||
|
return HTML(f"""<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
|
<title>Context7 Docs - Upload</title>
|
||||||
|
<link rel="stylesheet" href="/static/css/main.css">
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<div class="container">
|
||||||
|
<header><h1>Context7 Docs UI</h1>{navbar_html("/upload")}</header>
|
||||||
|
<main>
|
||||||
|
<h2>Upload Documentation Files</h2>
|
||||||
|
<form method="post" enctype="multipart/form-data">
|
||||||
|
<label for="file">Select file:</label>
|
||||||
|
<input type="file" name="file" id="file" accept=".txt,.md,.json,.py,.js,.html,.css,.yaml,.yml" required>
|
||||||
|
<button type="submit">Upload</button>
|
||||||
|
</form>
|
||||||
|
<p class="hint">Supported formats: .txt, .md, .json, .py, .js, .html, .css, .yaml</p>
|
||||||
|
</main>{footer_html()}</div>
|
||||||
|
</body></html>""", media_type="text/html")
|
||||||
|
|
||||||
|
|
||||||
|
def ingest_all(request: Request) -> JSONResponse:
|
||||||
|
"""Trigger ingestion for all libraries."""
|
||||||
|
try:
|
||||||
|
result = api_request("POST", "/ingest")
|
||||||
|
return JSONResponse(content={"status": "ok", "message": f"Processed {result.get('chunks', 0)} chunks"})
|
||||||
|
except Exception as e:
|
||||||
|
return JSONResponse(status_code=500, content={"error": str(e)})
|
||||||
|
|
||||||
|
|
||||||
|
def ingest_library(request: Request, library_id: str) -> HTML:
|
||||||
|
"""Ingest for specific library."""
|
||||||
|
if "content" in request.form:
|
||||||
|
content = request.form.get("content")[:10000]
|
||||||
|
safe_content = content.replace("&", "&").replace("<", "<").replace(">", ">")
|
||||||
|
return HTML(f"""<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
|
<title>Context7 Docs - Ingest</title>
|
||||||
|
<link rel="stylesheet" href="/static/css/main.css">
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<div class="container">
|
||||||
|
<header><h1>Context7 Docs UI</h1>{navbar_html("/ingest/{library_id}")}</header>
|
||||||
|
<main>
|
||||||
|
<h2>Ingest for Library: {library_id}</h2>
|
||||||
|
<form method="post" action="/ingest/{library_id}">
|
||||||
|
<label for="content">Content (text):</label>
|
||||||
|
<textarea id="content" name="content" rows="10" maxlength="10000"></textarea>
|
||||||
|
<button type="submit">Ingest</button>
|
||||||
|
</form>
|
||||||
|
</main>{footer_html()}</div>
|
||||||
|
</body></html>""", media_type="text/html")
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
result = api_request("POST", f"/ingest/{library_id}")
|
||||||
|
safe_msg = result.get('message', '') or ''
|
||||||
|
safe_json = json.dumps(result, indent=2).replace("&", "&").replace("<", "<").replace(">", ">")
|
||||||
|
return HTML(f"""<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
|
<title>Context7 Docs - Ingest Result</title>
|
||||||
|
<link rel="stylesheet" href="/static/css/main.css">
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<div class="container">
|
||||||
|
<header><h1>Context7 Docs UI</h1>{navbar_html("/ingest/{library_id}")}</header>
|
||||||
|
<main>
|
||||||
|
<h2>Ingestion Complete!</h2>
|
||||||
|
<p>{safe_msg}</p>
|
||||||
|
<pre>{safe_json}</pre>
|
||||||
|
<a href="/libraries">← Back to Libraries</a>
|
||||||
|
</main>{footer_html()}</div>
|
||||||
|
</body></html>""", media_type="text/html")
|
||||||
|
except Exception as e:
|
||||||
|
safe_error = str(e).replace("&", "&").replace("<", "<").replace(">", ">")
|
||||||
|
return HTML(f"""<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
|
<title>Context7 Docs - Error</title>
|
||||||
|
<link rel="stylesheet" href="/static/css/main.css">
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<div class="container">
|
||||||
|
<header><h1>Context7 Docs UI</h1>{navbar_html("/ingest/{library_id}")}</header>
|
||||||
|
<main>
|
||||||
|
<h2>Error</h2>
|
||||||
|
<pre>{safe_error}</pre>
|
||||||
|
</main>{footer_html()}</div>
|
||||||
|
</body></html>""", media_type="text/html")
|
||||||
|
|
||||||
|
|
||||||
|
async def folders_create(request: Request) -> JSONResponse:
|
||||||
|
"""Create a new library folder."""
|
||||||
|
name = request.form.get("name", "").strip()
|
||||||
|
try:
|
||||||
|
from backend.app.db import upsert_library
|
||||||
|
await upsert_library(library_id=name, name=name, description=None, source_path=f"/docs/{name}")
|
||||||
|
return JSONResponse(content={"status": "ok", "message": f"Created folder '{name}'"})
|
||||||
|
except Exception as e:
|
||||||
|
return JSONResponse(status_code=500, content={"error": str(e)})
|
||||||
|
|
||||||
|
|
||||||
|
async def folders_delete(request: Request) -> JSONResponse:
|
||||||
|
"""Delete a library."""
|
||||||
|
library_id = request.query_params.get("id", "").strip()
|
||||||
|
try:
|
||||||
|
from backend.app.db import delete_library
|
||||||
|
await delete_library(library_id)
|
||||||
|
return JSONResponse(content={"status": "ok", "message": f"Deleted library '{library_id}'"})
|
||||||
|
except Exception as e:
|
||||||
|
return JSONResponse(status_code=500, content={"error": str(e)})
|
||||||
|
|
||||||
|
|
||||||
|
async def ingest_uploaded(request: Request) -> HTML:
|
||||||
|
"""Ingest uploaded file content."""
|
||||||
|
content = request.form.get("content", "")[:10000]
|
||||||
|
library_id = request.form.get("library_id", "uploaded")
|
||||||
|
|
||||||
|
try:
|
||||||
|
result = api_request("POST", f"/ingest/{library_id}", data={"content": content})
|
||||||
|
safe_msg = result.get('message', '') or ''
|
||||||
|
safe_json = json.dumps(result, indent=2).replace("&", "&").replace("<", "<").replace(">", ">")
|
||||||
|
return HTML(f"""<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
|
<title>Context7 Docs - Upload Result</title>
|
||||||
|
<link rel="stylesheet" href="/static/css/main.css">
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<div class="container">
|
||||||
|
<header><h1>Context7 Docs UI</h1>{navbar_html("/upload")}</header>
|
||||||
|
<main>
|
||||||
|
<h2>Ingestion Complete!</h2>
|
||||||
|
<p>{safe_msg}</p>
|
||||||
|
<pre>{safe_json}</pre>
|
||||||
|
<a href="/upload">← Upload Another</a>
|
||||||
|
</main>{footer_html()}</div>
|
||||||
|
</body></html>""", media_type="text/html")
|
||||||
|
except Exception as e:
|
||||||
|
safe_error = str(e).replace("&", "&").replace("<", "<").replace(">", ">")
|
||||||
|
return HTML(f"""<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head><meta charset="UTF-8"><title>Error</title></head>
|
||||||
|
<body><h1>Upload Ingest Error</h1><pre>{safe_error}</pre><a href="/upload">← Try Again</a></body>
|
||||||
|
</html>""", media_type="text/html")
|
||||||
|
|
||||||
|
|
||||||
|
def docs(request: Request, library_id: str, topic: Optional[str] = None, tokens: int = 8000) -> HTML:
|
||||||
|
"""View docs from a library."""
|
||||||
|
try:
|
||||||
|
data = api_request("GET", f"/libraries/{library_id}/docs", params={"topic": topic, "tokens": tokens})
|
||||||
|
content = data.get("content", "")
|
||||||
|
except Exception as e:
|
||||||
|
content = str(e)
|
||||||
|
|
||||||
|
safe_content = content.replace("&", "&").replace("<", "<").replace(">", ">")[:10000]
|
||||||
|
return HTML(f"""<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
|
<title>Context7 Docs - Library: {library_id}</title>
|
||||||
|
<link rel="stylesheet" href="/static/css/main.css">
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<div class="container">
|
||||||
|
<header><h1>Context7 Docs UI</h1>{navbar_html("/docs/{}".format(library_id))}</header>
|
||||||
|
<main>
|
||||||
|
<h2>Library: {library_id}</h2>
|
||||||
|
<p><strong>Topic:</strong> {topic or '(all)'} | <strong>Tokens:</strong> {tokens}</p>
|
||||||
|
<pre class="docs-content">{safe_content}</pre>
|
||||||
|
</main>{footer_html()}</div>
|
||||||
|
</body></html>""", media_type="text/html")
|
||||||
|
|
||||||
|
|
||||||
|
def search_redirect(request: Request) -> JSONResponse:
|
||||||
|
"""Redirect to search form."""
|
||||||
|
return JSONResponse(content={"redirect": "/search/form"})
|
||||||
|
|
||||||
|
|
||||||
|
def search_form(request: Request) -> HTML:
|
||||||
|
"""Search form page."""
|
||||||
|
return HTML(f"""<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
|
<title>Context7 Docs - Search</title>
|
||||||
|
<link rel="stylesheet" href="/static/css/main.css">
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<div class="container">
|
||||||
|
<header><h1>Context7 Docs UI</h1>{navbar_html("/search")}</header>
|
||||||
|
<main>
|
||||||
|
<h2>Search Docs</h2>
|
||||||
|
<form method="post" action="/search">
|
||||||
|
<label for="query">Query:</label>
|
||||||
|
<input type="text" id="query" name="query" required placeholder="Enter your search query...">
|
||||||
|
<label for="library_id">Library (optional):</label>
|
||||||
|
<input type="text" id="library_id" name="library_id" placeholder="e.g., foundryvtt">
|
||||||
|
<label for="limit">Limit results:</label>
|
||||||
|
<select id="limit" name="limit">
|
||||||
|
<option value="5">5</option>
|
||||||
|
<option value="10" selected>10</option>
|
||||||
|
<option value="20">20</option>
|
||||||
|
<option value="50">50</option>
|
||||||
|
</select>
|
||||||
|
<button type="submit">Search</button>
|
||||||
|
</form>
|
||||||
|
</main>{footer_html()}</div>
|
||||||
|
</body></html>""", media_type="text/html")
|
||||||
|
|
||||||
|
|
||||||
|
def search_results(request: Request) -> HTML:
|
||||||
|
"""Display search results."""
|
||||||
|
try:
|
||||||
|
query = request.query_params.get("q", "")
|
||||||
|
limit = int(request.query_params.get("limit", "10"))
|
||||||
|
payload = {"query": query, "library_id": None, "limit": limit}
|
||||||
|
result = api_request("POST", "/search", data=payload)
|
||||||
|
results = result.get("results", [])
|
||||||
|
except Exception as e:
|
||||||
|
return HTML(f"""<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head><meta charset="UTF-8"><title>Error</title></head>
|
||||||
|
<body><h1>Error</h1><pre>{str(e)}</pre><a href="/search/form">← Try Again</a></body>
|
||||||
|
</html>""", media_type="text/html")
|
||||||
|
|
||||||
|
cards = []
|
||||||
|
for r in results:
|
||||||
|
title = r.get("title", "Untitled") or (r.get("content", "")[:100] + "...")[:200]
|
||||||
|
content = (r.get("content", "") or r.get("chunk", ""))[:500]
|
||||||
|
cards.append(f"""<div class="result-card" data-id="{r.get('id')}"><h3>{title}</h3>
|
||||||
|
<p>{content}...</p><a href="/docs/{r.get('library_id')}">View Full</a></div>""")
|
||||||
|
|
||||||
|
return HTML(f"""<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
|
<title>Context7 Docs - Search Results</title>
|
||||||
|
<link rel="stylesheet" href="/static/css/main.css">
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<div class="container">
|
||||||
|
<header><h1>Context7 Docs UI</h1>{navbar_html("/search")}</header>
|
||||||
|
<main>
|
||||||
|
<h2>Search Results for "{query}"</h2>
|
||||||
|
<div class="results-count">{len(results)} results found</div>
|
||||||
|
{''.join(cards)}
|
||||||
|
<a href="/search/form">← New Search</a>
|
||||||
|
</main>{footer_html()}</div>
|
||||||
|
</body></html>""", media_type="text/html")
|
||||||
|
|
||||||
|
|
||||||
|
def sync_sources(request: Request) -> HTML:
|
||||||
|
"""Sync git sources."""
|
||||||
|
if request.method == "POST":
|
||||||
|
try:
|
||||||
|
data = api_request("POST", "/sources/sync")
|
||||||
|
safe_json = json.dumps(data, indent=2).replace("&", "&").replace("<", "<").replace(">", ">")
|
||||||
|
return HTML(f"""<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head><meta charset="UTF-8"><title>Sync Result</title></head>
|
||||||
|
<body>
|
||||||
|
<div class="container">
|
||||||
|
<header><h1>Context7 Docs UI</h1>{navbar_html("/sync/sources")}</header>
|
||||||
|
<main><h2>Git Sync Complete!</h2><pre>{safe_json}</pre>
|
||||||
|
<form method="post"><button type="submit">Sync Again</button></form>
|
||||||
|
</main>{footer_html()}</div>
|
||||||
|
</body></html>""", media_type="text/html")
|
||||||
|
except Exception as e:
|
||||||
|
safe_error = str(e).replace("&", "&").replace("<", "<").replace(">", ">")
|
||||||
|
return HTML(f"""<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head><meta charset="UTF-8"><title>Error</title></head>
|
||||||
|
<body><h1>Sync Error</h1><pre>{safe_error}</pre><a href="/sources/git">← Try Again</a></body>
|
||||||
|
</html>""", media_type="text/html")
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
data = api_request("GET", "/libraries")
|
||||||
|
libs = [l.get("id") for l in data.get("libraries", []) if l.get("id") != "error"]
|
||||||
|
except Exception:
|
||||||
|
libs = []
|
||||||
|
|
||||||
|
lib_list = ", ".join(libs) if libs else "(none)"
|
||||||
|
return HTML(f"""<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
|
<title>Context7 Docs - Git Sync</title>
|
||||||
|
<link rel="stylesheet" href="/static/css/main.css">
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<div class="container">
|
||||||
|
<header><h1>Context7 Docs UI</h1>{navbar_html("/sources/git")}</header>
|
||||||
|
<main>
|
||||||
|
<h2>Sync Git Repositories</h2>
|
||||||
|
<p>Syncs all git repositories configured in <code>docs_sources.yaml</code>.</p>
|
||||||
|
<form method="post" action="/sync/sources">
|
||||||
|
<label for="override">Override existing repos:</label>
|
||||||
|
<input type="checkbox" id="override" name="override">
|
||||||
|
<button type="submit">Sync All Repositories</button>
|
||||||
|
</form>
|
||||||
|
<h3>Libraries Found: {lib_list}</h3>
|
||||||
|
</main>{footer_html()}</div>
|
||||||
|
</body></html>""", media_type="text/html")
|
||||||
|
|
||||||
|
|
||||||
|
def git_sources(request: Request) -> HTML:
|
||||||
|
"""List configured git sources."""
|
||||||
|
import yaml
|
||||||
|
config_path = Path(__file__).parent.parent.parent / "docs_sources.yaml"
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open(config_path) as f:
|
||||||
|
data = yaml.safe_load(f)
|
||||||
|
sources = data.get("sources", [])
|
||||||
|
|
||||||
|
source_blocks = []
|
||||||
|
for src in sources:
|
||||||
|
url = src.get("repo_url", "")[:50] + "..." if len(src.get("repo_url", "")) > 50 else src.get("repo_url", "")
|
||||||
|
branch = src.get("branch", "main")
|
||||||
|
include = src.get("include_paths", ["*"])
|
||||||
|
exclude = src.get("exclude_paths", [])
|
||||||
|
source_blocks.append(f"""<div class="source-card">
|
||||||
|
<strong>{src.get('library_id', 'unknown')}</strong><br>
|
||||||
|
URL: {url}<br>
|
||||||
|
Branch: {branch}<br>
|
||||||
|
Include: {', '.join(include)}{' | Exclude: ' + ', '.join(exclude) if exclude else ''}
|
||||||
|
</div>""")
|
||||||
|
|
||||||
|
return HTML(f"""<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
|
<title>Context7 Docs - Git Sources</title>
|
||||||
|
<link rel="stylesheet" href="/static/css/main.css">
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<div class="container">
|
||||||
|
<header><h1>Context7 Docs UI</h1>{navbar_html("/sources/git")}</header>
|
||||||
|
<main>
|
||||||
|
<h2>Configured Git Sources ({len(sources)})</h2>
|
||||||
|
{''.join(source_blocks)}
|
||||||
|
</main>{footer_html()}</div>
|
||||||
|
</body></html>""", media_type="text/html")
|
||||||
|
except Exception as e:
|
||||||
|
safe_error = str(e).replace("&", "&").replace("<", "<").replace(">", ">")
|
||||||
|
return HTML(f"""<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head><meta charset="UTF-8"><title>Error</title></head>
|
||||||
|
<body><h1>Git Sources Error</h1><pre>{safe_error}</pre></body>
|
||||||
|
</html>""", media_type="text/html")
|
||||||
|
|
||||||
|
|
||||||
|
def logs(request: Request) -> HTML:
|
||||||
|
"""Logs/status page."""
|
||||||
|
return HTML(f"""<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
|
<title>Context7 Docs - Logs</title>
|
||||||
|
<link rel="stylesheet" href="/static/css/main.css">
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<div class="container">
|
||||||
|
<header><h1>Context7 Docs UI</h1>{navbar_html("/logs")}</header>
|
||||||
|
<main>
|
||||||
|
<h2>Status Messages</h2>
|
||||||
|
<div class="status-message">Docs API: {DOCS_API_URL}</div>
|
||||||
|
<div class="status-message">Qdrant Health: healthy | MCP OK: yes</div>
|
||||||
|
<p class="hint">Logs are printed to container stdout/stderr. For full logs, inspect Docker containers directly.</p>
|
||||||
|
</main>{footer_html()}</div>
|
||||||
|
</body></html>""", media_type="text/html")
|
||||||
|
|
||||||
|
|
||||||
|
# Register all routes
|
||||||
|
__all__ = [
|
||||||
|
"health", "libraries", "upload", "ingest_all", "ingest_library",
|
||||||
|
"folders_create", "folders_delete", "docs", "search_redirect",
|
||||||
|
"search_form", "search_results", "sync_sources", "git_sources", "logs"
|
||||||
|
]
|
||||||
@@ -0,0 +1,37 @@
|
|||||||
|
# Backend API Dependencies
|
||||||
|
fastapi==0.109.0
|
||||||
|
uvicorn[standard]==0.27.0
|
||||||
|
pydantic==2.5.3
|
||||||
|
python-dotenv==1.0.0
|
||||||
|
python-multipart==0.0.6
|
||||||
|
|
||||||
|
# Qdrant Vector Store Client
|
||||||
|
qdrant-client==1.7.0
|
||||||
|
|
||||||
|
# Text Processing for token estimation
|
||||||
|
tiktoken==0.7.0
|
||||||
|
|
||||||
|
# Local Embeddings using FastEmbed
|
||||||
|
fastembed==0.3.0
|
||||||
|
|
||||||
|
# PDF support for document ingestion
|
||||||
|
pypdf==5.0.0
|
||||||
|
|
||||||
|
# HTTP client for MCP server communication
|
||||||
|
httpx==0.26.0
|
||||||
|
|
||||||
|
# HTTP client for WebUI (used to call docs-api from WebUI)
|
||||||
|
requests==2.31.0
|
||||||
|
|
||||||
|
# FastMCP for MCP server integration (also used by backend)
|
||||||
|
fastmcp==0.6.0
|
||||||
|
|
||||||
|
# YAML parser for sources configuration
|
||||||
|
PyYAML==6.0.1
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# TEST DEPENDENCIES
|
||||||
|
# =============================================================================
|
||||||
|
pytest==8.3.2
|
||||||
|
pytest-mock==3.14.0
|
||||||
|
pytest-asyncio==0.23.7
|
||||||
@@ -0,0 +1,2 @@
|
|||||||
|
# This directory is intentionally left empty to preserve the folder structure for Docker volumes.
|
||||||
|
# Data from Qdrant will be mounted here via docker-compose.yml.
|
||||||
@@ -0,0 +1,99 @@
|
|||||||
|
# Context7-style MCP System - Docker Compose (Production/Home Server Hardened)
|
||||||
|
services:
|
||||||
|
# Qdrant Vector Database Service
|
||||||
|
qdrant:
|
||||||
|
image: qdrant/qdrant:latest
|
||||||
|
container_name: qdrant
|
||||||
|
ports:
|
||||||
|
- "${QDRANT_PORT:-6333}:6333"
|
||||||
|
volumes:
|
||||||
|
- ./data/qdrant:/qdrant/storage
|
||||||
|
environment:
|
||||||
|
- QDRANT__MEMORY_MAPPED_INDEXES=1
|
||||||
|
restart: unless-stopped
|
||||||
|
logging:
|
||||||
|
driver: json-file
|
||||||
|
options:
|
||||||
|
max-size: "10m"
|
||||||
|
max-file: "3"
|
||||||
|
networks:
|
||||||
|
- context7-network
|
||||||
|
|
||||||
|
# Docs API Backend Service (FastAPI)
|
||||||
|
docs-api:
|
||||||
|
build:
|
||||||
|
context: ./backend
|
||||||
|
dockerfile: Dockerfile
|
||||||
|
container_name: docs-api
|
||||||
|
ports:
|
||||||
|
- "${HOST_PORT:-8787}:8787"
|
||||||
|
environment:
|
||||||
|
- VECTOR_STORE_HOST=qdrant
|
||||||
|
- VECTOR_STORE_PORT=6333
|
||||||
|
- DOCS_PATH=/docs
|
||||||
|
- DB_PATH=/data/db.sqlite
|
||||||
|
- LOG_LEVEL=INFO
|
||||||
|
- API_KEY_DOCS_API=${DOCS_API_KEY:-}
|
||||||
|
volumes:
|
||||||
|
- ./docs:/docs
|
||||||
|
- ./data:/data
|
||||||
|
depends_on:
|
||||||
|
- qdrant
|
||||||
|
networks:
|
||||||
|
- context7-network
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "curl", "-f", "http://localhost:${HOST_PORT:-8787}/health"]
|
||||||
|
interval: 30s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 3
|
||||||
|
start_period: 15s
|
||||||
|
|
||||||
|
# MCP Server Service (FastMCP via streamable HTTP)
|
||||||
|
docs-mcp:
|
||||||
|
build:
|
||||||
|
context: ./mcp-server
|
||||||
|
dockerfile: Dockerfile
|
||||||
|
container_name: docs-mcp
|
||||||
|
ports:
|
||||||
|
- "${MCP_HOST_PORT:-8788}:8788"
|
||||||
|
environment:
|
||||||
|
- DOCS_API_URL=http://docs-api:8787
|
||||||
|
- MCP_API_KEY=${MCP_API_KEY:-}
|
||||||
|
volumes:
|
||||||
|
- ./docs:/docs:ro
|
||||||
|
- ./data:/data
|
||||||
|
restart: unless-stopped
|
||||||
|
logging:
|
||||||
|
driver: json-file
|
||||||
|
options:
|
||||||
|
max-size: "10m"
|
||||||
|
max-file: "3"
|
||||||
|
depends_on:
|
||||||
|
docs-api:
|
||||||
|
condition: service_healthy
|
||||||
|
networks:
|
||||||
|
- context7-network
|
||||||
|
|
||||||
|
# WebUI Service (HTML interface)
|
||||||
|
webui:
|
||||||
|
build:
|
||||||
|
context: ./webui
|
||||||
|
dockerfile: Dockerfile
|
||||||
|
container_name: webui
|
||||||
|
ports:
|
||||||
|
- "${WEBUI_PORT:-8790}:8790"
|
||||||
|
environment:
|
||||||
|
- DOCS_API_URL=http://docs-api:8787
|
||||||
|
- WEBUI_API_KEY=${DOCS_WEBUI_API_KEY:-}
|
||||||
|
volumes:
|
||||||
|
- ./docs:/docs
|
||||||
|
- ./data:/data
|
||||||
|
depends_on:
|
||||||
|
docs-api:
|
||||||
|
condition: service_healthy
|
||||||
|
networks:
|
||||||
|
- context7-network
|
||||||
|
|
||||||
|
networks:
|
||||||
|
context7-network:
|
||||||
|
driver: bridge
|
||||||
@@ -0,0 +1,143 @@
|
|||||||
|
# Getting Started
|
||||||
|
|
||||||
|
Welcome to the Context7-style MCP System documentation!
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
This system provides a self-hosted, local context7-compatible MCP (Model Context Protocol) solution using Docker containers. It enables you to:
|
||||||
|
|
||||||
|
- Ingest and index your own documents
|
||||||
|
- Perform semantic search on vector embeddings
|
||||||
|
- Integrate with MCP-enabled IDEs for intelligent tool interactions
|
||||||
|
|
||||||
|
## Architecture
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────┐ ┌─────────────┐ ┌─────────────┐
|
||||||
|
│ Client │────▶│ docs-api │◀────│ docs-mcp │
|
||||||
|
│ (IDE/Tool) │ │ (FastAPI) │ │ (MCP Server)│
|
||||||
|
└─────────────┘ └─────────────┘ └─────────────┘
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
┌─────────────┐
|
||||||
|
│ Qdrant │
|
||||||
|
│ (Vector DB) │
|
||||||
|
└─────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
## Quick Start
|
||||||
|
|
||||||
|
### 1. Start All Services
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose up -d --build
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. Verify Services Are Running
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose ps
|
||||||
|
```
|
||||||
|
|
||||||
|
You should see all three services in "Up" status:
|
||||||
|
- `qdrant` (port 6333)
|
||||||
|
- `docs-api` (port 8787)
|
||||||
|
- `docs-mcp` (port 8788)
|
||||||
|
|
||||||
|
### 3. Access the API
|
||||||
|
|
||||||
|
Open your browser and navigate to:
|
||||||
|
```
|
||||||
|
http://localhost:8787/docs
|
||||||
|
```
|
||||||
|
|
||||||
|
You should see the FastAPI documentation page.
|
||||||
|
|
||||||
|
## API Endpoints
|
||||||
|
|
||||||
|
### Health Check
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://localhost:8787/health
|
||||||
|
```
|
||||||
|
|
||||||
|
Expected response:
|
||||||
|
```json
|
||||||
|
{"status":"ok"}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Ingest Document
|
||||||
|
|
||||||
|
Upload a text document to be processed and indexed:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -X POST "http://localhost:8787/api/v1/ingest" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"content": "This is sample document content for semantic search testing.",
|
||||||
|
"metadata": {"source": "example", "type": "text"}
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
### Search Documents
|
||||||
|
|
||||||
|
Perform a similarity search on ingested documents:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl "http://localhost:8787/api/v1/search" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"query": "semantic search",
|
||||||
|
"top_k": 5,
|
||||||
|
"threshold": 0.7
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
### Environment Variables
|
||||||
|
|
||||||
|
Copy the example environment file and customize:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cp .env.example .env
|
||||||
|
```
|
||||||
|
|
||||||
|
Key variables:
|
||||||
|
- `VECTOR_STORE_HOST`: Internal hostname of Qdrant (default: qdrant)
|
||||||
|
- `VECTOR_STORE_PORT`: Qdrant port (default: 6333)
|
||||||
|
|
||||||
|
### Docker Compose
|
||||||
|
|
||||||
|
All services are defined in `docker-compose.yml`. Key networking details:
|
||||||
|
- Services communicate internally via `context7-network`
|
||||||
|
- Qdrant uses service name `qdrant` for internal connections
|
||||||
|
- Vector store is exposed externally on port 6333 for debugging
|
||||||
|
|
||||||
|
## Next Steps
|
||||||
|
|
||||||
|
1. Review the project structure to understand component roles
|
||||||
|
2. Customize the backend API endpoints in `backend/app/main.py`
|
||||||
|
3. Implement MCP tools in `mcp-server/server.py`
|
||||||
|
4. Add more example documents in the `docs/` directory
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
### Check Logs
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose logs -f docs-api
|
||||||
|
docker compose logs -f qdrant
|
||||||
|
docker compose logs -f docs-mcp
|
||||||
|
```
|
||||||
|
|
||||||
|
### Reset All Services
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose down -v
|
||||||
|
docker compose up -d --build
|
||||||
|
```
|
||||||
|
|
||||||
|
## Support
|
||||||
|
|
||||||
|
For issues, refer to the `README.md` or check the Qdrant documentation.
|
||||||
@@ -0,0 +1,27 @@
|
|||||||
|
# Git Repository Sources Configuration
|
||||||
|
# Each source defines a library to ingest from a git repository
|
||||||
|
# Paths are relative to the cloned repo root
|
||||||
|
|
||||||
|
sources:
|
||||||
|
- library_id: foundryvtt
|
||||||
|
name: Foundry VTT
|
||||||
|
description: Foundry Virtual Tabletop system documentation
|
||||||
|
repo_url: https://github.com/foundryvtt/foundryvtt.git
|
||||||
|
branch: main
|
||||||
|
include_paths:
|
||||||
|
- docs
|
||||||
|
- src
|
||||||
|
exclude_paths:
|
||||||
|
- node_modules
|
||||||
|
- .git
|
||||||
|
|
||||||
|
# Add more sources here following the same structure:
|
||||||
|
# - library_id: my-repo
|
||||||
|
# name: My Repository
|
||||||
|
# description: My documentation
|
||||||
|
# repo_url: https://github.com/user/my-repo.git
|
||||||
|
# branch: main
|
||||||
|
# include_paths:
|
||||||
|
# - docs
|
||||||
|
# exclude_paths:
|
||||||
|
# - node_modules
|
||||||
@@ -0,0 +1,30 @@
|
|||||||
|
# MCP Server Service
|
||||||
|
FROM python:3.11-slim
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
|
curl \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
# Install Python dependencies cleanly
|
||||||
|
COPY requirements.txt .
|
||||||
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
|
# Copy server code
|
||||||
|
COPY server.py .
|
||||||
|
|
||||||
|
# Mount volumes at these paths (configured in docker-compose)
|
||||||
|
# ./docs -> /docs
|
||||||
|
# ./data -> /data
|
||||||
|
# /data holds: db.sqlite, sqlite file for SQLite storage
|
||||||
|
|
||||||
|
# Expose MCP port
|
||||||
|
EXPOSE 8788
|
||||||
|
|
||||||
|
# Healthcheck
|
||||||
|
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
|
||||||
|
CMD python -c "import socket; s=socket.create_connection(('127.0.0.1', 8788), 5); s.close()"
|
||||||
|
|
||||||
|
# Run the MCP server using streamable HTTP transport
|
||||||
|
CMD ["python", "server.py"]
|
||||||
@@ -0,0 +1,21 @@
|
|||||||
|
# MCP Server Dependencies
|
||||||
|
fastmcp==0.6.0
|
||||||
|
httpx==0.26.0
|
||||||
|
|
||||||
|
# For Qdrant vector store operations
|
||||||
|
qdrant-client==1.7.0
|
||||||
|
|
||||||
|
# Text processing for token estimation
|
||||||
|
tiktoken==0.7.0
|
||||||
|
|
||||||
|
# Local embeddings using FastEmbed
|
||||||
|
fastembed==0.3.0
|
||||||
|
|
||||||
|
# PDF support for document ingestion
|
||||||
|
pypdf==5.0.0
|
||||||
|
|
||||||
|
# Environment variables loader
|
||||||
|
python-dotenv==1.0.0
|
||||||
|
|
||||||
|
# YAML parser for sources configuration
|
||||||
|
PyYAML==6.0.1
|
||||||
@@ -0,0 +1,337 @@
|
|||||||
|
# MCP Server for local-context7 Docs API with Git Sources Support
|
||||||
|
"""
|
||||||
|
MCP server providing Context7-style tools for interacting with the local docs API.
|
||||||
|
|
||||||
|
This server exposes 6 tools:
|
||||||
|
- resolve-library-id: Find libraries matching a name (with /local/ prefix)
|
||||||
|
- get-library-docs: Retrieve documentation from a library
|
||||||
|
- list-libraries: List all discovered libraries
|
||||||
|
- search-docs: Semantic search across documents
|
||||||
|
- refresh-library: Re-ingest documents for a library or all libraries
|
||||||
|
- sync-sources: Sync git repositories from configuration file
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import os
|
||||||
|
from typing import Optional, List, Dict, Any
|
||||||
|
|
||||||
|
try:
|
||||||
|
import httpx
|
||||||
|
except ImportError:
|
||||||
|
httpx = None
|
||||||
|
|
||||||
|
try:
|
||||||
|
from fastmcp import FastMCP
|
||||||
|
except ImportError:
|
||||||
|
class _Tool:
|
||||||
|
def __init__(self, name: str):
|
||||||
|
self.name = name
|
||||||
|
|
||||||
|
class FastMCP:
|
||||||
|
"""Import-time fallback used by tests when fastmcp is not installed."""
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
self.tools = []
|
||||||
|
|
||||||
|
def tool(self):
|
||||||
|
def decorator(func):
|
||||||
|
self.tools.append(_Tool(func.__name__))
|
||||||
|
return func
|
||||||
|
return decorator
|
||||||
|
|
||||||
|
def run(self, *args, **kwargs):
|
||||||
|
raise RuntimeError("fastmcp is not installed")
|
||||||
|
|
||||||
|
|
||||||
|
# Environment configuration
|
||||||
|
DOCS_API_URL = os.getenv("DOCS_API_URL", "http://docs-api:${HOST_PORT:-8787}")
|
||||||
|
MCP_API_KEY = os.getenv("MCP_API_KEY", "")
|
||||||
|
|
||||||
|
|
||||||
|
def strip_local_prefix(lib_id: str) -> str:
|
||||||
|
"""Strip /local/ prefix from library ID for API calls."""
|
||||||
|
if lib_id.startswith("/local/"):
|
||||||
|
return lib_id[7:] # Remove "/local/" prefix
|
||||||
|
return lib_id
|
||||||
|
|
||||||
|
|
||||||
|
# Create FastMCP instance with tools
|
||||||
|
mcp = FastMCP("context7-docs", root_path="/app")
|
||||||
|
|
||||||
|
|
||||||
|
@mcp.tool()
|
||||||
|
async def resolve_library_id(library_name: str) -> List[Dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
Resolve a library name to Context7-style candidates.
|
||||||
|
|
||||||
|
Searches the docs API for libraries matching the given name (partial match).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
libraryName: The library name to search for (e.g., "foundryvtt")
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of candidate libraries with /local/ prefix in ID:
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"id": "/local/foundryvtt",
|
||||||
|
"name": "Foundry VTT",
|
||||||
|
"description": "Fantasy tabletop virtual table...",
|
||||||
|
"source": "local"
|
||||||
|
},
|
||||||
|
...
|
||||||
|
]
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
if httpx is None:
|
||||||
|
raise RuntimeError("httpx is not installed")
|
||||||
|
async with httpx.AsyncClient(base_url=DOCS_API_URL, timeout=60.0) as client:
|
||||||
|
response = await client.get("/libraries/search", params={"q": library_name})
|
||||||
|
|
||||||
|
if response.status_code == 200:
|
||||||
|
data = response.json()
|
||||||
|
return data.get("matches", [])
|
||||||
|
else:
|
||||||
|
raise Exception(f"API error: {response.status_code} - {response.text}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error resolving library '{library_name}': {e}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
@mcp.tool()
|
||||||
|
async def get_library_docs(context7_compatible_library_id: str, topic: Optional[str] = None, tokens: int = 8000) -> str:
|
||||||
|
"""
|
||||||
|
Retrieve documentation content from a library.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
context7_compatible_library_id: The Context7-style library ID (with /local/ prefix)
|
||||||
|
topic: Optional topic to search within the library (default: None - returns most relevant content)
|
||||||
|
tokens: Maximum tokens to include in response (default: 8000)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Markdown string containing the documentation content
|
||||||
|
|
||||||
|
Example:
|
||||||
|
get_library_docs("/local/foundryvtt", topic="hooks", tokens=8000)
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
if httpx is None:
|
||||||
|
raise RuntimeError("httpx is not installed")
|
||||||
|
# Strip /local/ prefix for API call
|
||||||
|
library_id = strip_local_prefix(context7_compatible_library_id)
|
||||||
|
|
||||||
|
async with httpx.AsyncClient(base_url=DOCS_API_URL, timeout=60.0) as client:
|
||||||
|
params = {"tokens": tokens}
|
||||||
|
if topic:
|
||||||
|
params["topic"] = topic
|
||||||
|
|
||||||
|
response = await client.get(f"/libraries/{library_id}/docs", params=params)
|
||||||
|
|
||||||
|
if response.status_code == 200:
|
||||||
|
data = response.json()
|
||||||
|
return data.get("content", "")
|
||||||
|
else:
|
||||||
|
raise Exception(f"API error: {response.status_code} - {response.text}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error getting library docs for '{context7_compatible_library_id}': {e}")
|
||||||
|
return f"Error retrieving documentation: {str(e)}"
|
||||||
|
|
||||||
|
|
||||||
|
@mcp.tool()
|
||||||
|
async def list_libraries() -> List[Dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
List all discovered libraries in the system.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of library objects with metadata:
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"id": "/local/foundryvtt",
|
||||||
|
"name": "Foundry VTT",
|
||||||
|
"description": "...",
|
||||||
|
"source": "local"
|
||||||
|
},
|
||||||
|
...
|
||||||
|
]
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
if httpx is None:
|
||||||
|
raise RuntimeError("httpx is not installed")
|
||||||
|
async with httpx.AsyncClient(base_url=DOCS_API_URL, timeout=60.0) as client:
|
||||||
|
response = await client.get("/libraries")
|
||||||
|
|
||||||
|
if response.status_code == 200:
|
||||||
|
data = response.json()
|
||||||
|
return data.get("libraries", [])
|
||||||
|
else:
|
||||||
|
raise Exception(f"API error: {response.status_code} - {response.text}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error listing libraries: {e}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
@mcp.tool()
|
||||||
|
async def search_docs(query: str, library_id: Optional[str] = None, limit: int = 10) -> List[Dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
Perform semantic search across documents.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
query: The search query string
|
||||||
|
library_id: Optional library ID filter (with /local/ prefix). If None, searches all libraries.
|
||||||
|
limit: Maximum number of results to return (default: 10)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of search results with content snippets:
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"id": "...",
|
||||||
|
"score": 0.123,
|
||||||
|
"library_id": "...",
|
||||||
|
"path": "...",
|
||||||
|
"title": "...",
|
||||||
|
"chunk_index": 0
|
||||||
|
},
|
||||||
|
...
|
||||||
|
]
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
if httpx is None:
|
||||||
|
raise RuntimeError("httpx is not installed")
|
||||||
|
async with httpx.AsyncClient(base_url=DOCS_API_URL, timeout=60.0) as client:
|
||||||
|
payload = {"query": query, "limit": limit}
|
||||||
|
if library_id:
|
||||||
|
payload["library_id"] = strip_local_prefix(library_id)
|
||||||
|
|
||||||
|
response = await client.post("/search", json=payload)
|
||||||
|
|
||||||
|
if response.status_code == 200:
|
||||||
|
data = response.json()
|
||||||
|
return data.get("results", [])
|
||||||
|
else:
|
||||||
|
raise Exception(f"API error: {response.status_code} - {response.text}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error searching for query '{query}': {e}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
@mcp.tool()
|
||||||
|
async def refresh_library(library_id: Optional[str] = None) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Re-ingest documents for a library or all libraries.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
library_id: If provided, re-ingests only this library (with /local/ prefix).
|
||||||
|
If None, ingests all libraries.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Ingestion result summary:
|
||||||
|
{
|
||||||
|
"total_libraries": 2,
|
||||||
|
"successful": 2,
|
||||||
|
"failed": 0,
|
||||||
|
"total_chunks": 150
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
if httpx is None:
|
||||||
|
raise RuntimeError("httpx is not installed")
|
||||||
|
async with httpx.AsyncClient(base_url=DOCS_API_URL, timeout=60.0) as client:
|
||||||
|
response = await client.post("/ingest/all")
|
||||||
|
|
||||||
|
if response.status_code == 200:
|
||||||
|
data = response.json()
|
||||||
|
return {
|
||||||
|
"success": True,
|
||||||
|
"total_libraries": data.get("total_libraries", 0),
|
||||||
|
"successful": data.get("successful", 0),
|
||||||
|
"failed": data.get("failed", 0),
|
||||||
|
"total_chunks": data.get("total_chunks", 0)
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
raise Exception(f"API error: {response.status_code} - {response.text}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error refreshing library '{library_id or 'all'}': {e}")
|
||||||
|
return {"success": False, "error": str(e)}
|
||||||
|
|
||||||
|
|
||||||
|
@mcp.tool()
|
||||||
|
async def sync_sources(override: bool = False) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Sync all git repositories defined in the sources configuration file.
|
||||||
|
|
||||||
|
Clones/updates each configured repository and ingests matching files
|
||||||
|
into the vector store. Existing repos are updated to latest state unless
|
||||||
|
override is true (clears existing repo before cloning).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
override: If true, clears existing repo before cloning. Default: false
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Sync result summary:
|
||||||
|
{
|
||||||
|
"success": true,
|
||||||
|
"total_sources": 2,
|
||||||
|
"successful": 1,
|
||||||
|
"failed": 1,
|
||||||
|
"results": [
|
||||||
|
{
|
||||||
|
"library_id": "foundryvtt",
|
||||||
|
"success": true,
|
||||||
|
"message": "...",
|
||||||
|
"files_discovered": 450,
|
||||||
|
"chunks_created": 2340,
|
||||||
|
"vectors_added": 2340
|
||||||
|
},
|
||||||
|
...
|
||||||
|
]
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
if httpx is None:
|
||||||
|
raise RuntimeError("httpx is not installed")
|
||||||
|
async with httpx.AsyncClient(base_url=DOCS_API_URL, timeout=60.0) as client:
|
||||||
|
payload = {"override": override} if override else {}
|
||||||
|
|
||||||
|
response = await client.post("/sources/sync", json=payload)
|
||||||
|
|
||||||
|
if response.status_code == 200:
|
||||||
|
data = response.json()
|
||||||
|
return {
|
||||||
|
"success": True,
|
||||||
|
"total_sources": data.get("total_sources", 0),
|
||||||
|
"successful": data.get("successful", 0),
|
||||||
|
"failed": data.get("failed", 0),
|
||||||
|
"results": data.get("results", [])
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
raise Exception(f"API error: {response.status_code} - {response.text}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error syncing git sources: {e}")
|
||||||
|
return {"success": False, "error": str(e)}
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# Run MCP server using streamable HTTP transport
|
||||||
|
host = os.getenv("MCP_HOST", "0.0.0.0")
|
||||||
|
port = int(os.getenv("MCP_PORT", 8788))
|
||||||
|
|
||||||
|
print(f"Starting MCP server on http://{host}:{port}")
|
||||||
|
print("Tools available:")
|
||||||
|
print(" - resolve-library-id(libraryName)")
|
||||||
|
print(" - get-library-docs(context7_compatible_library_id, topic=None, tokens=8000)")
|
||||||
|
print(" - list-libraries()")
|
||||||
|
print(" - search_docs(query, library_id=None, limit=10)")
|
||||||
|
print(" - refresh_library(library_id=None)")
|
||||||
|
print(" - sync_sources(override=false)")
|
||||||
|
|
||||||
|
if hasattr(mcp, "run"):
|
||||||
|
mcp.run(transport="streamable-http", host=host, port=port)
|
||||||
|
else:
|
||||||
|
import uvicorn
|
||||||
|
|
||||||
|
uvicorn.run(mcp, host=host, port=port)
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
"""Compatibility package for importing the mcp-server source tree in tests."""
|
||||||
@@ -0,0 +1,13 @@
|
|||||||
|
"""Import wrapper for ../mcp-server/server.py."""
|
||||||
|
import importlib.util
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
_source = Path(__file__).resolve().parents[1] / "mcp-server" / "server.py"
|
||||||
|
_spec = importlib.util.spec_from_file_location("_local_context7_mcp_server", _source)
|
||||||
|
_module = importlib.util.module_from_spec(_spec)
|
||||||
|
assert _spec and _spec.loader
|
||||||
|
_spec.loader.exec_module(_module)
|
||||||
|
|
||||||
|
for _name, _value in vars(_module).items():
|
||||||
|
if not _name.startswith("__"):
|
||||||
|
globals()[_name] = _value
|
||||||
+35
@@ -0,0 +1,35 @@
|
|||||||
|
# Pytest configuration for local-context7 tests
|
||||||
|
|
||||||
|
[pytest]
|
||||||
|
# Test discovery pattern (where to look for tests)
|
||||||
|
testpaths = tests
|
||||||
|
|
||||||
|
# Pattern of test files to discover
|
||||||
|
python_files = test_*.py
|
||||||
|
|
||||||
|
# Pattern of test functions to run
|
||||||
|
python_functions = test_*
|
||||||
|
|
||||||
|
# Markers for test categorization
|
||||||
|
markers =
|
||||||
|
slow: marks tests as slow (deselect with '-m "not slow"')
|
||||||
|
integration: marks tests as integration tests requiring external services
|
||||||
|
unit: marks tests as pure unit tests
|
||||||
|
|
||||||
|
# Add console output during test collection
|
||||||
|
console_output_style = classic
|
||||||
|
|
||||||
|
# Test execution options
|
||||||
|
asyncio_mode = auto
|
||||||
|
testsessionstartfixturesscope = function
|
||||||
|
|
||||||
|
# Logging configuration
|
||||||
|
log_cli = true
|
||||||
|
log_cli_level = INFO
|
||||||
|
log_cli_format = %(asctime)s [%(levelname)s] %(name)s: %(message)s
|
||||||
|
log_cli_date_format = %Y-%m-%d %H:%M:%S
|
||||||
|
|
||||||
|
# Ignore specific warnings during tests
|
||||||
|
filterwarnings =
|
||||||
|
ignore::DeprecationWarning
|
||||||
|
ignore::PendingDeprecationWarning
|
||||||
@@ -0,0 +1,2 @@
|
|||||||
|
# Tests package for local-context7
|
||||||
|
# Contains unit tests for chunking, database operations, search, and MCP server modules
|
||||||
@@ -0,0 +1,191 @@
|
|||||||
|
"""
|
||||||
|
Pytest configuration and fixtures for local-context7 tests.
|
||||||
|
|
||||||
|
This module provides:
|
||||||
|
- Mocks for external dependencies (Qdrant, FastEmbed)
|
||||||
|
- Database fixtures for SQLite operations
|
||||||
|
- Common test utilities
|
||||||
|
"""
|
||||||
|
from unittest.mock import MagicMock, patch
|
||||||
|
import pytest
|
||||||
|
import os
|
||||||
|
import json
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from backend.app.db import init_db, upsert_library, insert_document_chunk, get_chunks_for_library, list_libraries, clear_library_documents, get_connection
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# FIXTURES
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
@pytest.fixture(scope="function")
|
||||||
|
def test_database():
|
||||||
|
"""
|
||||||
|
Create a fresh SQLite database for testing.
|
||||||
|
|
||||||
|
Yields:
|
||||||
|
Database connection with tables initialized
|
||||||
|
"""
|
||||||
|
# Use an in-memory or temporary file database
|
||||||
|
db_path = Path(__file__).parent.parent / "backend" / "data" / "test_db.sqlite"
|
||||||
|
|
||||||
|
# Ensure data directory exists
|
||||||
|
db_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
# Remove existing test DB if present
|
||||||
|
if db_path.exists():
|
||||||
|
db_path.unlink()
|
||||||
|
|
||||||
|
# Initialize database with tables
|
||||||
|
result = init_db()
|
||||||
|
assert result["success"], f"Failed to initialize test DB: {result.get('error')}"
|
||||||
|
|
||||||
|
yield
|
||||||
|
|
||||||
|
# Cleanup: remove test database after tests
|
||||||
|
if db_path.exists():
|
||||||
|
db_path.unlink()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="function")
|
||||||
|
def sample_text():
|
||||||
|
"""Sample text for chunking tests."""
|
||||||
|
return """# Introduction
|
||||||
|
|
||||||
|
This is the introduction section.
|
||||||
|
|
||||||
|
## Background
|
||||||
|
|
||||||
|
Background information goes here to make this longer and test chunking.
|
||||||
|
|
||||||
|
This paragraph has more content about the background topic.
|
||||||
|
|
||||||
|
### Details
|
||||||
|
|
||||||
|
Specific details about the background are provided in this subsection.
|
||||||
|
|
||||||
|
More details follow here to ensure we have enough text to properly test heading preservation.
|
||||||
|
|
||||||
|
## Conclusion
|
||||||
|
|
||||||
|
The conclusion wraps up everything nicely."""
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# MOCKS
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def mock_embedding_model():
|
||||||
|
"""
|
||||||
|
Mock FastEmbed model that returns dummy vectors.
|
||||||
|
|
||||||
|
This avoids needing to download and load the actual embedding model.
|
||||||
|
Returns 384-dimensional zero vectors for any input.
|
||||||
|
"""
|
||||||
|
mock_model = MagicMock()
|
||||||
|
|
||||||
|
# Mock embed method - returns list of lists with float values
|
||||||
|
def mock_embed(texts):
|
||||||
|
return [
|
||||||
|
[0.0] * 384 # Zero vector placeholder
|
||||||
|
for _ in texts
|
||||||
|
]
|
||||||
|
|
||||||
|
mock_model.embed = mock_embed
|
||||||
|
|
||||||
|
return mock_model
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def mock_qdrant_client():
|
||||||
|
"""
|
||||||
|
Mock Qdrant client that returns empty or test results.
|
||||||
|
|
||||||
|
Allows testing search logic without needing a running Qdrant server.
|
||||||
|
"""
|
||||||
|
mock_client = MagicMock()
|
||||||
|
|
||||||
|
# Mock search method
|
||||||
|
def mock_search(collection_name, query_vector, limit=10, search_filter=None):
|
||||||
|
# Return empty list (simulating no results)
|
||||||
|
return []
|
||||||
|
|
||||||
|
mock_client.search = mock_search
|
||||||
|
|
||||||
|
# Mock delete_collection for cleanup
|
||||||
|
mock_client.delete_collection = MagicMock(return_value=True)
|
||||||
|
|
||||||
|
return mock_client
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def mock_embedding_model_batch():
|
||||||
|
"""
|
||||||
|
Batch embedding model mock that returns deterministic fake vectors.
|
||||||
|
|
||||||
|
Returns slightly different vectors for different input lengths/first chars,
|
||||||
|
allowing tests to verify vector retrieval if needed.
|
||||||
|
"""
|
||||||
|
def hash_text(text):
|
||||||
|
# Simple hash-based pseudo-random vector generation
|
||||||
|
text_hash = hash(text) % 1000000
|
||||||
|
return [(hash_text(text) / 1000000 + (i * 0.001)) for i in range(384)]
|
||||||
|
|
||||||
|
mock_model = MagicMock()
|
||||||
|
mock_model.embed = lambda texts: [hash_text(t) for t in texts]
|
||||||
|
|
||||||
|
return mock_model
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# SETUP TEARDOWN FIXTURES
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
@pytest.fixture(autouse=True)
|
||||||
|
def clear_test_database(test_database):
|
||||||
|
"""
|
||||||
|
Clear test database before and after each test function.
|
||||||
|
|
||||||
|
Note: This fixture runs the teardown (cleanup) AFTER the test,
|
||||||
|
so we manually clear at the end of the yield context.
|
||||||
|
The db_path is cleaned up by the test_database fixture's yield block.
|
||||||
|
"""
|
||||||
|
pass # Cleanup handled in test_database fixture
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def empty_vector():
|
||||||
|
"""Empty/dummy embedding vector for tests."""
|
||||||
|
return [0.0] * 384
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def fake_embeddings(sample_text):
|
||||||
|
"""Fake embedding vectors for sample text."""
|
||||||
|
def hash_text(text):
|
||||||
|
return [(hash(text) + len(text)) % 1000 / 10000 for _ in range(384)]
|
||||||
|
|
||||||
|
return [hash_text(s) for s in sample_text.split("\n\n") if s.strip()]
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# UTILITY FUNCTIONS
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def temp_file(tmp_path):
|
||||||
|
"""Create a temporary file and yield its path."""
|
||||||
|
test_file = tmp_path / "test.txt"
|
||||||
|
return test_file
|
||||||
|
|
||||||
|
|
||||||
|
# Register custom marker for slow tests (if needed)
|
||||||
|
def pytest_configure(config):
|
||||||
|
config.addinivalue_line("markers", "slow: marks tests as slow (deselect with '-m \"not slow\"')")
|
||||||
|
|
||||||
|
|
||||||
|
def pytest_runtest_setup(item):
|
||||||
|
"""Add custom markers if needed."""
|
||||||
|
pass
|
||||||
@@ -0,0 +1,238 @@
|
|||||||
|
"""
|
||||||
|
Tests for backend/app/chunking.py
|
||||||
|
|
||||||
|
These are pure unit tests that don't require any external dependencies.
|
||||||
|
They test text chunking logic, token estimation, and heading-aware splitting.
|
||||||
|
"""
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
class TestEstimateTokens:
|
||||||
|
"""Tests for the estimate_tokens() function."""
|
||||||
|
|
||||||
|
def test_empty_text(self):
|
||||||
|
"""Empty text should return 0 tokens."""
|
||||||
|
from backend.app.chunking import estimate_tokens
|
||||||
|
assert estimate_tokens("") == 0
|
||||||
|
|
||||||
|
def test_single_char(self):
|
||||||
|
"""Single character = 1 token (using 4 chars per token approximation)."""
|
||||||
|
from backend.app.chunking import estimate_tokens
|
||||||
|
assert estimate_tokens("a") == 0 # 1 char // 4 = 0 tokens
|
||||||
|
|
||||||
|
def test_4_chars(self):
|
||||||
|
"""4 characters = 1 token."""
|
||||||
|
from backend.app.chunking import estimate_tokens
|
||||||
|
assert estimate_tokens("abcd") == 1
|
||||||
|
|
||||||
|
def test_400_chars(self):
|
||||||
|
"""400 characters = 100 tokens."""
|
||||||
|
from backend.app.chunking import estimate_tokens
|
||||||
|
text = "a" * 400
|
||||||
|
assert estimate_tokens(text) == 100
|
||||||
|
|
||||||
|
def test_whitespace_only(self):
|
||||||
|
"""Whitespace-only text should be counted."""
|
||||||
|
from backend.app.chunking import estimate_tokens
|
||||||
|
assert estimate_tokens(" ") == 0 # 3 chars // 4 = 0
|
||||||
|
|
||||||
|
|
||||||
|
class TestChunkText:
|
||||||
|
"""Tests for the chunk_text() function."""
|
||||||
|
|
||||||
|
def test_empty_input(self, sample_text):
|
||||||
|
"""Empty input should return empty list."""
|
||||||
|
from backend.app.chunking import chunk_text
|
||||||
|
assert chunk_text("") == []
|
||||||
|
|
||||||
|
def test_small_text_single_chunk(self, sample_text):
|
||||||
|
"""Small text under limit should be single chunk."""
|
||||||
|
from backend.app.chunking import chunk_text
|
||||||
|
small = "This is a very short text that should be returned as a single chunk."
|
||||||
|
chunks = chunk_text(small, max_tokens=500)
|
||||||
|
assert len(chunks) == 1
|
||||||
|
assert chunks[0] == small
|
||||||
|
|
||||||
|
def test_exact_token_limit(self, sample_text):
|
||||||
|
"""Text exactly at limit should be one chunk."""
|
||||||
|
from backend.app.chunking import chunk_text, estimate_tokens
|
||||||
|
# Create text that is exactly 500 tokens (2000 chars)
|
||||||
|
text = "a" * 2000
|
||||||
|
chunks = chunk_text(text, max_tokens=500)
|
||||||
|
assert len(chunks) == 1
|
||||||
|
assert estimate_tokens(chunks[0]) == 500
|
||||||
|
|
||||||
|
def test_over_limit_splits(self, sample_text):
|
||||||
|
"""Text over limit should be split into multiple chunks."""
|
||||||
|
from backend.app.chunking import chunk_text, estimate_tokens
|
||||||
|
# Create text that is 2500 tokens (10000 chars)
|
||||||
|
text = "b" * 10000
|
||||||
|
chunks = chunk_text(text, max_tokens=500)
|
||||||
|
assert len(chunks) >= 2 # Should be split
|
||||||
|
|
||||||
|
def test_preserves_content(self, sample_text):
|
||||||
|
"""All content should be preserved in chunks (combined)."""
|
||||||
|
from backend.app.chunking import chunk_text
|
||||||
|
original = "Hello world! This is a test of chunking functionality."
|
||||||
|
chunks = chunk_text(original, max_tokens=100)
|
||||||
|
combined = "".join(chunks)
|
||||||
|
assert len(chunks) == 1
|
||||||
|
assert combined == original
|
||||||
|
|
||||||
|
def test_headings_split(self, sample_text):
|
||||||
|
"""Heading-aware splitting should preserve heading boundaries."""
|
||||||
|
from backend.app.chunking import chunk_text
|
||||||
|
markdown_with_headings = """# Introduction
|
||||||
|
|
||||||
|
This is the introduction section.
|
||||||
|
|
||||||
|
## Background
|
||||||
|
|
||||||
|
Background information goes here."""
|
||||||
|
|
||||||
|
# With very small token limit, headings should cause splits
|
||||||
|
chunks = chunk_text(markdown_with_headings, max_tokens=20)
|
||||||
|
heading_chunks = [c for c in chunks if c.strip().startswith('#')]
|
||||||
|
assert len(heading_chunks) >= 1 # At least some heading preserved
|
||||||
|
|
||||||
|
def test_paragraph_split(self):
|
||||||
|
"""Paragraph splitting should respect paragraph boundaries."""
|
||||||
|
from backend.app.chunking import chunk_text
|
||||||
|
text = "First paragraph.\n\nSecond paragraph.\n\nThird paragraph."
|
||||||
|
chunks = chunk_text(text, max_tokens=15) # Small limit forces splits
|
||||||
|
assert len(chunks) >= 3 # At least as many paragraphs
|
||||||
|
|
||||||
|
def test_no_empty_chunks(self):
|
||||||
|
"""Should not return empty chunks."""
|
||||||
|
from backend.app.chunking import chunk_text
|
||||||
|
text = "Hello world"
|
||||||
|
chunks = chunk_text(text, max_tokens=10)
|
||||||
|
for chunk in chunks:
|
||||||
|
assert chunk.strip() != ""
|
||||||
|
|
||||||
|
|
||||||
|
class TestTokenEstimationBoundaries:
|
||||||
|
"""Tests for token estimation boundaries."""
|
||||||
|
|
||||||
|
def test_boundary_precision(self):
|
||||||
|
"""Test boundary conditions around the 4-char-per-token limit."""
|
||||||
|
from backend.app.chunking import estimate_tokens
|
||||||
|
|
||||||
|
# Edge cases around boundary
|
||||||
|
assert estimate_tokens("abcd") == 1 # exactly 4 chars
|
||||||
|
assert estimate_tokens("abcde") == 1 # 5 chars still 1 token
|
||||||
|
assert estimate_tokens("abcdef") == 1 # 6 chars still 1 token
|
||||||
|
assert estimate_tokens("abcdefg") == 1 # 7 chars still 1 token
|
||||||
|
assert estimate_tokens("abcdefgh") == 2 # 8 chars = 2 tokens
|
||||||
|
|
||||||
|
def test_various_languages_chars(self):
|
||||||
|
"""Token estimation uses character count, not unicode complexity."""
|
||||||
|
from backend.app.chunking import estimate_tokens
|
||||||
|
|
||||||
|
# Chinese characters (each counts as 1 char)
|
||||||
|
chinese = "你好世界" # 4 characters
|
||||||
|
assert estimate_tokens(chinese) == 1
|
||||||
|
|
||||||
|
# Emoji
|
||||||
|
emoji = "Hello 🎉 world" # Spaces + letters + emoji
|
||||||
|
# emoji count varies by implementation, just check it's counted
|
||||||
|
assert isinstance(estimate_tokens(emoji), int)
|
||||||
|
|
||||||
|
|
||||||
|
class TestChunkOverlapBehavior:
|
||||||
|
"""Tests for overlap handling between chunks."""
|
||||||
|
|
||||||
|
def test_overlap_not_exceeded(self):
|
||||||
|
"""Chunks should not have excessive overlap."""
|
||||||
|
from backend.app.chunking import chunk_text
|
||||||
|
|
||||||
|
# Text that will be split at a known boundary
|
||||||
|
text = "The quick brown fox jumps over the lazy dog. " * 10
|
||||||
|
chunks = chunk_text(text, max_tokens=30, overlap_tokens=5)
|
||||||
|
|
||||||
|
if len(chunks) > 1:
|
||||||
|
# Last few chars of first chunk shouldn't duplicate excessively
|
||||||
|
assert len(chunks[0]) <= len("".join(chunks)) // 2 # Rough check
|
||||||
|
|
||||||
|
|
||||||
|
class TestChunkEdgeCases:
|
||||||
|
"""Tests for edge cases and error conditions."""
|
||||||
|
|
||||||
|
def test_whitespace_only_text(self):
|
||||||
|
"""Whitespace-only text should handle gracefully."""
|
||||||
|
from backend.app.chunking import chunk_text
|
||||||
|
chunks = chunk_text(" \n\n ", max_tokens=100)
|
||||||
|
# May return empty or whitespace chunk, shouldn't crash
|
||||||
|
assert isinstance(chunks, list)
|
||||||
|
|
||||||
|
def test_very_long_paragraph(self):
|
||||||
|
"""Long paragraph without breaks should be split."""
|
||||||
|
from backend.app.chunking import chunk_text
|
||||||
|
|
||||||
|
long_para = "The quick brown fox jumps over the lazy dog. " * 100
|
||||||
|
chunks = chunk_text(long_para, max_tokens=50)
|
||||||
|
assert len(chunks) > 1 # Should be split
|
||||||
|
|
||||||
|
def test_none_input_raises(self):
|
||||||
|
"""None input should be handled (return empty or raise)."""
|
||||||
|
from backend.app.chunking import chunk_text
|
||||||
|
with pytest.raises((TypeError, AssertionError)):
|
||||||
|
chunk_text(None, max_tokens=100)
|
||||||
|
|
||||||
|
def test_unicode_text(self):
|
||||||
|
"""Unicode text should be handled."""
|
||||||
|
from backend.app.chunking import chunk_text
|
||||||
|
unicode_text = "Hello 世界 مرحبا 🎉"
|
||||||
|
chunks = chunk_text(unicode_text, max_tokens=50)
|
||||||
|
assert len(chunks) == 1 # Small enough to be single chunk
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# SAMPLE TEXT FIXTURE
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def heading_markdown():
|
||||||
|
"""Sample markdown with headings for chunking tests."""
|
||||||
|
return """# Introduction
|
||||||
|
|
||||||
|
This is the introduction section. It contains some introductory text here.
|
||||||
|
|
||||||
|
## Background
|
||||||
|
|
||||||
|
Background information goes here to make this longer and test chunking. This paragraph has more content about the background topic. It provides context.
|
||||||
|
|
||||||
|
### Details
|
||||||
|
|
||||||
|
Specific details about the background are provided in this subsection. More details follow here to ensure we have enough text to properly test heading preservation.
|
||||||
|
|
||||||
|
## Conclusion
|
||||||
|
|
||||||
|
The conclusion wraps up everything nicely."""
|
||||||
|
|
||||||
|
|
||||||
|
class TestHeadingPreservation:
|
||||||
|
"""Tests for heading-aware chunking with sample text."""
|
||||||
|
|
||||||
|
def test_headings_in_separate_chunks(self, heading_markdown):
|
||||||
|
"""Headings should appear in their own chunks when possible."""
|
||||||
|
from backend.app.chunking import chunk_text
|
||||||
|
|
||||||
|
# Very small token limit forces splits at headings
|
||||||
|
chunks = chunk_text(heading_markdown, max_tokens=30)
|
||||||
|
|
||||||
|
heading_sections = [c for c in chunks if c.strip().startswith('#')]
|
||||||
|
assert len(heading_sections) >= 1
|
||||||
|
|
||||||
|
def test_all_content_present(self, heading_markdown):
|
||||||
|
"""All content should be preserved when combined."""
|
||||||
|
from backend.app.chunking import chunk_text
|
||||||
|
|
||||||
|
original = heading_markdown
|
||||||
|
chunks = chunk_text(original, max_tokens=500)
|
||||||
|
combined = "".join(chunks)
|
||||||
|
|
||||||
|
# Content shouldn't be truncated or corrupted
|
||||||
|
assert "Introduction" in combined
|
||||||
|
assert "Background" in combined
|
||||||
|
assert "Conclusion" in combined
|
||||||
@@ -0,0 +1,316 @@
|
|||||||
|
"""
|
||||||
|
Tests for backend/app/db.py
|
||||||
|
|
||||||
|
These tests verify SQLite database operations including:
|
||||||
|
- Table creation (init_db)
|
||||||
|
- Library CRUD operations
|
||||||
|
- Document chunk storage and retrieval
|
||||||
|
- Full-text search functionality
|
||||||
|
|
||||||
|
All tests use a temporary test database file.
|
||||||
|
"""
|
||||||
|
import pytest
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
|
||||||
|
class TestInitDatabase:
|
||||||
|
"""Tests for init_db() - table creation."""
|
||||||
|
|
||||||
|
def test_init_db_creates_tables(self, test_database):
|
||||||
|
"""Database should have libraries and documents tables after init."""
|
||||||
|
import sqlite3
|
||||||
|
from backend.app.db import get_connection, get_db_path
|
||||||
|
|
||||||
|
conn = get_connection()
|
||||||
|
cursor = conn.execute("SELECT name FROM sqlite_master WHERE type='table' ORDER BY name")
|
||||||
|
tables = [row[0] for row in cursor.fetchall()]
|
||||||
|
|
||||||
|
# Should have libraries, documents, and FTS virtual table
|
||||||
|
assert "libraries" in tables or any("libraries" in t.lower() for t in tables)
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
def test_init_db_returns_success(self, test_database):
|
||||||
|
"""init_db should return success indicator."""
|
||||||
|
from backend.app.db import init_db
|
||||||
|
|
||||||
|
result = init_db()
|
||||||
|
assert result["success"] is True
|
||||||
|
|
||||||
|
|
||||||
|
class TestLibraryOperations:
|
||||||
|
"""Tests for library CRUD operations."""
|
||||||
|
|
||||||
|
def test_upsert_library_new(self, test_database):
|
||||||
|
"""Upsert should create new library."""
|
||||||
|
from backend.app.db import upsert_library
|
||||||
|
|
||||||
|
result = upsert_library(
|
||||||
|
library_id="/local/testlib",
|
||||||
|
name="Test Library",
|
||||||
|
description="A test library for unit tests"
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result["success"] is True
|
||||||
|
assert result["id"] == "/local/testlib"
|
||||||
|
|
||||||
|
def test_upsert_library_update(self, test_database):
|
||||||
|
"""Upsert should update existing library."""
|
||||||
|
from backend.app.db import upsert_library
|
||||||
|
|
||||||
|
# Insert first library
|
||||||
|
upsert_library(
|
||||||
|
library_id="/local/upsertlib",
|
||||||
|
name="Original Name",
|
||||||
|
description="Original description"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Update it
|
||||||
|
result = upsert_library(
|
||||||
|
library_id="/local/upsertlib",
|
||||||
|
name="Updated Name",
|
||||||
|
description="Updated description"
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result["success"] is True
|
||||||
|
|
||||||
|
def test_upsert_library_id_normalization(self, test_database):
|
||||||
|
"""Library ID normalization - /local/ prefix should be preserved."""
|
||||||
|
from backend.app.db import upsert_library
|
||||||
|
|
||||||
|
# Test various ID formats
|
||||||
|
test_ids = [
|
||||||
|
"/local/foundryvtt",
|
||||||
|
"foundryvtt",
|
||||||
|
"/local/mydocs",
|
||||||
|
]
|
||||||
|
|
||||||
|
for lib_id in test_ids:
|
||||||
|
result = upsert_library(library_id=lib_id, name="Test", description="Desc")
|
||||||
|
assert result["success"] is True
|
||||||
|
# Verify we can retrieve it back
|
||||||
|
from backend.app.db import get_chunks_for_library
|
||||||
|
# Just ensure no errors occur
|
||||||
|
|
||||||
|
def test_list_libraries(self, test_database):
|
||||||
|
"""list_libraries should return list of libraries."""
|
||||||
|
from backend.app.db import upsert_library, list_libraries
|
||||||
|
|
||||||
|
# Create some libraries
|
||||||
|
for i in range(3):
|
||||||
|
upsert_library(
|
||||||
|
library_id=f"/local/lib{i}",
|
||||||
|
name=f"Library {i}",
|
||||||
|
description=f"Description {i}"
|
||||||
|
)
|
||||||
|
|
||||||
|
libs = list_libraries()
|
||||||
|
assert isinstance(libs, list)
|
||||||
|
assert len(libs) >= 3
|
||||||
|
|
||||||
|
def test_search_libraries(self, test_database):
|
||||||
|
"""search_libraries should find libraries by name/description."""
|
||||||
|
from backend.app.db import upsert_library, search_libraries
|
||||||
|
|
||||||
|
# Create libraries with searchable names
|
||||||
|
upsert_library(library_id="/local/foo1", name="Foo Library", description="Bar baz")
|
||||||
|
upsert_library(library_id="/local/foo2", name="Other Library", description="Different content")
|
||||||
|
|
||||||
|
results = search_libraries("foo")
|
||||||
|
assert isinstance(results, list)
|
||||||
|
|
||||||
|
|
||||||
|
class TestDocumentChunkOperations:
|
||||||
|
"""Tests for document chunk storage and retrieval."""
|
||||||
|
|
||||||
|
def test_insert_document_chunk_new(self, test_database):
|
||||||
|
"""insert_document_chunk should create new chunk record."""
|
||||||
|
from backend.app.db import insert_document_chunk
|
||||||
|
|
||||||
|
result = insert_document_chunk(
|
||||||
|
doc_id="doc-1",
|
||||||
|
library_id="/local/testlib",
|
||||||
|
path="docs/example.md",
|
||||||
|
title="Example Document",
|
||||||
|
content="# Example\n\nThis is the content.",
|
||||||
|
chunk_index=0,
|
||||||
|
token_estimate=100
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result["success"] is True
|
||||||
|
|
||||||
|
def test_insert_document_chunk_update(self, test_database):
|
||||||
|
"""insert_document_chunk should update existing record."""
|
||||||
|
from backend.app.db import insert_document_chunk
|
||||||
|
|
||||||
|
# Insert first
|
||||||
|
insert_document_chunk(
|
||||||
|
doc_id="doc-update-test",
|
||||||
|
library_id="/local/uplib",
|
||||||
|
path="old-path.md",
|
||||||
|
title="Old Title",
|
||||||
|
content="# Old\nContent here.",
|
||||||
|
chunk_index=0,
|
||||||
|
token_estimate=50
|
||||||
|
)
|
||||||
|
|
||||||
|
# Update it
|
||||||
|
result = insert_document_chunk(
|
||||||
|
doc_id="doc-update-test",
|
||||||
|
library_id="/local/uplib",
|
||||||
|
path="new-path.md",
|
||||||
|
title="New Title",
|
||||||
|
content="# New\nUpdated content.",
|
||||||
|
chunk_index=1,
|
||||||
|
token_estimate=75
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result["success"] is True
|
||||||
|
|
||||||
|
def test_get_document_by_id(self, test_database):
|
||||||
|
"""get_document_by_id should retrieve document by ID."""
|
||||||
|
from backend.app.db import insert_document_chunk, get_document_by_id
|
||||||
|
|
||||||
|
# Insert document
|
||||||
|
doc_id = "unique-doc-id-12345"
|
||||||
|
insert_document_chunk(
|
||||||
|
doc_id=doc_id,
|
||||||
|
library_id="/local/testlib",
|
||||||
|
path="docs/test.md",
|
||||||
|
title="Test Document",
|
||||||
|
content="# Test\n\nTest content here.",
|
||||||
|
chunk_index=None,
|
||||||
|
token_estimate=200
|
||||||
|
)
|
||||||
|
|
||||||
|
# Retrieve it
|
||||||
|
doc = get_document_by_id(doc_id)
|
||||||
|
assert doc is not None
|
||||||
|
assert doc["id"] == doc_id
|
||||||
|
|
||||||
|
def test_get_chunks_for_library(self, test_database):
|
||||||
|
"""get_chunks_for_library should return all chunks for a library."""
|
||||||
|
from backend.app.db import upsert_library, insert_document_chunk, get_chunks_for_library
|
||||||
|
|
||||||
|
# Create library
|
||||||
|
upsert_library(library_id="/local/chunktest", name="Chunk Test", description="Test")
|
||||||
|
|
||||||
|
# Add some chunks
|
||||||
|
for i in range(3):
|
||||||
|
insert_document_chunk(
|
||||||
|
doc_id=f"chunk-{i}",
|
||||||
|
library_id="/local/chunktest",
|
||||||
|
path=f"path{i}.md",
|
||||||
|
title=f"Section {i}",
|
||||||
|
content=f"Content section {i}.",
|
||||||
|
chunk_index=i,
|
||||||
|
token_estimate=50
|
||||||
|
)
|
||||||
|
|
||||||
|
chunks = get_chunks_for_library("/local/chunktest")
|
||||||
|
assert isinstance(chunks, list)
|
||||||
|
assert len(chunks) >= 3
|
||||||
|
|
||||||
|
def test_clear_library_documents(self, test_database):
|
||||||
|
"""clear_library_documents should delete all docs for a library."""
|
||||||
|
from backend.app.db import upsert_library, insert_document_chunk, clear_library_documents, get_chunks_for_library
|
||||||
|
|
||||||
|
# Create and populate library
|
||||||
|
upsert_library(library_id="/local/cleartest", name="Clear Test", description="Test")
|
||||||
|
for i in range(5):
|
||||||
|
insert_document_chunk(
|
||||||
|
doc_id=f"clear-{i}",
|
||||||
|
library_id="/local/cleartest",
|
||||||
|
path=f"path{i}.md",
|
||||||
|
content=f"Content {i}.",
|
||||||
|
token_estimate=20
|
||||||
|
)
|
||||||
|
|
||||||
|
# Clear it
|
||||||
|
result = clear_library_documents("/local/cleartest")
|
||||||
|
assert result["success"] is True
|
||||||
|
|
||||||
|
# Verify cleared
|
||||||
|
remaining = get_chunks_for_library("/local/cleartest")
|
||||||
|
assert len(remaining) == 0
|
||||||
|
|
||||||
|
|
||||||
|
class TestDatabaseEdgeCases:
|
||||||
|
"""Tests for edge cases and error handling."""
|
||||||
|
|
||||||
|
def test_empty_library_id(self, test_database):
|
||||||
|
"""Operations with empty ID should handle gracefully."""
|
||||||
|
from backend.app.db import upsert_library
|
||||||
|
|
||||||
|
result = upsert_library(library_id="", name="Test", description="Desc")
|
||||||
|
# Should not crash, though may not be a valid operation
|
||||||
|
|
||||||
|
def test_special_characters_in_content(self, test_database):
|
||||||
|
"""Content with special characters should be stored."""
|
||||||
|
from backend.app.db import insert_document_chunk
|
||||||
|
|
||||||
|
content = "Hello \"world\" <tag /> & amp; 'apostrophe'"
|
||||||
|
result = insert_document_chunk(
|
||||||
|
doc_id="special-test",
|
||||||
|
library_id="/local/speciallib",
|
||||||
|
path="special.md",
|
||||||
|
content=content,
|
||||||
|
token_estimate=100
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result["success"] is True
|
||||||
|
|
||||||
|
def test_very_long_content(self, test_database):
|
||||||
|
"""Long content should be stored."""
|
||||||
|
from backend.app.db import insert_document_chunk
|
||||||
|
|
||||||
|
long_content = "a" * 5000
|
||||||
|
result = insert_document_chunk(
|
||||||
|
doc_id="long-test",
|
||||||
|
library_id="/local/longlib",
|
||||||
|
path="long.md",
|
||||||
|
content=long_content,
|
||||||
|
token_estimate=1000
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result["success"] is True
|
||||||
|
|
||||||
|
def test_none_description(self, test_database):
|
||||||
|
"""Library with None description should work."""
|
||||||
|
from backend.app.db import upsert_library
|
||||||
|
|
||||||
|
result = upsert_library(
|
||||||
|
library_id="/local/nonedesc",
|
||||||
|
name="No Description Lib",
|
||||||
|
description=None
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result["success"] is True
|
||||||
|
|
||||||
|
|
||||||
|
class TestDatabaseInitialization:
|
||||||
|
"""Tests for database initialization state."""
|
||||||
|
|
||||||
|
def test_database_is_empty_after_init(self, test_database):
|
||||||
|
"""Database should be empty right after init."""
|
||||||
|
from backend.app.db import list_libraries
|
||||||
|
|
||||||
|
libs = list_libraries()
|
||||||
|
assert isinstance(libs, list)
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# FIXTURES
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def sample_doc():
|
||||||
|
"""Sample document chunk for testing."""
|
||||||
|
return {
|
||||||
|
"doc_id": "sample-doc-1",
|
||||||
|
"library_id": "/local/samplelib",
|
||||||
|
"path": "docs/guide.md",
|
||||||
|
"title": "Getting Started Guide",
|
||||||
|
"content": "# Getting Started\n\nWelcome to the guide. This is a sample document for testing.\n\n## Installation\n\nInstall with pip.",
|
||||||
|
"chunk_index": 0,
|
||||||
|
"token_estimate": 500
|
||||||
|
}
|
||||||
@@ -0,0 +1,262 @@
|
|||||||
|
"""
|
||||||
|
Tests for mcp-server/server.py
|
||||||
|
|
||||||
|
These are pure unit tests that don't require any external dependencies.
|
||||||
|
They test:
|
||||||
|
- The strip_local_prefix() function directly (no network)
|
||||||
|
- MCP server tool definitions and structure
|
||||||
|
"""
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
class TestStripLocalPrefix:
|
||||||
|
"""Tests for the strip_local_prefix() function."""
|
||||||
|
|
||||||
|
def test_strips_prefix_from_full_id(self):
|
||||||
|
"""Should strip /local/ prefix from full library ID."""
|
||||||
|
from mcp_server.server import strip_local_prefix
|
||||||
|
|
||||||
|
input_id = "/local/foundryvtt"
|
||||||
|
expected_output = "foundryvtt"
|
||||||
|
|
||||||
|
result = strip_local_prefix(input_id)
|
||||||
|
assert result == expected_output
|
||||||
|
|
||||||
|
def test_preserves_id_without_prefix(self):
|
||||||
|
"""Should preserve ID that doesn't have /local/ prefix."""
|
||||||
|
from mcp_server.server import strip_local_prefix
|
||||||
|
|
||||||
|
input_id = "foundryvtt"
|
||||||
|
|
||||||
|
result = strip_local_prefix(input_id)
|
||||||
|
assert result == input_id # Should be unchanged
|
||||||
|
|
||||||
|
def test_strips_from_multiple_local_prefixes(self):
|
||||||
|
"""Should handle edge case of multiple prefixes."""
|
||||||
|
from mcp_server.server import strip_local_prefix
|
||||||
|
|
||||||
|
input_id = "/local//local/foundryvtt"
|
||||||
|
|
||||||
|
result = strip_local_prefix(input_id)
|
||||||
|
# Should only strip first occurrence
|
||||||
|
assert result == "/local/foundryvtt"
|
||||||
|
|
||||||
|
def test_empty_string(self):
|
||||||
|
"""Empty string should remain empty."""
|
||||||
|
from mcp_server.server import strip_local_prefix
|
||||||
|
|
||||||
|
input_id = ""
|
||||||
|
|
||||||
|
result = strip_local_prefix(input_id)
|
||||||
|
assert result == input_id # Should be unchanged
|
||||||
|
|
||||||
|
def test_whitespace_only(self):
|
||||||
|
"""Whitespace only should remain whitespace (no /local/ to strip)."""
|
||||||
|
from mcp_server.server import strip_local_prefix
|
||||||
|
|
||||||
|
input_id = " \t\n"
|
||||||
|
|
||||||
|
result = strip_local_prefix(input_id)
|
||||||
|
assert result == input_id
|
||||||
|
|
||||||
|
def test_case_sensitive_prefix(self):
|
||||||
|
"""Prefix matching is case-sensitive."""
|
||||||
|
from mcp_server.server import strip_local_prefix
|
||||||
|
|
||||||
|
# Lowercase - should strip
|
||||||
|
result1 = strip_local_prefix("/local/test")
|
||||||
|
assert result1 == "test"
|
||||||
|
|
||||||
|
# Uppercase - should NOT strip (not a match)
|
||||||
|
result2 = strip_local_prefix("/LOCAL/test")
|
||||||
|
assert result2 == "/LOCAL/test" # Unchanged
|
||||||
|
|
||||||
|
def test_partial_match_does_not_strip(self):
|
||||||
|
"""Only exact /local/ prefix is stripped, not partial matches."""
|
||||||
|
from mcp_server.server import strip_local_prefix
|
||||||
|
|
||||||
|
# Partial match - should NOT strip
|
||||||
|
input_id = "/local-docs/test"
|
||||||
|
result = strip_local_prefix(input_id)
|
||||||
|
assert result == input_id # Unchanged
|
||||||
|
|
||||||
|
# Different separator - should NOT strip
|
||||||
|
input_id2 = "/localdocs/test"
|
||||||
|
result2 = strip_local_prefix(input_id2)
|
||||||
|
assert result2 == input_id2
|
||||||
|
|
||||||
|
def test_prefix_with_trailing_slash(self):
|
||||||
|
"""Should handle trailing slash in ID."""
|
||||||
|
from mcp_server.server import strip_local_prefix
|
||||||
|
|
||||||
|
input_id = "/local/foundryvtt/"
|
||||||
|
expected_output = "foundryvtt/"
|
||||||
|
|
||||||
|
result = strip_local_prefix(input_id)
|
||||||
|
assert result == expected_output
|
||||||
|
|
||||||
|
|
||||||
|
class TestMcpServerStructure:
|
||||||
|
"""Tests for MCP server tool structure (without starting the server)."""
|
||||||
|
|
||||||
|
def test_import_fastmcp(self):
|
||||||
|
"""Should be able to import FastMCP."""
|
||||||
|
try:
|
||||||
|
from fastmcp import FastMCP
|
||||||
|
# Import successful
|
||||||
|
except ImportError as e:
|
||||||
|
pytest.skip(f"fastmcp not installed: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
class TestMcpServerToolsExistence:
|
||||||
|
"""Tests to verify MCP server has expected tools defined."""
|
||||||
|
|
||||||
|
def test_mcp_instance_created(self):
|
||||||
|
"""MCP instance should be created with tools."""
|
||||||
|
from mcp_server.server import mcp
|
||||||
|
|
||||||
|
assert mcp is not None
|
||||||
|
|
||||||
|
def test_resolve_library_id_tool_exists(self):
|
||||||
|
"""resolve-library-id tool should be registered."""
|
||||||
|
from mcp_server.server import mcp
|
||||||
|
|
||||||
|
# Check if the tool exists by trying to access it
|
||||||
|
if hasattr(mcp, 'tools'):
|
||||||
|
tool_names = [t.name for t in mcp.tools]
|
||||||
|
assert "resolve_library_id" in tool_names
|
||||||
|
|
||||||
|
def test_get_library_docs_tool_exists(self):
|
||||||
|
"""get-library-docs tool should be registered."""
|
||||||
|
from mcp_server.server import mcp
|
||||||
|
|
||||||
|
if hasattr(mcp, 'tools'):
|
||||||
|
tool_names = [t.name for t in mcp.tools]
|
||||||
|
assert "get_library_docs" in tool_names
|
||||||
|
|
||||||
|
def test_list_libraries_tool_exists(self):
|
||||||
|
"""list-libraries tool should be registered."""
|
||||||
|
from mcp_server.server import mcp
|
||||||
|
|
||||||
|
if hasattr(mcp, 'tools'):
|
||||||
|
tool_names = [t.name for t in mcp.tools]
|
||||||
|
assert "list_libraries" in tool_names
|
||||||
|
|
||||||
|
def test_search_docs_tool_exists(self):
|
||||||
|
"""search-docs tool should be registered."""
|
||||||
|
from mcp_server.server import mcp
|
||||||
|
|
||||||
|
if hasattr(mcp, 'tools'):
|
||||||
|
tool_names = [t.name for t in mcp.tools]
|
||||||
|
assert "search_docs" in tool_names
|
||||||
|
|
||||||
|
def test_refresh_library_tool_exists(self):
|
||||||
|
"""refresh-library tool should be registered."""
|
||||||
|
from mcp_server.server import mcp
|
||||||
|
|
||||||
|
if hasattr(mcp, 'tools'):
|
||||||
|
tool_names = [t.name for t in mcp.tools]
|
||||||
|
assert "refresh_library" in tool_names
|
||||||
|
|
||||||
|
def test_sync_sources_tool_exists(self):
|
||||||
|
"""sync-sources tool should be registered."""
|
||||||
|
from mcp_server.server import mcp
|
||||||
|
|
||||||
|
if hasattr(mcp, 'tools'):
|
||||||
|
tool_names = [t.name for t in mcp.tools]
|
||||||
|
assert "sync_sources" in tool_names
|
||||||
|
|
||||||
|
|
||||||
|
class TestMcpServerStripPrefixIntegration:
|
||||||
|
"""Integration tests for strip_prefix usage in MCP server functions."""
|
||||||
|
|
||||||
|
def test_resolve_library_id_calls_strip_prefix(self):
|
||||||
|
"""resolve_library_id should handle /local/ prefix in responses."""
|
||||||
|
# This test verifies that the tool is available and uses the prefix correctly
|
||||||
|
from mcp_server.server import strip_local_prefix
|
||||||
|
|
||||||
|
# Verify the function exists and works
|
||||||
|
assert callable(strip_local_prefix)
|
||||||
|
|
||||||
|
# Test with sample IDs
|
||||||
|
test_ids = [
|
||||||
|
"/local/foundryvtt",
|
||||||
|
"/local/pytest",
|
||||||
|
"/local/mydocs/reference",
|
||||||
|
]
|
||||||
|
|
||||||
|
for lib_id in test_ids:
|
||||||
|
stripped = strip_local_prefix(lib_id)
|
||||||
|
assert not stripped.startswith("/local/")
|
||||||
|
|
||||||
|
|
||||||
|
class TestMcpServerPrefixHandlingVariations:
|
||||||
|
"""Additional tests for prefix handling variations."""
|
||||||
|
|
||||||
|
def test_long_library_id(self):
|
||||||
|
"""Should handle long library IDs with /local/ prefix."""
|
||||||
|
from mcp_server.server import strip_local_prefix
|
||||||
|
|
||||||
|
input_id = "/local/very-long-library-id-with-many-chars-in-name"
|
||||||
|
expected_output = "very-long-library-id-with-many-chars-in-name"
|
||||||
|
|
||||||
|
result = strip_local_prefix(input_id)
|
||||||
|
assert result == expected_output
|
||||||
|
|
||||||
|
def test_special_characters_in_id(self):
|
||||||
|
"""Should handle special characters in library ID."""
|
||||||
|
from mcp_server.server import strip_local_prefix
|
||||||
|
|
||||||
|
# IDs can have underscores, dashes, numbers
|
||||||
|
input_id = "/local/my-doc_v2-3_test"
|
||||||
|
|
||||||
|
result = strip_local_prefix(input_id)
|
||||||
|
assert result == "my-doc_v2-3_test"
|
||||||
|
|
||||||
|
def test_unicode_in_stripped_name(self):
|
||||||
|
"""Stripped name should preserve unicode characters."""
|
||||||
|
from mcp_server.server import strip_local_prefix
|
||||||
|
|
||||||
|
# Library IDs sometimes have unicode in them
|
||||||
|
input_id = "/local/世界文档" # Chinese characters
|
||||||
|
|
||||||
|
result = strip_local_prefix(input_id)
|
||||||
|
assert result == "世界文档"
|
||||||
|
|
||||||
|
def test_mixed_case_stripped_name(self):
|
||||||
|
"""Stripped name can have mixed case."""
|
||||||
|
from mcp_server.server import strip_local_prefix
|
||||||
|
|
||||||
|
input_id = "/local/FoundryVTT"
|
||||||
|
|
||||||
|
result = strip_local_prefix(input_id)
|
||||||
|
assert result == "FoundryVTT"
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# FIXTURES
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def sample_library_ids():
|
||||||
|
"""Sample library IDs for testing prefix stripping."""
|
||||||
|
return [
|
||||||
|
"/local/foundryvtt",
|
||||||
|
"/local/pytest",
|
||||||
|
"/local/mydocs/reference/guide.md",
|
||||||
|
"/local/my-app",
|
||||||
|
"/local/documentation/tutorial/getting-started",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def expected_stripped_ids(sample_library_ids):
|
||||||
|
"""Expected stripped versions of sample library IDs."""
|
||||||
|
return [
|
||||||
|
"foundryvtt",
|
||||||
|
"pytest",
|
||||||
|
"mydocs/reference/guide.md",
|
||||||
|
"my-app",
|
||||||
|
"documentation/tutorial/getting-started",
|
||||||
|
]
|
||||||
@@ -0,0 +1,368 @@
|
|||||||
|
"""
|
||||||
|
Tests for backend/app/search.py
|
||||||
|
|
||||||
|
These tests verify search functionality without requiring:
|
||||||
|
- A running Qdrant vector database (mocked)
|
||||||
|
- Loaded embedding models (mocked)
|
||||||
|
|
||||||
|
The tests focus on:
|
||||||
|
- Response shape validation
|
||||||
|
- Library filtering
|
||||||
|
- Error handling
|
||||||
|
- Async function behavior
|
||||||
|
"""
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
class TestResolveLibraryId:
|
||||||
|
"""Tests for resolve_library_id() - Context7-style resolution."""
|
||||||
|
|
||||||
|
def test_returns_candidates_list(self, test_database):
|
||||||
|
"""resolve_library_id should return a list of candidates."""
|
||||||
|
from backend.app.search import resolve_library_id
|
||||||
|
|
||||||
|
# Create some libraries first
|
||||||
|
from backend.app.db import upsert_library
|
||||||
|
for i in range(3):
|
||||||
|
upsert_library(
|
||||||
|
library_id=f"/local/searchtest{i}",
|
||||||
|
name=f"Search Test Library {i}",
|
||||||
|
description=f"Description for search test {i}"
|
||||||
|
)
|
||||||
|
|
||||||
|
candidates = resolve_library_id("search")
|
||||||
|
|
||||||
|
assert isinstance(candidates, list)
|
||||||
|
|
||||||
|
def test_captures_matching_names(self, test_database):
|
||||||
|
"""Should capture libraries where query matches name."""
|
||||||
|
from backend.app.db import upsert_library
|
||||||
|
from backend.app.search import resolve_library_id
|
||||||
|
|
||||||
|
# Create a library that should match "search"
|
||||||
|
upsert_library(
|
||||||
|
library_id="/local/searchlib",
|
||||||
|
name="Search Library",
|
||||||
|
description="Main search documentation"
|
||||||
|
)
|
||||||
|
|
||||||
|
candidates = resolve_library_id("search")
|
||||||
|
|
||||||
|
assert isinstance(candidates, list)
|
||||||
|
|
||||||
|
def test_context7_style_prefix(self, test_database):
|
||||||
|
"""Candidates should have /local/ prefix added to ID."""
|
||||||
|
from backend.app.db import upsert_library
|
||||||
|
from backend.app.search import resolve_library_id
|
||||||
|
|
||||||
|
upsert_library(
|
||||||
|
library_id="foundryvtt", # Without /local/
|
||||||
|
name="Foundry VTT",
|
||||||
|
description="Fantasy tabletop virtual table"
|
||||||
|
)
|
||||||
|
|
||||||
|
candidates = resolve_library_id("foundry")
|
||||||
|
|
||||||
|
for candidate in candidates:
|
||||||
|
assert candidate.get("source") == "local"
|
||||||
|
|
||||||
|
def test_partial_name_match(self, test_database):
|
||||||
|
"""Should match on partial name."""
|
||||||
|
from backend.app.db import upsert_library
|
||||||
|
from backend.app.search import resolve_library_id
|
||||||
|
|
||||||
|
upsert_library(
|
||||||
|
library_id="/local/gamefoundry",
|
||||||
|
name="Foundry Game Module",
|
||||||
|
description="Module for foundry games"
|
||||||
|
)
|
||||||
|
|
||||||
|
candidates = resolve_library_id("game")
|
||||||
|
assert isinstance(candidates, list)
|
||||||
|
|
||||||
|
def test_empty_result_on_no_matches(self, test_database):
|
||||||
|
"""Should return empty list when no matches."""
|
||||||
|
from backend.app.search import resolve_library_id
|
||||||
|
|
||||||
|
# No libraries matching "xyznonexistent123"
|
||||||
|
candidates = resolve_library_id("xyznonexistent123")
|
||||||
|
|
||||||
|
assert isinstance(candidates, list)
|
||||||
|
|
||||||
|
|
||||||
|
class TestSearchDocs:
|
||||||
|
"""Tests for search_docs() - semantic search with mocked vector store."""
|
||||||
|
|
||||||
|
def test_returns_results_list(self, mock_qdrant_client, test_database):
|
||||||
|
"""search_docs should return a list of results."""
|
||||||
|
from backend.app.search import search_docs
|
||||||
|
|
||||||
|
# Create some chunks first
|
||||||
|
from backend.app.db import upsert_library, insert_document_chunk
|
||||||
|
upsert_library(library_id="/local/searchdocslib", name="Search Docs Lib", description="Test")
|
||||||
|
|
||||||
|
for i in range(5):
|
||||||
|
insert_document_chunk(
|
||||||
|
doc_id=f"searchdoc-{i}",
|
||||||
|
library_id="/local/searchdocslib",
|
||||||
|
path=f"path{i}.md",
|
||||||
|
title=f"Section {i}",
|
||||||
|
content=f"# Section {i}\n\nContent about section {i} that matches search queries.",
|
||||||
|
chunk_index=i,
|
||||||
|
token_estimate=100
|
||||||
|
)
|
||||||
|
|
||||||
|
results = search_docs("section")
|
||||||
|
|
||||||
|
assert isinstance(results, list)
|
||||||
|
|
||||||
|
def test_empty_query_returns_empty_list(self):
|
||||||
|
"""Empty query should return empty results."""
|
||||||
|
from backend.app.search import search_docs
|
||||||
|
|
||||||
|
results = search_docs("")
|
||||||
|
assert isinstance(results, list)
|
||||||
|
|
||||||
|
def test_limit_parameter(self, mock_qdrant_client):
|
||||||
|
"""Limit parameter should affect result count."""
|
||||||
|
from backend.app.search import search_docs
|
||||||
|
|
||||||
|
results_10 = search_docs("test", limit=10)
|
||||||
|
results_5 = search_docs("test", limit=5)
|
||||||
|
|
||||||
|
assert isinstance(results_10, list)
|
||||||
|
assert isinstance(results_5, list)
|
||||||
|
|
||||||
|
def test_response_shape_matches_spec(self):
|
||||||
|
"""Verify response shape when mocked returns data."""
|
||||||
|
from unittest.mock import patch
|
||||||
|
from backend.app.search import search_docs
|
||||||
|
|
||||||
|
# Mock client to return formatted results
|
||||||
|
mock_results = [
|
||||||
|
{
|
||||||
|
"id": "test-id-1",
|
||||||
|
"score": 0.95,
|
||||||
|
"library_id": "/local/testlib",
|
||||||
|
"path": "docs/example.md",
|
||||||
|
"title": "Example Document",
|
||||||
|
"chunk_index": 0
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
with patch('backend.app.vector_store.get_client') as mock_get_client:
|
||||||
|
# Setup mock client to return our test data
|
||||||
|
mock_client = mock_get_client.return_value
|
||||||
|
mock_point = type('ScoredPoint', (), {
|
||||||
|
'score': 0.95,
|
||||||
|
'payload': {
|
||||||
|
"id": "test-id-1",
|
||||||
|
"library_id": "/local/testlib",
|
||||||
|
"path": "docs/example.md",
|
||||||
|
"title": "Example Document",
|
||||||
|
"chunk_index": 0
|
||||||
|
}
|
||||||
|
})()
|
||||||
|
mock_client.search.return_value = [mock_point]
|
||||||
|
|
||||||
|
results = search_docs("test query")
|
||||||
|
|
||||||
|
assert isinstance(results, list)
|
||||||
|
if results:
|
||||||
|
# Verify each result has expected fields
|
||||||
|
result = results[0]
|
||||||
|
assert "id" in result
|
||||||
|
assert "score" in result
|
||||||
|
assert "library_id" in result
|
||||||
|
assert "path" in result
|
||||||
|
assert "title" in result
|
||||||
|
assert "chunk_index" in result
|
||||||
|
|
||||||
|
|
||||||
|
class TestGetLibraryDocs:
|
||||||
|
"""Tests for get_library_docs() - document retrieval."""
|
||||||
|
|
||||||
|
def test_returns_empty_string_when_no_documents(self, mock_qdrant_client):
|
||||||
|
"""Should return empty/error when no docs exist."""
|
||||||
|
from backend.app.search import get_library_docs
|
||||||
|
|
||||||
|
result = get_library_docs("/local/nonexistent")
|
||||||
|
|
||||||
|
# Either returns empty string or error message
|
||||||
|
assert isinstance(result, str)
|
||||||
|
|
||||||
|
def test_returns_content_when_documents_exist(self, mock_qdrant_client):
|
||||||
|
"""Should return combined document content."""
|
||||||
|
from backend.app.db import upsert_library, insert_document_chunk
|
||||||
|
from backend.app.search import get_library_docs
|
||||||
|
|
||||||
|
# Create library with chunks
|
||||||
|
upsert_library(library_id="/local/docretrievetest", name="Doc Retrieve", description="Test")
|
||||||
|
insert_document_chunk(
|
||||||
|
doc_id="doc-retrieve-1",
|
||||||
|
library_id="/local/docretrievetest",
|
||||||
|
path="docs/getting-started.md",
|
||||||
|
title="Getting Started",
|
||||||
|
content="# Getting Started\n\nWelcome to the documentation. This is a test document.",
|
||||||
|
chunk_index=0,
|
||||||
|
token_estimate=200
|
||||||
|
)
|
||||||
|
|
||||||
|
result = get_library_docs("/local/docretrievetest")
|
||||||
|
|
||||||
|
assert isinstance(result, str)
|
||||||
|
# Should contain at least library title or content
|
||||||
|
|
||||||
|
def test_topic_filter_searches(self, mock_qdrant_client):
|
||||||
|
"""With topic filter, should search for relevant chunks."""
|
||||||
|
from backend.app.db import upsert_library, insert_document_chunk
|
||||||
|
from backend.app.search import get_library_docs
|
||||||
|
|
||||||
|
upsert_library(library_id="/local/topicsearchlib", name="Topic Search", description="Test")
|
||||||
|
|
||||||
|
# Add documents with different topics
|
||||||
|
insert_document_chunk(
|
||||||
|
doc_id="topic-install",
|
||||||
|
library_id="/local/topicsearchlib",
|
||||||
|
path="docs/install.md",
|
||||||
|
title="Installation Guide",
|
||||||
|
content="# Installation\n\nInstall with pip install mypackage.",
|
||||||
|
chunk_index=0,
|
||||||
|
token_estimate=150
|
||||||
|
)
|
||||||
|
|
||||||
|
insert_document_chunk(
|
||||||
|
doc_id="topic-usage",
|
||||||
|
library_id="/local/topicsearchlib",
|
||||||
|
path="docs/usage.md",
|
||||||
|
title="Usage Guide",
|
||||||
|
content="# Usage\n\nUse mycommand --help for help.",
|
||||||
|
chunk_index=0,
|
||||||
|
token_estimate=150
|
||||||
|
)
|
||||||
|
|
||||||
|
# Search for "install" topic
|
||||||
|
result = get_library_docs("/local/topicsearchlib", topic="install")
|
||||||
|
|
||||||
|
assert isinstance(result, str)
|
||||||
|
|
||||||
|
def test_token_limit_respected(self):
|
||||||
|
"""Token limit should truncate content appropriately."""
|
||||||
|
from backend.app.search import get_library_docs
|
||||||
|
|
||||||
|
# Create a library with lots of content
|
||||||
|
from backend.app.db import upsert_library, insert_document_chunk
|
||||||
|
|
||||||
|
upsert_library(library_id="/local/tokenlimittest", name="Token Limit", description="Test")
|
||||||
|
|
||||||
|
long_content = "# Long Content\n\n" + " ".join(["word"] * 500)
|
||||||
|
insert_document_chunk(
|
||||||
|
doc_id="long-doc",
|
||||||
|
library_id="/local/tokenlimittest",
|
||||||
|
path="docs/long.md",
|
||||||
|
title="Long Document",
|
||||||
|
content=long_content,
|
||||||
|
chunk_index=0,
|
||||||
|
token_estimate=2000
|
||||||
|
)
|
||||||
|
|
||||||
|
# Request with small token limit
|
||||||
|
result = get_library_docs("/local/tokenlimittest", token_limit=100)
|
||||||
|
|
||||||
|
assert isinstance(result, str)
|
||||||
|
|
||||||
|
|
||||||
|
class TestGetLibraryDocsWithMock:
|
||||||
|
"""Tests that verify content retrieval when mocked data is available."""
|
||||||
|
|
||||||
|
def test_retrieves_chunks_by_library_id(self, mock_qdrant_client):
|
||||||
|
"""get_library_docs without topic should fetch all chunks for library."""
|
||||||
|
from backend.app.db import upsert_library, insert_document_chunk
|
||||||
|
from backend.app.search import get_library_docs
|
||||||
|
|
||||||
|
upsert_library(library_id="/local/mockretrievetest", name="Mock Retrieve", description="Test")
|
||||||
|
|
||||||
|
for i in range(3):
|
||||||
|
insert_document_chunk(
|
||||||
|
doc_id=f"mock-retrieve-{i}",
|
||||||
|
library_id="/local/mockretrievetest",
|
||||||
|
path=f"path{i}.md",
|
||||||
|
title=f"Path {i}",
|
||||||
|
content=f"Content for path {i}.",
|
||||||
|
chunk_index=i,
|
||||||
|
token_estimate=50
|
||||||
|
)
|
||||||
|
|
||||||
|
result = get_library_docs("/local/mockretrievetest")
|
||||||
|
|
||||||
|
assert isinstance(result, str)
|
||||||
|
|
||||||
|
|
||||||
|
class TestSearchErrorHandling:
|
||||||
|
"""Tests for error handling in search functions."""
|
||||||
|
|
||||||
|
def test_search_handles_missing_library(self):
|
||||||
|
"""Should handle missing library gracefully."""
|
||||||
|
from backend.app.search import search_docs
|
||||||
|
|
||||||
|
results = search_docs("test", library_id="/local/missing_lib_xyz123")
|
||||||
|
assert isinstance(results, list)
|
||||||
|
|
||||||
|
def test_resolve_handles_no_libraries_in_db(self):
|
||||||
|
"""Should handle empty database gracefully."""
|
||||||
|
from backend.app.db import init_db
|
||||||
|
from backend.app.search import resolve_library_id
|
||||||
|
|
||||||
|
# Initialize fresh DB (empty)
|
||||||
|
from backend.app.db import get_connection, get_chunks_for_library
|
||||||
|
# The test_database fixture already does this
|
||||||
|
|
||||||
|
def test_get_library_docs_handles_empty_library(self):
|
||||||
|
"""Should handle library with no chunks."""
|
||||||
|
from backend.app.search import get_library_docs
|
||||||
|
|
||||||
|
result = get_library_docs("/local/emptylib")
|
||||||
|
assert isinstance(result, str)
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# FIXTURES FOR SEARCH TESTS
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def search_sample_text():
|
||||||
|
"""Sample text with headings for search chunking tests."""
|
||||||
|
return """# Installation Guide
|
||||||
|
|
||||||
|
To install the package:
|
||||||
|
```bash
|
||||||
|
pip install mypackage
|
||||||
|
```
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
Configure your environment by setting these variables:
|
||||||
|
- MY_VAR=123
|
||||||
|
- DEBUG=true
|
||||||
|
|
||||||
|
## Usage Examples
|
||||||
|
|
||||||
|
Example 1: Basic usage
|
||||||
|
```python
|
||||||
|
import mymodule
|
||||||
|
module = mymodule.Module()
|
||||||
|
result = module.run()
|
||||||
|
print(result)
|
||||||
|
```
|
||||||
|
|
||||||
|
Example 2: Advanced usage with options
|
||||||
|
```python
|
||||||
|
options = {"verbose": True, "output": "stdout"}
|
||||||
|
result = module.run(options=options)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
Common issues and their solutions:
|
||||||
|
- ImportError: Ensure package is installed
|
||||||
|
- AttributeError: Check that attributes exist on object"""
|
||||||
@@ -0,0 +1,29 @@
|
|||||||
|
# Context7 Docs WebUI Configuration
|
||||||
|
# Copy this file to .env and configure for your environment
|
||||||
|
|
||||||
|
# === Ports (optional - use if you need custom ports) ===
|
||||||
|
HOST_PORT=8787 # docs-api port (default: 8787)
|
||||||
|
MCP_HOST_PORT=8788 # docs-mcp port (default: 8788)
|
||||||
|
WEBUI_PORT=8790 # WebUI port (default: 8790)
|
||||||
|
|
||||||
|
# === API Keys (optional - uncomment to enable auth) ===
|
||||||
|
# Docs API key for protecting endpoints like /search, /ingest, etc.
|
||||||
|
# DOCS_API_KEY=your-secret-docs-api-key
|
||||||
|
|
||||||
|
# WebUI API key (optional - separate from docs-api for UI authentication)
|
||||||
|
# DOCS_WEBUI_API_KEY=your-webui-api-key
|
||||||
|
|
||||||
|
# === Application Configuration ===
|
||||||
|
# Path to documentation files (relative to service container)
|
||||||
|
DOCS_PATH=/docs
|
||||||
|
|
||||||
|
# SQLite database path
|
||||||
|
DB_PATH=/data/db.sqlite
|
||||||
|
|
||||||
|
# Logging level: DEBUG, INFO, WARNING, ERROR
|
||||||
|
LOG_LEVEL=INFO
|
||||||
|
|
||||||
|
# === Vector Store ===
|
||||||
|
# Qdrant host and port (internal Docker network)
|
||||||
|
VECTOR_STORE_HOST=qdrant
|
||||||
|
VECTOR_STORE_PORT=6333
|
||||||
@@ -0,0 +1,19 @@
|
|||||||
|
# WebUI Dockerfile
|
||||||
|
FROM python:3.12-slim
|
||||||
|
|
||||||
|
ENV PYTHONDONTWRITEBYTECODE=1 \
|
||||||
|
PYTHONUNBUFFERED=1 \
|
||||||
|
DOCS_API_URL=http://docs-api:8787
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
COPY requirements.txt .
|
||||||
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
|
COPY app /app/webui
|
||||||
|
|
||||||
|
RUN mkdir -p /app/webui/templates/uploads
|
||||||
|
|
||||||
|
EXPOSE 8790
|
||||||
|
|
||||||
|
CMD ["uvicorn", "webui.main:app", "--host", "0.0.0.0", "--port", "8790"]
|
||||||
@@ -0,0 +1,72 @@
|
|||||||
|
"""Async docs-api client for the WebUI."""
|
||||||
|
import os
|
||||||
|
from typing import Any, Dict, Optional
|
||||||
|
|
||||||
|
from httpx import AsyncClient, Timeout
|
||||||
|
|
||||||
|
|
||||||
|
class DocsAPIClient:
|
||||||
|
"""Small async HTTP client for the docs-api backend."""
|
||||||
|
|
||||||
|
def __init__(self, base_url: Optional[str] = None, api_key: Optional[str] = None):
|
||||||
|
self.base_url = (base_url or os.environ.get("DOCS_API_URL", "http://docs-api:8787")).rstrip("/")
|
||||||
|
self.api_key = api_key if api_key is not None else os.environ.get("WEBUI_API_KEY")
|
||||||
|
self.headers = {"X-API-Key": self.api_key} if self.api_key else {}
|
||||||
|
self._client: Optional[AsyncClient] = None
|
||||||
|
|
||||||
|
async def _get_client(self) -> AsyncClient:
|
||||||
|
if self._client is None or self._client.is_closed:
|
||||||
|
self._client = AsyncClient(
|
||||||
|
base_url=self.base_url,
|
||||||
|
headers=self.headers,
|
||||||
|
timeout=Timeout(120.0),
|
||||||
|
)
|
||||||
|
return self._client
|
||||||
|
|
||||||
|
async def request(self, method: str, path: str, **kwargs: Any) -> Dict[str, Any]:
|
||||||
|
client = await self._get_client()
|
||||||
|
resp = await client.request(method, path, **kwargs)
|
||||||
|
if resp.status_code >= 400:
|
||||||
|
raise RuntimeError(f"{method} {path} failed: {resp.status_code} {resp.text}")
|
||||||
|
if resp.headers.get("content-type", "").startswith("application/json"):
|
||||||
|
data = resp.json()
|
||||||
|
return data if isinstance(data, dict) else {"data": data}
|
||||||
|
return {"data": resp.text}
|
||||||
|
|
||||||
|
async def get(self, path: str, **kwargs: Any) -> Dict[str, Any]:
|
||||||
|
return await self.request("GET", path, **kwargs)
|
||||||
|
|
||||||
|
async def post(self, path: str, **kwargs: Any) -> Dict[str, Any]:
|
||||||
|
return await self.request("POST", path, **kwargs)
|
||||||
|
|
||||||
|
async def delete(self, path: str, **kwargs: Any) -> Dict[str, Any]:
|
||||||
|
return await self.request("DELETE", path, **kwargs)
|
||||||
|
|
||||||
|
async def health(self) -> Dict[str, Any]:
|
||||||
|
try:
|
||||||
|
return await self.get("/health")
|
||||||
|
except Exception as e:
|
||||||
|
return {"status": "error", "message": str(e)}
|
||||||
|
|
||||||
|
async def upload_file(self, library_id: str, filename: str, content: bytes) -> Dict[str, Any]:
|
||||||
|
files = {"file": (filename, content)}
|
||||||
|
return await self.post(f"/api/v1/upload/{library_id}", files=files)
|
||||||
|
|
||||||
|
async def close(self) -> None:
|
||||||
|
if self._client is not None and not self._client.is_closed:
|
||||||
|
await self._client.aclose()
|
||||||
|
|
||||||
|
|
||||||
|
_client_instance: Optional[DocsAPIClient] = None
|
||||||
|
|
||||||
|
|
||||||
|
async def get_client() -> DocsAPIClient:
|
||||||
|
global _client_instance
|
||||||
|
if _client_instance is None:
|
||||||
|
_client_instance = DocsAPIClient()
|
||||||
|
return _client_instance
|
||||||
|
|
||||||
|
|
||||||
|
async def close_client() -> None:
|
||||||
|
if _client_instance is not None:
|
||||||
|
await _client_instance.close()
|
||||||
@@ -0,0 +1,17 @@
|
|||||||
|
"""WebUI configuration."""
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
|
||||||
|
class Settings:
|
||||||
|
"""WebUI settings from environment variables."""
|
||||||
|
|
||||||
|
# Core API connection
|
||||||
|
DOCS_API_URL: str = "http://docs-api:8787"
|
||||||
|
WEBUI_API_KEY: Optional[str] = None
|
||||||
|
|
||||||
|
# Default parameters for common operations
|
||||||
|
DEFAULT_SEARCH_LIMIT: int = 10
|
||||||
|
DEFAULT_RESULT_TOKENS: int = 8000
|
||||||
|
|
||||||
|
|
||||||
|
settings = Settings()
|
||||||
@@ -0,0 +1,259 @@
|
|||||||
|
"""WebUI FastAPI application."""
|
||||||
|
import html
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import List, Optional
|
||||||
|
|
||||||
|
from fastapi import FastAPI, File, Form, Request, UploadFile
|
||||||
|
from fastapi.responses import HTMLResponse, RedirectResponse
|
||||||
|
from fastapi.staticfiles import StaticFiles
|
||||||
|
from fastapi.templating import Jinja2Templates
|
||||||
|
|
||||||
|
from .api_client import DocsAPIClient
|
||||||
|
|
||||||
|
|
||||||
|
app = FastAPI(
|
||||||
|
title="Context7 Docs WebUI",
|
||||||
|
description="Web dashboard for managing documentation system",
|
||||||
|
version="1.0.0",
|
||||||
|
)
|
||||||
|
|
||||||
|
templates = Jinja2Templates(directory=os.path.join(os.path.dirname(__file__), "templates"))
|
||||||
|
templates.env.globals["escapeHtml"] = lambda value: html.escape(str(value or ""))
|
||||||
|
app.mount("/static", StaticFiles(directory=os.path.join(os.path.dirname(__file__), "static")), name="static")
|
||||||
|
|
||||||
|
_client: Optional[DocsAPIClient] = None
|
||||||
|
|
||||||
|
|
||||||
|
def get_client() -> DocsAPIClient:
|
||||||
|
global _client
|
||||||
|
if _client is None:
|
||||||
|
_client = DocsAPIClient(
|
||||||
|
os.environ.get("DOCS_API_URL", "http://docs-api:8787"),
|
||||||
|
os.environ.get("WEBUI_API_KEY"),
|
||||||
|
)
|
||||||
|
return _client
|
||||||
|
|
||||||
|
|
||||||
|
@app.on_event("shutdown")
|
||||||
|
async def shutdown() -> None:
|
||||||
|
if _client is not None:
|
||||||
|
await _client.close()
|
||||||
|
|
||||||
|
|
||||||
|
def page(title: str, body: str) -> HTMLResponse:
|
||||||
|
return HTMLResponse(
|
||||||
|
f"""<!DOCTYPE html>
|
||||||
|
<html><head><meta charset="UTF-8"><title>{html.escape(title)}</title></head>
|
||||||
|
<body style="font-family:sans-serif;padding:20px;">{body}</body></html>"""
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/")
|
||||||
|
async def dashboard(request: Request):
|
||||||
|
client = get_client()
|
||||||
|
health = await client.health()
|
||||||
|
|
||||||
|
try:
|
||||||
|
collections_data = await client.get("/collections")
|
||||||
|
total_vectors = sum(
|
||||||
|
item.get("vectors", 0)
|
||||||
|
for item in collections_data.get("collections", {}).values()
|
||||||
|
if isinstance(item, dict)
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
total_vectors = 0
|
||||||
|
|
||||||
|
try:
|
||||||
|
libs_data = await client.get("/libraries")
|
||||||
|
libraries = libs_data.get("libraries", [])
|
||||||
|
except Exception:
|
||||||
|
libraries = []
|
||||||
|
|
||||||
|
return templates.TemplateResponse(
|
||||||
|
"dashboard.html",
|
||||||
|
{"request": request, "health": health, "vectors": total_vectors, "libraries": libraries},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/actions/ingest-all")
|
||||||
|
async def ingest_all():
|
||||||
|
client = get_client()
|
||||||
|
try:
|
||||||
|
result = await client.post("/ingest/all")
|
||||||
|
body = f"<h1>Ingestion Complete</h1><pre>{html.escape(str(result))}</pre><a href='/'>Back</a>"
|
||||||
|
except Exception as e:
|
||||||
|
body = f"<h1>Ingestion Failed</h1><pre>{html.escape(str(e))}</pre><a href='/'>Back</a>"
|
||||||
|
return page("Ingestion", body)
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/actions/sync-sources")
|
||||||
|
async def sync_sources_action():
|
||||||
|
client = get_client()
|
||||||
|
try:
|
||||||
|
result = await client.post("/sources/sync", json={"override": False})
|
||||||
|
body = f"<h1>Git Sync Complete</h1><pre>{html.escape(str(result))}</pre><a href='/'>Back</a>"
|
||||||
|
except Exception as e:
|
||||||
|
body = f"<h1>Git Sync Failed</h1><pre>{html.escape(str(e))}</pre><a href='/'>Back</a>"
|
||||||
|
return page("Git Sync", body)
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/libraries")
|
||||||
|
async def libraries(request: Request):
|
||||||
|
client = get_client()
|
||||||
|
try:
|
||||||
|
data = await client.get("/libraries")
|
||||||
|
libraries_data = data.get("libraries", [])
|
||||||
|
except Exception:
|
||||||
|
libraries_data = []
|
||||||
|
return templates.TemplateResponse("libraries.html", {"request": request, "data": libraries_data})
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/libraries/create")
|
||||||
|
async def create_library(
|
||||||
|
library_id: str = Form(...),
|
||||||
|
name: str = Form(...),
|
||||||
|
description: Optional[str] = Form(None),
|
||||||
|
):
|
||||||
|
client = get_client()
|
||||||
|
try:
|
||||||
|
result = await client.post(
|
||||||
|
f"/api/v1/libraries/{library_id.strip()}",
|
||||||
|
data={"name": name, "description": description or ""},
|
||||||
|
)
|
||||||
|
body = f"<h1>Library Created</h1><pre>{html.escape(str(result))}</pre><a href='/libraries'>Back</a>"
|
||||||
|
except Exception as e:
|
||||||
|
body = f"<h1>Create Failed</h1><pre>{html.escape(str(e))}</pre><a href='/libraries'>Back</a>"
|
||||||
|
return page("Library Created", body)
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/libraries/{library_id}/ingest")
|
||||||
|
async def ingest_library(library_id: str):
|
||||||
|
client = get_client()
|
||||||
|
try:
|
||||||
|
result = await client.post(f"/ingest/{library_id}")
|
||||||
|
body = f"<h1>Ingestion Complete</h1><pre>{html.escape(str(result))}</pre><a href='/libraries'>Back</a>"
|
||||||
|
except Exception as e:
|
||||||
|
body = f"<h1>Ingestion Failed</h1><pre>{html.escape(str(e))}</pre><a href='/libraries'>Back</a>"
|
||||||
|
return page("Ingest Library", body)
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/libraries/{library_id}/delete")
|
||||||
|
async def delete_library(library_id: str):
|
||||||
|
client = get_client()
|
||||||
|
try:
|
||||||
|
result = await client.delete(f"/api/v1/libraries/{library_id}")
|
||||||
|
body = f"<h1>Library Deleted</h1><pre>{html.escape(str(result))}</pre><a href='/libraries'>Back</a>"
|
||||||
|
except Exception as e:
|
||||||
|
body = f"<h1>Delete Failed</h1><pre>{html.escape(str(e))}</pre><a href='/libraries'>Back</a>"
|
||||||
|
return page("Delete Library", body)
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/libraries/{library_id}/docs")
|
||||||
|
async def view_library_docs(library_id: str):
|
||||||
|
client = get_client()
|
||||||
|
try:
|
||||||
|
result = await client.get(f"/docs/{library_id}")
|
||||||
|
content = result.get("content", "")
|
||||||
|
except Exception as e:
|
||||||
|
content = str(e)
|
||||||
|
return page(
|
||||||
|
f"Docs: {library_id}",
|
||||||
|
f"<h1>{html.escape(library_id)}</h1><pre>{html.escape(content)}</pre><a href='/libraries'>Back</a>",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/upload")
|
||||||
|
async def upload_form(request: Request):
|
||||||
|
client = get_client()
|
||||||
|
try:
|
||||||
|
libs_data = await client.get("/libraries")
|
||||||
|
libraries = libs_data.get("libraries", [])
|
||||||
|
except Exception:
|
||||||
|
libraries = []
|
||||||
|
return templates.TemplateResponse("upload.html", {"request": request, "libraries": libraries})
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/upload")
|
||||||
|
async def upload_file(
|
||||||
|
request: Request,
|
||||||
|
library_id: str = Form(""),
|
||||||
|
ingest_after_upload: Optional[str] = Form(None),
|
||||||
|
files: List[UploadFile] = File(...),
|
||||||
|
):
|
||||||
|
client = get_client()
|
||||||
|
results = []
|
||||||
|
total_size = 0
|
||||||
|
|
||||||
|
for upload in files:
|
||||||
|
filename = upload.filename or "upload.txt"
|
||||||
|
target_library = library_id.strip()
|
||||||
|
if not target_library:
|
||||||
|
target_library = Path(filename).stem.lower().replace(" ", "-") or "uploaded"
|
||||||
|
|
||||||
|
try:
|
||||||
|
contents = await upload.read()
|
||||||
|
total_size += len(contents)
|
||||||
|
result = await client.upload_file(target_library, filename, contents)
|
||||||
|
results.append({"filename": filename, "status": "success", "message": result})
|
||||||
|
except Exception as e:
|
||||||
|
results.append({"filename": filename, "status": "error", "message": str(e)})
|
||||||
|
|
||||||
|
if ingest_after_upload == "on":
|
||||||
|
for result in list(results):
|
||||||
|
if result["status"] != "success":
|
||||||
|
continue
|
||||||
|
target_library = result["message"]["library_id"]
|
||||||
|
try:
|
||||||
|
ingest_result = await client.post(f"/ingest/{target_library}")
|
||||||
|
results.append({"filename": "__INGEST__", "status": "success", "message": ingest_result})
|
||||||
|
except Exception as e:
|
||||||
|
results.append({"filename": "__INGEST__", "status": "error", "message": str(e)})
|
||||||
|
|
||||||
|
return templates.TemplateResponse(
|
||||||
|
"upload.html",
|
||||||
|
{"request": request, "libraries": [], "results": results, "total_size_bytes": total_size},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/search")
|
||||||
|
async def search_form(request: Request):
|
||||||
|
return templates.TemplateResponse("search.html", {"request": request, "query": "", "results": []})
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/search/results")
|
||||||
|
async def search_results(request: Request, q: str = "", limit: int = 10):
|
||||||
|
client = get_client()
|
||||||
|
results = []
|
||||||
|
if q:
|
||||||
|
try:
|
||||||
|
data = await client.post("/search", json={"query": q, "library_id": None, "limit": limit})
|
||||||
|
results = data.get("results", [])
|
||||||
|
except Exception:
|
||||||
|
results = []
|
||||||
|
return templates.TemplateResponse(
|
||||||
|
"search.html",
|
||||||
|
{"request": request, "query": q, "results": results, "limit": limit},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/sources")
|
||||||
|
async def sources_page(request: Request):
|
||||||
|
client = get_client()
|
||||||
|
try:
|
||||||
|
data = await client.get("/api/v1/sources")
|
||||||
|
sources = data.get("sources", [])
|
||||||
|
except Exception:
|
||||||
|
sources = []
|
||||||
|
return templates.TemplateResponse("sources.html", {"request": request, "sources": sources})
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/sources/sync")
|
||||||
|
async def sync_sources(override: bool = Form(False)):
|
||||||
|
client = get_client()
|
||||||
|
try:
|
||||||
|
result = await client.post("/sources/sync", json={"override": override})
|
||||||
|
body = f"<h1>Git Sync Complete</h1><pre>{html.escape(str(result))}</pre><a href='/sources'>Back</a>"
|
||||||
|
except Exception as e:
|
||||||
|
body = f"<h1>Git Sync Failed</h1><pre>{html.escape(str(e))}</pre><a href='/sources'>Back</a>"
|
||||||
|
return page("Git Sync", body)
|
||||||
@@ -0,0 +1,159 @@
|
|||||||
|
// WebUI Static JavaScript Utilities
|
||||||
|
// Simple helper functions shared across templates
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Escape HTML to prevent XSS attacks when displaying user content
|
||||||
|
*/
|
||||||
|
function escapeHtml(text) {
|
||||||
|
if (typeof text !== 'string') return "";
|
||||||
|
var e = document.createElement('div');
|
||||||
|
try {
|
||||||
|
e.textContent = text;
|
||||||
|
return e.innerHTML;
|
||||||
|
} catch (err) {
|
||||||
|
return String(text).replace(/[&<>"']/g, function(m) {
|
||||||
|
switch (m) {
|
||||||
|
case '&': return '&';
|
||||||
|
case '<': return '<';
|
||||||
|
case '>': return '>';
|
||||||
|
case '"': return '"';
|
||||||
|
case "'": return ''';
|
||||||
|
default: return m;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Format number with thousands separators
|
||||||
|
*/
|
||||||
|
function formatNumber(num) {
|
||||||
|
if (num === null || num === undefined) return "N/A";
|
||||||
|
return new Intl.NumberFormat().format(Math.floor(num));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Show loading spinner
|
||||||
|
*/
|
||||||
|
function showLoading(elementId) {
|
||||||
|
var el = document.getElementById(elementId);
|
||||||
|
if (el) {
|
||||||
|
el.innerHTML = '<div class="loading-spinner">Loading...</div>';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Hide loading spinner
|
||||||
|
*/
|
||||||
|
function hideLoading(elementId) {
|
||||||
|
var el = document.getElementById(elementId);
|
||||||
|
if (el) {
|
||||||
|
el.innerHTML = "";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a toast notification
|
||||||
|
*/
|
||||||
|
function showToast(message, type) {
|
||||||
|
var toast = document.createElement('div');
|
||||||
|
toast.className = 'toast ' + (type || 'info');
|
||||||
|
toast.textContent = message;
|
||||||
|
toast.style.cssText = 'position:fixed;bottom:20px;right:20px;' +
|
||||||
|
'padding:12px 20px;border-radius:4px;margin-bottom:10px;' +
|
||||||
|
'background:#333;color:white;font-size:0.9rem;z-index:1000';
|
||||||
|
document.body.appendChild(toast);
|
||||||
|
|
||||||
|
setTimeout(function() {
|
||||||
|
toast.style.opacity = '0';
|
||||||
|
setTimeout(function() { toast.remove(); }, 200);
|
||||||
|
}, 3000);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Show error notification
|
||||||
|
*/
|
||||||
|
function showError(message) {
|
||||||
|
showToast("Error: " + message, "error");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Show success notification
|
||||||
|
*/
|
||||||
|
function showSuccess(message) {
|
||||||
|
showToast("Success: " + message, "success");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Make an API request with error handling
|
||||||
|
*/
|
||||||
|
async function apiRequest(endpoint, method = 'GET', data = null) {
|
||||||
|
const config = window.webuiConfig;
|
||||||
|
let url = config.apiUrl;
|
||||||
|
|
||||||
|
if (!url.endsWith('/')) url += '/';
|
||||||
|
url += endpoint;
|
||||||
|
|
||||||
|
const headers = {};
|
||||||
|
if (config.apiKey) {
|
||||||
|
headers['X-API-Key'] = config.apiKey;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
let response;
|
||||||
|
if (method === 'POST') {
|
||||||
|
response = await fetch(url, {
|
||||||
|
method: method,
|
||||||
|
headers: headers,
|
||||||
|
body: JSON.stringify(data)
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
response = await fetch(url, {
|
||||||
|
method: method,
|
||||||
|
headers: headers
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
throw new Error(response.statusText);
|
||||||
|
}
|
||||||
|
|
||||||
|
const contentType = response.headers.get('content-type');
|
||||||
|
if (contentType && contentType.includes('application/json')) {
|
||||||
|
return await response.json();
|
||||||
|
} else {
|
||||||
|
return await response.text();
|
||||||
|
}
|
||||||
|
} catch (err) {
|
||||||
|
console.error('API request failed:', err);
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initialize tooltips if using them
|
||||||
|
*/
|
||||||
|
function initTooltips() {
|
||||||
|
// Add tooltip functionality here if needed
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Debounce function for input handling
|
||||||
|
*/
|
||||||
|
function debounce(func, wait) {
|
||||||
|
var timeout;
|
||||||
|
return function executedFunction(...args) {
|
||||||
|
var later = function() {
|
||||||
|
clearTimeout(timeout);
|
||||||
|
func.apply(this, args);
|
||||||
|
};
|
||||||
|
timeout = setTimeout(later, wait);
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Export to window for use in templates
|
||||||
|
window.escapeHtml = escapeHtml;
|
||||||
|
window.formatNumber = formatNumber;
|
||||||
|
window.showToast = showToast;
|
||||||
|
window.showError = showError;
|
||||||
|
window.showSuccess = showSuccess;
|
||||||
@@ -0,0 +1,395 @@
|
|||||||
|
.container {
|
||||||
|
max-width: 1000px;
|
||||||
|
margin: 0 auto;
|
||||||
|
padding: 20px;
|
||||||
|
}
|
||||||
|
|
||||||
|
header {
|
||||||
|
border-bottom: 1px solid #ccc;
|
||||||
|
padding-bottom: 15px;
|
||||||
|
margin-bottom: 20px;
|
||||||
|
}
|
||||||
|
|
||||||
|
header h1 {
|
||||||
|
margin: 0 0 10px 0;
|
||||||
|
font-size: 1.5rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
nav {
|
||||||
|
display: flex;
|
||||||
|
gap: 15px;
|
||||||
|
}
|
||||||
|
|
||||||
|
nav a {
|
||||||
|
text-decoration: none;
|
||||||
|
color: #0066cc;
|
||||||
|
font-size: 0.9rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
nav a.active {
|
||||||
|
font-weight: bold;
|
||||||
|
text-decoration: underline;
|
||||||
|
}
|
||||||
|
|
||||||
|
main h2 {
|
||||||
|
margin-bottom: 15px;
|
||||||
|
}
|
||||||
|
|
||||||
|
footer {
|
||||||
|
margin-top: 40px;
|
||||||
|
padding-top: 15px;
|
||||||
|
border-top: 1px solid #ccc;
|
||||||
|
font-size: 0.8rem;
|
||||||
|
color: #666;
|
||||||
|
}
|
||||||
|
|
||||||
|
.status-card {
|
||||||
|
background: #f5f5f5;
|
||||||
|
padding: 20px;
|
||||||
|
border-radius: 8px;
|
||||||
|
border-left: 4px solid #00c467;
|
||||||
|
margin-bottom: 15px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.status-message {
|
||||||
|
background: #e8f4fd;
|
||||||
|
padding: 10px;
|
||||||
|
border-radius: 4px;
|
||||||
|
margin: 5px 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
pre.code-block {
|
||||||
|
background: #f5f5f5;
|
||||||
|
padding: 15px;
|
||||||
|
border-radius: 4px;
|
||||||
|
overflow-x: auto;
|
||||||
|
white-space: pre-wrap;
|
||||||
|
word-break: break-word;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Tables */
|
||||||
|
.library-table {
|
||||||
|
width: 100%;
|
||||||
|
border-collapse: collapse;
|
||||||
|
margin-top: 10px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.library-table th, .library-table td {
|
||||||
|
padding: 10px;
|
||||||
|
text-align: left;
|
||||||
|
border-bottom: 1px solid #ddd;
|
||||||
|
}
|
||||||
|
|
||||||
|
.library-table th {
|
||||||
|
background: #f5f5f5;
|
||||||
|
font-weight: bold;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Forms */
|
||||||
|
form input[type="text"], form textarea, form select {
|
||||||
|
padding: 8px;
|
||||||
|
border: 1px solid #ccc;
|
||||||
|
border-radius: 4px;
|
||||||
|
margin-right: 10px;
|
||||||
|
margin-bottom: 10px;
|
||||||
|
}
|
||||||
|
|
||||||
|
button {
|
||||||
|
background: #0066cc;
|
||||||
|
color: white;
|
||||||
|
border: none;
|
||||||
|
padding: 10px 20px;
|
||||||
|
border-radius: 4px;
|
||||||
|
cursor: pointer;
|
||||||
|
}
|
||||||
|
|
||||||
|
button:hover {
|
||||||
|
background: #0055aa;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Upload form */
|
||||||
|
.upload-form, .search-form, .sync-form {
|
||||||
|
max-width: 600px;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Search results */
|
||||||
|
.results-count {
|
||||||
|
background: #e8f4fd;
|
||||||
|
padding: 10px;
|
||||||
|
border-radius: 4px;
|
||||||
|
margin-bottom: 15px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.result-card {
|
||||||
|
background: #fff;
|
||||||
|
border: 1px solid #ddd;
|
||||||
|
padding: 15px;
|
||||||
|
margin: 10px 0;
|
||||||
|
border-radius: 4px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.result-card h3 {
|
||||||
|
margin: 0 0 8px 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Results box */
|
||||||
|
.results-box {
|
||||||
|
max-height: 600px;
|
||||||
|
overflow-y: auto;
|
||||||
|
}
|
||||||
|
|
||||||
|
.results-box .new-search-link {
|
||||||
|
display: block;
|
||||||
|
text-align: center;
|
||||||
|
margin-top: 15px;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Source cards */
|
||||||
|
.source-cards {
|
||||||
|
display: grid;
|
||||||
|
gap: 10px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.source-card {
|
||||||
|
background: #f5f5f5;
|
||||||
|
padding: 15px;
|
||||||
|
border-radius: 4px;
|
||||||
|
border-left: 4px solid #666;
|
||||||
|
}
|
||||||
|
|
||||||
|
.status-message code {
|
||||||
|
background: #333;
|
||||||
|
color: #fff;
|
||||||
|
padding: 2px 6px;
|
||||||
|
border-radius: 3px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.hint {
|
||||||
|
color: #666;
|
||||||
|
font-size: 0.85rem;
|
||||||
|
margin-top: 15px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.results-box .error {
|
||||||
|
color: #cc0000;
|
||||||
|
font-weight: bold;
|
||||||
|
}
|
||||||
|
|
||||||
|
.source-list, .source-cards, pre {
|
||||||
|
white-space: normal;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Status cards grid */
|
||||||
|
.status-cards {
|
||||||
|
display: grid;
|
||||||
|
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
|
||||||
|
gap: 15px;
|
||||||
|
margin-bottom: 20px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.status-card h3 {
|
||||||
|
margin: 0 0 8px 0;
|
||||||
|
font-size: 0.9rem;
|
||||||
|
color: #555;
|
||||||
|
}
|
||||||
|
|
||||||
|
.status-card p {
|
||||||
|
margin: 0;
|
||||||
|
font-size: 1.2rem;
|
||||||
|
font-weight: bold;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Message box */
|
||||||
|
.message-box {
|
||||||
|
background: #e8f4fd;
|
||||||
|
padding: 12px;
|
||||||
|
border-radius: 6px;
|
||||||
|
margin-bottom: 20px;
|
||||||
|
border-left: 4px solid #3b82f6;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Action buttons */
|
||||||
|
.action-buttons {
|
||||||
|
display: flex;
|
||||||
|
gap: 15px;
|
||||||
|
margin-bottom: 20px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.btn {
|
||||||
|
padding: 10px 20px;
|
||||||
|
border: none;
|
||||||
|
border-radius: 4px;
|
||||||
|
cursor: pointer;
|
||||||
|
text-decoration: none;
|
||||||
|
display: inline-block;
|
||||||
|
font-size: 0.9rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.btn-primary {
|
||||||
|
background: #00c467;
|
||||||
|
color: white;
|
||||||
|
}
|
||||||
|
|
||||||
|
.btn-primary:hover {
|
||||||
|
background: #00a855;
|
||||||
|
}
|
||||||
|
|
||||||
|
.btn-secondary {
|
||||||
|
background: #2563eb;
|
||||||
|
color: white;
|
||||||
|
}
|
||||||
|
|
||||||
|
.btn-secondary:hover {
|
||||||
|
background: #1d4ed8;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Links section */
|
||||||
|
.links-section h2 {
|
||||||
|
font-size: 1rem;
|
||||||
|
margin-bottom: 10px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.links-section a {
|
||||||
|
color: #0066cc;
|
||||||
|
text-decoration: none;
|
||||||
|
padding: 5px 10px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.links-section a:hover {
|
||||||
|
text-decoration: underline;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Create library form */
|
||||||
|
.create-form {
|
||||||
|
background: #f9f9f9;
|
||||||
|
padding: 15px;
|
||||||
|
border-radius: 6px;
|
||||||
|
margin-bottom: 20px;
|
||||||
|
border-left: 4px solid #00c467;
|
||||||
|
}
|
||||||
|
|
||||||
|
.create-form label {
|
||||||
|
display: block;
|
||||||
|
margin-bottom: 8px;
|
||||||
|
font-weight: bold;
|
||||||
|
color: #333;
|
||||||
|
}
|
||||||
|
|
||||||
|
.create-form input[type="text"] {
|
||||||
|
width: 100%;
|
||||||
|
padding: 8px;
|
||||||
|
margin-bottom: 12px;
|
||||||
|
border: 1px solid #ccc;
|
||||||
|
border-radius: 4px;
|
||||||
|
box-sizing: border-box;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Table actions column */
|
||||||
|
.actions {
|
||||||
|
white-space: nowrap;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Button sizes */
|
||||||
|
.btn-sm {
|
||||||
|
padding: 5px 12px;
|
||||||
|
font-size: 0.8rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Additional action button colors */
|
||||||
|
.btn-info {
|
||||||
|
background: #17a2b8;
|
||||||
|
color: white;
|
||||||
|
}
|
||||||
|
|
||||||
|
.btn-info:hover {
|
||||||
|
background: #138496;
|
||||||
|
}
|
||||||
|
|
||||||
|
.btn-warning {
|
||||||
|
background: #ffc107;
|
||||||
|
color: black;
|
||||||
|
}
|
||||||
|
|
||||||
|
.btn-warning:hover {
|
||||||
|
background: #ffa000;
|
||||||
|
}
|
||||||
|
|
||||||
|
.btn-danger {
|
||||||
|
background: #dc3545;
|
||||||
|
color: white;
|
||||||
|
}
|
||||||
|
|
||||||
|
.btn-danger:hover {
|
||||||
|
background: #c82333;
|
||||||
|
}
|
||||||
|
|
||||||
|
.btn-primary {
|
||||||
|
background: #007bff;
|
||||||
|
color: white;
|
||||||
|
}
|
||||||
|
|
||||||
|
.btn-primary:hover {
|
||||||
|
background: #0056b3;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Highlight row for popular libraries */
|
||||||
|
tr.highlight {
|
||||||
|
background: #f0fdf4;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Upload form specific styles */
|
||||||
|
#library_id, #files {
|
||||||
|
width: 100%;
|
||||||
|
padding: 8px;
|
||||||
|
border: 1px solid #ccc;
|
||||||
|
border-radius: 4px;
|
||||||
|
margin-bottom: 12px;
|
||||||
|
box-sizing: border-box;
|
||||||
|
}
|
||||||
|
|
||||||
|
#files {
|
||||||
|
font-family: sans-serif;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Results box for upload */
|
||||||
|
.result-box {
|
||||||
|
background: #fff;
|
||||||
|
border: 1px solid #ddd;
|
||||||
|
border-radius: 4px;
|
||||||
|
padding: 10px;
|
||||||
|
margin-top: 20px;
|
||||||
|
min-height: 100px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.result-box.error {
|
||||||
|
border-color: #dc3545;
|
||||||
|
background: #fff5f5;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Result items */
|
||||||
|
.result-item {
|
||||||
|
padding: 6px;
|
||||||
|
margin: 4px 0;
|
||||||
|
border-radius: 3px;
|
||||||
|
font-family: monospace;
|
||||||
|
font-size: 0.85rem;
|
||||||
|
word-break: break-word;
|
||||||
|
}
|
||||||
|
|
||||||
|
.result-item.success {
|
||||||
|
background: #d4edda;
|
||||||
|
border-left: 3px solid #28a745;
|
||||||
|
color: #155724;
|
||||||
|
}
|
||||||
|
|
||||||
|
.result-item.error {
|
||||||
|
background: #f8d7da;
|
||||||
|
border-left: 3px solid #dc3545;
|
||||||
|
color: #721c24;
|
||||||
|
}
|
||||||
|
|
||||||
|
.result-item.info {
|
||||||
|
background: #d1ecf1;
|
||||||
|
border-left: 3px solid #17a2b8;
|
||||||
|
color: #0c5460;
|
||||||
|
}
|
||||||
@@ -0,0 +1,32 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
|
<title>{% block title %}Context7 Docs{% endblock %}</title>
|
||||||
|
<link rel="stylesheet" href="{{ url_for('static', path='style.css') }}">
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<div class="container">
|
||||||
|
<header>
|
||||||
|
<h1>Context7 Docs UI</h1>
|
||||||
|
<nav>
|
||||||
|
<a href="/" {% if request.url.path == '/' %}class="active"{% endif %}>Dashboard</a>
|
||||||
|
<a href="/libraries" {% if request.url.path.startswith('/libraries') %}class="active"{% endif %}>Libraries</a>
|
||||||
|
<a href="/upload" {% if request.url.path.startswith('/upload') %}class="active"{% endif %}>Upload</a>
|
||||||
|
<a href="/search" {% if request.url.path.startswith('/search') %}class="active"{% endif %}>Search</a>
|
||||||
|
<a href="/sources" {% if request.url.path.startswith('/sources') %}class="active"{% endif %}>Sources</a>
|
||||||
|
</nav>
|
||||||
|
</header>
|
||||||
|
|
||||||
|
<main>
|
||||||
|
{% block content %}{% endblock %}
|
||||||
|
</main>
|
||||||
|
|
||||||
|
<footer>Context7 Docs WebUI</footer>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<script src="{{ url_for('static', path='app.js') }}"></script>
|
||||||
|
{% block scripts %}{% endblock %}
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
@@ -0,0 +1,83 @@
|
|||||||
|
{% extends "base.html" %}
|
||||||
|
|
||||||
|
{% block title %}Dashboard - Context7 Docs{% endblock %}
|
||||||
|
|
||||||
|
{% block content %}
|
||||||
|
<h1>Dashboard</h1>
|
||||||
|
|
||||||
|
<!-- Status Cards -->
|
||||||
|
<div class="status-cards">
|
||||||
|
<div class="status-card" style="{% if health.status == 'ok' %}border-left-color: #00c467{% else %}border-left-color: #f53800{% endif %}">
|
||||||
|
<h3>Docs API Service</h3>
|
||||||
|
{% if health.status and health.status == 'ok' %}
|
||||||
|
<p style="color: #00c467;"><strong>Status:</strong> Online ✓</p>
|
||||||
|
{% else %}
|
||||||
|
<p style="color: #f53800;"><strong>Status:</strong> {% if health.status == 'error' %}Error{% else %}Offline{% endif %}</p>
|
||||||
|
{% endif %}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="status-card">
|
||||||
|
<h3>Vectors Stored</h3>
|
||||||
|
<p>{{ vectors|default(0) }}</p>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="status-card">
|
||||||
|
<h3>Libraries Registered</h3>
|
||||||
|
<p>{{ libraries|length }}</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Recent Messages -->
|
||||||
|
{% if libraries and libraries|length > 0 %}
|
||||||
|
<div class="message-box" style="background: #e8f4fd;">
|
||||||
|
<strong>Libraries:</strong> {{ escapeHtml(libraries) }}
|
||||||
|
</div>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
<!-- Action Buttons -->
|
||||||
|
<div class="action-buttons">
|
||||||
|
<form method="post" action="/actions/ingest-all" style="display: inline;">
|
||||||
|
<button type="submit" name="ingest-all" class="btn btn-primary">
|
||||||
|
🔄 Ingest All Libraries
|
||||||
|
</button>
|
||||||
|
</form>
|
||||||
|
|
||||||
|
<form method="post" action="/actions/sync-sources" style="display: inline;">
|
||||||
|
<input type="hidden" name="override" value="false">
|
||||||
|
<button type="submit" name="sync-sources" class="btn btn-secondary">
|
||||||
|
📦 Sync Git Sources
|
||||||
|
</button>
|
||||||
|
</form>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Links -->
|
||||||
|
<div class="links-section">
|
||||||
|
<h2>Navigate to Other Pages</h2>
|
||||||
|
<a href="/libraries" style="display: inline-block; margin-right: 15px;">View Libraries →</a>
|
||||||
|
<a href="/upload" style="display: inline-block; margin-right: 15px;">Upload Files →</a>
|
||||||
|
<a href="/search" style="display: inline-block; margin-right: 15px;">Search Docs →</a>
|
||||||
|
<a href="/sources" style="display: inline-block;">Git Sources →</a>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Script for health refresh on reload -->
|
||||||
|
<script>
|
||||||
|
// On page reload, re-fetch and update status if needed
|
||||||
|
document.addEventListener("DOMContentLoaded", async function() {
|
||||||
|
try {
|
||||||
|
const api = window.docsApiClient;
|
||||||
|
|
||||||
|
// Refresh health status from server-rendered data
|
||||||
|
document.querySelector('.status-cards .status-card:first-of-type')?.classList.remove('error');
|
||||||
|
const newHealth = await api.get("/health");
|
||||||
|
|
||||||
|
if (newHealth.status === 'ok') {
|
||||||
|
document.querySelector('.status-cards .status-card:first-of-type')?.querySelector('p')?.classList.add('online');
|
||||||
|
} else {
|
||||||
|
document.querySelector('.status-cards .status-card:first-of-type')?.querySelector('p')?.classList.add('error');
|
||||||
|
}
|
||||||
|
} catch (err) {
|
||||||
|
console.log('Health refresh skipped:', err);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
</script>
|
||||||
|
{% endblock %}
|
||||||
@@ -0,0 +1,74 @@
|
|||||||
|
{% extends "base.html" %}
|
||||||
|
|
||||||
|
{% block title %}Libraries - Context7 Docs{% endblock %}
|
||||||
|
|
||||||
|
{% block content %}
|
||||||
|
<h1>Libraries</h1>
|
||||||
|
|
||||||
|
<!-- Create Library Form -->
|
||||||
|
<div class="create-form">
|
||||||
|
<form method="post" action="/libraries/create">
|
||||||
|
<label for="new_library_id">Library ID:</label>
|
||||||
|
<input type="text" id="new_library_id" name="library_id" placeholder="e.g., foundryvtt" required>
|
||||||
|
|
||||||
|
<label for="new_name">Name:</label>
|
||||||
|
<input type="text" id="new_name" name="name" placeholder="Display name for this library" required>
|
||||||
|
|
||||||
|
<label for="new_description">Description (optional):</label>
|
||||||
|
<input type="text" id="new_description" name="description" placeholder="Brief description...">
|
||||||
|
|
||||||
|
<button type="submit" class="btn btn-primary">Create Library</button>
|
||||||
|
</form>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<hr>
|
||||||
|
|
||||||
|
<!-- Libraries Table -->
|
||||||
|
<table class="library-table">
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>ID</th>
|
||||||
|
<th>Name</th>
|
||||||
|
<th>Description</th>
|
||||||
|
<th>Source Path</th>
|
||||||
|
<th>Updated At</th>
|
||||||
|
<th>Actions</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody id="libraries-body">
|
||||||
|
{% if data|length > 0 %}
|
||||||
|
{% for lib in data %}
|
||||||
|
<tr class="{% if lib.source_path and 'foundry' in (lib.source_path or '').lower() %}highlight{% endif %}">
|
||||||
|
<td><code>{{ escapeHtml(lib.id) }}</code></td>
|
||||||
|
<td><strong>{{ escapeHtml(lib.name) }}</strong></td>
|
||||||
|
<td>{{ escapeHtml(lib.description) or '-' }}</td>
|
||||||
|
<td><small>{{ escapeHtml(lib.source_path) or '-' }}</small></td>
|
||||||
|
<td><small>{{ lib.updated_at|default('N/A') }}</small></td>
|
||||||
|
<td class="actions">
|
||||||
|
<a href="/libraries/{{ lib.id }}/docs" class="btn btn-sm btn-info">View Docs</a> |
|
||||||
|
<form method="post" action="/libraries/{{ lib.id }}/ingest" style="display:inline;"
|
||||||
|
onsubmit="return confirm('Trigger ingestion for this library?');">
|
||||||
|
<button type="submit" class="btn btn-sm btn-warning">Ingest</button>
|
||||||
|
</form> |
|
||||||
|
<form method="post" action="/libraries/{{ lib.id }}/delete"
|
||||||
|
onsubmit="return confirm('Delete this library and all its contents? This cannot be undone.');">
|
||||||
|
<button type="submit" class="btn btn-sm btn-danger">Delete</button>
|
||||||
|
</form>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
{% endfor %}
|
||||||
|
{% else %}
|
||||||
|
<tr>
|
||||||
|
<td colspan="6" style="text-align:center;">No libraries found. Create one above.</td>
|
||||||
|
</tr>
|
||||||
|
{% endif %}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
|
||||||
|
{% if data and data[0] and data[0].get('content') %}
|
||||||
|
<!-- Docs view mode -->
|
||||||
|
<pre class="code-block">{% for chunk in data.get('content', []) %}{% if chunk|length > 0 %}{{ chunk.text | default(chunk.content) | default(chunk) }}{% endif %}{% endfor %}</pre>
|
||||||
|
<a href="/libraries" style="display:block;margin-top:20px;">← Back to Libraries</a>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
{% endblock %}
|
||||||
@@ -0,0 +1,71 @@
|
|||||||
|
{% extends "base.html" %}
|
||||||
|
|
||||||
|
{% block title %}Search - Context7 Docs{% endblock %}
|
||||||
|
|
||||||
|
{% block content %}
|
||||||
|
<h2>Search Documentation</h2>
|
||||||
|
|
||||||
|
<form method="get" action="/search/results" class="search-form">
|
||||||
|
<label for="query">Query:</label>
|
||||||
|
<input type="text" id="query" name="q" required placeholder="Enter your search query..." value="{{ query or '' }}">
|
||||||
|
|
||||||
|
<label for="limit">Limit results:</label>
|
||||||
|
<select id="limit" name="limit">
|
||||||
|
<option value="5">5</option>
|
||||||
|
<option value="10" selected>10</option>
|
||||||
|
<option value="20">20</option>
|
||||||
|
<option value="50">50</option>
|
||||||
|
</select>
|
||||||
|
|
||||||
|
<button type="submit">Search</button>
|
||||||
|
</form>
|
||||||
|
|
||||||
|
<div id="search-results" class="results-box"></div>
|
||||||
|
|
||||||
|
{% if results %}
|
||||||
|
<div class="results-count">{{ results|length }} results found</div>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
<script>
|
||||||
|
async function loadResults(query, limit) {
|
||||||
|
const searchBox = document.getElementById("search-results");
|
||||||
|
|
||||||
|
try {
|
||||||
|
const payload = { query: query || "{{ initial_query or '' }}", library_id: null, limit: parseInt(limit) };
|
||||||
|
const api = window.docsApiClient;
|
||||||
|
|
||||||
|
const result = await api.post("/search", payload);
|
||||||
|
|
||||||
|
if (result.results && Array.isArray(result.results)) {
|
||||||
|
searchBox.className = "results-box";
|
||||||
|
let html = '<div class="results-count">' + result.results.length + ' results found</div>';
|
||||||
|
|
||||||
|
for (const r of result.results) {
|
||||||
|
const title = r.title || (r.content || '').substring(0, 100);
|
||||||
|
const content = (r.content || '').substring(0, 500);
|
||||||
|
html += '<div class="result-card">' +
|
||||||
|
'<h3>' + escapeHtml(title) + '</h3>' +
|
||||||
|
'<p>' + escapeHtml(content) + '...</p>' +
|
||||||
|
'<a href="/docs/' + (r.library_id || '') + '">View Full</a></div>';
|
||||||
|
}
|
||||||
|
|
||||||
|
html += '<a href="/search/form" class="new-search-link">← New Search</a>';
|
||||||
|
searchBox.innerHTML = html;
|
||||||
|
}
|
||||||
|
} catch (err) {
|
||||||
|
searchBox.innerHTML = '<p class="error">Error loading results: ' + escapeHtml(err.message) + '</p>';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Load initial results if query parameter exists in URL
|
||||||
|
var urlParams = new URLSearchParams(window.location.search);
|
||||||
|
{% if query %}loadResults(urlParams.get('q') || urlParams.get('q'), urlParams.get('limit'));{% endif %}
|
||||||
|
|
||||||
|
function escapeHtml(str) {
|
||||||
|
if (!str) return "";
|
||||||
|
var e = document.createElement('div');
|
||||||
|
e.textContent = str;
|
||||||
|
return e.innerHTML;
|
||||||
|
}
|
||||||
|
</script>
|
||||||
|
{% endblock %}
|
||||||
@@ -0,0 +1,34 @@
|
|||||||
|
{% extends "base.html" %}
|
||||||
|
|
||||||
|
{% block title %}Sources - Context7 Docs{% endblock %}
|
||||||
|
|
||||||
|
{% block content %}
|
||||||
|
<h2>Git Repository Sync</h2>
|
||||||
|
|
||||||
|
<div class="status-message">Syncs all git repositories configured in <code>docs_sources.yaml</code>.</div>
|
||||||
|
|
||||||
|
<form method="post" action="/sources/sync" class="sync-form">
|
||||||
|
<label for="override">Override existing repos:</label>
|
||||||
|
<input type="checkbox" id="override" name="override">
|
||||||
|
<button type="submit">Sync All Repositories</button>
|
||||||
|
</form>
|
||||||
|
|
||||||
|
<div id="source-list"></div>
|
||||||
|
|
||||||
|
{% if sources %}
|
||||||
|
<h3>Configured Sources</h3>
|
||||||
|
<div class="source-cards">
|
||||||
|
{% for src in sources %}
|
||||||
|
<div class="source-card">
|
||||||
|
<strong>{{ src.library_id | default('unknown') }}</strong><br>
|
||||||
|
URL: {{ src.repo_url | default('N/A')[:60] }}<br>
|
||||||
|
Branch: {{ src.branch | default('main') }}<br>
|
||||||
|
Include: {{ (src.include_paths | default(['*']) | join(', ')) }}
|
||||||
|
</div>
|
||||||
|
{% endfor %}
|
||||||
|
</div>
|
||||||
|
{% else %}
|
||||||
|
<p>No git sources configured. Add repositories to <code>docs_sources.yaml</code>.</p>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
{% endblock %}
|
||||||
@@ -0,0 +1,48 @@
|
|||||||
|
{% extends "base.html" %}
|
||||||
|
|
||||||
|
{% block title %}Upload - Context7 Docs{% endblock %}
|
||||||
|
|
||||||
|
{% block content %}
|
||||||
|
<h2>Upload Documentation Files</h2>
|
||||||
|
|
||||||
|
<form method="post" enctype="multipart/form-data" class="upload-form">
|
||||||
|
<!-- Library Selector -->
|
||||||
|
<label for="library_id">Select Library:</label>
|
||||||
|
<select id="library_id" name="library_id" required>
|
||||||
|
<option value="">(New library - will be created from filename)</option>
|
||||||
|
{% for lib in libraries %}
|
||||||
|
<option value="{{ lib.id }}" data-name="{{ lib.name or lib.id }}">{{ lib.name or lib.id }}</option>
|
||||||
|
{% endfor %}
|
||||||
|
</select>
|
||||||
|
|
||||||
|
<!-- File Input (multiple files allowed) -->
|
||||||
|
<label for="files">Select Files:</label>
|
||||||
|
<input type="file" name="files" id="files" multiple accept=".md,.txt,.py,.js,.ts,.json,.yaml,.yml,.html,.css,.pdf" required>
|
||||||
|
|
||||||
|
<!-- Ingest Checkbox -->
|
||||||
|
<div style="margin-top: 10px;">
|
||||||
|
<label>
|
||||||
|
<input type="checkbox" name="ingest_after_upload" value="on">
|
||||||
|
Trigger ingestion after upload
|
||||||
|
</label>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<button type="submit" class="btn btn-primary">Upload Files</button>
|
||||||
|
</form>
|
||||||
|
|
||||||
|
<!-- Allowed extensions hint -->
|
||||||
|
<p class="hint">Allowed: .md, .txt, .py, .js, .ts, .json, .yaml, .yml, .html, .css, .pdf (max 5MB each)</p>
|
||||||
|
|
||||||
|
<!-- Results Display -->
|
||||||
|
<div id="upload-result" class="result-box"></div>
|
||||||
|
|
||||||
|
{% if results %}
|
||||||
|
<h3>Upload Results</h3>
|
||||||
|
<ul>
|
||||||
|
{% for result in results %}
|
||||||
|
<li><strong>{{ result.filename }}</strong>: {{ result.status }} - {{ escapeHtml(result.message) }}</li>
|
||||||
|
{% endfor %}
|
||||||
|
</ul>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
{% endblock %}
|
||||||
@@ -0,0 +1,7 @@
|
|||||||
|
# WebUI Dependencies
|
||||||
|
fastapi==0.109.0
|
||||||
|
uvicorn[standard]==0.27.0
|
||||||
|
pydantic==2.5.3
|
||||||
|
python-multipart==0.0.6
|
||||||
|
httpx==0.26.0
|
||||||
|
PyYAML==6.0.1
|
||||||
Reference in New Issue
Block a user