Initial DocsMCP stack
This commit is contained in:
@@ -0,0 +1,299 @@
|
||||
"""Context7 Docs API."""
|
||||
import asyncio
|
||||
import shutil
|
||||
import yaml
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import FastAPI, File, Form, HTTPException, Query, Request, UploadFile
|
||||
from fastapi.responses import JSONResponse
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from .config import settings
|
||||
from .db import (
|
||||
clear_library_documents,
|
||||
delete_library,
|
||||
init_db,
|
||||
list_libraries,
|
||||
search_libraries,
|
||||
upsert_library,
|
||||
)
|
||||
from .git_source import ingest_git_source
|
||||
from .ingest import ingest_all, ingest_library
|
||||
from .search import get_library_docs, resolve_library_id, search_docs
|
||||
from .vector_store import delete_library_vectors, ensure_collection, get_client, get_collection_name
|
||||
|
||||
|
||||
app = FastAPI(
|
||||
title="Context7 Docs API",
|
||||
description="Document ingestion and semantic search API for local-context7",
|
||||
version="1.0.0",
|
||||
)
|
||||
|
||||
|
||||
class SearchRequest(BaseModel):
|
||||
query: str = Field(..., min_length=1)
|
||||
library_id: Optional[str] = None
|
||||
limit: int = Field(10, ge=1, le=50)
|
||||
|
||||
|
||||
class SyncSourcesRequest(BaseModel):
|
||||
override: bool = False
|
||||
|
||||
|
||||
ALLOWED_EXTENSIONS = {
|
||||
".md",
|
||||
".txt",
|
||||
".py",
|
||||
".js",
|
||||
".ts",
|
||||
".json",
|
||||
".yaml",
|
||||
".yml",
|
||||
".html",
|
||||
".css",
|
||||
".pdf",
|
||||
}
|
||||
|
||||
|
||||
@app.middleware("http")
|
||||
async def auth_middleware(request: Request, call_next):
|
||||
"""Require X-API-Key for mutating endpoints when API_KEY_DOCS_API is set."""
|
||||
if not settings.is_auth_enabled:
|
||||
return await call_next(request)
|
||||
|
||||
public_prefixes = ("/health", "/libraries", "/docs/")
|
||||
if request.method == "GET" and request.url.path.startswith(public_prefixes):
|
||||
return await call_next(request)
|
||||
|
||||
if request.headers.get("X-API-Key") != settings.api_key_docs_api:
|
||||
return JSONResponse(status_code=401, content={"detail": "Invalid or missing API key"})
|
||||
|
||||
return await call_next(request)
|
||||
|
||||
|
||||
@app.on_event("startup")
|
||||
async def startup() -> None:
|
||||
init_result = init_db()
|
||||
if not init_result.get("success"):
|
||||
raise RuntimeError(f"Failed to initialize SQLite database: {init_result.get('error')}")
|
||||
|
||||
last_error = None
|
||||
for _ in range(20):
|
||||
collection_result = await ensure_collection()
|
||||
if collection_result.get("success"):
|
||||
return
|
||||
last_error = collection_result.get("error")
|
||||
await asyncio.sleep(1)
|
||||
raise RuntimeError(f"Failed to initialize Qdrant collection: {last_error}")
|
||||
|
||||
|
||||
def safe_library_id(library_id: str) -> str:
|
||||
"""Normalize user-provided library IDs to a single path segment."""
|
||||
base = Path(library_id).name.strip()
|
||||
if not base or base in {".", ".."} or ".." in library_id or "/" in library_id or "\\" in library_id:
|
||||
raise HTTPException(status_code=400, detail="Invalid library ID")
|
||||
return base
|
||||
|
||||
|
||||
def safe_upload_filename(filename: str) -> str:
|
||||
ext = Path(filename).suffix.lower()
|
||||
if ext not in ALLOWED_EXTENSIONS:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Unsafe extension: {ext}. Allowed extensions: {', '.join(sorted(ALLOWED_EXTENSIONS))}",
|
||||
)
|
||||
|
||||
stem = "".join(c for c in Path(filename).stem if c.isalnum() or c in "-_ ").strip()
|
||||
if not stem:
|
||||
raise HTTPException(status_code=400, detail="Filename contains only unsafe characters")
|
||||
return f"{stem}{ext}"
|
||||
|
||||
|
||||
def docs_root() -> Path:
|
||||
return Path(settings.docs_path)
|
||||
|
||||
|
||||
def sources_config_path() -> Path:
|
||||
return Path(__file__).resolve().parents[2] / "docs_sources.yaml"
|
||||
|
||||
|
||||
@app.get("/health")
|
||||
async def health_check():
|
||||
return {"status": "ok", "service": "docs-api"}
|
||||
|
||||
|
||||
@app.get("/collections")
|
||||
async def collections():
|
||||
try:
|
||||
client = get_client()
|
||||
info = client.get_collection(get_collection_name())
|
||||
vectors = getattr(info, "vectors_count", None) or getattr(info, "points_count", 0) or 0
|
||||
return {"collections": {get_collection_name(): {"vectors": vectors}}}
|
||||
except Exception as e:
|
||||
return {"collections": {}, "warning": str(e)}
|
||||
|
||||
|
||||
@app.get("/libraries")
|
||||
async def list_libraries_api():
|
||||
libs = list_libraries()
|
||||
if isinstance(libs, dict) and not libs.get("success", True):
|
||||
raise HTTPException(status_code=500, detail=libs.get("error", "Failed to list libraries"))
|
||||
return {"libraries": libs, "count": len(libs)}
|
||||
|
||||
|
||||
@app.get("/libraries/search")
|
||||
async def search_libraries_api(q: str = Query(..., min_length=1)):
|
||||
matches = resolve_library_id(q)
|
||||
return {"matches": matches, "count": len(matches)}
|
||||
|
||||
|
||||
@app.post("/search")
|
||||
async def search_docs_api(payload: SearchRequest):
|
||||
results = search_docs(payload.query, library_id=payload.library_id, limit=payload.limit)
|
||||
return {
|
||||
"query": payload.query,
|
||||
"library_id": payload.library_id,
|
||||
"results": results,
|
||||
"count": len(results),
|
||||
}
|
||||
|
||||
|
||||
@app.get("/docs/{library_id}")
|
||||
@app.get("/libraries/{library_id}/docs")
|
||||
async def get_library_docs_api(
|
||||
library_id: str,
|
||||
topic: Optional[str] = Query(None),
|
||||
tokens: int = Query(8000, ge=1),
|
||||
):
|
||||
docs = get_library_docs(library_id=library_id, topic=topic, token_limit=tokens)
|
||||
return {"library_id": library_id, "content": docs}
|
||||
|
||||
|
||||
@app.post("/ingest/all")
|
||||
async def ingest_all_api():
|
||||
return await ingest_all()
|
||||
|
||||
|
||||
@app.post("/ingest/{library_id}")
|
||||
async def ingest_library_api(library_id: str):
|
||||
library_id = safe_library_id(library_id)
|
||||
source_path = library_id
|
||||
return await ingest_library(library_id=library_id, name=library_id, source_path=source_path)
|
||||
|
||||
|
||||
@app.post("/api/v1/libraries/{library_id}")
|
||||
async def api_create_library(
|
||||
library_id: str,
|
||||
name: Optional[str] = Form(None),
|
||||
description: Optional[str] = Form(None),
|
||||
):
|
||||
library_id = safe_library_id(library_id)
|
||||
lib_dir = docs_root() / library_id
|
||||
lib_dir.mkdir(parents=True, exist_ok=True)
|
||||
result = upsert_library(library_id, name or library_id, description, library_id)
|
||||
if not result.get("success"):
|
||||
raise HTTPException(status_code=500, detail=result.get("error", "Failed to create library"))
|
||||
return {
|
||||
"success": True,
|
||||
"created": not result.get("exists", False),
|
||||
"library_id": library_id,
|
||||
"name": name or library_id,
|
||||
"description": description,
|
||||
"path": str(lib_dir),
|
||||
}
|
||||
|
||||
|
||||
@app.delete("/api/v1/libraries/{library_id}")
|
||||
async def api_delete_library(library_id: str):
|
||||
library_id = safe_library_id(library_id)
|
||||
lib_dir = docs_root() / library_id
|
||||
deleted_files = 0
|
||||
|
||||
if lib_dir.exists():
|
||||
for path in lib_dir.rglob("*"):
|
||||
if path.is_file():
|
||||
deleted_files += 1
|
||||
shutil.rmtree(lib_dir)
|
||||
|
||||
docs_result = clear_library_documents(library_id)
|
||||
vectors_result = await delete_library_vectors(library_id)
|
||||
library_result = delete_library(library_id)
|
||||
|
||||
failures = [
|
||||
r.get("error")
|
||||
for r in (docs_result, vectors_result, library_result)
|
||||
if isinstance(r, dict) and not r.get("success", True)
|
||||
]
|
||||
if failures:
|
||||
raise HTTPException(status_code=500, detail="; ".join(failures))
|
||||
|
||||
return {"success": True, "library_id": library_id, "deleted_files": deleted_files}
|
||||
|
||||
|
||||
@app.post("/api/v1/upload/{library_id}")
|
||||
async def api_upload(library_id: str, file: UploadFile = File(...)):
|
||||
library_id = safe_library_id(library_id)
|
||||
safe_name = safe_upload_filename(file.filename or "upload.txt")
|
||||
lib_dir = docs_root() / library_id
|
||||
lib_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
contents = await file.read()
|
||||
if len(contents) > 5 * 1024 * 1024:
|
||||
raise HTTPException(status_code=400, detail="File too large (max 5MB)")
|
||||
|
||||
target = lib_dir / safe_name
|
||||
target.write_bytes(contents)
|
||||
|
||||
upsert_library(library_id, library_id, None, library_id)
|
||||
return {
|
||||
"success": True,
|
||||
"library_id": library_id,
|
||||
"filename": safe_name,
|
||||
"path": str(target.relative_to(docs_root())),
|
||||
"size_bytes": len(contents),
|
||||
}
|
||||
|
||||
|
||||
@app.get("/api/v1/sources")
|
||||
@app.get("/sources/config")
|
||||
async def api_list_sources():
|
||||
path = sources_config_path()
|
||||
if not path.exists():
|
||||
return {"success": True, "sources": [], "count": 0}
|
||||
|
||||
with path.open() as f:
|
||||
data = yaml.safe_load(f) or {}
|
||||
sources = data.get("sources", data if isinstance(data, list) else [])
|
||||
if not isinstance(sources, list):
|
||||
sources = []
|
||||
return {"success": True, "sources": sources, "count": len(sources)}
|
||||
|
||||
|
||||
@app.post("/sources/sync")
|
||||
async def sync_sources_api(payload: Optional[SyncSourcesRequest] = None):
|
||||
source_data = await api_list_sources()
|
||||
sources = source_data["sources"]
|
||||
override = payload.override if payload else False
|
||||
results = []
|
||||
|
||||
for source in sources:
|
||||
result = await ingest_git_source(
|
||||
library_id=source["library_id"],
|
||||
name=source.get("name") or source["library_id"],
|
||||
description=source.get("description"),
|
||||
repo_url=source["repo_url"],
|
||||
branch=source.get("branch", "main"),
|
||||
include_paths=source.get("include_paths"),
|
||||
exclude_paths=source.get("exclude_paths"),
|
||||
)
|
||||
results.append(result)
|
||||
|
||||
successful = len([r for r in results if r.get("success")])
|
||||
return {
|
||||
"success": successful == len(results),
|
||||
"total_sources": len(results),
|
||||
"successful": successful,
|
||||
"failed": len(results) - successful,
|
||||
"results": results,
|
||||
}
|
||||
Reference in New Issue
Block a user