Fix git sync and add repo browser with path selection

- Fix discover_files: rel_path always computed (was stuck at '.' at root),
  include_path_match now uses relative path, 'return' changed to 'continue'
- Fix ingest_git_source: files were cloned but ingested from wrong path
  (docs/repo-id instead of data/repos/repo-id). Now stages filtered files
  into DOCS_PATH/library_id before calling ingest_library.
- Add browse_repo_tree() for interactive repo exploration
- Add POST /api/v1/sources/browse endpoint to backend
- Add /sources/browse proxy route to webui
- Rewrite sources.html: browse repo, expand/collapse tree, check paths to
  include, then save source and sync

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
george
2026-06-06 01:28:10 +01:00
parent 1b61af8873
commit ff4da0cb9e
4 changed files with 394 additions and 128 deletions
+26 -1
View File
@@ -22,7 +22,7 @@ from .db import (
search_libraries,
upsert_library,
)
from .git_source import ingest_git_source
from .git_source import browse_repo_tree, clone_or_update_repo, ingest_git_source
from .ingest import ingest_all, ingest_library
from .search import get_library_docs, resolve_library_id, search_docs
from .vector_store import delete_library_vectors, ensure_collection, get_client, get_collection_name
@@ -55,6 +55,11 @@ class GitSourceRequest(BaseModel):
exclude_paths: Optional[list[str]] = None
class BrowseRepoRequest(BaseModel):
repo_url: str = Field(..., min_length=1)
branch: str = "main"
DOCUMENT_EXTENSIONS = {
".md",
".txt",
@@ -424,6 +429,26 @@ async def api_add_source(source: GitSourceRequest):
return {"success": True, "created": created, "source": source_entry}
@app.post("/api/v1/sources/browse")
async def browse_repo_api(payload: BrowseRepoRequest):
"""Shallow-clone a repo and return its directory tree for path selection."""
import hashlib
repo_hash = hashlib.md5(payload.repo_url.encode()).hexdigest()[:10]
repo_id = f"browse-{repo_hash}"
try:
clone_result = clone_or_update_repo(
repo_id=repo_id,
repo_url=payload.repo_url,
branch=payload.branch,
)
repo_path = Path(clone_result["repo_path"])
tree = browse_repo_tree(repo_path)
return {"success": True, "tree": tree, "repo_url": payload.repo_url, "branch": payload.branch}
except Exception as exc:
raise HTTPException(status_code=400, detail=str(exc))
@app.post("/sources/sync")
async def sync_sources_api(payload: Optional[SyncSourcesRequest] = None):
source_data = await api_list_sources()