diff --git a/backend/Dockerfile b/backend/Dockerfile index d2efcda..113d935 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -6,6 +6,7 @@ WORKDIR /app # Install system dependencies for PDF parsing and embeddings RUN apt-get update && apt-get install -y --no-install-recommends \ curl \ + git \ libgl1 \ libglib2.0-0 \ && rm -rf /var/lib/apt/lists/* diff --git a/backend/app/git_source.py b/backend/app/git_source.py index 1aeb753..c51c461 100644 --- a/backend/app/git_source.py +++ b/backend/app/git_source.py @@ -1,6 +1,7 @@ # Git Source Operations for Repository Cloning and File Discovery import os import shutil +from subprocess import run from pathlib import Path from typing import List, Optional, Dict, Any @@ -27,6 +28,13 @@ class GitCloneError(Exception): pass +def run_git(command: List[str]) -> None: + result = run(command, capture_output=True, text=True) + if result.returncode != 0: + error = (result.stderr or result.stdout or "unknown git error").strip() + raise GitCloneError(error) + + def clone_or_update_repo( repo_id: str, repo_url: str, @@ -55,37 +63,26 @@ def clone_or_update_repo( if repo_path.exists(): # Update existing clone print(f" [Git] Updating existing clone at {repo_path}") - - from subprocess import run, CalledProcessError - import subprocess - + # Fetch latest changes - result = run( - ["git", "-C", str(repo_path), "fetch", "origin"], - capture_output=True, - text=True - ) - - if result.returncode != 0: - raise GitCloneError(f"Failed to fetch: {result.stderr}") - + run_git(["git", "-C", str(repo_path), "fetch", "origin"]) + # Reset to branch - run( - ["git", "-C", str(repo_path), "reset", "--hard", "origin/" + branch], - capture_output=True, - text=True - ) + run_git(["git", "-C", str(repo_path), "reset", "--hard", "origin/" + branch]) else: # Clone new repository print(f" [Git] Cloning {repo_url} to {repo_path}") - - run( - ["git", "-C", str(repo_path.parent), "clone", - "--branch", branch, - "--single-branch", - repo_url, "."], - capture_output=True, - text=True + + run_git( + [ + "git", + "clone", + "--branch", + branch, + "--single-branch", + repo_url, + str(repo_path), + ] ) print(f" [Git] Checked out branch: {branch}") @@ -97,8 +94,8 @@ def clone_or_update_repo( "branch": branch } - except CalledProcessError as e: - raise GitCloneError(f"Git command failed: {e.stderr}") from e + except GitCloneError: + raise except Exception as e: raise GitCloneError(f"Failed to clone/update repo: {e}") from e @@ -386,4 +383,4 @@ async def sync_sources( results.append(result) - return results \ No newline at end of file + return results diff --git a/backend/app/main.py b/backend/app/main.py index efb9c11..b6ff0e7 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -432,15 +432,24 @@ async def sync_sources_api(payload: Optional[SyncSourcesRequest] = None): results = [] for source in sources: - result = await ingest_git_source( - library_id=source["library_id"], - name=source.get("name") or source["library_id"], - description=source.get("description"), - repo_url=source["repo_url"], - branch=source.get("branch", "main"), - include_paths=source.get("include_paths"), - exclude_paths=source.get("exclude_paths"), - ) + library_id = source.get("library_id", "unknown") + try: + result = await ingest_git_source( + library_id=library_id, + name=source.get("name") or library_id, + description=source.get("description"), + repo_url=source["repo_url"], + branch=source.get("branch", "main"), + include_paths=source.get("include_paths"), + exclude_paths=source.get("exclude_paths"), + ) + except Exception as exc: + result = { + "success": False, + "library_id": library_id, + "repo_url": source.get("repo_url"), + "error": str(exc), + } results.append(result) successful = len([r for r in results if r.get("success")]) diff --git a/tests/test_git_sources.py b/tests/test_git_sources.py new file mode 100644 index 0000000..e847e5b --- /dev/null +++ b/tests/test_git_sources.py @@ -0,0 +1,66 @@ +import pytest + +from backend.app import git_source, main + + +def test_clone_or_update_repo_clones_into_repo_path(monkeypatch, tmp_path): + commands = [] + + def fake_run(command, capture_output=True, text=True): + commands.append(command) + + class Result: + returncode = 0 + stdout = "" + stderr = "" + + return Result() + + monkeypatch.setattr(git_source, "run", fake_run) + + result = git_source.clone_or_update_repo( + repo_id="neoforge-git", + repo_url="https://github.com/neoforged/Documentation.git", + branch="main", + repos_base=tmp_path, + ) + + assert result["success"] is True + assert commands == [ + [ + "git", + "clone", + "--branch", + "main", + "--single-branch", + "https://github.com/neoforged/Documentation.git", + str(tmp_path / "neoforge-git"), + ] + ] + + +@pytest.mark.asyncio +async def test_sync_sources_returns_failed_result_for_source_exception(monkeypatch): + async def fake_list_sources(): + return { + "sources": [ + { + "library_id": "neoforge", + "repo_url": "https://github.com/neoforged/Documentation.git", + "branch": "main", + } + ] + } + + async def fake_ingest_git_source(**kwargs): + raise RuntimeError("git is unavailable") + + monkeypatch.setattr(main, "api_list_sources", fake_list_sources) + monkeypatch.setattr(main, "ingest_git_source", fake_ingest_git_source) + + result = await main.sync_sources_api() + + assert result["success"] is False + assert result["failed"] == 1 + assert result["results"][0]["library_id"] == "neoforge" + assert result["results"][0]["error"] == "git is unavailable"