Fix Git source sync failures

This commit is contained in:
george
2026-06-06 00:48:21 +01:00
parent 54986cda99
commit fc683b2803
4 changed files with 111 additions and 38 deletions
+1
View File
@@ -6,6 +6,7 @@ WORKDIR /app
# Install system dependencies for PDF parsing and embeddings
RUN apt-get update && apt-get install -y --no-install-recommends \
curl \
git \
libgl1 \
libglib2.0-0 \
&& rm -rf /var/lib/apt/lists/*
+21 -24
View File
@@ -1,6 +1,7 @@
# Git Source Operations for Repository Cloning and File Discovery
import os
import shutil
from subprocess import run
from pathlib import Path
from typing import List, Optional, Dict, Any
@@ -27,6 +28,13 @@ class GitCloneError(Exception):
pass
def run_git(command: List[str]) -> None:
result = run(command, capture_output=True, text=True)
if result.returncode != 0:
error = (result.stderr or result.stdout or "unknown git error").strip()
raise GitCloneError(error)
def clone_or_update_repo(
repo_id: str,
repo_url: str,
@@ -56,36 +64,25 @@ def clone_or_update_repo(
# Update existing clone
print(f" [Git] Updating existing clone at {repo_path}")
from subprocess import run, CalledProcessError
import subprocess
# Fetch latest changes
result = run(
["git", "-C", str(repo_path), "fetch", "origin"],
capture_output=True,
text=True
)
if result.returncode != 0:
raise GitCloneError(f"Failed to fetch: {result.stderr}")
run_git(["git", "-C", str(repo_path), "fetch", "origin"])
# Reset to branch
run(
["git", "-C", str(repo_path), "reset", "--hard", "origin/" + branch],
capture_output=True,
text=True
)
run_git(["git", "-C", str(repo_path), "reset", "--hard", "origin/" + branch])
else:
# Clone new repository
print(f" [Git] Cloning {repo_url} to {repo_path}")
run(
["git", "-C", str(repo_path.parent), "clone",
"--branch", branch,
run_git(
[
"git",
"clone",
"--branch",
branch,
"--single-branch",
repo_url, "."],
capture_output=True,
text=True
repo_url,
str(repo_path),
]
)
print(f" [Git] Checked out branch: {branch}")
@@ -97,8 +94,8 @@ def clone_or_update_repo(
"branch": branch
}
except CalledProcessError as e:
raise GitCloneError(f"Git command failed: {e.stderr}") from e
except GitCloneError:
raise
except Exception as e:
raise GitCloneError(f"Failed to clone/update repo: {e}") from e
+11 -2
View File
@@ -432,15 +432,24 @@ async def sync_sources_api(payload: Optional[SyncSourcesRequest] = None):
results = []
for source in sources:
library_id = source.get("library_id", "unknown")
try:
result = await ingest_git_source(
library_id=source["library_id"],
name=source.get("name") or source["library_id"],
library_id=library_id,
name=source.get("name") or library_id,
description=source.get("description"),
repo_url=source["repo_url"],
branch=source.get("branch", "main"),
include_paths=source.get("include_paths"),
exclude_paths=source.get("exclude_paths"),
)
except Exception as exc:
result = {
"success": False,
"library_id": library_id,
"repo_url": source.get("repo_url"),
"error": str(exc),
}
results.append(result)
successful = len([r for r in results if r.get("success")])
+66
View File
@@ -0,0 +1,66 @@
import pytest
from backend.app import git_source, main
def test_clone_or_update_repo_clones_into_repo_path(monkeypatch, tmp_path):
commands = []
def fake_run(command, capture_output=True, text=True):
commands.append(command)
class Result:
returncode = 0
stdout = ""
stderr = ""
return Result()
monkeypatch.setattr(git_source, "run", fake_run)
result = git_source.clone_or_update_repo(
repo_id="neoforge-git",
repo_url="https://github.com/neoforged/Documentation.git",
branch="main",
repos_base=tmp_path,
)
assert result["success"] is True
assert commands == [
[
"git",
"clone",
"--branch",
"main",
"--single-branch",
"https://github.com/neoforged/Documentation.git",
str(tmp_path / "neoforge-git"),
]
]
@pytest.mark.asyncio
async def test_sync_sources_returns_failed_result_for_source_exception(monkeypatch):
async def fake_list_sources():
return {
"sources": [
{
"library_id": "neoforge",
"repo_url": "https://github.com/neoforged/Documentation.git",
"branch": "main",
}
]
}
async def fake_ingest_git_source(**kwargs):
raise RuntimeError("git is unavailable")
monkeypatch.setattr(main, "api_list_sources", fake_list_sources)
monkeypatch.setattr(main, "ingest_git_source", fake_ingest_git_source)
result = await main.sync_sources_api()
assert result["success"] is False
assert result["failed"] == 1
assert result["results"][0]["library_id"] == "neoforge"
assert result["results"][0]["error"] == "git is unavailable"