Fix Git source sync failures
This commit is contained in:
@@ -6,6 +6,7 @@ WORKDIR /app
|
|||||||
# Install system dependencies for PDF parsing and embeddings
|
# Install system dependencies for PDF parsing and embeddings
|
||||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
curl \
|
curl \
|
||||||
|
git \
|
||||||
libgl1 \
|
libgl1 \
|
||||||
libglib2.0-0 \
|
libglib2.0-0 \
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|||||||
+21
-24
@@ -1,6 +1,7 @@
|
|||||||
# Git Source Operations for Repository Cloning and File Discovery
|
# Git Source Operations for Repository Cloning and File Discovery
|
||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
|
from subprocess import run
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import List, Optional, Dict, Any
|
from typing import List, Optional, Dict, Any
|
||||||
|
|
||||||
@@ -27,6 +28,13 @@ class GitCloneError(Exception):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def run_git(command: List[str]) -> None:
|
||||||
|
result = run(command, capture_output=True, text=True)
|
||||||
|
if result.returncode != 0:
|
||||||
|
error = (result.stderr or result.stdout or "unknown git error").strip()
|
||||||
|
raise GitCloneError(error)
|
||||||
|
|
||||||
|
|
||||||
def clone_or_update_repo(
|
def clone_or_update_repo(
|
||||||
repo_id: str,
|
repo_id: str,
|
||||||
repo_url: str,
|
repo_url: str,
|
||||||
@@ -56,36 +64,25 @@ def clone_or_update_repo(
|
|||||||
# Update existing clone
|
# Update existing clone
|
||||||
print(f" [Git] Updating existing clone at {repo_path}")
|
print(f" [Git] Updating existing clone at {repo_path}")
|
||||||
|
|
||||||
from subprocess import run, CalledProcessError
|
|
||||||
import subprocess
|
|
||||||
|
|
||||||
# Fetch latest changes
|
# Fetch latest changes
|
||||||
result = run(
|
run_git(["git", "-C", str(repo_path), "fetch", "origin"])
|
||||||
["git", "-C", str(repo_path), "fetch", "origin"],
|
|
||||||
capture_output=True,
|
|
||||||
text=True
|
|
||||||
)
|
|
||||||
|
|
||||||
if result.returncode != 0:
|
|
||||||
raise GitCloneError(f"Failed to fetch: {result.stderr}")
|
|
||||||
|
|
||||||
# Reset to branch
|
# Reset to branch
|
||||||
run(
|
run_git(["git", "-C", str(repo_path), "reset", "--hard", "origin/" + branch])
|
||||||
["git", "-C", str(repo_path), "reset", "--hard", "origin/" + branch],
|
|
||||||
capture_output=True,
|
|
||||||
text=True
|
|
||||||
)
|
|
||||||
else:
|
else:
|
||||||
# Clone new repository
|
# Clone new repository
|
||||||
print(f" [Git] Cloning {repo_url} to {repo_path}")
|
print(f" [Git] Cloning {repo_url} to {repo_path}")
|
||||||
|
|
||||||
run(
|
run_git(
|
||||||
["git", "-C", str(repo_path.parent), "clone",
|
[
|
||||||
"--branch", branch,
|
"git",
|
||||||
|
"clone",
|
||||||
|
"--branch",
|
||||||
|
branch,
|
||||||
"--single-branch",
|
"--single-branch",
|
||||||
repo_url, "."],
|
repo_url,
|
||||||
capture_output=True,
|
str(repo_path),
|
||||||
text=True
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
print(f" [Git] Checked out branch: {branch}")
|
print(f" [Git] Checked out branch: {branch}")
|
||||||
@@ -97,8 +94,8 @@ def clone_or_update_repo(
|
|||||||
"branch": branch
|
"branch": branch
|
||||||
}
|
}
|
||||||
|
|
||||||
except CalledProcessError as e:
|
except GitCloneError:
|
||||||
raise GitCloneError(f"Git command failed: {e.stderr}") from e
|
raise
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise GitCloneError(f"Failed to clone/update repo: {e}") from e
|
raise GitCloneError(f"Failed to clone/update repo: {e}") from e
|
||||||
|
|
||||||
|
|||||||
+11
-2
@@ -432,15 +432,24 @@ async def sync_sources_api(payload: Optional[SyncSourcesRequest] = None):
|
|||||||
results = []
|
results = []
|
||||||
|
|
||||||
for source in sources:
|
for source in sources:
|
||||||
|
library_id = source.get("library_id", "unknown")
|
||||||
|
try:
|
||||||
result = await ingest_git_source(
|
result = await ingest_git_source(
|
||||||
library_id=source["library_id"],
|
library_id=library_id,
|
||||||
name=source.get("name") or source["library_id"],
|
name=source.get("name") or library_id,
|
||||||
description=source.get("description"),
|
description=source.get("description"),
|
||||||
repo_url=source["repo_url"],
|
repo_url=source["repo_url"],
|
||||||
branch=source.get("branch", "main"),
|
branch=source.get("branch", "main"),
|
||||||
include_paths=source.get("include_paths"),
|
include_paths=source.get("include_paths"),
|
||||||
exclude_paths=source.get("exclude_paths"),
|
exclude_paths=source.get("exclude_paths"),
|
||||||
)
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
result = {
|
||||||
|
"success": False,
|
||||||
|
"library_id": library_id,
|
||||||
|
"repo_url": source.get("repo_url"),
|
||||||
|
"error": str(exc),
|
||||||
|
}
|
||||||
results.append(result)
|
results.append(result)
|
||||||
|
|
||||||
successful = len([r for r in results if r.get("success")])
|
successful = len([r for r in results if r.get("success")])
|
||||||
|
|||||||
@@ -0,0 +1,66 @@
|
|||||||
|
import pytest
|
||||||
|
|
||||||
|
from backend.app import git_source, main
|
||||||
|
|
||||||
|
|
||||||
|
def test_clone_or_update_repo_clones_into_repo_path(monkeypatch, tmp_path):
|
||||||
|
commands = []
|
||||||
|
|
||||||
|
def fake_run(command, capture_output=True, text=True):
|
||||||
|
commands.append(command)
|
||||||
|
|
||||||
|
class Result:
|
||||||
|
returncode = 0
|
||||||
|
stdout = ""
|
||||||
|
stderr = ""
|
||||||
|
|
||||||
|
return Result()
|
||||||
|
|
||||||
|
monkeypatch.setattr(git_source, "run", fake_run)
|
||||||
|
|
||||||
|
result = git_source.clone_or_update_repo(
|
||||||
|
repo_id="neoforge-git",
|
||||||
|
repo_url="https://github.com/neoforged/Documentation.git",
|
||||||
|
branch="main",
|
||||||
|
repos_base=tmp_path,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result["success"] is True
|
||||||
|
assert commands == [
|
||||||
|
[
|
||||||
|
"git",
|
||||||
|
"clone",
|
||||||
|
"--branch",
|
||||||
|
"main",
|
||||||
|
"--single-branch",
|
||||||
|
"https://github.com/neoforged/Documentation.git",
|
||||||
|
str(tmp_path / "neoforge-git"),
|
||||||
|
]
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_sync_sources_returns_failed_result_for_source_exception(monkeypatch):
|
||||||
|
async def fake_list_sources():
|
||||||
|
return {
|
||||||
|
"sources": [
|
||||||
|
{
|
||||||
|
"library_id": "neoforge",
|
||||||
|
"repo_url": "https://github.com/neoforged/Documentation.git",
|
||||||
|
"branch": "main",
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
async def fake_ingest_git_source(**kwargs):
|
||||||
|
raise RuntimeError("git is unavailable")
|
||||||
|
|
||||||
|
monkeypatch.setattr(main, "api_list_sources", fake_list_sources)
|
||||||
|
monkeypatch.setattr(main, "ingest_git_source", fake_ingest_git_source)
|
||||||
|
|
||||||
|
result = await main.sync_sources_api()
|
||||||
|
|
||||||
|
assert result["success"] is False
|
||||||
|
assert result["failed"] == 1
|
||||||
|
assert result["results"][0]["library_id"] == "neoforge"
|
||||||
|
assert result["results"][0]["error"] == "git is unavailable"
|
||||||
Reference in New Issue
Block a user