Preserve ingestion data across rebuilds

This commit is contained in:
george
2026-06-06 12:44:02 +01:00
parent f3509a363e
commit 7707a6306d
6 changed files with 194 additions and 38 deletions
+70
View File
@@ -233,6 +233,76 @@ class TestDocumentChunkOperations:
remaining = get_chunks_for_library("/local/cleartest")
assert len(remaining) == 0
def test_replace_library_documents_is_atomic(self, test_database):
"""Replacing chunks should remove old rows and insert the new set."""
from backend.app.db import (
get_chunks_for_library,
insert_document_chunk,
replace_library_documents,
upsert_library,
)
library_id = "/local/replacetest"
upsert_library(library_id, "Replace test", source_path=library_id)
insert_document_chunk(
"old-chunk",
library_id,
"old.md",
content="old content",
chunk_index=0,
)
result = replace_library_documents(
library_id,
[
{
"id": "new-chunk",
"path": "new.md",
"title": "new",
"content": "new content",
"chunk_index": 0,
"token_estimate": 2,
}
],
)
chunks = get_chunks_for_library(library_id)
assert result["success"] is True
assert result["deleted"] >= 1
assert result["inserted"] == 1
assert [chunk["id"] for chunk in chunks] == ["new-chunk"]
def test_failed_replacement_keeps_existing_chunks(self, test_database):
"""A bad replacement must roll back instead of erasing the old index."""
from backend.app.db import (
get_chunks_for_library,
insert_document_chunk,
replace_library_documents,
upsert_library,
)
library_id = "/local/rollbacktest"
upsert_library(library_id, "Rollback test", source_path=library_id)
insert_document_chunk(
"old-chunk",
library_id,
"old.md",
content="old content",
chunk_index=0,
)
duplicate = {
"id": "duplicate",
"path": "new.md",
"content": "new content",
"chunk_index": 0,
}
result = replace_library_documents(library_id, [duplicate, duplicate])
chunks = get_chunks_for_library(library_id)
assert result["success"] is False
assert [chunk["id"] for chunk in chunks] == ["old-chunk"]
class TestDatabaseEdgeCases:
"""Tests for edge cases and error handling."""