Preserve ingestion data across rebuilds

This commit is contained in:
george
2026-06-06 12:44:02 +01:00
parent f3509a363e
commit 7707a6306d
6 changed files with 194 additions and 38 deletions
+7 -2
View File
@@ -1,5 +1,6 @@
# Local Embedding Generation using FastEmbed
import asyncio
import os
from typing import List
from functools import lru_cache
@@ -20,7 +21,11 @@ def _load_model():
print("Loading embedding model (this may take a few minutes on first run)...")
# Use BAAI/bge-small-en-v1.5 - lightweight (~90MB), works offline
_embedding_model = TextEmbedding(model_name="BAAI/bge-small-en-v1.5", cache_dir=".embed_cache")
cache_dir = os.getenv("EMBEDDING_CACHE_DIR", ".embed_cache")
_embedding_model = TextEmbedding(
model_name="BAAI/bge-small-en-v1.5",
cache_dir=cache_dir,
)
print("Embedding model loaded successfully.")
return _embedding_model
@@ -178,4 +183,4 @@ if __name__ == "__main__":
assert embed_texts([]) == [], "Empty list should return empty list"
print("✓ Empty input handling works")
print("\n✅ All tests passed!")
print("\n✅ All tests passed!")