Preserve ingestion data across rebuilds
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
# Local Embedding Generation using FastEmbed
|
||||
import asyncio
|
||||
import os
|
||||
from typing import List
|
||||
from functools import lru_cache
|
||||
|
||||
@@ -20,7 +21,11 @@ def _load_model():
|
||||
print("Loading embedding model (this may take a few minutes on first run)...")
|
||||
|
||||
# Use BAAI/bge-small-en-v1.5 - lightweight (~90MB), works offline
|
||||
_embedding_model = TextEmbedding(model_name="BAAI/bge-small-en-v1.5", cache_dir=".embed_cache")
|
||||
cache_dir = os.getenv("EMBEDDING_CACHE_DIR", ".embed_cache")
|
||||
_embedding_model = TextEmbedding(
|
||||
model_name="BAAI/bge-small-en-v1.5",
|
||||
cache_dir=cache_dir,
|
||||
)
|
||||
print("Embedding model loaded successfully.")
|
||||
|
||||
return _embedding_model
|
||||
@@ -178,4 +183,4 @@ if __name__ == "__main__":
|
||||
assert embed_texts([]) == [], "Empty list should return empty list"
|
||||
print("✓ Empty input handling works")
|
||||
|
||||
print("\n✅ All tests passed!")
|
||||
print("\n✅ All tests passed!")
|
||||
|
||||
Reference in New Issue
Block a user