Initial DocsMCP stack
This commit is contained in:
@@ -0,0 +1,2 @@
|
||||
# Tests package for local-context7
|
||||
# Contains unit tests for chunking, database operations, search, and MCP server modules
|
||||
@@ -0,0 +1,191 @@
|
||||
"""
|
||||
Pytest configuration and fixtures for local-context7 tests.
|
||||
|
||||
This module provides:
|
||||
- Mocks for external dependencies (Qdrant, FastEmbed)
|
||||
- Database fixtures for SQLite operations
|
||||
- Common test utilities
|
||||
"""
|
||||
from unittest.mock import MagicMock, patch
|
||||
import pytest
|
||||
import os
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
from backend.app.db import init_db, upsert_library, insert_document_chunk, get_chunks_for_library, list_libraries, clear_library_documents, get_connection
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# FIXTURES
|
||||
# =============================================================================
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
def test_database():
|
||||
"""
|
||||
Create a fresh SQLite database for testing.
|
||||
|
||||
Yields:
|
||||
Database connection with tables initialized
|
||||
"""
|
||||
# Use an in-memory or temporary file database
|
||||
db_path = Path(__file__).parent.parent / "backend" / "data" / "test_db.sqlite"
|
||||
|
||||
# Ensure data directory exists
|
||||
db_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Remove existing test DB if present
|
||||
if db_path.exists():
|
||||
db_path.unlink()
|
||||
|
||||
# Initialize database with tables
|
||||
result = init_db()
|
||||
assert result["success"], f"Failed to initialize test DB: {result.get('error')}"
|
||||
|
||||
yield
|
||||
|
||||
# Cleanup: remove test database after tests
|
||||
if db_path.exists():
|
||||
db_path.unlink()
|
||||
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
def sample_text():
|
||||
"""Sample text for chunking tests."""
|
||||
return """# Introduction
|
||||
|
||||
This is the introduction section.
|
||||
|
||||
## Background
|
||||
|
||||
Background information goes here to make this longer and test chunking.
|
||||
|
||||
This paragraph has more content about the background topic.
|
||||
|
||||
### Details
|
||||
|
||||
Specific details about the background are provided in this subsection.
|
||||
|
||||
More details follow here to ensure we have enough text to properly test heading preservation.
|
||||
|
||||
## Conclusion
|
||||
|
||||
The conclusion wraps up everything nicely."""
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# MOCKS
|
||||
# =============================================================================
|
||||
|
||||
@pytest.fixture
|
||||
def mock_embedding_model():
|
||||
"""
|
||||
Mock FastEmbed model that returns dummy vectors.
|
||||
|
||||
This avoids needing to download and load the actual embedding model.
|
||||
Returns 384-dimensional zero vectors for any input.
|
||||
"""
|
||||
mock_model = MagicMock()
|
||||
|
||||
# Mock embed method - returns list of lists with float values
|
||||
def mock_embed(texts):
|
||||
return [
|
||||
[0.0] * 384 # Zero vector placeholder
|
||||
for _ in texts
|
||||
]
|
||||
|
||||
mock_model.embed = mock_embed
|
||||
|
||||
return mock_model
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_qdrant_client():
|
||||
"""
|
||||
Mock Qdrant client that returns empty or test results.
|
||||
|
||||
Allows testing search logic without needing a running Qdrant server.
|
||||
"""
|
||||
mock_client = MagicMock()
|
||||
|
||||
# Mock search method
|
||||
def mock_search(collection_name, query_vector, limit=10, search_filter=None):
|
||||
# Return empty list (simulating no results)
|
||||
return []
|
||||
|
||||
mock_client.search = mock_search
|
||||
|
||||
# Mock delete_collection for cleanup
|
||||
mock_client.delete_collection = MagicMock(return_value=True)
|
||||
|
||||
return mock_client
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_embedding_model_batch():
|
||||
"""
|
||||
Batch embedding model mock that returns deterministic fake vectors.
|
||||
|
||||
Returns slightly different vectors for different input lengths/first chars,
|
||||
allowing tests to verify vector retrieval if needed.
|
||||
"""
|
||||
def hash_text(text):
|
||||
# Simple hash-based pseudo-random vector generation
|
||||
text_hash = hash(text) % 1000000
|
||||
return [(hash_text(text) / 1000000 + (i * 0.001)) for i in range(384)]
|
||||
|
||||
mock_model = MagicMock()
|
||||
mock_model.embed = lambda texts: [hash_text(t) for t in texts]
|
||||
|
||||
return mock_model
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# SETUP TEARDOWN FIXTURES
|
||||
# =============================================================================
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def clear_test_database(test_database):
|
||||
"""
|
||||
Clear test database before and after each test function.
|
||||
|
||||
Note: This fixture runs the teardown (cleanup) AFTER the test,
|
||||
so we manually clear at the end of the yield context.
|
||||
The db_path is cleaned up by the test_database fixture's yield block.
|
||||
"""
|
||||
pass # Cleanup handled in test_database fixture
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def empty_vector():
|
||||
"""Empty/dummy embedding vector for tests."""
|
||||
return [0.0] * 384
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def fake_embeddings(sample_text):
|
||||
"""Fake embedding vectors for sample text."""
|
||||
def hash_text(text):
|
||||
return [(hash(text) + len(text)) % 1000 / 10000 for _ in range(384)]
|
||||
|
||||
return [hash_text(s) for s in sample_text.split("\n\n") if s.strip()]
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# UTILITY FUNCTIONS
|
||||
# =============================================================================
|
||||
|
||||
@pytest.fixture
|
||||
def temp_file(tmp_path):
|
||||
"""Create a temporary file and yield its path."""
|
||||
test_file = tmp_path / "test.txt"
|
||||
return test_file
|
||||
|
||||
|
||||
# Register custom marker for slow tests (if needed)
|
||||
def pytest_configure(config):
|
||||
config.addinivalue_line("markers", "slow: marks tests as slow (deselect with '-m \"not slow\"')")
|
||||
|
||||
|
||||
def pytest_runtest_setup(item):
|
||||
"""Add custom markers if needed."""
|
||||
pass
|
||||
@@ -0,0 +1,238 @@
|
||||
"""
|
||||
Tests for backend/app/chunking.py
|
||||
|
||||
These are pure unit tests that don't require any external dependencies.
|
||||
They test text chunking logic, token estimation, and heading-aware splitting.
|
||||
"""
|
||||
import pytest
|
||||
|
||||
|
||||
class TestEstimateTokens:
|
||||
"""Tests for the estimate_tokens() function."""
|
||||
|
||||
def test_empty_text(self):
|
||||
"""Empty text should return 0 tokens."""
|
||||
from backend.app.chunking import estimate_tokens
|
||||
assert estimate_tokens("") == 0
|
||||
|
||||
def test_single_char(self):
|
||||
"""Single character = 1 token (using 4 chars per token approximation)."""
|
||||
from backend.app.chunking import estimate_tokens
|
||||
assert estimate_tokens("a") == 0 # 1 char // 4 = 0 tokens
|
||||
|
||||
def test_4_chars(self):
|
||||
"""4 characters = 1 token."""
|
||||
from backend.app.chunking import estimate_tokens
|
||||
assert estimate_tokens("abcd") == 1
|
||||
|
||||
def test_400_chars(self):
|
||||
"""400 characters = 100 tokens."""
|
||||
from backend.app.chunking import estimate_tokens
|
||||
text = "a" * 400
|
||||
assert estimate_tokens(text) == 100
|
||||
|
||||
def test_whitespace_only(self):
|
||||
"""Whitespace-only text should be counted."""
|
||||
from backend.app.chunking import estimate_tokens
|
||||
assert estimate_tokens(" ") == 0 # 3 chars // 4 = 0
|
||||
|
||||
|
||||
class TestChunkText:
|
||||
"""Tests for the chunk_text() function."""
|
||||
|
||||
def test_empty_input(self, sample_text):
|
||||
"""Empty input should return empty list."""
|
||||
from backend.app.chunking import chunk_text
|
||||
assert chunk_text("") == []
|
||||
|
||||
def test_small_text_single_chunk(self, sample_text):
|
||||
"""Small text under limit should be single chunk."""
|
||||
from backend.app.chunking import chunk_text
|
||||
small = "This is a very short text that should be returned as a single chunk."
|
||||
chunks = chunk_text(small, max_tokens=500)
|
||||
assert len(chunks) == 1
|
||||
assert chunks[0] == small
|
||||
|
||||
def test_exact_token_limit(self, sample_text):
|
||||
"""Text exactly at limit should be one chunk."""
|
||||
from backend.app.chunking import chunk_text, estimate_tokens
|
||||
# Create text that is exactly 500 tokens (2000 chars)
|
||||
text = "a" * 2000
|
||||
chunks = chunk_text(text, max_tokens=500)
|
||||
assert len(chunks) == 1
|
||||
assert estimate_tokens(chunks[0]) == 500
|
||||
|
||||
def test_over_limit_splits(self, sample_text):
|
||||
"""Text over limit should be split into multiple chunks."""
|
||||
from backend.app.chunking import chunk_text, estimate_tokens
|
||||
# Create text that is 2500 tokens (10000 chars)
|
||||
text = "b" * 10000
|
||||
chunks = chunk_text(text, max_tokens=500)
|
||||
assert len(chunks) >= 2 # Should be split
|
||||
|
||||
def test_preserves_content(self, sample_text):
|
||||
"""All content should be preserved in chunks (combined)."""
|
||||
from backend.app.chunking import chunk_text
|
||||
original = "Hello world! This is a test of chunking functionality."
|
||||
chunks = chunk_text(original, max_tokens=100)
|
||||
combined = "".join(chunks)
|
||||
assert len(chunks) == 1
|
||||
assert combined == original
|
||||
|
||||
def test_headings_split(self, sample_text):
|
||||
"""Heading-aware splitting should preserve heading boundaries."""
|
||||
from backend.app.chunking import chunk_text
|
||||
markdown_with_headings = """# Introduction
|
||||
|
||||
This is the introduction section.
|
||||
|
||||
## Background
|
||||
|
||||
Background information goes here."""
|
||||
|
||||
# With very small token limit, headings should cause splits
|
||||
chunks = chunk_text(markdown_with_headings, max_tokens=20)
|
||||
heading_chunks = [c for c in chunks if c.strip().startswith('#')]
|
||||
assert len(heading_chunks) >= 1 # At least some heading preserved
|
||||
|
||||
def test_paragraph_split(self):
|
||||
"""Paragraph splitting should respect paragraph boundaries."""
|
||||
from backend.app.chunking import chunk_text
|
||||
text = "First paragraph.\n\nSecond paragraph.\n\nThird paragraph."
|
||||
chunks = chunk_text(text, max_tokens=15) # Small limit forces splits
|
||||
assert len(chunks) >= 3 # At least as many paragraphs
|
||||
|
||||
def test_no_empty_chunks(self):
|
||||
"""Should not return empty chunks."""
|
||||
from backend.app.chunking import chunk_text
|
||||
text = "Hello world"
|
||||
chunks = chunk_text(text, max_tokens=10)
|
||||
for chunk in chunks:
|
||||
assert chunk.strip() != ""
|
||||
|
||||
|
||||
class TestTokenEstimationBoundaries:
|
||||
"""Tests for token estimation boundaries."""
|
||||
|
||||
def test_boundary_precision(self):
|
||||
"""Test boundary conditions around the 4-char-per-token limit."""
|
||||
from backend.app.chunking import estimate_tokens
|
||||
|
||||
# Edge cases around boundary
|
||||
assert estimate_tokens("abcd") == 1 # exactly 4 chars
|
||||
assert estimate_tokens("abcde") == 1 # 5 chars still 1 token
|
||||
assert estimate_tokens("abcdef") == 1 # 6 chars still 1 token
|
||||
assert estimate_tokens("abcdefg") == 1 # 7 chars still 1 token
|
||||
assert estimate_tokens("abcdefgh") == 2 # 8 chars = 2 tokens
|
||||
|
||||
def test_various_languages_chars(self):
|
||||
"""Token estimation uses character count, not unicode complexity."""
|
||||
from backend.app.chunking import estimate_tokens
|
||||
|
||||
# Chinese characters (each counts as 1 char)
|
||||
chinese = "你好世界" # 4 characters
|
||||
assert estimate_tokens(chinese) == 1
|
||||
|
||||
# Emoji
|
||||
emoji = "Hello 🎉 world" # Spaces + letters + emoji
|
||||
# emoji count varies by implementation, just check it's counted
|
||||
assert isinstance(estimate_tokens(emoji), int)
|
||||
|
||||
|
||||
class TestChunkOverlapBehavior:
|
||||
"""Tests for overlap handling between chunks."""
|
||||
|
||||
def test_overlap_not_exceeded(self):
|
||||
"""Chunks should not have excessive overlap."""
|
||||
from backend.app.chunking import chunk_text
|
||||
|
||||
# Text that will be split at a known boundary
|
||||
text = "The quick brown fox jumps over the lazy dog. " * 10
|
||||
chunks = chunk_text(text, max_tokens=30, overlap_tokens=5)
|
||||
|
||||
if len(chunks) > 1:
|
||||
# Last few chars of first chunk shouldn't duplicate excessively
|
||||
assert len(chunks[0]) <= len("".join(chunks)) // 2 # Rough check
|
||||
|
||||
|
||||
class TestChunkEdgeCases:
|
||||
"""Tests for edge cases and error conditions."""
|
||||
|
||||
def test_whitespace_only_text(self):
|
||||
"""Whitespace-only text should handle gracefully."""
|
||||
from backend.app.chunking import chunk_text
|
||||
chunks = chunk_text(" \n\n ", max_tokens=100)
|
||||
# May return empty or whitespace chunk, shouldn't crash
|
||||
assert isinstance(chunks, list)
|
||||
|
||||
def test_very_long_paragraph(self):
|
||||
"""Long paragraph without breaks should be split."""
|
||||
from backend.app.chunking import chunk_text
|
||||
|
||||
long_para = "The quick brown fox jumps over the lazy dog. " * 100
|
||||
chunks = chunk_text(long_para, max_tokens=50)
|
||||
assert len(chunks) > 1 # Should be split
|
||||
|
||||
def test_none_input_raises(self):
|
||||
"""None input should be handled (return empty or raise)."""
|
||||
from backend.app.chunking import chunk_text
|
||||
with pytest.raises((TypeError, AssertionError)):
|
||||
chunk_text(None, max_tokens=100)
|
||||
|
||||
def test_unicode_text(self):
|
||||
"""Unicode text should be handled."""
|
||||
from backend.app.chunking import chunk_text
|
||||
unicode_text = "Hello 世界 مرحبا 🎉"
|
||||
chunks = chunk_text(unicode_text, max_tokens=50)
|
||||
assert len(chunks) == 1 # Small enough to be single chunk
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# SAMPLE TEXT FIXTURE
|
||||
# =============================================================================
|
||||
|
||||
@pytest.fixture
|
||||
def heading_markdown():
|
||||
"""Sample markdown with headings for chunking tests."""
|
||||
return """# Introduction
|
||||
|
||||
This is the introduction section. It contains some introductory text here.
|
||||
|
||||
## Background
|
||||
|
||||
Background information goes here to make this longer and test chunking. This paragraph has more content about the background topic. It provides context.
|
||||
|
||||
### Details
|
||||
|
||||
Specific details about the background are provided in this subsection. More details follow here to ensure we have enough text to properly test heading preservation.
|
||||
|
||||
## Conclusion
|
||||
|
||||
The conclusion wraps up everything nicely."""
|
||||
|
||||
|
||||
class TestHeadingPreservation:
|
||||
"""Tests for heading-aware chunking with sample text."""
|
||||
|
||||
def test_headings_in_separate_chunks(self, heading_markdown):
|
||||
"""Headings should appear in their own chunks when possible."""
|
||||
from backend.app.chunking import chunk_text
|
||||
|
||||
# Very small token limit forces splits at headings
|
||||
chunks = chunk_text(heading_markdown, max_tokens=30)
|
||||
|
||||
heading_sections = [c for c in chunks if c.strip().startswith('#')]
|
||||
assert len(heading_sections) >= 1
|
||||
|
||||
def test_all_content_present(self, heading_markdown):
|
||||
"""All content should be preserved when combined."""
|
||||
from backend.app.chunking import chunk_text
|
||||
|
||||
original = heading_markdown
|
||||
chunks = chunk_text(original, max_tokens=500)
|
||||
combined = "".join(chunks)
|
||||
|
||||
# Content shouldn't be truncated or corrupted
|
||||
assert "Introduction" in combined
|
||||
assert "Background" in combined
|
||||
assert "Conclusion" in combined
|
||||
@@ -0,0 +1,316 @@
|
||||
"""
|
||||
Tests for backend/app/db.py
|
||||
|
||||
These tests verify SQLite database operations including:
|
||||
- Table creation (init_db)
|
||||
- Library CRUD operations
|
||||
- Document chunk storage and retrieval
|
||||
- Full-text search functionality
|
||||
|
||||
All tests use a temporary test database file.
|
||||
"""
|
||||
import pytest
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
class TestInitDatabase:
|
||||
"""Tests for init_db() - table creation."""
|
||||
|
||||
def test_init_db_creates_tables(self, test_database):
|
||||
"""Database should have libraries and documents tables after init."""
|
||||
import sqlite3
|
||||
from backend.app.db import get_connection, get_db_path
|
||||
|
||||
conn = get_connection()
|
||||
cursor = conn.execute("SELECT name FROM sqlite_master WHERE type='table' ORDER BY name")
|
||||
tables = [row[0] for row in cursor.fetchall()]
|
||||
|
||||
# Should have libraries, documents, and FTS virtual table
|
||||
assert "libraries" in tables or any("libraries" in t.lower() for t in tables)
|
||||
conn.close()
|
||||
|
||||
def test_init_db_returns_success(self, test_database):
|
||||
"""init_db should return success indicator."""
|
||||
from backend.app.db import init_db
|
||||
|
||||
result = init_db()
|
||||
assert result["success"] is True
|
||||
|
||||
|
||||
class TestLibraryOperations:
|
||||
"""Tests for library CRUD operations."""
|
||||
|
||||
def test_upsert_library_new(self, test_database):
|
||||
"""Upsert should create new library."""
|
||||
from backend.app.db import upsert_library
|
||||
|
||||
result = upsert_library(
|
||||
library_id="/local/testlib",
|
||||
name="Test Library",
|
||||
description="A test library for unit tests"
|
||||
)
|
||||
|
||||
assert result["success"] is True
|
||||
assert result["id"] == "/local/testlib"
|
||||
|
||||
def test_upsert_library_update(self, test_database):
|
||||
"""Upsert should update existing library."""
|
||||
from backend.app.db import upsert_library
|
||||
|
||||
# Insert first library
|
||||
upsert_library(
|
||||
library_id="/local/upsertlib",
|
||||
name="Original Name",
|
||||
description="Original description"
|
||||
)
|
||||
|
||||
# Update it
|
||||
result = upsert_library(
|
||||
library_id="/local/upsertlib",
|
||||
name="Updated Name",
|
||||
description="Updated description"
|
||||
)
|
||||
|
||||
assert result["success"] is True
|
||||
|
||||
def test_upsert_library_id_normalization(self, test_database):
|
||||
"""Library ID normalization - /local/ prefix should be preserved."""
|
||||
from backend.app.db import upsert_library
|
||||
|
||||
# Test various ID formats
|
||||
test_ids = [
|
||||
"/local/foundryvtt",
|
||||
"foundryvtt",
|
||||
"/local/mydocs",
|
||||
]
|
||||
|
||||
for lib_id in test_ids:
|
||||
result = upsert_library(library_id=lib_id, name="Test", description="Desc")
|
||||
assert result["success"] is True
|
||||
# Verify we can retrieve it back
|
||||
from backend.app.db import get_chunks_for_library
|
||||
# Just ensure no errors occur
|
||||
|
||||
def test_list_libraries(self, test_database):
|
||||
"""list_libraries should return list of libraries."""
|
||||
from backend.app.db import upsert_library, list_libraries
|
||||
|
||||
# Create some libraries
|
||||
for i in range(3):
|
||||
upsert_library(
|
||||
library_id=f"/local/lib{i}",
|
||||
name=f"Library {i}",
|
||||
description=f"Description {i}"
|
||||
)
|
||||
|
||||
libs = list_libraries()
|
||||
assert isinstance(libs, list)
|
||||
assert len(libs) >= 3
|
||||
|
||||
def test_search_libraries(self, test_database):
|
||||
"""search_libraries should find libraries by name/description."""
|
||||
from backend.app.db import upsert_library, search_libraries
|
||||
|
||||
# Create libraries with searchable names
|
||||
upsert_library(library_id="/local/foo1", name="Foo Library", description="Bar baz")
|
||||
upsert_library(library_id="/local/foo2", name="Other Library", description="Different content")
|
||||
|
||||
results = search_libraries("foo")
|
||||
assert isinstance(results, list)
|
||||
|
||||
|
||||
class TestDocumentChunkOperations:
|
||||
"""Tests for document chunk storage and retrieval."""
|
||||
|
||||
def test_insert_document_chunk_new(self, test_database):
|
||||
"""insert_document_chunk should create new chunk record."""
|
||||
from backend.app.db import insert_document_chunk
|
||||
|
||||
result = insert_document_chunk(
|
||||
doc_id="doc-1",
|
||||
library_id="/local/testlib",
|
||||
path="docs/example.md",
|
||||
title="Example Document",
|
||||
content="# Example\n\nThis is the content.",
|
||||
chunk_index=0,
|
||||
token_estimate=100
|
||||
)
|
||||
|
||||
assert result["success"] is True
|
||||
|
||||
def test_insert_document_chunk_update(self, test_database):
|
||||
"""insert_document_chunk should update existing record."""
|
||||
from backend.app.db import insert_document_chunk
|
||||
|
||||
# Insert first
|
||||
insert_document_chunk(
|
||||
doc_id="doc-update-test",
|
||||
library_id="/local/uplib",
|
||||
path="old-path.md",
|
||||
title="Old Title",
|
||||
content="# Old\nContent here.",
|
||||
chunk_index=0,
|
||||
token_estimate=50
|
||||
)
|
||||
|
||||
# Update it
|
||||
result = insert_document_chunk(
|
||||
doc_id="doc-update-test",
|
||||
library_id="/local/uplib",
|
||||
path="new-path.md",
|
||||
title="New Title",
|
||||
content="# New\nUpdated content.",
|
||||
chunk_index=1,
|
||||
token_estimate=75
|
||||
)
|
||||
|
||||
assert result["success"] is True
|
||||
|
||||
def test_get_document_by_id(self, test_database):
|
||||
"""get_document_by_id should retrieve document by ID."""
|
||||
from backend.app.db import insert_document_chunk, get_document_by_id
|
||||
|
||||
# Insert document
|
||||
doc_id = "unique-doc-id-12345"
|
||||
insert_document_chunk(
|
||||
doc_id=doc_id,
|
||||
library_id="/local/testlib",
|
||||
path="docs/test.md",
|
||||
title="Test Document",
|
||||
content="# Test\n\nTest content here.",
|
||||
chunk_index=None,
|
||||
token_estimate=200
|
||||
)
|
||||
|
||||
# Retrieve it
|
||||
doc = get_document_by_id(doc_id)
|
||||
assert doc is not None
|
||||
assert doc["id"] == doc_id
|
||||
|
||||
def test_get_chunks_for_library(self, test_database):
|
||||
"""get_chunks_for_library should return all chunks for a library."""
|
||||
from backend.app.db import upsert_library, insert_document_chunk, get_chunks_for_library
|
||||
|
||||
# Create library
|
||||
upsert_library(library_id="/local/chunktest", name="Chunk Test", description="Test")
|
||||
|
||||
# Add some chunks
|
||||
for i in range(3):
|
||||
insert_document_chunk(
|
||||
doc_id=f"chunk-{i}",
|
||||
library_id="/local/chunktest",
|
||||
path=f"path{i}.md",
|
||||
title=f"Section {i}",
|
||||
content=f"Content section {i}.",
|
||||
chunk_index=i,
|
||||
token_estimate=50
|
||||
)
|
||||
|
||||
chunks = get_chunks_for_library("/local/chunktest")
|
||||
assert isinstance(chunks, list)
|
||||
assert len(chunks) >= 3
|
||||
|
||||
def test_clear_library_documents(self, test_database):
|
||||
"""clear_library_documents should delete all docs for a library."""
|
||||
from backend.app.db import upsert_library, insert_document_chunk, clear_library_documents, get_chunks_for_library
|
||||
|
||||
# Create and populate library
|
||||
upsert_library(library_id="/local/cleartest", name="Clear Test", description="Test")
|
||||
for i in range(5):
|
||||
insert_document_chunk(
|
||||
doc_id=f"clear-{i}",
|
||||
library_id="/local/cleartest",
|
||||
path=f"path{i}.md",
|
||||
content=f"Content {i}.",
|
||||
token_estimate=20
|
||||
)
|
||||
|
||||
# Clear it
|
||||
result = clear_library_documents("/local/cleartest")
|
||||
assert result["success"] is True
|
||||
|
||||
# Verify cleared
|
||||
remaining = get_chunks_for_library("/local/cleartest")
|
||||
assert len(remaining) == 0
|
||||
|
||||
|
||||
class TestDatabaseEdgeCases:
|
||||
"""Tests for edge cases and error handling."""
|
||||
|
||||
def test_empty_library_id(self, test_database):
|
||||
"""Operations with empty ID should handle gracefully."""
|
||||
from backend.app.db import upsert_library
|
||||
|
||||
result = upsert_library(library_id="", name="Test", description="Desc")
|
||||
# Should not crash, though may not be a valid operation
|
||||
|
||||
def test_special_characters_in_content(self, test_database):
|
||||
"""Content with special characters should be stored."""
|
||||
from backend.app.db import insert_document_chunk
|
||||
|
||||
content = "Hello \"world\" <tag /> & amp; 'apostrophe'"
|
||||
result = insert_document_chunk(
|
||||
doc_id="special-test",
|
||||
library_id="/local/speciallib",
|
||||
path="special.md",
|
||||
content=content,
|
||||
token_estimate=100
|
||||
)
|
||||
|
||||
assert result["success"] is True
|
||||
|
||||
def test_very_long_content(self, test_database):
|
||||
"""Long content should be stored."""
|
||||
from backend.app.db import insert_document_chunk
|
||||
|
||||
long_content = "a" * 5000
|
||||
result = insert_document_chunk(
|
||||
doc_id="long-test",
|
||||
library_id="/local/longlib",
|
||||
path="long.md",
|
||||
content=long_content,
|
||||
token_estimate=1000
|
||||
)
|
||||
|
||||
assert result["success"] is True
|
||||
|
||||
def test_none_description(self, test_database):
|
||||
"""Library with None description should work."""
|
||||
from backend.app.db import upsert_library
|
||||
|
||||
result = upsert_library(
|
||||
library_id="/local/nonedesc",
|
||||
name="No Description Lib",
|
||||
description=None
|
||||
)
|
||||
|
||||
assert result["success"] is True
|
||||
|
||||
|
||||
class TestDatabaseInitialization:
|
||||
"""Tests for database initialization state."""
|
||||
|
||||
def test_database_is_empty_after_init(self, test_database):
|
||||
"""Database should be empty right after init."""
|
||||
from backend.app.db import list_libraries
|
||||
|
||||
libs = list_libraries()
|
||||
assert isinstance(libs, list)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# FIXTURES
|
||||
# =============================================================================
|
||||
|
||||
@pytest.fixture
|
||||
def sample_doc():
|
||||
"""Sample document chunk for testing."""
|
||||
return {
|
||||
"doc_id": "sample-doc-1",
|
||||
"library_id": "/local/samplelib",
|
||||
"path": "docs/guide.md",
|
||||
"title": "Getting Started Guide",
|
||||
"content": "# Getting Started\n\nWelcome to the guide. This is a sample document for testing.\n\n## Installation\n\nInstall with pip.",
|
||||
"chunk_index": 0,
|
||||
"token_estimate": 500
|
||||
}
|
||||
@@ -0,0 +1,262 @@
|
||||
"""
|
||||
Tests for mcp-server/server.py
|
||||
|
||||
These are pure unit tests that don't require any external dependencies.
|
||||
They test:
|
||||
- The strip_local_prefix() function directly (no network)
|
||||
- MCP server tool definitions and structure
|
||||
"""
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
class TestStripLocalPrefix:
|
||||
"""Tests for the strip_local_prefix() function."""
|
||||
|
||||
def test_strips_prefix_from_full_id(self):
|
||||
"""Should strip /local/ prefix from full library ID."""
|
||||
from mcp_server.server import strip_local_prefix
|
||||
|
||||
input_id = "/local/foundryvtt"
|
||||
expected_output = "foundryvtt"
|
||||
|
||||
result = strip_local_prefix(input_id)
|
||||
assert result == expected_output
|
||||
|
||||
def test_preserves_id_without_prefix(self):
|
||||
"""Should preserve ID that doesn't have /local/ prefix."""
|
||||
from mcp_server.server import strip_local_prefix
|
||||
|
||||
input_id = "foundryvtt"
|
||||
|
||||
result = strip_local_prefix(input_id)
|
||||
assert result == input_id # Should be unchanged
|
||||
|
||||
def test_strips_from_multiple_local_prefixes(self):
|
||||
"""Should handle edge case of multiple prefixes."""
|
||||
from mcp_server.server import strip_local_prefix
|
||||
|
||||
input_id = "/local//local/foundryvtt"
|
||||
|
||||
result = strip_local_prefix(input_id)
|
||||
# Should only strip first occurrence
|
||||
assert result == "/local/foundryvtt"
|
||||
|
||||
def test_empty_string(self):
|
||||
"""Empty string should remain empty."""
|
||||
from mcp_server.server import strip_local_prefix
|
||||
|
||||
input_id = ""
|
||||
|
||||
result = strip_local_prefix(input_id)
|
||||
assert result == input_id # Should be unchanged
|
||||
|
||||
def test_whitespace_only(self):
|
||||
"""Whitespace only should remain whitespace (no /local/ to strip)."""
|
||||
from mcp_server.server import strip_local_prefix
|
||||
|
||||
input_id = " \t\n"
|
||||
|
||||
result = strip_local_prefix(input_id)
|
||||
assert result == input_id
|
||||
|
||||
def test_case_sensitive_prefix(self):
|
||||
"""Prefix matching is case-sensitive."""
|
||||
from mcp_server.server import strip_local_prefix
|
||||
|
||||
# Lowercase - should strip
|
||||
result1 = strip_local_prefix("/local/test")
|
||||
assert result1 == "test"
|
||||
|
||||
# Uppercase - should NOT strip (not a match)
|
||||
result2 = strip_local_prefix("/LOCAL/test")
|
||||
assert result2 == "/LOCAL/test" # Unchanged
|
||||
|
||||
def test_partial_match_does_not_strip(self):
|
||||
"""Only exact /local/ prefix is stripped, not partial matches."""
|
||||
from mcp_server.server import strip_local_prefix
|
||||
|
||||
# Partial match - should NOT strip
|
||||
input_id = "/local-docs/test"
|
||||
result = strip_local_prefix(input_id)
|
||||
assert result == input_id # Unchanged
|
||||
|
||||
# Different separator - should NOT strip
|
||||
input_id2 = "/localdocs/test"
|
||||
result2 = strip_local_prefix(input_id2)
|
||||
assert result2 == input_id2
|
||||
|
||||
def test_prefix_with_trailing_slash(self):
|
||||
"""Should handle trailing slash in ID."""
|
||||
from mcp_server.server import strip_local_prefix
|
||||
|
||||
input_id = "/local/foundryvtt/"
|
||||
expected_output = "foundryvtt/"
|
||||
|
||||
result = strip_local_prefix(input_id)
|
||||
assert result == expected_output
|
||||
|
||||
|
||||
class TestMcpServerStructure:
|
||||
"""Tests for MCP server tool structure (without starting the server)."""
|
||||
|
||||
def test_import_fastmcp(self):
|
||||
"""Should be able to import FastMCP."""
|
||||
try:
|
||||
from fastmcp import FastMCP
|
||||
# Import successful
|
||||
except ImportError as e:
|
||||
pytest.skip(f"fastmcp not installed: {e}")
|
||||
|
||||
|
||||
class TestMcpServerToolsExistence:
|
||||
"""Tests to verify MCP server has expected tools defined."""
|
||||
|
||||
def test_mcp_instance_created(self):
|
||||
"""MCP instance should be created with tools."""
|
||||
from mcp_server.server import mcp
|
||||
|
||||
assert mcp is not None
|
||||
|
||||
def test_resolve_library_id_tool_exists(self):
|
||||
"""resolve-library-id tool should be registered."""
|
||||
from mcp_server.server import mcp
|
||||
|
||||
# Check if the tool exists by trying to access it
|
||||
if hasattr(mcp, 'tools'):
|
||||
tool_names = [t.name for t in mcp.tools]
|
||||
assert "resolve_library_id" in tool_names
|
||||
|
||||
def test_get_library_docs_tool_exists(self):
|
||||
"""get-library-docs tool should be registered."""
|
||||
from mcp_server.server import mcp
|
||||
|
||||
if hasattr(mcp, 'tools'):
|
||||
tool_names = [t.name for t in mcp.tools]
|
||||
assert "get_library_docs" in tool_names
|
||||
|
||||
def test_list_libraries_tool_exists(self):
|
||||
"""list-libraries tool should be registered."""
|
||||
from mcp_server.server import mcp
|
||||
|
||||
if hasattr(mcp, 'tools'):
|
||||
tool_names = [t.name for t in mcp.tools]
|
||||
assert "list_libraries" in tool_names
|
||||
|
||||
def test_search_docs_tool_exists(self):
|
||||
"""search-docs tool should be registered."""
|
||||
from mcp_server.server import mcp
|
||||
|
||||
if hasattr(mcp, 'tools'):
|
||||
tool_names = [t.name for t in mcp.tools]
|
||||
assert "search_docs" in tool_names
|
||||
|
||||
def test_refresh_library_tool_exists(self):
|
||||
"""refresh-library tool should be registered."""
|
||||
from mcp_server.server import mcp
|
||||
|
||||
if hasattr(mcp, 'tools'):
|
||||
tool_names = [t.name for t in mcp.tools]
|
||||
assert "refresh_library" in tool_names
|
||||
|
||||
def test_sync_sources_tool_exists(self):
|
||||
"""sync-sources tool should be registered."""
|
||||
from mcp_server.server import mcp
|
||||
|
||||
if hasattr(mcp, 'tools'):
|
||||
tool_names = [t.name for t in mcp.tools]
|
||||
assert "sync_sources" in tool_names
|
||||
|
||||
|
||||
class TestMcpServerStripPrefixIntegration:
|
||||
"""Integration tests for strip_prefix usage in MCP server functions."""
|
||||
|
||||
def test_resolve_library_id_calls_strip_prefix(self):
|
||||
"""resolve_library_id should handle /local/ prefix in responses."""
|
||||
# This test verifies that the tool is available and uses the prefix correctly
|
||||
from mcp_server.server import strip_local_prefix
|
||||
|
||||
# Verify the function exists and works
|
||||
assert callable(strip_local_prefix)
|
||||
|
||||
# Test with sample IDs
|
||||
test_ids = [
|
||||
"/local/foundryvtt",
|
||||
"/local/pytest",
|
||||
"/local/mydocs/reference",
|
||||
]
|
||||
|
||||
for lib_id in test_ids:
|
||||
stripped = strip_local_prefix(lib_id)
|
||||
assert not stripped.startswith("/local/")
|
||||
|
||||
|
||||
class TestMcpServerPrefixHandlingVariations:
|
||||
"""Additional tests for prefix handling variations."""
|
||||
|
||||
def test_long_library_id(self):
|
||||
"""Should handle long library IDs with /local/ prefix."""
|
||||
from mcp_server.server import strip_local_prefix
|
||||
|
||||
input_id = "/local/very-long-library-id-with-many-chars-in-name"
|
||||
expected_output = "very-long-library-id-with-many-chars-in-name"
|
||||
|
||||
result = strip_local_prefix(input_id)
|
||||
assert result == expected_output
|
||||
|
||||
def test_special_characters_in_id(self):
|
||||
"""Should handle special characters in library ID."""
|
||||
from mcp_server.server import strip_local_prefix
|
||||
|
||||
# IDs can have underscores, dashes, numbers
|
||||
input_id = "/local/my-doc_v2-3_test"
|
||||
|
||||
result = strip_local_prefix(input_id)
|
||||
assert result == "my-doc_v2-3_test"
|
||||
|
||||
def test_unicode_in_stripped_name(self):
|
||||
"""Stripped name should preserve unicode characters."""
|
||||
from mcp_server.server import strip_local_prefix
|
||||
|
||||
# Library IDs sometimes have unicode in them
|
||||
input_id = "/local/世界文档" # Chinese characters
|
||||
|
||||
result = strip_local_prefix(input_id)
|
||||
assert result == "世界文档"
|
||||
|
||||
def test_mixed_case_stripped_name(self):
|
||||
"""Stripped name can have mixed case."""
|
||||
from mcp_server.server import strip_local_prefix
|
||||
|
||||
input_id = "/local/FoundryVTT"
|
||||
|
||||
result = strip_local_prefix(input_id)
|
||||
assert result == "FoundryVTT"
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# FIXTURES
|
||||
# =============================================================================
|
||||
|
||||
@pytest.fixture
|
||||
def sample_library_ids():
|
||||
"""Sample library IDs for testing prefix stripping."""
|
||||
return [
|
||||
"/local/foundryvtt",
|
||||
"/local/pytest",
|
||||
"/local/mydocs/reference/guide.md",
|
||||
"/local/my-app",
|
||||
"/local/documentation/tutorial/getting-started",
|
||||
]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def expected_stripped_ids(sample_library_ids):
|
||||
"""Expected stripped versions of sample library IDs."""
|
||||
return [
|
||||
"foundryvtt",
|
||||
"pytest",
|
||||
"mydocs/reference/guide.md",
|
||||
"my-app",
|
||||
"documentation/tutorial/getting-started",
|
||||
]
|
||||
@@ -0,0 +1,368 @@
|
||||
"""
|
||||
Tests for backend/app/search.py
|
||||
|
||||
These tests verify search functionality without requiring:
|
||||
- A running Qdrant vector database (mocked)
|
||||
- Loaded embedding models (mocked)
|
||||
|
||||
The tests focus on:
|
||||
- Response shape validation
|
||||
- Library filtering
|
||||
- Error handling
|
||||
- Async function behavior
|
||||
"""
|
||||
import pytest
|
||||
|
||||
|
||||
class TestResolveLibraryId:
|
||||
"""Tests for resolve_library_id() - Context7-style resolution."""
|
||||
|
||||
def test_returns_candidates_list(self, test_database):
|
||||
"""resolve_library_id should return a list of candidates."""
|
||||
from backend.app.search import resolve_library_id
|
||||
|
||||
# Create some libraries first
|
||||
from backend.app.db import upsert_library
|
||||
for i in range(3):
|
||||
upsert_library(
|
||||
library_id=f"/local/searchtest{i}",
|
||||
name=f"Search Test Library {i}",
|
||||
description=f"Description for search test {i}"
|
||||
)
|
||||
|
||||
candidates = resolve_library_id("search")
|
||||
|
||||
assert isinstance(candidates, list)
|
||||
|
||||
def test_captures_matching_names(self, test_database):
|
||||
"""Should capture libraries where query matches name."""
|
||||
from backend.app.db import upsert_library
|
||||
from backend.app.search import resolve_library_id
|
||||
|
||||
# Create a library that should match "search"
|
||||
upsert_library(
|
||||
library_id="/local/searchlib",
|
||||
name="Search Library",
|
||||
description="Main search documentation"
|
||||
)
|
||||
|
||||
candidates = resolve_library_id("search")
|
||||
|
||||
assert isinstance(candidates, list)
|
||||
|
||||
def test_context7_style_prefix(self, test_database):
|
||||
"""Candidates should have /local/ prefix added to ID."""
|
||||
from backend.app.db import upsert_library
|
||||
from backend.app.search import resolve_library_id
|
||||
|
||||
upsert_library(
|
||||
library_id="foundryvtt", # Without /local/
|
||||
name="Foundry VTT",
|
||||
description="Fantasy tabletop virtual table"
|
||||
)
|
||||
|
||||
candidates = resolve_library_id("foundry")
|
||||
|
||||
for candidate in candidates:
|
||||
assert candidate.get("source") == "local"
|
||||
|
||||
def test_partial_name_match(self, test_database):
|
||||
"""Should match on partial name."""
|
||||
from backend.app.db import upsert_library
|
||||
from backend.app.search import resolve_library_id
|
||||
|
||||
upsert_library(
|
||||
library_id="/local/gamefoundry",
|
||||
name="Foundry Game Module",
|
||||
description="Module for foundry games"
|
||||
)
|
||||
|
||||
candidates = resolve_library_id("game")
|
||||
assert isinstance(candidates, list)
|
||||
|
||||
def test_empty_result_on_no_matches(self, test_database):
|
||||
"""Should return empty list when no matches."""
|
||||
from backend.app.search import resolve_library_id
|
||||
|
||||
# No libraries matching "xyznonexistent123"
|
||||
candidates = resolve_library_id("xyznonexistent123")
|
||||
|
||||
assert isinstance(candidates, list)
|
||||
|
||||
|
||||
class TestSearchDocs:
|
||||
"""Tests for search_docs() - semantic search with mocked vector store."""
|
||||
|
||||
def test_returns_results_list(self, mock_qdrant_client, test_database):
|
||||
"""search_docs should return a list of results."""
|
||||
from backend.app.search import search_docs
|
||||
|
||||
# Create some chunks first
|
||||
from backend.app.db import upsert_library, insert_document_chunk
|
||||
upsert_library(library_id="/local/searchdocslib", name="Search Docs Lib", description="Test")
|
||||
|
||||
for i in range(5):
|
||||
insert_document_chunk(
|
||||
doc_id=f"searchdoc-{i}",
|
||||
library_id="/local/searchdocslib",
|
||||
path=f"path{i}.md",
|
||||
title=f"Section {i}",
|
||||
content=f"# Section {i}\n\nContent about section {i} that matches search queries.",
|
||||
chunk_index=i,
|
||||
token_estimate=100
|
||||
)
|
||||
|
||||
results = search_docs("section")
|
||||
|
||||
assert isinstance(results, list)
|
||||
|
||||
def test_empty_query_returns_empty_list(self):
|
||||
"""Empty query should return empty results."""
|
||||
from backend.app.search import search_docs
|
||||
|
||||
results = search_docs("")
|
||||
assert isinstance(results, list)
|
||||
|
||||
def test_limit_parameter(self, mock_qdrant_client):
|
||||
"""Limit parameter should affect result count."""
|
||||
from backend.app.search import search_docs
|
||||
|
||||
results_10 = search_docs("test", limit=10)
|
||||
results_5 = search_docs("test", limit=5)
|
||||
|
||||
assert isinstance(results_10, list)
|
||||
assert isinstance(results_5, list)
|
||||
|
||||
def test_response_shape_matches_spec(self):
|
||||
"""Verify response shape when mocked returns data."""
|
||||
from unittest.mock import patch
|
||||
from backend.app.search import search_docs
|
||||
|
||||
# Mock client to return formatted results
|
||||
mock_results = [
|
||||
{
|
||||
"id": "test-id-1",
|
||||
"score": 0.95,
|
||||
"library_id": "/local/testlib",
|
||||
"path": "docs/example.md",
|
||||
"title": "Example Document",
|
||||
"chunk_index": 0
|
||||
}
|
||||
]
|
||||
|
||||
with patch('backend.app.vector_store.get_client') as mock_get_client:
|
||||
# Setup mock client to return our test data
|
||||
mock_client = mock_get_client.return_value
|
||||
mock_point = type('ScoredPoint', (), {
|
||||
'score': 0.95,
|
||||
'payload': {
|
||||
"id": "test-id-1",
|
||||
"library_id": "/local/testlib",
|
||||
"path": "docs/example.md",
|
||||
"title": "Example Document",
|
||||
"chunk_index": 0
|
||||
}
|
||||
})()
|
||||
mock_client.search.return_value = [mock_point]
|
||||
|
||||
results = search_docs("test query")
|
||||
|
||||
assert isinstance(results, list)
|
||||
if results:
|
||||
# Verify each result has expected fields
|
||||
result = results[0]
|
||||
assert "id" in result
|
||||
assert "score" in result
|
||||
assert "library_id" in result
|
||||
assert "path" in result
|
||||
assert "title" in result
|
||||
assert "chunk_index" in result
|
||||
|
||||
|
||||
class TestGetLibraryDocs:
|
||||
"""Tests for get_library_docs() - document retrieval."""
|
||||
|
||||
def test_returns_empty_string_when_no_documents(self, mock_qdrant_client):
|
||||
"""Should return empty/error when no docs exist."""
|
||||
from backend.app.search import get_library_docs
|
||||
|
||||
result = get_library_docs("/local/nonexistent")
|
||||
|
||||
# Either returns empty string or error message
|
||||
assert isinstance(result, str)
|
||||
|
||||
def test_returns_content_when_documents_exist(self, mock_qdrant_client):
|
||||
"""Should return combined document content."""
|
||||
from backend.app.db import upsert_library, insert_document_chunk
|
||||
from backend.app.search import get_library_docs
|
||||
|
||||
# Create library with chunks
|
||||
upsert_library(library_id="/local/docretrievetest", name="Doc Retrieve", description="Test")
|
||||
insert_document_chunk(
|
||||
doc_id="doc-retrieve-1",
|
||||
library_id="/local/docretrievetest",
|
||||
path="docs/getting-started.md",
|
||||
title="Getting Started",
|
||||
content="# Getting Started\n\nWelcome to the documentation. This is a test document.",
|
||||
chunk_index=0,
|
||||
token_estimate=200
|
||||
)
|
||||
|
||||
result = get_library_docs("/local/docretrievetest")
|
||||
|
||||
assert isinstance(result, str)
|
||||
# Should contain at least library title or content
|
||||
|
||||
def test_topic_filter_searches(self, mock_qdrant_client):
|
||||
"""With topic filter, should search for relevant chunks."""
|
||||
from backend.app.db import upsert_library, insert_document_chunk
|
||||
from backend.app.search import get_library_docs
|
||||
|
||||
upsert_library(library_id="/local/topicsearchlib", name="Topic Search", description="Test")
|
||||
|
||||
# Add documents with different topics
|
||||
insert_document_chunk(
|
||||
doc_id="topic-install",
|
||||
library_id="/local/topicsearchlib",
|
||||
path="docs/install.md",
|
||||
title="Installation Guide",
|
||||
content="# Installation\n\nInstall with pip install mypackage.",
|
||||
chunk_index=0,
|
||||
token_estimate=150
|
||||
)
|
||||
|
||||
insert_document_chunk(
|
||||
doc_id="topic-usage",
|
||||
library_id="/local/topicsearchlib",
|
||||
path="docs/usage.md",
|
||||
title="Usage Guide",
|
||||
content="# Usage\n\nUse mycommand --help for help.",
|
||||
chunk_index=0,
|
||||
token_estimate=150
|
||||
)
|
||||
|
||||
# Search for "install" topic
|
||||
result = get_library_docs("/local/topicsearchlib", topic="install")
|
||||
|
||||
assert isinstance(result, str)
|
||||
|
||||
def test_token_limit_respected(self):
|
||||
"""Token limit should truncate content appropriately."""
|
||||
from backend.app.search import get_library_docs
|
||||
|
||||
# Create a library with lots of content
|
||||
from backend.app.db import upsert_library, insert_document_chunk
|
||||
|
||||
upsert_library(library_id="/local/tokenlimittest", name="Token Limit", description="Test")
|
||||
|
||||
long_content = "# Long Content\n\n" + " ".join(["word"] * 500)
|
||||
insert_document_chunk(
|
||||
doc_id="long-doc",
|
||||
library_id="/local/tokenlimittest",
|
||||
path="docs/long.md",
|
||||
title="Long Document",
|
||||
content=long_content,
|
||||
chunk_index=0,
|
||||
token_estimate=2000
|
||||
)
|
||||
|
||||
# Request with small token limit
|
||||
result = get_library_docs("/local/tokenlimittest", token_limit=100)
|
||||
|
||||
assert isinstance(result, str)
|
||||
|
||||
|
||||
class TestGetLibraryDocsWithMock:
|
||||
"""Tests that verify content retrieval when mocked data is available."""
|
||||
|
||||
def test_retrieves_chunks_by_library_id(self, mock_qdrant_client):
|
||||
"""get_library_docs without topic should fetch all chunks for library."""
|
||||
from backend.app.db import upsert_library, insert_document_chunk
|
||||
from backend.app.search import get_library_docs
|
||||
|
||||
upsert_library(library_id="/local/mockretrievetest", name="Mock Retrieve", description="Test")
|
||||
|
||||
for i in range(3):
|
||||
insert_document_chunk(
|
||||
doc_id=f"mock-retrieve-{i}",
|
||||
library_id="/local/mockretrievetest",
|
||||
path=f"path{i}.md",
|
||||
title=f"Path {i}",
|
||||
content=f"Content for path {i}.",
|
||||
chunk_index=i,
|
||||
token_estimate=50
|
||||
)
|
||||
|
||||
result = get_library_docs("/local/mockretrievetest")
|
||||
|
||||
assert isinstance(result, str)
|
||||
|
||||
|
||||
class TestSearchErrorHandling:
|
||||
"""Tests for error handling in search functions."""
|
||||
|
||||
def test_search_handles_missing_library(self):
|
||||
"""Should handle missing library gracefully."""
|
||||
from backend.app.search import search_docs
|
||||
|
||||
results = search_docs("test", library_id="/local/missing_lib_xyz123")
|
||||
assert isinstance(results, list)
|
||||
|
||||
def test_resolve_handles_no_libraries_in_db(self):
|
||||
"""Should handle empty database gracefully."""
|
||||
from backend.app.db import init_db
|
||||
from backend.app.search import resolve_library_id
|
||||
|
||||
# Initialize fresh DB (empty)
|
||||
from backend.app.db import get_connection, get_chunks_for_library
|
||||
# The test_database fixture already does this
|
||||
|
||||
def test_get_library_docs_handles_empty_library(self):
|
||||
"""Should handle library with no chunks."""
|
||||
from backend.app.search import get_library_docs
|
||||
|
||||
result = get_library_docs("/local/emptylib")
|
||||
assert isinstance(result, str)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# FIXTURES FOR SEARCH TESTS
|
||||
# =============================================================================
|
||||
|
||||
@pytest.fixture
|
||||
def search_sample_text():
|
||||
"""Sample text with headings for search chunking tests."""
|
||||
return """# Installation Guide
|
||||
|
||||
To install the package:
|
||||
```bash
|
||||
pip install mypackage
|
||||
```
|
||||
|
||||
## Configuration
|
||||
|
||||
Configure your environment by setting these variables:
|
||||
- MY_VAR=123
|
||||
- DEBUG=true
|
||||
|
||||
## Usage Examples
|
||||
|
||||
Example 1: Basic usage
|
||||
```python
|
||||
import mymodule
|
||||
module = mymodule.Module()
|
||||
result = module.run()
|
||||
print(result)
|
||||
```
|
||||
|
||||
Example 2: Advanced usage with options
|
||||
```python
|
||||
options = {"verbose": True, "output": "stdout"}
|
||||
result = module.run(options=options)
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
Common issues and their solutions:
|
||||
- ImportError: Ensure package is installed
|
||||
- AttributeError: Check that attributes exist on object"""
|
||||
Reference in New Issue
Block a user