Initial DocsMCP stack

This commit is contained in:
george
2026-06-05 23:02:55 +01:00
commit 421b6f973a
51 changed files with 7414 additions and 0 deletions
+2
View File
@@ -0,0 +1,2 @@
# Tests package for local-context7
# Contains unit tests for chunking, database operations, search, and MCP server modules
+191
View File
@@ -0,0 +1,191 @@
"""
Pytest configuration and fixtures for local-context7 tests.
This module provides:
- Mocks for external dependencies (Qdrant, FastEmbed)
- Database fixtures for SQLite operations
- Common test utilities
"""
from unittest.mock import MagicMock, patch
import pytest
import os
import json
from pathlib import Path
from backend.app.db import init_db, upsert_library, insert_document_chunk, get_chunks_for_library, list_libraries, clear_library_documents, get_connection
# =============================================================================
# FIXTURES
# =============================================================================
@pytest.fixture(scope="function")
def test_database():
"""
Create a fresh SQLite database for testing.
Yields:
Database connection with tables initialized
"""
# Use an in-memory or temporary file database
db_path = Path(__file__).parent.parent / "backend" / "data" / "test_db.sqlite"
# Ensure data directory exists
db_path.parent.mkdir(parents=True, exist_ok=True)
# Remove existing test DB if present
if db_path.exists():
db_path.unlink()
# Initialize database with tables
result = init_db()
assert result["success"], f"Failed to initialize test DB: {result.get('error')}"
yield
# Cleanup: remove test database after tests
if db_path.exists():
db_path.unlink()
@pytest.fixture(scope="function")
def sample_text():
"""Sample text for chunking tests."""
return """# Introduction
This is the introduction section.
## Background
Background information goes here to make this longer and test chunking.
This paragraph has more content about the background topic.
### Details
Specific details about the background are provided in this subsection.
More details follow here to ensure we have enough text to properly test heading preservation.
## Conclusion
The conclusion wraps up everything nicely."""
# =============================================================================
# MOCKS
# =============================================================================
@pytest.fixture
def mock_embedding_model():
"""
Mock FastEmbed model that returns dummy vectors.
This avoids needing to download and load the actual embedding model.
Returns 384-dimensional zero vectors for any input.
"""
mock_model = MagicMock()
# Mock embed method - returns list of lists with float values
def mock_embed(texts):
return [
[0.0] * 384 # Zero vector placeholder
for _ in texts
]
mock_model.embed = mock_embed
return mock_model
@pytest.fixture
def mock_qdrant_client():
"""
Mock Qdrant client that returns empty or test results.
Allows testing search logic without needing a running Qdrant server.
"""
mock_client = MagicMock()
# Mock search method
def mock_search(collection_name, query_vector, limit=10, search_filter=None):
# Return empty list (simulating no results)
return []
mock_client.search = mock_search
# Mock delete_collection for cleanup
mock_client.delete_collection = MagicMock(return_value=True)
return mock_client
@pytest.fixture
def mock_embedding_model_batch():
"""
Batch embedding model mock that returns deterministic fake vectors.
Returns slightly different vectors for different input lengths/first chars,
allowing tests to verify vector retrieval if needed.
"""
def hash_text(text):
# Simple hash-based pseudo-random vector generation
text_hash = hash(text) % 1000000
return [(hash_text(text) / 1000000 + (i * 0.001)) for i in range(384)]
mock_model = MagicMock()
mock_model.embed = lambda texts: [hash_text(t) for t in texts]
return mock_model
# =============================================================================
# SETUP TEARDOWN FIXTURES
# =============================================================================
@pytest.fixture(autouse=True)
def clear_test_database(test_database):
"""
Clear test database before and after each test function.
Note: This fixture runs the teardown (cleanup) AFTER the test,
so we manually clear at the end of the yield context.
The db_path is cleaned up by the test_database fixture's yield block.
"""
pass # Cleanup handled in test_database fixture
@pytest.fixture
def empty_vector():
"""Empty/dummy embedding vector for tests."""
return [0.0] * 384
@pytest.fixture
def fake_embeddings(sample_text):
"""Fake embedding vectors for sample text."""
def hash_text(text):
return [(hash(text) + len(text)) % 1000 / 10000 for _ in range(384)]
return [hash_text(s) for s in sample_text.split("\n\n") if s.strip()]
# =============================================================================
# UTILITY FUNCTIONS
# =============================================================================
@pytest.fixture
def temp_file(tmp_path):
"""Create a temporary file and yield its path."""
test_file = tmp_path / "test.txt"
return test_file
# Register custom marker for slow tests (if needed)
def pytest_configure(config):
config.addinivalue_line("markers", "slow: marks tests as slow (deselect with '-m \"not slow\"')")
def pytest_runtest_setup(item):
"""Add custom markers if needed."""
pass
+238
View File
@@ -0,0 +1,238 @@
"""
Tests for backend/app/chunking.py
These are pure unit tests that don't require any external dependencies.
They test text chunking logic, token estimation, and heading-aware splitting.
"""
import pytest
class TestEstimateTokens:
"""Tests for the estimate_tokens() function."""
def test_empty_text(self):
"""Empty text should return 0 tokens."""
from backend.app.chunking import estimate_tokens
assert estimate_tokens("") == 0
def test_single_char(self):
"""Single character = 1 token (using 4 chars per token approximation)."""
from backend.app.chunking import estimate_tokens
assert estimate_tokens("a") == 0 # 1 char // 4 = 0 tokens
def test_4_chars(self):
"""4 characters = 1 token."""
from backend.app.chunking import estimate_tokens
assert estimate_tokens("abcd") == 1
def test_400_chars(self):
"""400 characters = 100 tokens."""
from backend.app.chunking import estimate_tokens
text = "a" * 400
assert estimate_tokens(text) == 100
def test_whitespace_only(self):
"""Whitespace-only text should be counted."""
from backend.app.chunking import estimate_tokens
assert estimate_tokens(" ") == 0 # 3 chars // 4 = 0
class TestChunkText:
"""Tests for the chunk_text() function."""
def test_empty_input(self, sample_text):
"""Empty input should return empty list."""
from backend.app.chunking import chunk_text
assert chunk_text("") == []
def test_small_text_single_chunk(self, sample_text):
"""Small text under limit should be single chunk."""
from backend.app.chunking import chunk_text
small = "This is a very short text that should be returned as a single chunk."
chunks = chunk_text(small, max_tokens=500)
assert len(chunks) == 1
assert chunks[0] == small
def test_exact_token_limit(self, sample_text):
"""Text exactly at limit should be one chunk."""
from backend.app.chunking import chunk_text, estimate_tokens
# Create text that is exactly 500 tokens (2000 chars)
text = "a" * 2000
chunks = chunk_text(text, max_tokens=500)
assert len(chunks) == 1
assert estimate_tokens(chunks[0]) == 500
def test_over_limit_splits(self, sample_text):
"""Text over limit should be split into multiple chunks."""
from backend.app.chunking import chunk_text, estimate_tokens
# Create text that is 2500 tokens (10000 chars)
text = "b" * 10000
chunks = chunk_text(text, max_tokens=500)
assert len(chunks) >= 2 # Should be split
def test_preserves_content(self, sample_text):
"""All content should be preserved in chunks (combined)."""
from backend.app.chunking import chunk_text
original = "Hello world! This is a test of chunking functionality."
chunks = chunk_text(original, max_tokens=100)
combined = "".join(chunks)
assert len(chunks) == 1
assert combined == original
def test_headings_split(self, sample_text):
"""Heading-aware splitting should preserve heading boundaries."""
from backend.app.chunking import chunk_text
markdown_with_headings = """# Introduction
This is the introduction section.
## Background
Background information goes here."""
# With very small token limit, headings should cause splits
chunks = chunk_text(markdown_with_headings, max_tokens=20)
heading_chunks = [c for c in chunks if c.strip().startswith('#')]
assert len(heading_chunks) >= 1 # At least some heading preserved
def test_paragraph_split(self):
"""Paragraph splitting should respect paragraph boundaries."""
from backend.app.chunking import chunk_text
text = "First paragraph.\n\nSecond paragraph.\n\nThird paragraph."
chunks = chunk_text(text, max_tokens=15) # Small limit forces splits
assert len(chunks) >= 3 # At least as many paragraphs
def test_no_empty_chunks(self):
"""Should not return empty chunks."""
from backend.app.chunking import chunk_text
text = "Hello world"
chunks = chunk_text(text, max_tokens=10)
for chunk in chunks:
assert chunk.strip() != ""
class TestTokenEstimationBoundaries:
"""Tests for token estimation boundaries."""
def test_boundary_precision(self):
"""Test boundary conditions around the 4-char-per-token limit."""
from backend.app.chunking import estimate_tokens
# Edge cases around boundary
assert estimate_tokens("abcd") == 1 # exactly 4 chars
assert estimate_tokens("abcde") == 1 # 5 chars still 1 token
assert estimate_tokens("abcdef") == 1 # 6 chars still 1 token
assert estimate_tokens("abcdefg") == 1 # 7 chars still 1 token
assert estimate_tokens("abcdefgh") == 2 # 8 chars = 2 tokens
def test_various_languages_chars(self):
"""Token estimation uses character count, not unicode complexity."""
from backend.app.chunking import estimate_tokens
# Chinese characters (each counts as 1 char)
chinese = "你好世界" # 4 characters
assert estimate_tokens(chinese) == 1
# Emoji
emoji = "Hello 🎉 world" # Spaces + letters + emoji
# emoji count varies by implementation, just check it's counted
assert isinstance(estimate_tokens(emoji), int)
class TestChunkOverlapBehavior:
"""Tests for overlap handling between chunks."""
def test_overlap_not_exceeded(self):
"""Chunks should not have excessive overlap."""
from backend.app.chunking import chunk_text
# Text that will be split at a known boundary
text = "The quick brown fox jumps over the lazy dog. " * 10
chunks = chunk_text(text, max_tokens=30, overlap_tokens=5)
if len(chunks) > 1:
# Last few chars of first chunk shouldn't duplicate excessively
assert len(chunks[0]) <= len("".join(chunks)) // 2 # Rough check
class TestChunkEdgeCases:
"""Tests for edge cases and error conditions."""
def test_whitespace_only_text(self):
"""Whitespace-only text should handle gracefully."""
from backend.app.chunking import chunk_text
chunks = chunk_text(" \n\n ", max_tokens=100)
# May return empty or whitespace chunk, shouldn't crash
assert isinstance(chunks, list)
def test_very_long_paragraph(self):
"""Long paragraph without breaks should be split."""
from backend.app.chunking import chunk_text
long_para = "The quick brown fox jumps over the lazy dog. " * 100
chunks = chunk_text(long_para, max_tokens=50)
assert len(chunks) > 1 # Should be split
def test_none_input_raises(self):
"""None input should be handled (return empty or raise)."""
from backend.app.chunking import chunk_text
with pytest.raises((TypeError, AssertionError)):
chunk_text(None, max_tokens=100)
def test_unicode_text(self):
"""Unicode text should be handled."""
from backend.app.chunking import chunk_text
unicode_text = "Hello 世界 مرحبا 🎉"
chunks = chunk_text(unicode_text, max_tokens=50)
assert len(chunks) == 1 # Small enough to be single chunk
# =============================================================================
# SAMPLE TEXT FIXTURE
# =============================================================================
@pytest.fixture
def heading_markdown():
"""Sample markdown with headings for chunking tests."""
return """# Introduction
This is the introduction section. It contains some introductory text here.
## Background
Background information goes here to make this longer and test chunking. This paragraph has more content about the background topic. It provides context.
### Details
Specific details about the background are provided in this subsection. More details follow here to ensure we have enough text to properly test heading preservation.
## Conclusion
The conclusion wraps up everything nicely."""
class TestHeadingPreservation:
"""Tests for heading-aware chunking with sample text."""
def test_headings_in_separate_chunks(self, heading_markdown):
"""Headings should appear in their own chunks when possible."""
from backend.app.chunking import chunk_text
# Very small token limit forces splits at headings
chunks = chunk_text(heading_markdown, max_tokens=30)
heading_sections = [c for c in chunks if c.strip().startswith('#')]
assert len(heading_sections) >= 1
def test_all_content_present(self, heading_markdown):
"""All content should be preserved when combined."""
from backend.app.chunking import chunk_text
original = heading_markdown
chunks = chunk_text(original, max_tokens=500)
combined = "".join(chunks)
# Content shouldn't be truncated or corrupted
assert "Introduction" in combined
assert "Background" in combined
assert "Conclusion" in combined
+316
View File
@@ -0,0 +1,316 @@
"""
Tests for backend/app/db.py
These tests verify SQLite database operations including:
- Table creation (init_db)
- Library CRUD operations
- Document chunk storage and retrieval
- Full-text search functionality
All tests use a temporary test database file.
"""
import pytest
from datetime import datetime
class TestInitDatabase:
"""Tests for init_db() - table creation."""
def test_init_db_creates_tables(self, test_database):
"""Database should have libraries and documents tables after init."""
import sqlite3
from backend.app.db import get_connection, get_db_path
conn = get_connection()
cursor = conn.execute("SELECT name FROM sqlite_master WHERE type='table' ORDER BY name")
tables = [row[0] for row in cursor.fetchall()]
# Should have libraries, documents, and FTS virtual table
assert "libraries" in tables or any("libraries" in t.lower() for t in tables)
conn.close()
def test_init_db_returns_success(self, test_database):
"""init_db should return success indicator."""
from backend.app.db import init_db
result = init_db()
assert result["success"] is True
class TestLibraryOperations:
"""Tests for library CRUD operations."""
def test_upsert_library_new(self, test_database):
"""Upsert should create new library."""
from backend.app.db import upsert_library
result = upsert_library(
library_id="/local/testlib",
name="Test Library",
description="A test library for unit tests"
)
assert result["success"] is True
assert result["id"] == "/local/testlib"
def test_upsert_library_update(self, test_database):
"""Upsert should update existing library."""
from backend.app.db import upsert_library
# Insert first library
upsert_library(
library_id="/local/upsertlib",
name="Original Name",
description="Original description"
)
# Update it
result = upsert_library(
library_id="/local/upsertlib",
name="Updated Name",
description="Updated description"
)
assert result["success"] is True
def test_upsert_library_id_normalization(self, test_database):
"""Library ID normalization - /local/ prefix should be preserved."""
from backend.app.db import upsert_library
# Test various ID formats
test_ids = [
"/local/foundryvtt",
"foundryvtt",
"/local/mydocs",
]
for lib_id in test_ids:
result = upsert_library(library_id=lib_id, name="Test", description="Desc")
assert result["success"] is True
# Verify we can retrieve it back
from backend.app.db import get_chunks_for_library
# Just ensure no errors occur
def test_list_libraries(self, test_database):
"""list_libraries should return list of libraries."""
from backend.app.db import upsert_library, list_libraries
# Create some libraries
for i in range(3):
upsert_library(
library_id=f"/local/lib{i}",
name=f"Library {i}",
description=f"Description {i}"
)
libs = list_libraries()
assert isinstance(libs, list)
assert len(libs) >= 3
def test_search_libraries(self, test_database):
"""search_libraries should find libraries by name/description."""
from backend.app.db import upsert_library, search_libraries
# Create libraries with searchable names
upsert_library(library_id="/local/foo1", name="Foo Library", description="Bar baz")
upsert_library(library_id="/local/foo2", name="Other Library", description="Different content")
results = search_libraries("foo")
assert isinstance(results, list)
class TestDocumentChunkOperations:
"""Tests for document chunk storage and retrieval."""
def test_insert_document_chunk_new(self, test_database):
"""insert_document_chunk should create new chunk record."""
from backend.app.db import insert_document_chunk
result = insert_document_chunk(
doc_id="doc-1",
library_id="/local/testlib",
path="docs/example.md",
title="Example Document",
content="# Example\n\nThis is the content.",
chunk_index=0,
token_estimate=100
)
assert result["success"] is True
def test_insert_document_chunk_update(self, test_database):
"""insert_document_chunk should update existing record."""
from backend.app.db import insert_document_chunk
# Insert first
insert_document_chunk(
doc_id="doc-update-test",
library_id="/local/uplib",
path="old-path.md",
title="Old Title",
content="# Old\nContent here.",
chunk_index=0,
token_estimate=50
)
# Update it
result = insert_document_chunk(
doc_id="doc-update-test",
library_id="/local/uplib",
path="new-path.md",
title="New Title",
content="# New\nUpdated content.",
chunk_index=1,
token_estimate=75
)
assert result["success"] is True
def test_get_document_by_id(self, test_database):
"""get_document_by_id should retrieve document by ID."""
from backend.app.db import insert_document_chunk, get_document_by_id
# Insert document
doc_id = "unique-doc-id-12345"
insert_document_chunk(
doc_id=doc_id,
library_id="/local/testlib",
path="docs/test.md",
title="Test Document",
content="# Test\n\nTest content here.",
chunk_index=None,
token_estimate=200
)
# Retrieve it
doc = get_document_by_id(doc_id)
assert doc is not None
assert doc["id"] == doc_id
def test_get_chunks_for_library(self, test_database):
"""get_chunks_for_library should return all chunks for a library."""
from backend.app.db import upsert_library, insert_document_chunk, get_chunks_for_library
# Create library
upsert_library(library_id="/local/chunktest", name="Chunk Test", description="Test")
# Add some chunks
for i in range(3):
insert_document_chunk(
doc_id=f"chunk-{i}",
library_id="/local/chunktest",
path=f"path{i}.md",
title=f"Section {i}",
content=f"Content section {i}.",
chunk_index=i,
token_estimate=50
)
chunks = get_chunks_for_library("/local/chunktest")
assert isinstance(chunks, list)
assert len(chunks) >= 3
def test_clear_library_documents(self, test_database):
"""clear_library_documents should delete all docs for a library."""
from backend.app.db import upsert_library, insert_document_chunk, clear_library_documents, get_chunks_for_library
# Create and populate library
upsert_library(library_id="/local/cleartest", name="Clear Test", description="Test")
for i in range(5):
insert_document_chunk(
doc_id=f"clear-{i}",
library_id="/local/cleartest",
path=f"path{i}.md",
content=f"Content {i}.",
token_estimate=20
)
# Clear it
result = clear_library_documents("/local/cleartest")
assert result["success"] is True
# Verify cleared
remaining = get_chunks_for_library("/local/cleartest")
assert len(remaining) == 0
class TestDatabaseEdgeCases:
"""Tests for edge cases and error handling."""
def test_empty_library_id(self, test_database):
"""Operations with empty ID should handle gracefully."""
from backend.app.db import upsert_library
result = upsert_library(library_id="", name="Test", description="Desc")
# Should not crash, though may not be a valid operation
def test_special_characters_in_content(self, test_database):
"""Content with special characters should be stored."""
from backend.app.db import insert_document_chunk
content = "Hello \"world\" <tag /> & amp; 'apostrophe'"
result = insert_document_chunk(
doc_id="special-test",
library_id="/local/speciallib",
path="special.md",
content=content,
token_estimate=100
)
assert result["success"] is True
def test_very_long_content(self, test_database):
"""Long content should be stored."""
from backend.app.db import insert_document_chunk
long_content = "a" * 5000
result = insert_document_chunk(
doc_id="long-test",
library_id="/local/longlib",
path="long.md",
content=long_content,
token_estimate=1000
)
assert result["success"] is True
def test_none_description(self, test_database):
"""Library with None description should work."""
from backend.app.db import upsert_library
result = upsert_library(
library_id="/local/nonedesc",
name="No Description Lib",
description=None
)
assert result["success"] is True
class TestDatabaseInitialization:
"""Tests for database initialization state."""
def test_database_is_empty_after_init(self, test_database):
"""Database should be empty right after init."""
from backend.app.db import list_libraries
libs = list_libraries()
assert isinstance(libs, list)
# =============================================================================
# FIXTURES
# =============================================================================
@pytest.fixture
def sample_doc():
"""Sample document chunk for testing."""
return {
"doc_id": "sample-doc-1",
"library_id": "/local/samplelib",
"path": "docs/guide.md",
"title": "Getting Started Guide",
"content": "# Getting Started\n\nWelcome to the guide. This is a sample document for testing.\n\n## Installation\n\nInstall with pip.",
"chunk_index": 0,
"token_estimate": 500
}
+262
View File
@@ -0,0 +1,262 @@
"""
Tests for mcp-server/server.py
These are pure unit tests that don't require any external dependencies.
They test:
- The strip_local_prefix() function directly (no network)
- MCP server tool definitions and structure
"""
import pytest
class TestStripLocalPrefix:
"""Tests for the strip_local_prefix() function."""
def test_strips_prefix_from_full_id(self):
"""Should strip /local/ prefix from full library ID."""
from mcp_server.server import strip_local_prefix
input_id = "/local/foundryvtt"
expected_output = "foundryvtt"
result = strip_local_prefix(input_id)
assert result == expected_output
def test_preserves_id_without_prefix(self):
"""Should preserve ID that doesn't have /local/ prefix."""
from mcp_server.server import strip_local_prefix
input_id = "foundryvtt"
result = strip_local_prefix(input_id)
assert result == input_id # Should be unchanged
def test_strips_from_multiple_local_prefixes(self):
"""Should handle edge case of multiple prefixes."""
from mcp_server.server import strip_local_prefix
input_id = "/local//local/foundryvtt"
result = strip_local_prefix(input_id)
# Should only strip first occurrence
assert result == "/local/foundryvtt"
def test_empty_string(self):
"""Empty string should remain empty."""
from mcp_server.server import strip_local_prefix
input_id = ""
result = strip_local_prefix(input_id)
assert result == input_id # Should be unchanged
def test_whitespace_only(self):
"""Whitespace only should remain whitespace (no /local/ to strip)."""
from mcp_server.server import strip_local_prefix
input_id = " \t\n"
result = strip_local_prefix(input_id)
assert result == input_id
def test_case_sensitive_prefix(self):
"""Prefix matching is case-sensitive."""
from mcp_server.server import strip_local_prefix
# Lowercase - should strip
result1 = strip_local_prefix("/local/test")
assert result1 == "test"
# Uppercase - should NOT strip (not a match)
result2 = strip_local_prefix("/LOCAL/test")
assert result2 == "/LOCAL/test" # Unchanged
def test_partial_match_does_not_strip(self):
"""Only exact /local/ prefix is stripped, not partial matches."""
from mcp_server.server import strip_local_prefix
# Partial match - should NOT strip
input_id = "/local-docs/test"
result = strip_local_prefix(input_id)
assert result == input_id # Unchanged
# Different separator - should NOT strip
input_id2 = "/localdocs/test"
result2 = strip_local_prefix(input_id2)
assert result2 == input_id2
def test_prefix_with_trailing_slash(self):
"""Should handle trailing slash in ID."""
from mcp_server.server import strip_local_prefix
input_id = "/local/foundryvtt/"
expected_output = "foundryvtt/"
result = strip_local_prefix(input_id)
assert result == expected_output
class TestMcpServerStructure:
"""Tests for MCP server tool structure (without starting the server)."""
def test_import_fastmcp(self):
"""Should be able to import FastMCP."""
try:
from fastmcp import FastMCP
# Import successful
except ImportError as e:
pytest.skip(f"fastmcp not installed: {e}")
class TestMcpServerToolsExistence:
"""Tests to verify MCP server has expected tools defined."""
def test_mcp_instance_created(self):
"""MCP instance should be created with tools."""
from mcp_server.server import mcp
assert mcp is not None
def test_resolve_library_id_tool_exists(self):
"""resolve-library-id tool should be registered."""
from mcp_server.server import mcp
# Check if the tool exists by trying to access it
if hasattr(mcp, 'tools'):
tool_names = [t.name for t in mcp.tools]
assert "resolve_library_id" in tool_names
def test_get_library_docs_tool_exists(self):
"""get-library-docs tool should be registered."""
from mcp_server.server import mcp
if hasattr(mcp, 'tools'):
tool_names = [t.name for t in mcp.tools]
assert "get_library_docs" in tool_names
def test_list_libraries_tool_exists(self):
"""list-libraries tool should be registered."""
from mcp_server.server import mcp
if hasattr(mcp, 'tools'):
tool_names = [t.name for t in mcp.tools]
assert "list_libraries" in tool_names
def test_search_docs_tool_exists(self):
"""search-docs tool should be registered."""
from mcp_server.server import mcp
if hasattr(mcp, 'tools'):
tool_names = [t.name for t in mcp.tools]
assert "search_docs" in tool_names
def test_refresh_library_tool_exists(self):
"""refresh-library tool should be registered."""
from mcp_server.server import mcp
if hasattr(mcp, 'tools'):
tool_names = [t.name for t in mcp.tools]
assert "refresh_library" in tool_names
def test_sync_sources_tool_exists(self):
"""sync-sources tool should be registered."""
from mcp_server.server import mcp
if hasattr(mcp, 'tools'):
tool_names = [t.name for t in mcp.tools]
assert "sync_sources" in tool_names
class TestMcpServerStripPrefixIntegration:
"""Integration tests for strip_prefix usage in MCP server functions."""
def test_resolve_library_id_calls_strip_prefix(self):
"""resolve_library_id should handle /local/ prefix in responses."""
# This test verifies that the tool is available and uses the prefix correctly
from mcp_server.server import strip_local_prefix
# Verify the function exists and works
assert callable(strip_local_prefix)
# Test with sample IDs
test_ids = [
"/local/foundryvtt",
"/local/pytest",
"/local/mydocs/reference",
]
for lib_id in test_ids:
stripped = strip_local_prefix(lib_id)
assert not stripped.startswith("/local/")
class TestMcpServerPrefixHandlingVariations:
"""Additional tests for prefix handling variations."""
def test_long_library_id(self):
"""Should handle long library IDs with /local/ prefix."""
from mcp_server.server import strip_local_prefix
input_id = "/local/very-long-library-id-with-many-chars-in-name"
expected_output = "very-long-library-id-with-many-chars-in-name"
result = strip_local_prefix(input_id)
assert result == expected_output
def test_special_characters_in_id(self):
"""Should handle special characters in library ID."""
from mcp_server.server import strip_local_prefix
# IDs can have underscores, dashes, numbers
input_id = "/local/my-doc_v2-3_test"
result = strip_local_prefix(input_id)
assert result == "my-doc_v2-3_test"
def test_unicode_in_stripped_name(self):
"""Stripped name should preserve unicode characters."""
from mcp_server.server import strip_local_prefix
# Library IDs sometimes have unicode in them
input_id = "/local/世界文档" # Chinese characters
result = strip_local_prefix(input_id)
assert result == "世界文档"
def test_mixed_case_stripped_name(self):
"""Stripped name can have mixed case."""
from mcp_server.server import strip_local_prefix
input_id = "/local/FoundryVTT"
result = strip_local_prefix(input_id)
assert result == "FoundryVTT"
# =============================================================================
# FIXTURES
# =============================================================================
@pytest.fixture
def sample_library_ids():
"""Sample library IDs for testing prefix stripping."""
return [
"/local/foundryvtt",
"/local/pytest",
"/local/mydocs/reference/guide.md",
"/local/my-app",
"/local/documentation/tutorial/getting-started",
]
@pytest.fixture
def expected_stripped_ids(sample_library_ids):
"""Expected stripped versions of sample library IDs."""
return [
"foundryvtt",
"pytest",
"mydocs/reference/guide.md",
"my-app",
"documentation/tutorial/getting-started",
]
+368
View File
@@ -0,0 +1,368 @@
"""
Tests for backend/app/search.py
These tests verify search functionality without requiring:
- A running Qdrant vector database (mocked)
- Loaded embedding models (mocked)
The tests focus on:
- Response shape validation
- Library filtering
- Error handling
- Async function behavior
"""
import pytest
class TestResolveLibraryId:
"""Tests for resolve_library_id() - Context7-style resolution."""
def test_returns_candidates_list(self, test_database):
"""resolve_library_id should return a list of candidates."""
from backend.app.search import resolve_library_id
# Create some libraries first
from backend.app.db import upsert_library
for i in range(3):
upsert_library(
library_id=f"/local/searchtest{i}",
name=f"Search Test Library {i}",
description=f"Description for search test {i}"
)
candidates = resolve_library_id("search")
assert isinstance(candidates, list)
def test_captures_matching_names(self, test_database):
"""Should capture libraries where query matches name."""
from backend.app.db import upsert_library
from backend.app.search import resolve_library_id
# Create a library that should match "search"
upsert_library(
library_id="/local/searchlib",
name="Search Library",
description="Main search documentation"
)
candidates = resolve_library_id("search")
assert isinstance(candidates, list)
def test_context7_style_prefix(self, test_database):
"""Candidates should have /local/ prefix added to ID."""
from backend.app.db import upsert_library
from backend.app.search import resolve_library_id
upsert_library(
library_id="foundryvtt", # Without /local/
name="Foundry VTT",
description="Fantasy tabletop virtual table"
)
candidates = resolve_library_id("foundry")
for candidate in candidates:
assert candidate.get("source") == "local"
def test_partial_name_match(self, test_database):
"""Should match on partial name."""
from backend.app.db import upsert_library
from backend.app.search import resolve_library_id
upsert_library(
library_id="/local/gamefoundry",
name="Foundry Game Module",
description="Module for foundry games"
)
candidates = resolve_library_id("game")
assert isinstance(candidates, list)
def test_empty_result_on_no_matches(self, test_database):
"""Should return empty list when no matches."""
from backend.app.search import resolve_library_id
# No libraries matching "xyznonexistent123"
candidates = resolve_library_id("xyznonexistent123")
assert isinstance(candidates, list)
class TestSearchDocs:
"""Tests for search_docs() - semantic search with mocked vector store."""
def test_returns_results_list(self, mock_qdrant_client, test_database):
"""search_docs should return a list of results."""
from backend.app.search import search_docs
# Create some chunks first
from backend.app.db import upsert_library, insert_document_chunk
upsert_library(library_id="/local/searchdocslib", name="Search Docs Lib", description="Test")
for i in range(5):
insert_document_chunk(
doc_id=f"searchdoc-{i}",
library_id="/local/searchdocslib",
path=f"path{i}.md",
title=f"Section {i}",
content=f"# Section {i}\n\nContent about section {i} that matches search queries.",
chunk_index=i,
token_estimate=100
)
results = search_docs("section")
assert isinstance(results, list)
def test_empty_query_returns_empty_list(self):
"""Empty query should return empty results."""
from backend.app.search import search_docs
results = search_docs("")
assert isinstance(results, list)
def test_limit_parameter(self, mock_qdrant_client):
"""Limit parameter should affect result count."""
from backend.app.search import search_docs
results_10 = search_docs("test", limit=10)
results_5 = search_docs("test", limit=5)
assert isinstance(results_10, list)
assert isinstance(results_5, list)
def test_response_shape_matches_spec(self):
"""Verify response shape when mocked returns data."""
from unittest.mock import patch
from backend.app.search import search_docs
# Mock client to return formatted results
mock_results = [
{
"id": "test-id-1",
"score": 0.95,
"library_id": "/local/testlib",
"path": "docs/example.md",
"title": "Example Document",
"chunk_index": 0
}
]
with patch('backend.app.vector_store.get_client') as mock_get_client:
# Setup mock client to return our test data
mock_client = mock_get_client.return_value
mock_point = type('ScoredPoint', (), {
'score': 0.95,
'payload': {
"id": "test-id-1",
"library_id": "/local/testlib",
"path": "docs/example.md",
"title": "Example Document",
"chunk_index": 0
}
})()
mock_client.search.return_value = [mock_point]
results = search_docs("test query")
assert isinstance(results, list)
if results:
# Verify each result has expected fields
result = results[0]
assert "id" in result
assert "score" in result
assert "library_id" in result
assert "path" in result
assert "title" in result
assert "chunk_index" in result
class TestGetLibraryDocs:
"""Tests for get_library_docs() - document retrieval."""
def test_returns_empty_string_when_no_documents(self, mock_qdrant_client):
"""Should return empty/error when no docs exist."""
from backend.app.search import get_library_docs
result = get_library_docs("/local/nonexistent")
# Either returns empty string or error message
assert isinstance(result, str)
def test_returns_content_when_documents_exist(self, mock_qdrant_client):
"""Should return combined document content."""
from backend.app.db import upsert_library, insert_document_chunk
from backend.app.search import get_library_docs
# Create library with chunks
upsert_library(library_id="/local/docretrievetest", name="Doc Retrieve", description="Test")
insert_document_chunk(
doc_id="doc-retrieve-1",
library_id="/local/docretrievetest",
path="docs/getting-started.md",
title="Getting Started",
content="# Getting Started\n\nWelcome to the documentation. This is a test document.",
chunk_index=0,
token_estimate=200
)
result = get_library_docs("/local/docretrievetest")
assert isinstance(result, str)
# Should contain at least library title or content
def test_topic_filter_searches(self, mock_qdrant_client):
"""With topic filter, should search for relevant chunks."""
from backend.app.db import upsert_library, insert_document_chunk
from backend.app.search import get_library_docs
upsert_library(library_id="/local/topicsearchlib", name="Topic Search", description="Test")
# Add documents with different topics
insert_document_chunk(
doc_id="topic-install",
library_id="/local/topicsearchlib",
path="docs/install.md",
title="Installation Guide",
content="# Installation\n\nInstall with pip install mypackage.",
chunk_index=0,
token_estimate=150
)
insert_document_chunk(
doc_id="topic-usage",
library_id="/local/topicsearchlib",
path="docs/usage.md",
title="Usage Guide",
content="# Usage\n\nUse mycommand --help for help.",
chunk_index=0,
token_estimate=150
)
# Search for "install" topic
result = get_library_docs("/local/topicsearchlib", topic="install")
assert isinstance(result, str)
def test_token_limit_respected(self):
"""Token limit should truncate content appropriately."""
from backend.app.search import get_library_docs
# Create a library with lots of content
from backend.app.db import upsert_library, insert_document_chunk
upsert_library(library_id="/local/tokenlimittest", name="Token Limit", description="Test")
long_content = "# Long Content\n\n" + " ".join(["word"] * 500)
insert_document_chunk(
doc_id="long-doc",
library_id="/local/tokenlimittest",
path="docs/long.md",
title="Long Document",
content=long_content,
chunk_index=0,
token_estimate=2000
)
# Request with small token limit
result = get_library_docs("/local/tokenlimittest", token_limit=100)
assert isinstance(result, str)
class TestGetLibraryDocsWithMock:
"""Tests that verify content retrieval when mocked data is available."""
def test_retrieves_chunks_by_library_id(self, mock_qdrant_client):
"""get_library_docs without topic should fetch all chunks for library."""
from backend.app.db import upsert_library, insert_document_chunk
from backend.app.search import get_library_docs
upsert_library(library_id="/local/mockretrievetest", name="Mock Retrieve", description="Test")
for i in range(3):
insert_document_chunk(
doc_id=f"mock-retrieve-{i}",
library_id="/local/mockretrievetest",
path=f"path{i}.md",
title=f"Path {i}",
content=f"Content for path {i}.",
chunk_index=i,
token_estimate=50
)
result = get_library_docs("/local/mockretrievetest")
assert isinstance(result, str)
class TestSearchErrorHandling:
"""Tests for error handling in search functions."""
def test_search_handles_missing_library(self):
"""Should handle missing library gracefully."""
from backend.app.search import search_docs
results = search_docs("test", library_id="/local/missing_lib_xyz123")
assert isinstance(results, list)
def test_resolve_handles_no_libraries_in_db(self):
"""Should handle empty database gracefully."""
from backend.app.db import init_db
from backend.app.search import resolve_library_id
# Initialize fresh DB (empty)
from backend.app.db import get_connection, get_chunks_for_library
# The test_database fixture already does this
def test_get_library_docs_handles_empty_library(self):
"""Should handle library with no chunks."""
from backend.app.search import get_library_docs
result = get_library_docs("/local/emptylib")
assert isinstance(result, str)
# =============================================================================
# FIXTURES FOR SEARCH TESTS
# =============================================================================
@pytest.fixture
def search_sample_text():
"""Sample text with headings for search chunking tests."""
return """# Installation Guide
To install the package:
```bash
pip install mypackage
```
## Configuration
Configure your environment by setting these variables:
- MY_VAR=123
- DEBUG=true
## Usage Examples
Example 1: Basic usage
```python
import mymodule
module = mymodule.Module()
result = module.run()
print(result)
```
Example 2: Advanced usage with options
```python
options = {"verbose": True, "output": "stdout"}
result = module.run(options=options)
```
## Troubleshooting
Common issues and their solutions:
- ImportError: Ensure package is installed
- AttributeError: Check that attributes exist on object"""