Initial DocsMCP stack

2026-06-05 23:02:55 +01:00
commit 421b6f973a
51 changed files with 7414 additions and 0 deletions
@@ -0,0 +1,2 @@
+# Tests package for local-context7
+# Contains unit tests for chunking, database operations, search, and MCP server modules
@@ -0,0 +1,191 @@
+"""
+Pytest configuration and fixtures for local-context7 tests.
+
+This module provides:
+- Mocks for external dependencies (Qdrant, FastEmbed)
+- Database fixtures for SQLite operations
+- Common test utilities
+"""
+from unittest.mock import MagicMock, patch
+import pytest
+import os
+import json
+from pathlib import Path
+
+from backend.app.db import init_db, upsert_library, insert_document_chunk, get_chunks_for_library, list_libraries, clear_library_documents, get_connection
+
+
+# =============================================================================
+# FIXTURES
+# =============================================================================
+
+@pytest.fixture(scope="function")
+def test_database():
+    """
+    Create a fresh SQLite database for testing.
+    
+    Yields:
+        Database connection with tables initialized
+    """
+    # Use an in-memory or temporary file database
+    db_path = Path(__file__).parent.parent / "backend" / "data" / "test_db.sqlite"
+    
+    # Ensure data directory exists
+    db_path.parent.mkdir(parents=True, exist_ok=True)
+    
+    # Remove existing test DB if present
+    if db_path.exists():
+        db_path.unlink()
+    
+    # Initialize database with tables
+    result = init_db()
+    assert result["success"], f"Failed to initialize test DB: {result.get('error')}"
+    
+    yield
+    
+    # Cleanup: remove test database after tests
+    if db_path.exists():
+        db_path.unlink()
+
+
+@pytest.fixture(scope="function")
+def sample_text():
+    """Sample text for chunking tests."""
+    return """# Introduction
+
+This is the introduction section.
+
+## Background
+
+Background information goes here to make this longer and test chunking.
+
+This paragraph has more content about the background topic.
+
+### Details
+
+Specific details about the background are provided in this subsection.
+
+More details follow here to ensure we have enough text to properly test heading preservation.
+
+## Conclusion
+
+The conclusion wraps up everything nicely."""
+
+
+# =============================================================================
+# MOCKS
+# =============================================================================
+
+@pytest.fixture
+def mock_embedding_model():
+    """
+    Mock FastEmbed model that returns dummy vectors.
+    
+    This avoids needing to download and load the actual embedding model.
+    Returns 384-dimensional zero vectors for any input.
+    """
+    mock_model = MagicMock()
+    
+    # Mock embed method - returns list of lists with float values
+    def mock_embed(texts):
+        return [
+            [0.0] * 384  # Zero vector placeholder
+            for _ in texts
+        ]
+    
+    mock_model.embed = mock_embed
+    
+    return mock_model
+
+
+@pytest.fixture
+def mock_qdrant_client():
+    """
+    Mock Qdrant client that returns empty or test results.
+    
+    Allows testing search logic without needing a running Qdrant server.
+    """
+    mock_client = MagicMock()
+    
+    # Mock search method
+    def mock_search(collection_name, query_vector, limit=10, search_filter=None):
+        # Return empty list (simulating no results)
+        return []
+    
+    mock_client.search = mock_search
+    
+    # Mock delete_collection for cleanup
+    mock_client.delete_collection = MagicMock(return_value=True)
+    
+    return mock_client
+
+
+@pytest.fixture
+def mock_embedding_model_batch():
+    """
+    Batch embedding model mock that returns deterministic fake vectors.
+    
+    Returns slightly different vectors for different input lengths/first chars,
+    allowing tests to verify vector retrieval if needed.
+    """
+    def hash_text(text):
+        # Simple hash-based pseudo-random vector generation
+        text_hash = hash(text) % 1000000
+        return [(hash_text(text) / 1000000 + (i * 0.001)) for i in range(384)]
+    
+    mock_model = MagicMock()
+    mock_model.embed = lambda texts: [hash_text(t) for t in texts]
+    
+    return mock_model
+
+
+# =============================================================================
+# SETUP TEARDOWN FIXTURES
+# =============================================================================
+
+@pytest.fixture(autouse=True)
+def clear_test_database(test_database):
+    """
+    Clear test database before and after each test function.
+    
+    Note: This fixture runs the teardown (cleanup) AFTER the test,
+    so we manually clear at the end of the yield context.
+    The db_path is cleaned up by the test_database fixture's yield block.
+    """
+    pass  # Cleanup handled in test_database fixture
+
+
+@pytest.fixture
+def empty_vector():
+    """Empty/dummy embedding vector for tests."""
+    return [0.0] * 384
+
+
+@pytest.fixture
+def fake_embeddings(sample_text):
+    """Fake embedding vectors for sample text."""
+    def hash_text(text):
+        return [(hash(text) + len(text)) % 1000 / 10000 for _ in range(384)]
+    
+    return [hash_text(s) for s in sample_text.split("\n\n") if s.strip()]
+
+
+# =============================================================================
+# UTILITY FUNCTIONS
+# =============================================================================
+
+@pytest.fixture
+def temp_file(tmp_path):
+    """Create a temporary file and yield its path."""
+    test_file = tmp_path / "test.txt"
+    return test_file
+
+
+# Register custom marker for slow tests (if needed)
+def pytest_configure(config):
+    config.addinivalue_line("markers", "slow: marks tests as slow (deselect with '-m \"not slow\"')")
+
+
+def pytest_runtest_setup(item):
+    """Add custom markers if needed."""
+    pass
@@ -0,0 +1,238 @@
+"""
+Tests for backend/app/chunking.py
+
+These are pure unit tests that don't require any external dependencies.
+They test text chunking logic, token estimation, and heading-aware splitting.
+"""
+import pytest
+
+
+class TestEstimateTokens:
+    """Tests for the estimate_tokens() function."""
+
+    def test_empty_text(self):
+        """Empty text should return 0 tokens."""
+        from backend.app.chunking import estimate_tokens
+        assert estimate_tokens("") == 0
+
+    def test_single_char(self):
+        """Single character = 1 token (using 4 chars per token approximation)."""
+        from backend.app.chunking import estimate_tokens
+        assert estimate_tokens("a") == 0  # 1 char // 4 = 0 tokens
+
+    def test_4_chars(self):
+        """4 characters = 1 token."""
+        from backend.app.chunking import estimate_tokens
+        assert estimate_tokens("abcd") == 1
+
+    def test_400_chars(self):
+        """400 characters = 100 tokens."""
+        from backend.app.chunking import estimate_tokens
+        text = "a" * 400
+        assert estimate_tokens(text) == 100
+
+    def test_whitespace_only(self):
+        """Whitespace-only text should be counted."""
+        from backend.app.chunking import estimate_tokens
+        assert estimate_tokens("   ") == 0  # 3 chars // 4 = 0
+
+
+class TestChunkText:
+    """Tests for the chunk_text() function."""
+
+    def test_empty_input(self, sample_text):
+        """Empty input should return empty list."""
+        from backend.app.chunking import chunk_text
+        assert chunk_text("") == []
+
+    def test_small_text_single_chunk(self, sample_text):
+        """Small text under limit should be single chunk."""
+        from backend.app.chunking import chunk_text
+        small = "This is a very short text that should be returned as a single chunk."
+        chunks = chunk_text(small, max_tokens=500)
+        assert len(chunks) == 1
+        assert chunks[0] == small
+
+    def test_exact_token_limit(self, sample_text):
+        """Text exactly at limit should be one chunk."""
+        from backend.app.chunking import chunk_text, estimate_tokens
+        # Create text that is exactly 500 tokens (2000 chars)
+        text = "a" * 2000
+        chunks = chunk_text(text, max_tokens=500)
+        assert len(chunks) == 1
+        assert estimate_tokens(chunks[0]) == 500
+
+    def test_over_limit_splits(self, sample_text):
+        """Text over limit should be split into multiple chunks."""
+        from backend.app.chunking import chunk_text, estimate_tokens
+        # Create text that is 2500 tokens (10000 chars)
+        text = "b" * 10000
+        chunks = chunk_text(text, max_tokens=500)
+        assert len(chunks) >= 2  # Should be split
+
+    def test_preserves_content(self, sample_text):
+        """All content should be preserved in chunks (combined)."""
+        from backend.app.chunking import chunk_text
+        original = "Hello world! This is a test of chunking functionality."
+        chunks = chunk_text(original, max_tokens=100)
+        combined = "".join(chunks)
+        assert len(chunks) == 1
+        assert combined == original
+
+    def test_headings_split(self, sample_text):
+        """Heading-aware splitting should preserve heading boundaries."""
+        from backend.app.chunking import chunk_text
+        markdown_with_headings = """# Introduction
+
+This is the introduction section.
+
+## Background
+
+Background information goes here."""
+
+        # With very small token limit, headings should cause splits
+        chunks = chunk_text(markdown_with_headings, max_tokens=20)
+        heading_chunks = [c for c in chunks if c.strip().startswith('#')]
+        assert len(heading_chunks) >= 1  # At least some heading preserved
+
+    def test_paragraph_split(self):
+        """Paragraph splitting should respect paragraph boundaries."""
+        from backend.app.chunking import chunk_text
+        text = "First paragraph.\n\nSecond paragraph.\n\nThird paragraph."
+        chunks = chunk_text(text, max_tokens=15)  # Small limit forces splits
+        assert len(chunks) >= 3  # At least as many paragraphs
+
+    def test_no_empty_chunks(self):
+        """Should not return empty chunks."""
+        from backend.app.chunking import chunk_text
+        text = "Hello world"
+        chunks = chunk_text(text, max_tokens=10)
+        for chunk in chunks:
+            assert chunk.strip() != ""
+
+
+class TestTokenEstimationBoundaries:
+    """Tests for token estimation boundaries."""
+
+    def test_boundary_precision(self):
+        """Test boundary conditions around the 4-char-per-token limit."""
+        from backend.app.chunking import estimate_tokens
+        
+        # Edge cases around boundary
+        assert estimate_tokens("abcd") == 1      # exactly 4 chars
+        assert estimate_tokens("abcde") == 1     # 5 chars still 1 token
+        assert estimate_tokens("abcdef") == 1    # 6 chars still 1 token
+        assert estimate_tokens("abcdefg") == 1   # 7 chars still 1 token
+        assert estimate_tokens("abcdefgh") == 2   # 8 chars = 2 tokens
+
+    def test_various_languages_chars(self):
+        """Token estimation uses character count, not unicode complexity."""
+        from backend.app.chunking import estimate_tokens
+        
+        # Chinese characters (each counts as 1 char)
+        chinese = "你好世界"  # 4 characters
+        assert estimate_tokens(chinese) == 1
+        
+        # Emoji
+        emoji = "Hello 🎉 world"  # Spaces + letters + emoji
+        # emoji count varies by implementation, just check it's counted
+        assert isinstance(estimate_tokens(emoji), int)
+
+
+class TestChunkOverlapBehavior:
+    """Tests for overlap handling between chunks."""
+
+    def test_overlap_not_exceeded(self):
+        """Chunks should not have excessive overlap."""
+        from backend.app.chunking import chunk_text
+        
+        # Text that will be split at a known boundary
+        text = "The quick brown fox jumps over the lazy dog. " * 10
+        chunks = chunk_text(text, max_tokens=30, overlap_tokens=5)
+        
+        if len(chunks) > 1:
+            # Last few chars of first chunk shouldn't duplicate excessively
+            assert len(chunks[0]) <= len("".join(chunks)) // 2  # Rough check
+
+
+class TestChunkEdgeCases:
+    """Tests for edge cases and error conditions."""
+
+    def test_whitespace_only_text(self):
+        """Whitespace-only text should handle gracefully."""
+        from backend.app.chunking import chunk_text
+        chunks = chunk_text("   \n\n   ", max_tokens=100)
+        # May return empty or whitespace chunk, shouldn't crash
+        assert isinstance(chunks, list)
+
+    def test_very_long_paragraph(self):
+        """Long paragraph without breaks should be split."""
+        from backend.app.chunking import chunk_text
+        
+        long_para = "The quick brown fox jumps over the lazy dog. " * 100
+        chunks = chunk_text(long_para, max_tokens=50)
+        assert len(chunks) > 1  # Should be split
+
+    def test_none_input_raises(self):
+        """None input should be handled (return empty or raise)."""
+        from backend.app.chunking import chunk_text
+        with pytest.raises((TypeError, AssertionError)):
+            chunk_text(None, max_tokens=100)
+
+    def test_unicode_text(self):
+        """Unicode text should be handled."""
+        from backend.app.chunking import chunk_text
+        unicode_text = "Hello 世界 مرحبا 🎉"
+        chunks = chunk_text(unicode_text, max_tokens=50)
+        assert len(chunks) == 1  # Small enough to be single chunk
+
+
+# =============================================================================
+# SAMPLE TEXT FIXTURE
+# =============================================================================
+
+@pytest.fixture
+def heading_markdown():
+    """Sample markdown with headings for chunking tests."""
+    return """# Introduction
+
+This is the introduction section. It contains some introductory text here.
+
+## Background
+
+Background information goes here to make this longer and test chunking. This paragraph has more content about the background topic. It provides context.
+
+### Details
+
+Specific details about the background are provided in this subsection. More details follow here to ensure we have enough text to properly test heading preservation.
+
+## Conclusion
+
+The conclusion wraps up everything nicely."""
+
+
+class TestHeadingPreservation:
+    """Tests for heading-aware chunking with sample text."""
+
+    def test_headings_in_separate_chunks(self, heading_markdown):
+        """Headings should appear in their own chunks when possible."""
+        from backend.app.chunking import chunk_text
+        
+        # Very small token limit forces splits at headings
+        chunks = chunk_text(heading_markdown, max_tokens=30)
+        
+        heading_sections = [c for c in chunks if c.strip().startswith('#')]
+        assert len(heading_sections) >= 1
+
+    def test_all_content_present(self, heading_markdown):
+        """All content should be preserved when combined."""
+        from backend.app.chunking import chunk_text
+        
+        original = heading_markdown
+        chunks = chunk_text(original, max_tokens=500)
+        combined = "".join(chunks)
+        
+        # Content shouldn't be truncated or corrupted
+        assert "Introduction" in combined
+        assert "Background" in combined
+        assert "Conclusion" in combined
@@ -0,0 +1,316 @@
+"""
+Tests for backend/app/db.py
+
+These tests verify SQLite database operations including:
+- Table creation (init_db)
+- Library CRUD operations
+- Document chunk storage and retrieval
+- Full-text search functionality
+
+All tests use a temporary test database file.
+"""
+import pytest
+from datetime import datetime
+
+
+class TestInitDatabase:
+    """Tests for init_db() - table creation."""
+
+    def test_init_db_creates_tables(self, test_database):
+        """Database should have libraries and documents tables after init."""
+        import sqlite3
+        from backend.app.db import get_connection, get_db_path
+        
+        conn = get_connection()
+        cursor = conn.execute("SELECT name FROM sqlite_master WHERE type='table' ORDER BY name")
+        tables = [row[0] for row in cursor.fetchall()]
+        
+        # Should have libraries, documents, and FTS virtual table
+        assert "libraries" in tables or any("libraries" in t.lower() for t in tables)
+        conn.close()
+
+    def test_init_db_returns_success(self, test_database):
+        """init_db should return success indicator."""
+        from backend.app.db import init_db
+        
+        result = init_db()
+        assert result["success"] is True
+
+
+class TestLibraryOperations:
+    """Tests for library CRUD operations."""
+
+    def test_upsert_library_new(self, test_database):
+        """Upsert should create new library."""
+        from backend.app.db import upsert_library
+        
+        result = upsert_library(
+            library_id="/local/testlib",
+            name="Test Library",
+            description="A test library for unit tests"
+        )
+        
+        assert result["success"] is True
+        assert result["id"] == "/local/testlib"
+
+    def test_upsert_library_update(self, test_database):
+        """Upsert should update existing library."""
+        from backend.app.db import upsert_library
+        
+        # Insert first library
+        upsert_library(
+            library_id="/local/upsertlib",
+            name="Original Name",
+            description="Original description"
+        )
+        
+        # Update it
+        result = upsert_library(
+            library_id="/local/upsertlib",
+            name="Updated Name",
+            description="Updated description"
+        )
+        
+        assert result["success"] is True
+
+    def test_upsert_library_id_normalization(self, test_database):
+        """Library ID normalization - /local/ prefix should be preserved."""
+        from backend.app.db import upsert_library
+        
+        # Test various ID formats
+        test_ids = [
+            "/local/foundryvtt",
+            "foundryvtt",
+            "/local/mydocs",
+        ]
+        
+        for lib_id in test_ids:
+            result = upsert_library(library_id=lib_id, name="Test", description="Desc")
+            assert result["success"] is True
+            # Verify we can retrieve it back
+            from backend.app.db import get_chunks_for_library
+            # Just ensure no errors occur
+
+    def test_list_libraries(self, test_database):
+        """list_libraries should return list of libraries."""
+        from backend.app.db import upsert_library, list_libraries
+        
+        # Create some libraries
+        for i in range(3):
+            upsert_library(
+                library_id=f"/local/lib{i}",
+                name=f"Library {i}",
+                description=f"Description {i}"
+            )
+        
+        libs = list_libraries()
+        assert isinstance(libs, list)
+        assert len(libs) >= 3
+
+    def test_search_libraries(self, test_database):
+        """search_libraries should find libraries by name/description."""
+        from backend.app.db import upsert_library, search_libraries
+        
+        # Create libraries with searchable names
+        upsert_library(library_id="/local/foo1", name="Foo Library", description="Bar baz")
+        upsert_library(library_id="/local/foo2", name="Other Library", description="Different content")
+        
+        results = search_libraries("foo")
+        assert isinstance(results, list)
+
+
+class TestDocumentChunkOperations:
+    """Tests for document chunk storage and retrieval."""
+
+    def test_insert_document_chunk_new(self, test_database):
+        """insert_document_chunk should create new chunk record."""
+        from backend.app.db import insert_document_chunk
+        
+        result = insert_document_chunk(
+            doc_id="doc-1",
+            library_id="/local/testlib",
+            path="docs/example.md",
+            title="Example Document",
+            content="# Example\n\nThis is the content.",
+            chunk_index=0,
+            token_estimate=100
+        )
+        
+        assert result["success"] is True
+
+    def test_insert_document_chunk_update(self, test_database):
+        """insert_document_chunk should update existing record."""
+        from backend.app.db import insert_document_chunk
+        
+        # Insert first
+        insert_document_chunk(
+            doc_id="doc-update-test",
+            library_id="/local/uplib",
+            path="old-path.md",
+            title="Old Title",
+            content="# Old\nContent here.",
+            chunk_index=0,
+            token_estimate=50
+        )
+        
+        # Update it
+        result = insert_document_chunk(
+            doc_id="doc-update-test",
+            library_id="/local/uplib",
+            path="new-path.md",
+            title="New Title",
+            content="# New\nUpdated content.",
+            chunk_index=1,
+            token_estimate=75
+        )
+        
+        assert result["success"] is True
+
+    def test_get_document_by_id(self, test_database):
+        """get_document_by_id should retrieve document by ID."""
+        from backend.app.db import insert_document_chunk, get_document_by_id
+        
+        # Insert document
+        doc_id = "unique-doc-id-12345"
+        insert_document_chunk(
+            doc_id=doc_id,
+            library_id="/local/testlib",
+            path="docs/test.md",
+            title="Test Document",
+            content="# Test\n\nTest content here.",
+            chunk_index=None,
+            token_estimate=200
+        )
+        
+        # Retrieve it
+        doc = get_document_by_id(doc_id)
+        assert doc is not None
+        assert doc["id"] == doc_id
+
+    def test_get_chunks_for_library(self, test_database):
+        """get_chunks_for_library should return all chunks for a library."""
+        from backend.app.db import upsert_library, insert_document_chunk, get_chunks_for_library
+        
+        # Create library
+        upsert_library(library_id="/local/chunktest", name="Chunk Test", description="Test")
+        
+        # Add some chunks
+        for i in range(3):
+            insert_document_chunk(
+                doc_id=f"chunk-{i}",
+                library_id="/local/chunktest",
+                path=f"path{i}.md",
+                title=f"Section {i}",
+                content=f"Content section {i}.",
+                chunk_index=i,
+                token_estimate=50
+            )
+        
+        chunks = get_chunks_for_library("/local/chunktest")
+        assert isinstance(chunks, list)
+        assert len(chunks) >= 3
+
+    def test_clear_library_documents(self, test_database):
+        """clear_library_documents should delete all docs for a library."""
+        from backend.app.db import upsert_library, insert_document_chunk, clear_library_documents, get_chunks_for_library
+        
+        # Create and populate library
+        upsert_library(library_id="/local/cleartest", name="Clear Test", description="Test")
+        for i in range(5):
+            insert_document_chunk(
+                doc_id=f"clear-{i}",
+                library_id="/local/cleartest",
+                path=f"path{i}.md",
+                content=f"Content {i}.",
+                token_estimate=20
+            )
+        
+        # Clear it
+        result = clear_library_documents("/local/cleartest")
+        assert result["success"] is True
+        
+        # Verify cleared
+        remaining = get_chunks_for_library("/local/cleartest")
+        assert len(remaining) == 0
+
+
+class TestDatabaseEdgeCases:
+    """Tests for edge cases and error handling."""
+
+    def test_empty_library_id(self, test_database):
+        """Operations with empty ID should handle gracefully."""
+        from backend.app.db import upsert_library
+        
+        result = upsert_library(library_id="", name="Test", description="Desc")
+        # Should not crash, though may not be a valid operation
+
+    def test_special_characters_in_content(self, test_database):
+        """Content with special characters should be stored."""
+        from backend.app.db import insert_document_chunk
+        
+        content = "Hello \"world\" <tag /> & amp; 'apostrophe'"
+        result = insert_document_chunk(
+            doc_id="special-test",
+            library_id="/local/speciallib",
+            path="special.md",
+            content=content,
+            token_estimate=100
+        )
+        
+        assert result["success"] is True
+
+    def test_very_long_content(self, test_database):
+        """Long content should be stored."""
+        from backend.app.db import insert_document_chunk
+        
+        long_content = "a" * 5000
+        result = insert_document_chunk(
+            doc_id="long-test",
+            library_id="/local/longlib",
+            path="long.md",
+            content=long_content,
+            token_estimate=1000
+        )
+        
+        assert result["success"] is True
+
+    def test_none_description(self, test_database):
+        """Library with None description should work."""
+        from backend.app.db import upsert_library
+        
+        result = upsert_library(
+            library_id="/local/nonedesc",
+            name="No Description Lib",
+            description=None
+        )
+        
+        assert result["success"] is True
+
+
+class TestDatabaseInitialization:
+    """Tests for database initialization state."""
+
+    def test_database_is_empty_after_init(self, test_database):
+        """Database should be empty right after init."""
+        from backend.app.db import list_libraries
+        
+        libs = list_libraries()
+        assert isinstance(libs, list)
+
+
+# =============================================================================
+# FIXTURES
+# =============================================================================
+
+@pytest.fixture
+def sample_doc():
+    """Sample document chunk for testing."""
+    return {
+        "doc_id": "sample-doc-1",
+        "library_id": "/local/samplelib",
+        "path": "docs/guide.md",
+        "title": "Getting Started Guide",
+        "content": "# Getting Started\n\nWelcome to the guide. This is a sample document for testing.\n\n## Installation\n\nInstall with pip.",
+        "chunk_index": 0,
+        "token_estimate": 500
+    }
@@ -0,0 +1,262 @@
+"""
+Tests for mcp-server/server.py
+
+These are pure unit tests that don't require any external dependencies.
+They test:
+- The strip_local_prefix() function directly (no network)
+- MCP server tool definitions and structure
+"""
+
+import pytest
+
+
+class TestStripLocalPrefix:
+    """Tests for the strip_local_prefix() function."""
+
+    def test_strips_prefix_from_full_id(self):
+        """Should strip /local/ prefix from full library ID."""
+        from mcp_server.server import strip_local_prefix
+        
+        input_id = "/local/foundryvtt"
+        expected_output = "foundryvtt"
+        
+        result = strip_local_prefix(input_id)
+        assert result == expected_output
+
+    def test_preserves_id_without_prefix(self):
+        """Should preserve ID that doesn't have /local/ prefix."""
+        from mcp_server.server import strip_local_prefix
+        
+        input_id = "foundryvtt"
+        
+        result = strip_local_prefix(input_id)
+        assert result == input_id  # Should be unchanged
+
+    def test_strips_from_multiple_local_prefixes(self):
+        """Should handle edge case of multiple prefixes."""
+        from mcp_server.server import strip_local_prefix
+        
+        input_id = "/local//local/foundryvtt"
+        
+        result = strip_local_prefix(input_id)
+        # Should only strip first occurrence
+        assert result == "/local/foundryvtt"
+
+    def test_empty_string(self):
+        """Empty string should remain empty."""
+        from mcp_server.server import strip_local_prefix
+        
+        input_id = ""
+        
+        result = strip_local_prefix(input_id)
+        assert result == input_id  # Should be unchanged
+
+    def test_whitespace_only(self):
+        """Whitespace only should remain whitespace (no /local/ to strip)."""
+        from mcp_server.server import strip_local_prefix
+        
+        input_id = "   \t\n"
+        
+        result = strip_local_prefix(input_id)
+        assert result == input_id
+
+    def test_case_sensitive_prefix(self):
+        """Prefix matching is case-sensitive."""
+        from mcp_server.server import strip_local_prefix
+        
+        # Lowercase - should strip
+        result1 = strip_local_prefix("/local/test")
+        assert result1 == "test"
+        
+        # Uppercase - should NOT strip (not a match)
+        result2 = strip_local_prefix("/LOCAL/test")
+        assert result2 == "/LOCAL/test"  # Unchanged
+
+    def test_partial_match_does_not_strip(self):
+        """Only exact /local/ prefix is stripped, not partial matches."""
+        from mcp_server.server import strip_local_prefix
+        
+        # Partial match - should NOT strip
+        input_id = "/local-docs/test"
+        result = strip_local_prefix(input_id)
+        assert result == input_id  # Unchanged
+        
+        # Different separator - should NOT strip
+        input_id2 = "/localdocs/test"
+        result2 = strip_local_prefix(input_id2)
+        assert result2 == input_id2
+
+    def test_prefix_with_trailing_slash(self):
+        """Should handle trailing slash in ID."""
+        from mcp_server.server import strip_local_prefix
+        
+        input_id = "/local/foundryvtt/"
+        expected_output = "foundryvtt/"
+        
+        result = strip_local_prefix(input_id)
+        assert result == expected_output
+
+
+class TestMcpServerStructure:
+    """Tests for MCP server tool structure (without starting the server)."""
+
+    def test_import_fastmcp(self):
+        """Should be able to import FastMCP."""
+        try:
+            from fastmcp import FastMCP
+            # Import successful
+        except ImportError as e:
+            pytest.skip(f"fastmcp not installed: {e}")
+
+
+class TestMcpServerToolsExistence:
+    """Tests to verify MCP server has expected tools defined."""
+
+    def test_mcp_instance_created(self):
+        """MCP instance should be created with tools."""
+        from mcp_server.server import mcp
+        
+        assert mcp is not None
+
+    def test_resolve_library_id_tool_exists(self):
+        """resolve-library-id tool should be registered."""
+        from mcp_server.server import mcp
+        
+        # Check if the tool exists by trying to access it
+        if hasattr(mcp, 'tools'):
+            tool_names = [t.name for t in mcp.tools]
+            assert "resolve_library_id" in tool_names
+
+    def test_get_library_docs_tool_exists(self):
+        """get-library-docs tool should be registered."""
+        from mcp_server.server import mcp
+        
+        if hasattr(mcp, 'tools'):
+            tool_names = [t.name for t in mcp.tools]
+            assert "get_library_docs" in tool_names
+
+    def test_list_libraries_tool_exists(self):
+        """list-libraries tool should be registered."""
+        from mcp_server.server import mcp
+        
+        if hasattr(mcp, 'tools'):
+            tool_names = [t.name for t in mcp.tools]
+            assert "list_libraries" in tool_names
+
+    def test_search_docs_tool_exists(self):
+        """search-docs tool should be registered."""
+        from mcp_server.server import mcp
+        
+        if hasattr(mcp, 'tools'):
+            tool_names = [t.name for t in mcp.tools]
+            assert "search_docs" in tool_names
+
+    def test_refresh_library_tool_exists(self):
+        """refresh-library tool should be registered."""
+        from mcp_server.server import mcp
+        
+        if hasattr(mcp, 'tools'):
+            tool_names = [t.name for t in mcp.tools]
+            assert "refresh_library" in tool_names
+
+    def test_sync_sources_tool_exists(self):
+        """sync-sources tool should be registered."""
+        from mcp_server.server import mcp
+        
+        if hasattr(mcp, 'tools'):
+            tool_names = [t.name for t in mcp.tools]
+            assert "sync_sources" in tool_names
+
+
+class TestMcpServerStripPrefixIntegration:
+    """Integration tests for strip_prefix usage in MCP server functions."""
+
+    def test_resolve_library_id_calls_strip_prefix(self):
+        """resolve_library_id should handle /local/ prefix in responses."""
+        # This test verifies that the tool is available and uses the prefix correctly
+        from mcp_server.server import strip_local_prefix
+        
+        # Verify the function exists and works
+        assert callable(strip_local_prefix)
+        
+        # Test with sample IDs
+        test_ids = [
+            "/local/foundryvtt",
+            "/local/pytest",
+            "/local/mydocs/reference",
+        ]
+        
+        for lib_id in test_ids:
+            stripped = strip_local_prefix(lib_id)
+            assert not stripped.startswith("/local/")
+
+
+class TestMcpServerPrefixHandlingVariations:
+    """Additional tests for prefix handling variations."""
+
+    def test_long_library_id(self):
+        """Should handle long library IDs with /local/ prefix."""
+        from mcp_server.server import strip_local_prefix
+        
+        input_id = "/local/very-long-library-id-with-many-chars-in-name"
+        expected_output = "very-long-library-id-with-many-chars-in-name"
+        
+        result = strip_local_prefix(input_id)
+        assert result == expected_output
+
+    def test_special_characters_in_id(self):
+        """Should handle special characters in library ID."""
+        from mcp_server.server import strip_local_prefix
+        
+        # IDs can have underscores, dashes, numbers
+        input_id = "/local/my-doc_v2-3_test"
+        
+        result = strip_local_prefix(input_id)
+        assert result == "my-doc_v2-3_test"
+
+    def test_unicode_in_stripped_name(self):
+        """Stripped name should preserve unicode characters."""
+        from mcp_server.server import strip_local_prefix
+        
+        # Library IDs sometimes have unicode in them
+        input_id = "/local/世界文档"  # Chinese characters
+        
+        result = strip_local_prefix(input_id)
+        assert result == "世界文档"
+
+    def test_mixed_case_stripped_name(self):
+        """Stripped name can have mixed case."""
+        from mcp_server.server import strip_local_prefix
+        
+        input_id = "/local/FoundryVTT"
+        
+        result = strip_local_prefix(input_id)
+        assert result == "FoundryVTT"
+
+
+# =============================================================================
+# FIXTURES
+# =============================================================================
+
+@pytest.fixture
+def sample_library_ids():
+    """Sample library IDs for testing prefix stripping."""
+    return [
+        "/local/foundryvtt",
+        "/local/pytest",
+        "/local/mydocs/reference/guide.md",
+        "/local/my-app",
+        "/local/documentation/tutorial/getting-started",
+    ]
+
+
+@pytest.fixture
+def expected_stripped_ids(sample_library_ids):
+    """Expected stripped versions of sample library IDs."""
+    return [
+        "foundryvtt",
+        "pytest",
+        "mydocs/reference/guide.md",
+        "my-app",
+        "documentation/tutorial/getting-started",
+    ]
@@ -0,0 +1,368 @@
+"""
+Tests for backend/app/search.py
+
+These tests verify search functionality without requiring:
+- A running Qdrant vector database (mocked)
+- Loaded embedding models (mocked)
+
+The tests focus on:
+- Response shape validation
+- Library filtering
+- Error handling
+- Async function behavior
+"""
+import pytest
+
+
+class TestResolveLibraryId:
+    """Tests for resolve_library_id() - Context7-style resolution."""
+
+    def test_returns_candidates_list(self, test_database):
+        """resolve_library_id should return a list of candidates."""
+        from backend.app.search import resolve_library_id
+        
+        # Create some libraries first
+        from backend.app.db import upsert_library
+        for i in range(3):
+            upsert_library(
+                library_id=f"/local/searchtest{i}",
+                name=f"Search Test Library {i}",
+                description=f"Description for search test {i}"
+            )
+        
+        candidates = resolve_library_id("search")
+        
+        assert isinstance(candidates, list)
+
+    def test_captures_matching_names(self, test_database):
+        """Should capture libraries where query matches name."""
+        from backend.app.db import upsert_library
+        from backend.app.search import resolve_library_id
+        
+        # Create a library that should match "search"
+        upsert_library(
+            library_id="/local/searchlib",
+            name="Search Library",
+            description="Main search documentation"
+        )
+        
+        candidates = resolve_library_id("search")
+        
+        assert isinstance(candidates, list)
+
+    def test_context7_style_prefix(self, test_database):
+        """Candidates should have /local/ prefix added to ID."""
+        from backend.app.db import upsert_library
+        from backend.app.search import resolve_library_id
+        
+        upsert_library(
+            library_id="foundryvtt",  # Without /local/
+            name="Foundry VTT",
+            description="Fantasy tabletop virtual table"
+        )
+        
+        candidates = resolve_library_id("foundry")
+        
+        for candidate in candidates:
+            assert candidate.get("source") == "local"
+
+    def test_partial_name_match(self, test_database):
+        """Should match on partial name."""
+        from backend.app.db import upsert_library
+        from backend.app.search import resolve_library_id
+        
+        upsert_library(
+            library_id="/local/gamefoundry",
+            name="Foundry Game Module",
+            description="Module for foundry games"
+        )
+        
+        candidates = resolve_library_id("game")
+        assert isinstance(candidates, list)
+
+    def test_empty_result_on_no_matches(self, test_database):
+        """Should return empty list when no matches."""
+        from backend.app.search import resolve_library_id
+        
+        # No libraries matching "xyznonexistent123"
+        candidates = resolve_library_id("xyznonexistent123")
+        
+        assert isinstance(candidates, list)
+
+
+class TestSearchDocs:
+    """Tests for search_docs() - semantic search with mocked vector store."""
+
+    def test_returns_results_list(self, mock_qdrant_client, test_database):
+        """search_docs should return a list of results."""
+        from backend.app.search import search_docs
+        
+        # Create some chunks first
+        from backend.app.db import upsert_library, insert_document_chunk
+        upsert_library(library_id="/local/searchdocslib", name="Search Docs Lib", description="Test")
+        
+        for i in range(5):
+            insert_document_chunk(
+                doc_id=f"searchdoc-{i}",
+                library_id="/local/searchdocslib",
+                path=f"path{i}.md",
+                title=f"Section {i}",
+                content=f"# Section {i}\n\nContent about section {i} that matches search queries.",
+                chunk_index=i,
+                token_estimate=100
+            )
+        
+        results = search_docs("section")
+        
+        assert isinstance(results, list)
+
+    def test_empty_query_returns_empty_list(self):
+        """Empty query should return empty results."""
+        from backend.app.search import search_docs
+        
+        results = search_docs("")
+        assert isinstance(results, list)
+
+    def test_limit_parameter(self, mock_qdrant_client):
+        """Limit parameter should affect result count."""
+        from backend.app.search import search_docs
+        
+        results_10 = search_docs("test", limit=10)
+        results_5 = search_docs("test", limit=5)
+        
+        assert isinstance(results_10, list)
+        assert isinstance(results_5, list)
+
+    def test_response_shape_matches_spec(self):
+        """Verify response shape when mocked returns data."""
+        from unittest.mock import patch
+        from backend.app.search import search_docs
+        
+        # Mock client to return formatted results
+        mock_results = [
+            {
+                "id": "test-id-1",
+                "score": 0.95,
+                "library_id": "/local/testlib",
+                "path": "docs/example.md",
+                "title": "Example Document",
+                "chunk_index": 0
+            }
+        ]
+        
+        with patch('backend.app.vector_store.get_client') as mock_get_client:
+            # Setup mock client to return our test data
+            mock_client = mock_get_client.return_value
+            mock_point = type('ScoredPoint', (), {
+                'score': 0.95,
+                'payload': {
+                    "id": "test-id-1",
+                    "library_id": "/local/testlib",
+                    "path": "docs/example.md",
+                    "title": "Example Document",
+                    "chunk_index": 0
+                }
+            })()
+            mock_client.search.return_value = [mock_point]
+            
+            results = search_docs("test query")
+            
+            assert isinstance(results, list)
+            if results:
+                # Verify each result has expected fields
+                result = results[0]
+                assert "id" in result
+                assert "score" in result
+                assert "library_id" in result
+                assert "path" in result
+                assert "title" in result
+                assert "chunk_index" in result
+
+
+class TestGetLibraryDocs:
+    """Tests for get_library_docs() - document retrieval."""
+
+    def test_returns_empty_string_when_no_documents(self, mock_qdrant_client):
+        """Should return empty/error when no docs exist."""
+        from backend.app.search import get_library_docs
+        
+        result = get_library_docs("/local/nonexistent")
+        
+        # Either returns empty string or error message
+        assert isinstance(result, str)
+
+    def test_returns_content_when_documents_exist(self, mock_qdrant_client):
+        """Should return combined document content."""
+        from backend.app.db import upsert_library, insert_document_chunk
+        from backend.app.search import get_library_docs
+        
+        # Create library with chunks
+        upsert_library(library_id="/local/docretrievetest", name="Doc Retrieve", description="Test")
+        insert_document_chunk(
+            doc_id="doc-retrieve-1",
+            library_id="/local/docretrievetest",
+            path="docs/getting-started.md",
+            title="Getting Started",
+            content="# Getting Started\n\nWelcome to the documentation. This is a test document.",
+            chunk_index=0,
+            token_estimate=200
+        )
+        
+        result = get_library_docs("/local/docretrievetest")
+        
+        assert isinstance(result, str)
+        # Should contain at least library title or content
+
+    def test_topic_filter_searches(self, mock_qdrant_client):
+        """With topic filter, should search for relevant chunks."""
+        from backend.app.db import upsert_library, insert_document_chunk
+        from backend.app.search import get_library_docs
+        
+        upsert_library(library_id="/local/topicsearchlib", name="Topic Search", description="Test")
+        
+        # Add documents with different topics
+        insert_document_chunk(
+            doc_id="topic-install",
+            library_id="/local/topicsearchlib",
+            path="docs/install.md",
+            title="Installation Guide",
+            content="# Installation\n\nInstall with pip install mypackage.",
+            chunk_index=0,
+            token_estimate=150
+        )
+        
+        insert_document_chunk(
+            doc_id="topic-usage",
+            library_id="/local/topicsearchlib",
+            path="docs/usage.md",
+            title="Usage Guide",
+            content="# Usage\n\nUse mycommand --help for help.",
+            chunk_index=0,
+            token_estimate=150
+        )
+        
+        # Search for "install" topic
+        result = get_library_docs("/local/topicsearchlib", topic="install")
+        
+        assert isinstance(result, str)
+
+    def test_token_limit_respected(self):
+        """Token limit should truncate content appropriately."""
+        from backend.app.search import get_library_docs
+        
+        # Create a library with lots of content
+        from backend.app.db import upsert_library, insert_document_chunk
+        
+        upsert_library(library_id="/local/tokenlimittest", name="Token Limit", description="Test")
+        
+        long_content = "# Long Content\n\n" + " ".join(["word"] * 500)
+        insert_document_chunk(
+            doc_id="long-doc",
+            library_id="/local/tokenlimittest",
+            path="docs/long.md",
+            title="Long Document",
+            content=long_content,
+            chunk_index=0,
+            token_estimate=2000
+        )
+        
+        # Request with small token limit
+        result = get_library_docs("/local/tokenlimittest", token_limit=100)
+        
+        assert isinstance(result, str)
+
+
+class TestGetLibraryDocsWithMock:
+    """Tests that verify content retrieval when mocked data is available."""
+
+    def test_retrieves_chunks_by_library_id(self, mock_qdrant_client):
+        """get_library_docs without topic should fetch all chunks for library."""
+        from backend.app.db import upsert_library, insert_document_chunk
+        from backend.app.search import get_library_docs
+        
+        upsert_library(library_id="/local/mockretrievetest", name="Mock Retrieve", description="Test")
+        
+        for i in range(3):
+            insert_document_chunk(
+                doc_id=f"mock-retrieve-{i}",
+                library_id="/local/mockretrievetest",
+                path=f"path{i}.md",
+                title=f"Path {i}",
+                content=f"Content for path {i}.",
+                chunk_index=i,
+                token_estimate=50
+            )
+        
+        result = get_library_docs("/local/mockretrievetest")
+        
+        assert isinstance(result, str)
+
+
+class TestSearchErrorHandling:
+    """Tests for error handling in search functions."""
+
+    def test_search_handles_missing_library(self):
+        """Should handle missing library gracefully."""
+        from backend.app.search import search_docs
+        
+        results = search_docs("test", library_id="/local/missing_lib_xyz123")
+        assert isinstance(results, list)
+
+    def test_resolve_handles_no_libraries_in_db(self):
+        """Should handle empty database gracefully."""
+        from backend.app.db import init_db
+        from backend.app.search import resolve_library_id
+        
+        # Initialize fresh DB (empty)
+        from backend.app.db import get_connection, get_chunks_for_library
+        # The test_database fixture already does this
+
+    def test_get_library_docs_handles_empty_library(self):
+        """Should handle library with no chunks."""
+        from backend.app.search import get_library_docs
+        
+        result = get_library_docs("/local/emptylib")
+        assert isinstance(result, str)
+
+
+# =============================================================================
+# FIXTURES FOR SEARCH TESTS
+# =============================================================================
+
+@pytest.fixture
+def search_sample_text():
+    """Sample text with headings for search chunking tests."""
+    return """# Installation Guide
+
+To install the package:
+```bash
+pip install mypackage
+```
+
+## Configuration
+
+Configure your environment by setting these variables:
+- MY_VAR=123
+- DEBUG=true
+
+## Usage Examples
+
+Example 1: Basic usage
+```python
+import mymodule
+module = mymodule.Module()
+result = module.run()
+print(result)
+```
+
+Example 2: Advanced usage with options
+```python
+options = {"verbose": True, "output": "stdout"}
+result = module.run(options=options)
+```
+
+## Troubleshooting
+
+Common issues and their solutions:
+- ImportError: Ensure package is installed
+- AttributeError: Check that attributes exist on object"""