# Search Operations for Semantic Query and Library Navigation from typing import List, Dict, Any, Optional from pathlib import Path from .config import settings from .vector_store import get_client, _collection_name as VECTOR_COLLECTION from .embeddings import embed_text, get_embedding_size from .db import get_chunks_for_library, list_libraries def search_docs( query: str, library_id: Optional[str] = None, limit: int = 10 ) -> List[Dict[str, Any]]: """ Search documents by semantic similarity in Qdrant. Args: query: The search query string library_id: Optional filter to search only within a library limit: Maximum number of results to return Returns: List of dicts with format: { "id": "...", "score": 0.123, "library_id": "...", "path": "...", "title": "...", "chunk_index": 0 } """ try: # Generate embedding for the query query_embedding = embed_text(query) client = get_client() # Build filter if library_id is specified search_filter = None if library_id: try: from qdrant_client.models import FieldCondition, Filter, MatchValue search_filter = Filter( must=[ FieldCondition( key="library_id", match=MatchValue(value=library_id), ) ] ) except ImportError: search_filter = None # Perform vector search results = client.search( collection_name=VECTOR_COLLECTION, query_vector=query_embedding, limit=limit, search_filter=search_filter ) # Format and return results formatted_results = [] for result in results: if result.score > 0 and result.payload: formatted_results.append({ "id": result.payload["id"], "score": float(result.score), "library_id": result.payload.get("library_id", ""), "path": result.payload.get("path", ""), "title": result.payload.get("title", ""), "chunk_index": result.payload.get("chunk_index", 0) }) return formatted_results except Exception as e: print(f"Search error: {e}") return [] def get_library_docs( library_id: str, topic: Optional[str] = None, token_limit: int = 8000 ) -> str: """ Retrieve documentation content from a library. Args: library_id: The library ID to fetch docs from topic: Optional topic filter - if provided, searches for topic first token_limit: Maximum tokens to include in output Returns: Combined markdown content as string """ try: # If topic is specified, search for relevant chunks if topic: print(f" [Search] Searching library '{library_id}' for topic: {topic}") search_results = search_docs(query=topic, library_id=library_id, limit=20) if not search_results: return f"No documents found in library '{library_id}' matching topic: {topic}" print(f" [Search] Found {len(search_results)} relevant chunks") else: # Fetch all chunks for the library and select most useful ones print(f" [Fetch] Retrieving chunks from library '{library_id}'") chunks_data = get_chunks_for_library(library_id) if not chunks_data: return f"No documents found in library '{library_id}'" # Sort by chunk_index descending and pick top ones to respect token limit sorted_chunks = sorted(chunks_data, key=lambda x: x.get("chunk_index", 0), reverse=True) selected_chunks = [] total_tokens = 0 for chunk in sorted_chunks: content = chunk.get("content", "") tokens = len(content) // 4 # Simple token estimate if total_tokens + tokens <= token_limit: selected_chunks.append(chunk) total_tokens += tokens else: # Take part of this chunk to fill remaining space remaining = token_limit - total_tokens content_preview = content[:remaining * 4] if remaining > 0 else "" if content_preview: selected_chunks.append({"content": content_preview, "title": chunk.get("title", "")}) print(f" [Fetch] Selected {len(selected_chunks)} chunks ({total_tokens} tokens)") # Combine chunks into markdown md_parts = [] for chunk in selected_chunks: title = chunk.get("title") content = chunk.get("content", "") if title and content.strip(): # Add heading before first chunk or if this is the first chunk if not md_parts or "\n\n" not in "".join(md_parts): md_parts.append(f"# {title}") elif not any(part.startswith("#") for part in md_parts[-5:]): md_parts.append(f"\n# {title}\n") md_parts.append(content) result = "\n\n".join(md_parts) # If no headings were added, prepend library title if not any(part.startswith("#") for part in result.split("\n")[:3]): result = f"# {library_id.upper().replace('_', ' ')}\n\n" + result return result.rstrip() except Exception as e: print(f"Error getting library docs: {e}") return f"Error retrieving documents from library '{library_id}': {str(e)}" def resolve_library_id(library_name: str) -> List[Dict[str, Any]]: """ Resolve a library name to potential matches (Context7-style). Args: library_name: Partial or full library name to search for Returns: List of Context7-style candidate dicts: { "id": "/local/foundryvtt", "name": "foundryvtt", "description": "...", "source": "local" } """ try: libraries = list_libraries() if not libraries: return [] # Filter by name match (case-insensitive) candidates = [] for lib in libraries: lib_name = lib.get("name", "").lower() lib_id = lib.get("id", "").lower() if library_name.lower() in lib_name or library_name.lower() in lib_id: candidates.append({ "id": f"/local/{lib['id']}", "name": lib["name"], "description": lib.get("description", ""), "source": "local" }) # Return top matches (or all if less than 3) candidates = candidates[:min(5, len(candidates))] print(f" [Resolve] Found {len(candidates)} candidate(s) for: {library_name}") return candidates except Exception as e: print(f"Error resolving library ID: {e}") return [] if __name__ == "__main__": import asyncio async def test_search(): """Test search functionality.""" print("Testing search module...\n") # Test 1: Simple search with dummy vector (simulated) print("1. Testing resolve_library_id()...") results = await resolve_library_id("foundryvtt") print(f" Results: {len(results)} candidates\n") # Test 2: Empty query should return empty list print("2. Testing search_docs() with empty query...") results = await search_docs("") print(f" Results: {len(results)} chunks\n") print("✅ All tests completed!") asyncio.run(test_search())