from __future__ import annotations
from dataclasses import asdict, dataclass
from pathlib import Path
from typing import List, TypedDict, cast
from mcp.server.fastmcp import FastMCP
from .config import get_settings
from .loaders import list_docs, read_doc
from .index import build_index, search as index_search
# FastMCP server instance
mcp = FastMCP("mcp-data-server")
# ---------- Structured types ----------
@dataclass
class SearchHit:
path: str
chunk_index: int
score: float
text: str
class SearchHitDict(TypedDict):
path: str
chunk_index: int
score: float
text: str
# ---------- Tools ----------
@mcp.tool()
def list_docs_tool(path: str | None = None) -> list[str]:
"""
List supported documents under DATA_DIR (or provided path).
Returns a list of absolute file paths.
"""
s = get_settings()
root = Path(path) if path else s.data_dir
return [str(p) for p in list_docs(root)]
@mcp.tool()
def read_doc_tool(path: str) -> str:
"""
Read and return text from a single file path.
"""
return read_doc(Path(path))
@mcp.tool()
def index_docs_tool() -> str:
"""
Build/rebuild the embeddings index from DATA_DIR.
"""
s = get_settings()
# quiet=True is critical for MCP stdio (no stdout noise)
build_index(quiet=True)
return f"Indexed documents under {s.data_dir}"
@mcp.tool()
def search_chunks_tool(query: str, k: int | None = None) -> list[SearchHitDict]:
"""
Search the embeddings index for a query and return top-k chunks.
"""
results = index_search(query, k)
hits: List[SearchHitDict] = []
for meta, score, chunk in results:
hit = SearchHit(
path=meta.path, chunk_index=meta.chunk_index, score=score, text=chunk
)
hits.append(cast(SearchHitDict, asdict(hit))) # convert dataclass -> typed dict
return hits
# ---------- Entrypoint ----------
def main() -> None:
"""
Run the MCP server over stdio. Configure your MCP client to execute:
python -m mcp_data_server run
"""
mcp.run(transport="stdio")
if __name__ == "__main__":
main()