#!/usr/bin/env python3
# /// script
# requires-python = ">=3.10"
# dependencies = [
# "llama-index",
# "llama-index-embeddings-huggingface",
# "llama-index-embeddings-openai",
# "sentence-transformers",
# "transformers",
# "mcp",
# ]
# ///
"""Query the persisted LlamaIndex for English docs and call CLI LLMs or MCP."""
from __future__ import annotations
import argparse
import subprocess
import sys
from pathlib import Path
from typing import List
from llama_index.core import Settings, StorageContext, load_index_from_storage
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
try:
from llama_index.embeddings.openai import OpenAIEmbedding
except ImportError: # pragma: no cover
OpenAIEmbedding = None
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description="Query docs via LlamaIndex")
parser.add_argument("question", type=str, help="User question to ask")
parser.add_argument(
"--persist-dir",
type=Path,
default=Path("storage/llamaindex"),
help="Path to the persisted index directory",
)
parser.add_argument(
"--k",
type=int,
default=4,
help="Number of top chunks to retrieve",
)
parser.add_argument(
"--model",
type=str,
choices=("codex", "claude", "gemini", "raw", "mcp"),
default="raw",
help="Output target: CLI, plain text, or MCP-compatible stdout",
)
parser.add_argument(
"--embedding-model",
type=str,
default="BAAI/bge-base-zh-v1.5",
help="Embedding model identifier; interpreted per backend",
)
parser.add_argument(
"--embed-backend",
type=str,
choices=("huggingface", "openai"),
default="huggingface",
help="Embedding backend to match the stored index (default: huggingface)",
)
parser.add_argument(
"--cli-path",
type=str,
help="Override default CLI executable path",
)
return parser.parse_args()
PROMPT_TEMPLATE = """
You are assisting with documentation questions. Use ONLY the provided context.
Question: {question}
Context:
{context}
Answer in English.
""".strip()
def build_context(source_nodes) -> str:
chunks: List[str] = []
for i, node in enumerate(source_nodes[:10], start=1):
meta = node.metadata or {}
meta_str = f"path={meta.get('path', 'unknown')} score={node.score:.3f}"
chunks.append(f"[{i}] ({meta_str})\n{node.text.strip()}\n")
return "\n".join(chunks)
def resolve_cli(model: str, override: str | None) -> List[str]:
if model == "raw":
return []
if override:
return [override]
if model == "codex":
return ["codex", "chat", "--stdin"]
if model == "claude":
return ["claude", "chat", "--stdin"]
if model == "gemini":
return ["gemini", "chat", "--stdin"]
raise ValueError(f"Unsupported model {model}")
def configure_embedding(args: argparse.Namespace) -> None:
if args.embed_backend == "huggingface":
Settings.embed_model = HuggingFaceEmbedding(model_name=args.embedding_model)
elif args.embed_backend == "openai":
if OpenAIEmbedding is None:
raise SystemExit(
"OpenAI backend requested but llama-index-embeddings-openai is not installed",
)
Settings.embed_model = OpenAIEmbedding(model=args.embedding_model)
else: # pragma: no cover
raise SystemExit(f"Unsupported embed backend {args.embed_backend}")
def main() -> None:
args = parse_args()
persist_dir = args.persist_dir.resolve()
if not persist_dir.exists():
raise SystemExit(f"Persist dir {persist_dir} not found")
configure_embedding(args)
storage_context = StorageContext.from_defaults(persist_dir=str(persist_dir))
index = load_index_from_storage(storage_context)
retriever = index.as_retriever(similarity_top_k=args.k)
source_nodes = retriever.retrieve(args.question)
context_text = build_context(source_nodes)
prompt = PROMPT_TEMPLATE.format(question=args.question, context=context_text)
if args.model == "raw":
print(prompt)
sys.exit(0)
cli_cmd = resolve_cli(args.model, args.cli_path)
try:
proc = subprocess.run(
cli_cmd,
input=prompt.encode("utf-8"),
check=True,
)
except FileNotFoundError:
raise SystemExit(f"CLI executable not found: {' '.join(cli_cmd)}") from None
except subprocess.CalledProcessError as exc:
raise SystemExit(f"CLI command failed with code {exc.returncode}") from exc
if __name__ == "__main__":
main()