Skip to main content
Glama
assess_all_tools.py8.53 kB
import os import sys import json import asyncio from pathlib import Path # Ensure project root on path regardless of cwd PROJECT_DIR = Path(__file__).resolve().parents[1] if str(PROJECT_DIR) not in sys.path: sys.path.insert(0, str(PROJECT_DIR)) from tools.registry import ToolRegistry, TOOL_MAP # type: ignore from mcp.types import TextContent # type: ignore # Load environment (.env) so providers/keys are available when running directly try: from dotenv import load_dotenv # type: ignore _env_path = os.getenv("ENV_FILE") or str(PROJECT_DIR / ".env") load_dotenv(_env_path) except Exception: pass # Ensure providers are configured similarly to server runtime try: from server import _ensure_providers_configured # type: ignore _ensure_providers_configured() except Exception: pass # Resolve available models and pick sensible defaults/fallbacks try: from src.providers.registry import ModelProviderRegistry # type: ignore from src.providers.base import ProviderType # type: ignore _avail_kimi = list(ModelProviderRegistry.get_available_model_names(provider_type=ProviderType.KIMI)) _avail_glm = list(ModelProviderRegistry.get_available_model_names(provider_type=ProviderType.GLM)) except Exception: _avail_kimi, _avail_glm = [], [] _req_kimi = os.getenv("ASSESS_KIMI_MODEL", "kimi-k2-0711-preview") _req_glm = os.getenv("ASSESS_GLM_MODEL", "glm-4.5-flash") def _choose_model(requested: str, available: list[str], fallback: str | None = None) -> str | None: if not available: return None if requested in available: return requested if fallback and fallback in available: return fallback return available[0] KIMI_MODEL = _choose_model(_req_kimi, _avail_kimi, fallback="kimi-k2-0711-preview") GLM_MODEL = _choose_model(_req_glm, _avail_glm, fallback="glm-4.5-flash") print(f"[ASSESS] Detected Kimi models: {_avail_kimi or []}; chosen: {KIMI_MODEL}") print(f"[ASSESS] Detected GLM models: {_avail_glm or []}; chosen: {GLM_MODEL}") OUT_DIR = PROJECT_DIR / "assessments" JSON_DIR = OUT_DIR / "json" MD_PATH = OUT_DIR / "ALL_TOOLS_ASSESSMENT.md" JSON_DIR.mkdir(parents=True, exist_ok=True) def get_tool_source_file(tool_name: str) -> str: """Resolve the python file path for a tool using TOOL_MAP.""" mod, cls = TOOL_MAP.get(tool_name, (None, None)) if not mod: return "" try: m = __import__(mod, fromlist=[cls]) path = Path(m.__file__).resolve() return str(path) except Exception: return "" async def run_tool_async(tool, arguments: dict) -> dict: try: res = await tool.execute(arguments) if isinstance(res, list) and res: # Prefer first TextContent; if none, use stringified content txt = None for item in res: if isinstance(item, TextContent): txt = item.text break if txt is None: txt = str(res[0]) # Try strict JSON first try: return json.loads(txt) except Exception: # Heuristic: extract first JSON object substring try: start = txt.find('{') if start != -1: candidate = txt[start:] return json.loads(candidate) except Exception: pass return {"status": "error", "error": "Response was not valid JSON", "raw": txt[:2000]} return {"status": "error", "error": "Unexpected tool return"} except Exception as e: return {"status": "error", "error": str(e)} async def analyze_tool(tool_name: str, source_file: str, model: str) -> dict: from tools.analyze import AnalyzeTool # lazy import analyze = AnalyzeTool() args = { "model": model, "step": f"Assess the {tool_name} tool implementation for flaws, inefficiencies, instability, and UX complexity risks.", "step_number": 1, "total_steps": 1, "next_step_required": False, "findings": "", "relevant_files": [source_file] if source_file else [], # Keep defaults for analysis_type/general } return await run_tool_async(analyze, args) async def run_consensus(tool_name: str, kimi_json: dict, glm_json: dict) -> dict: """Drive ConsensusTool across required steps with both models neutral.""" from tools.consensus import ConsensusTool # lazy import # Prepare proposal step content proposal = { "tool": tool_name, "constraints": [ "Keep user-facing UX clean and simple; tools are used AI-to-AI", "Improve effectiveness and stability", ], "kimi_assess": kimi_json, "glm_assess": glm_json, } step_text = ( "Evaluate the improvement plan for tool '" + tool_name + "' based on the two assessments below. " "Return a concise set of improvements that balance simplicity (AI-to-AI UX) and effectiveness/stability. " "When relevant, propose small interface tweaks to keep inputs/outputs minimal and deterministic.\n\n" + json.dumps(proposal, ensure_ascii=False) ) consensus = ConsensusTool() # Step 1: provide our analysis and seed models list models = [ {"model": KIMI_MODEL, "stance": "neutral"}, {"model": GLM_MODEL, "stance": "neutral"}, ] args = { "step": step_text, "step_number": 1, "total_steps": len(models), "next_step_required": True, "models": models, } out = await run_tool_async(consensus, args) cont_id = out.get("continuation_id") # Iterate per-model steps until done or we reach total steps for i in range(2, len(models) + 1): args = { "step": f"Continue consensus for {tool_name} (step {i}).", "step_number": i, "total_steps": len(models), "next_step_required": (i < len(models)), "continuation_id": cont_id, } out = await run_tool_async(consensus, args) cont_id = out.get("continuation_id", cont_id) return out def summarise_for_md(label: str, data: dict) -> str: try: return f"### {label}\n\n" + "```json\n" + json.dumps(data, indent=2, ensure_ascii=False) + "\n```\n\n" except Exception: return f"### {label}\n\n(Unable to serialize)\n\n" async def main(): reg = ToolRegistry(); reg.build_tools() tools = sorted(reg.list_tools().keys()) report_sections = [] index_entries = [] for name in tools: # Skip internal-only utilities if any slipped through if name in {"toolcall_log_tail"}: continue src_file = get_tool_source_file(name) print(f"[ASSESS] {name} -> {src_file}") kimi_res = await analyze_tool(name, src_file, KIMI_MODEL) glm_res = await analyze_tool(name, src_file, GLM_MODEL) # Persist raw json per tool with (JSON_DIR / f"{name}.kimi.json").open("w", encoding="utf-8") as f: json.dump(kimi_res, f, indent=2, ensure_ascii=False) with (JSON_DIR / f"{name}.glm.json").open("w", encoding="utf-8") as f: json.dump(glm_res, f, indent=2, ensure_ascii=False) # Run consensus for this tool consensus_res = await run_consensus(name, kimi_res, glm_res) with (JSON_DIR / f"{name}.consensus.json").open("w", encoding="utf-8") as f: json.dump(consensus_res, f, indent=2, ensure_ascii=False) # Build MD section index_entries.append(f"- [{name}](#{name.replace('_','-')})") section = [f"## {name}\n\n"] section.append(summarise_for_md("Kimi assessment", kimi_res)) section.append(summarise_for_md("GLM assessment", glm_res)) section.append(summarise_for_md("Consensus recommendations", consensus_res)) report_sections.append("".join(section)) md = [ "# EXAI Assessment: All Tools (Kimi & GLM + Consensus)\n\n", "Note: Keep UX clean/simple (AI-to-AI) while improving effectiveness and stability.\n\n", "## Index\n\n", "\n".join(index_entries) + "\n\n", "---\n\n", "\n\n".join(report_sections), ] OUT_DIR.mkdir(parents=True, exist_ok=True) with MD_PATH.open("w", encoding="utf-8") as f: f.write("".join(md)) print(f"Written: {MD_PATH}") if __name__ == "__main__": asyncio.run(main())

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Zazzles2908/EX_AI-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server