Ronin Defense MCP

alignment.py•3.2 KiB

"""Tool call alignment verification using token overlap heuristics.""" from dataclasses import dataclass from typing import Any, Dict, Optional, Tuple import re _STOPWORDS = { "the", "a", "an", "to", "of", "for", "and", "or", "in", "on", "with", "from", "by", "is", "are", "be", "this", "that", "it", "as", "at", "your", "you", "i", "we", "our", "us", "me" } @dataclass class ToolCallContext: """Minimal view of a tool call for alignment checking.""" candidate_text: Optional[str] tool_name: str tool_description: Optional[str] arguments: Dict[str, Any] def _normalize(text: str) -> set[str]: """Lowercase, tokenize, and filter stopwords.""" text = text.lower() tokens = re.findall(r"[a-z0-9]+", text) return { t for t in tokens if len(t) > 2 and t not in _STOPWORDS } def _extract_candidate_text(arguments: Dict[str, Any]) -> Optional[str]: """ Extract user prompt from tool arguments using heuristics. Args: arguments: Tool call arguments Returns: Longest natural-language string that looks like user intent """ skip_keys = {"body", "content", "data", "payload", "html", "text"} candidates: list[str] = [] for key, value in arguments.items(): if isinstance(value, str) and key.lower() not in skip_keys: text = value.strip() if len(text) >= 20 and " " in text: candidates.append(text) if not candidates: return None candidates.sort(key=len, reverse=True) return candidates[0] def compute_alignment_score(ctx: ToolCallContext) -> float: """ Compute token overlap score between user text and tool metadata. Args: ctx: Tool call context Returns: Score in [0, 1] where higher means more aligned """ if not ctx.candidate_text: return 1.0 prompt_tokens = _normalize(ctx.candidate_text) if not prompt_tokens: return 1.0 tool_text_parts = [ctx.tool_name or ""] if ctx.tool_description: tool_text_parts.append(ctx.tool_description) tool_text = " ".join(tool_text_parts) tool_tokens = _normalize(tool_text) if not tool_tokens: return 0.0 overlap = prompt_tokens & tool_tokens return len(overlap) / float(len(prompt_tokens)) def is_tool_call_likely_aligned( arguments: Dict[str, Any], tool_name: str, tool_description: Optional[str], threshold: float = 0.12, ) -> Tuple[bool, float]: """ Check if tool call matches user intent using token overlap. Args: arguments: Tool call arguments tool_name: Name of the tool tool_description: Tool description if available threshold: Minimum alignment score to allow Returns: (allowed, score): Whether call is allowed and its alignment score """ candidate = _extract_candidate_text(arguments) ctx = ToolCallContext( candidate_text=candidate, tool_name=tool_name, tool_description=tool_description, arguments=arguments, ) score = compute_alignment_score(ctx) if candidate is None: return True, score allow = score >= threshold return allow, score

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/NirjharDeb/CS8803_MLSecurity_MCP_Defense_Server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

alignment.py•3.2 KiB