Daemon-MCP

summarizer.py•8.69 KiB

"""Community summarization for GraphRAG hierarchical queries.""" import logging from dataclasses import dataclass from typing import Any, Awaitable, Callable, Dict, List, Optional logger = logging.getLogger(__name__) @dataclass class SummaryConfig: """Configuration for community summarization.""" max_content_length: int = 4000 # Max chars of member content to consider max_summary_length: int = 500 # Target summary length include_categories: bool = True # Group by memory category include_stats: bool = True # Include member count, etc. llm_enabled: bool = False # Use LLM for summarization llm_temperature: float = 0.0 # Deterministic for consistency class CommunitySummarizer: """ Generates summaries for memory communities. Supports two modes: 1. Extractive (default): Concatenates key information from members 2. LLM-based (optional): Uses LLM for natural language summary Anti-hallucination measures: - Extractive prompts: "Summarize ONLY what is stated" - Source attribution: Include memory IDs in context - Temperature 0: Deterministic outputs """ def __init__( self, config: Optional[SummaryConfig] = None, llm_func: Optional[Callable[[str], Awaitable[str]]] = None, ): """ Initialize summarizer. Args: config: Summary configuration llm_func: Optional async function that takes prompt, returns summary Signature: async def llm(prompt: str) -> str """ self.config = config or SummaryConfig() self.llm_func = llm_func async def summarize_community( self, community_name: str, members: List[Dict[str, Any]], entity_names: Optional[List[str]] = None, ) -> str: """ Generate summary for a community. Args: community_name: Name of the community members: List of memory dicts with content, category, etc. entity_names: Optional list of key entities in this community Returns: Generated summary text """ if not members: return f"Empty community: {community_name}" if self.config.llm_enabled and self.llm_func: return await self._llm_summarize(community_name, members, entity_names) else: return self._extractive_summarize(community_name, members, entity_names) def _extractive_summarize( self, community_name: str, members: List[Dict[str, Any]], entity_names: Optional[List[str]] = None, ) -> str: """ Generate summary by extracting key information from members. This is the default approach - no external dependencies. """ parts = [f"Community: {community_name}"] if self.config.include_stats: parts.append(f"Contains {len(members)} memories.") if entity_names: parts.append(f"Key entities: {', '.join(entity_names[:5])}") if self.config.include_categories: # Group by category by_category: Dict[str, List[str]] = {} for mem in members: cat = mem.get("category", "other") if cat not in by_category: by_category[cat] = [] content = mem.get("content", "") # Truncate long content if len(content) > 150: content = content[:147] + "..." by_category[cat].append(content) for category, contents in by_category.items(): parts.append(f"\n{category.title()}s ({len(contents)}):") # Show up to 3 items per category for content in contents[:3]: parts.append(f" - {content}") if len(contents) > 3: parts.append(f" ... and {len(contents) - 3} more") summary = "\n".join(parts) # Truncate if too long if len(summary) > self.config.max_summary_length: summary = summary[: self.config.max_summary_length - 3] + "..." return summary async def _llm_summarize( self, community_name: str, members: List[Dict[str, Any]], entity_names: Optional[List[str]] = None, ) -> str: """ Generate summary using LLM. Uses extractive prompting to prevent hallucination. """ # Build context from members context_parts = [] total_len = 0 for mem in members: content = mem.get("content", "") category = mem.get("category", "other") mem_id = mem.get("id", "?") entry = f"[{category}:{mem_id}] {content}" if total_len + len(entry) > self.config.max_content_length: break context_parts.append(entry) total_len += len(entry) context = "\n".join(context_parts) # Build extractive prompt entity_hint = "" if entity_names: entity_hint = ( f"\nKey entities in this community: {', '.join(entity_names[:5])}" ) prompt = f"""Summarize the following memories that form a community called "{community_name}". {entity_hint} IMPORTANT: Only include information that is explicitly stated in the memories below. Do NOT add information that isn't present. Do NOT make inferences beyond what is directly stated. Memories: {context} Write a concise summary (2-4 sentences) covering: 1. The main topics or decisions in this community 2. Any patterns or recurring themes 3. Key outcomes if mentioned Summary:""" try: summary = await self.llm_func(prompt) return summary.strip() except Exception as e: logger.warning( f"LLM summarization failed: {e}, falling back to extractive" ) return self._extractive_summarize(community_name, members, entity_names) async def summarize_hierarchy( self, communities: List[Dict[str, Any]], fetch_members_func: Callable[[List[int]], Awaitable[List[Dict[str, Any]]]], ) -> List[Dict[str, Any]]: """ Generate summaries for a hierarchy of communities (bottom-up). GraphRAG pattern: Leaf communities summarize from members, higher-level communities summarize from child summaries. Args: communities: List of community dicts with level, parent_id, member_ids fetch_members_func: Async function to fetch memory dicts for a community Signature: async def fetch(member_ids: List[int]) -> List[Dict] Returns: Communities with summaries populated """ # Group by level by_level: Dict[int, List[Dict[str, Any]]] = {} for c in communities: level = c.get("level", 0) if level not in by_level: by_level[level] = [] by_level[level].append(c) # Track generated summaries by community ID summaries: Dict[int, str] = {} # Process bottom-up (level 0 first) for level in sorted(by_level.keys()): for community in by_level[level]: comm_id = community.get("id") or community.get("leiden_community_id") if level == 0: # Leaf: summarize from member memories member_ids = community.get("member_ids", []) members = ( await fetch_members_func(member_ids) if member_ids else [] ) else: # Higher level: summarize from child summaries # Find children children = [ c for c in communities if c.get("parent_id") == comm_id ] # Create pseudo-members from child summaries members = [ { "id": child.get("id"), "category": f"level-{child.get('level', 0)}-summary", "content": summaries.get( child.get("id"), child.get("summary", "") ), } for child in children ] entity_names = community.get("tags", []) summary = await self.summarize_community( community.get("name", f"Community {comm_id}"), members, entity_names, ) community["summary"] = summary if comm_id: summaries[comm_id] = summary return communities

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/DasBluEyedDevil/Daemon-MCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

summarizer.py•8.69 KiB