Detect Knowledge Communities

graph_communities

Read-only

Find clusters of densely interconnected entities in a knowledge graph. Each entity belongs to one community, helping you explore related groups without specifying a starting point.

Instructions

Find clusters of densely-interconnected entities in the graph. Uses greedy seed-based BFS through edges above the weight threshold — works without GDS or APOC. Each entity is assigned to at most one community (the first that reaches it from a high-degree seed). Useful for understanding knowledge neighbourhoods (e.g. "everything related to infrastructure"). Returns at most max_communities clusters, each shaped {community_id, seed: {id, name, type}, size, members: [{id, name, type}]}, sorted by size desc; communities below min_size are filtered out. Use graph_query or graph_search instead when you have a specific entity to start from.

Input Schema

TableJSON Schema

Name	Required	Description
`weight_threshold`	No	Only traverse edges with weight strictly greater than this (default 0.4).
`max_communities`	No	Maximum number of communities to return (default 10).
`max_hops`	No	BFS depth from each seed (default 3, capped at 4).
`min_size`	No	Minimum members for a community to be returned (default 2).

Implementation Reference

src/mcp-server/index.ts:1458-1471 (handler)

The handler function for graph_communities tool - calls client.findCommunities() with the parsed args and returns the result.

}, async (args) => {
  try {
    const result = await client.findCommunities(currentTenant(), {
      weight_threshold: args.weight_threshold,
      max_communities: args.max_communities,
      max_hops: args.max_hops,
      min_size: args.min_size,
    });
    return toolResult(result);
  } catch (err) {
    const e = err instanceof Error ? err : new Error(String(err));
    return toolError(`graph_communities failed: ${e.message}`);
  }
});

src/mcp-server/index.ts:1416-1457 (schema)

The tool registration with input schema (weight_threshold, max_communities, max_hops, min_size) and description for graph_communities.

server.registerTool("graph_communities", {
  title: "Detect Knowledge Communities",
  description:
    "Find clusters of densely-interconnected entities in the graph. Uses greedy seed-based BFS through " +
    "edges above the weight threshold — works without GDS or APOC. Each entity is assigned to at most " +
    "one community (the first that reaches it from a high-degree seed). Useful for understanding " +
    "knowledge neighbourhoods (e.g. \"everything related to infrastructure\"). " +
    "Returns at most `max_communities` clusters, each shaped `{community_id, seed: {id, name, type}, size, members: [{id, name, type}]}`, sorted by size desc; communities below `min_size` are filtered out. " +
    "Use graph_query or graph_search instead when you have a specific entity to start from.",
  inputSchema: {
    weight_threshold: z
      .number()
      .min(0)
      .max(1)
      .optional()
      .default(0.4)
      .describe("Only traverse edges with weight strictly greater than this (default 0.4)."),
    max_communities: z
      .number()
      .int()
      .min(1)
      .max(30)
      .optional()
      .default(10)
      .describe("Maximum number of communities to return (default 10)."),
    max_hops: z
      .number()
      .int()
      .min(1)
      .max(4)
      .optional()
      .default(3)
      .describe("BFS depth from each seed (default 3, capped at 4)."),
    min_size: z
      .number()
      .int()
      .min(2)
      .optional()
      .default(2)
      .describe("Minimum members for a community to be returned (default 2)."),
  },
  annotations: { readOnlyHint: true },

src/mcp-server/index.ts:1416-1471 (registration)

Registration of graph_communities tool via server.registerTool() with the name 'graph_communities'.

server.registerTool("graph_communities", {
  title: "Detect Knowledge Communities",
  description:
    "Find clusters of densely-interconnected entities in the graph. Uses greedy seed-based BFS through " +
    "edges above the weight threshold — works without GDS or APOC. Each entity is assigned to at most " +
    "one community (the first that reaches it from a high-degree seed). Useful for understanding " +
    "knowledge neighbourhoods (e.g. \"everything related to infrastructure\"). " +
    "Returns at most `max_communities` clusters, each shaped `{community_id, seed: {id, name, type}, size, members: [{id, name, type}]}`, sorted by size desc; communities below `min_size` are filtered out. " +
    "Use graph_query or graph_search instead when you have a specific entity to start from.",
  inputSchema: {
    weight_threshold: z
      .number()
      .min(0)
      .max(1)
      .optional()
      .default(0.4)
      .describe("Only traverse edges with weight strictly greater than this (default 0.4)."),
    max_communities: z
      .number()
      .int()
      .min(1)
      .max(30)
      .optional()
      .default(10)
      .describe("Maximum number of communities to return (default 10)."),
    max_hops: z
      .number()
      .int()
      .min(1)
      .max(4)
      .optional()
      .default(3)
      .describe("BFS depth from each seed (default 3, capped at 4)."),
    min_size: z
      .number()
      .int()
      .min(2)
      .optional()
      .default(2)
      .describe("Minimum members for a community to be returned (default 2)."),
  },
  annotations: { readOnlyHint: true },
}, async (args) => {
  try {
    const result = await client.findCommunities(currentTenant(), {
      weight_threshold: args.weight_threshold,
      max_communities: args.max_communities,
      max_hops: args.max_hops,
      min_size: args.min_size,
    });
    return toolResult(result);
  } catch (err) {
    const e = err instanceof Error ? err : new Error(String(err));
    return toolError(`graph_communities failed: ${e.message}`);
  }
});

src/shared/neo4j-client.ts:2161-2297 (helper)

The findCommunities() method on Neo4jClient which implements greedy seed-based BFS clustering using pure Cypher (no GDS/APOC).

// ─── Communities ───
// Greedy seed-based BFS clustering. No GDS/APOC required — works on Aura Free.
// Algorithm:
//   1. Rank entities by strong-edge degree
//   2. Take the highest-degree unassigned entity as a seed
//   3. BFS through edges with weight > threshold up to max_hops
//   4. Assign all reached entities to this community
//   5. Repeat until max_communities reached or no more high-degree seeds

async findCommunities(tenantId: string, options: {
  weight_threshold?: number;
  max_communities?: number;
  max_hops?: number;
  min_size?: number;
} = {}): Promise<{
  communities: Array<{
    id: number;
    seed_name: string;
    seed_id: string;
    member_count: number;
    dominant_type: string;
    members: Array<{ id: string; name: string; type: string }>;
  }>;
  coverage: {
    total_entities: number;
    assigned: number;
    unassigned: number;
  };
}> {
  const threshold = options.weight_threshold ?? 0.4;
  const maxCommunities = options.max_communities ?? 10;
  const maxHops = Math.max(1, Math.min(options.max_hops ?? 3, 4));
  const minSize = options.min_size ?? 2;

  // Step 1: rank nodes by strong-edge degree (tenant-scoped)
  const hubRows = await this.run(
    `
    MATCH (n:Entity {tenant_id: $tenantId})-[r]-(other:Entity {tenant_id: $tenantId})
    WHERE r.weight > $threshold
    WITH n, count(r) AS degree
    WHERE degree >= 2
    RETURN n.id AS id,
           n.name AS name,
           [l IN labels(n) WHERE l <> 'Entity'][0] AS type,
           degree
    ORDER BY degree DESC
    LIMIT 100
    `,
    { tenantId, threshold },
  );

  // Total entity count for coverage stats (tenant-scoped)
  const totalRows = await this.run(
    `MATCH (n:Entity {tenant_id: $tenantId}) RETURN count(n) AS total`,
    { tenantId },
  );
  const totalEntities = Number(totalRows[0]?.["total"] ?? 0);

  const assigned = new Set<string>();
  const communities: Array<{
    id: number;
    seed_name: string;
    seed_id: string;
    member_count: number;
    dominant_type: string;
    members: Array<{ id: string; name: string; type: string }>;
  }> = [];

  for (const hub of hubRows) {
    if (communities.length >= maxCommunities) break;
    const hubId = String(hub["id"]);
    if (assigned.has(hubId)) continue;

    // BFS: variable-length path from seed, all relationships above threshold,
    // confined to this tenant's subgraph. Path is bound to a variable so we
    // can pass it to nodes() — passing the pattern directly is List<Path>.
    const memberRows = await this.run(
      `
      MATCH (seed:Entity {tenant_id: $tenantId, id: $seedId})
      MATCH path = (seed)-[r*1..${maxHops}]-(m:Entity)
      WHERE ALL(node IN nodes(path) WHERE node.tenant_id = $tenantId)
        AND ALL(rel IN r WHERE rel.weight > $threshold)
      RETURN DISTINCT m.id AS id,
             m.name AS name,
             [l IN labels(m) WHERE l <> 'Entity'][0] AS type
      `,
      { tenantId, seedId: hubId, threshold },
    );

    const seedRow = {
      id: hubId,
      name: String(hub["name"] ?? hubId),
      type: String(hub["type"] ?? "?"),
    };

    const members = [seedRow];
    const seenInCluster = new Set<string>([hubId]);
    for (const row of memberRows) {
      const id = String(row["id"]);
      if (assigned.has(id) || seenInCluster.has(id)) continue;
      seenInCluster.add(id);
      members.push({
        id,
        name: String(row["name"] ?? id),
        type: String(row["type"] ?? "?"),
      });
    }

    if (members.length < minSize) continue;

    // Compute dominant type
    const typeCounts: Record<string, number> = {};
    for (const m of members) typeCounts[m.type] = (typeCounts[m.type] ?? 0) + 1;
    const dominantType = Object.entries(typeCounts).sort((a, b) => b[1] - a[1])[0]?.[0] ?? "?";

    // Mark these as assigned (greedy: each entity belongs to the first community that grabs it)
    for (const m of members) assigned.add(m.id);

    communities.push({
      id: communities.length + 1,
      seed_name: seedRow.name,
      seed_id: seedRow.id,
      member_count: members.length,
      dominant_type: dominantType,
      members: members.slice(0, 30),
    });
  }

  return {
    communities,
    coverage: {
      total_entities: totalEntities,
      assigned: assigned.size,
      unassigned: totalEntities - assigned.size,
    },
  };
}

Graph-Memory

Detect Knowledge Communities

Instructions

Input Schema

Implementation Reference

Tool Definition Quality

Other Tools

Latest Blog Posts

MCP directory API