TianGong-AI-MCP

weaviate.ts•10.2 KiB

import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; import { z } from 'zod'; export function regWeaviateTool(server: McpServer) { server.tool( 'Weaviate_Hybrid_Search_with_Extension', 'Hybrid search in Weaviate with context extension', { collection: z .string() .describe( 'The name of the Weaviate collection to query. This should be the name of the collection where your documents are stored.', ), query: z .string() .describe( 'The search query or requirements from the user. This is the main input for the hybrid search.', ), where: z.any().optional() .describe(`Optional GraphQL "where" filter as strict JSON (keys quoted). Provide a JSON object in Weaviate format; we convert it to GraphQL for you (operator enum is auto-unquoted). - Must be valid JSON, not GraphQL syntax. Examples: {"path":["tags"],"operator":"ContainsAny","valueText":["foo","bar"]}`), topK: z .number() .min(0) .default(5) .describe( 'The number of top results to return from the hybrid search. This defines how many objects will be returned from the initial hybrid search query. Default is 5.', ), extK: z .number() .min(0) .default(0) .describe( 'The number of additional chunks to include before and after each topK result. This allows for context extension around the top results, providing more comprehensive information. Default is 0.', ), }, async ({ collection, query, where, topK, extK }) => { // Helper for GraphQL requests const gql = async (q: string) => { const res = await fetch('http://localhost:8080/v1/graphql', { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ query: q }), }); const json: any = await res.json(); if (json.errors) throw new Error(JSON.stringify(json.errors)); return json.data; }; // Serialize plain JSON into GraphQL input string const toGraphQLInput = (v: any): string => { const isEnumLike = (s: string) => /^[A-Z][A-Za-z0-9_]*$/.test(s); if (v === null) return 'null'; const t = typeof v; if (t === 'string') return JSON.stringify(v); if (t === 'number' || t === 'boolean') return String(v); if (Array.isArray(v)) return `[${v.map((x) => toGraphQLInput(x)).join(', ')}]`; if (t === 'object') { return ( '{' + Object.entries(v) .map(([k, val]) => { if (k === 'operator' && typeof val === 'string' && isEnumLike(val)) { return `${k}: ${val}`; // GraphQL enum (unquoted) } return `${k}: ${toGraphQLInput(val)}`; }) .join(', ') + '}' ); } // Fallback return JSON.stringify(v); }; // Detect page_number field let hasPageNumber = false; try { const schemaRes = await fetch(`http://localhost:8080/v1/schema/${collection}`); if (schemaRes.ok) { const cls: any = await schemaRes.json(); hasPageNumber = (cls?.properties || []).some((p: any) => p.name === 'page_number'); } } catch { hasPageNumber = false; } const fieldList = ['content', 'source', 'doc_chunk_id']; if (hasPageNumber) fieldList.push('page_number'); const fields = fieldList.join('\n'); // Hybrid search query const args: string[] = []; args.push(`hybrid:{query:${JSON.stringify(query)}, properties:["content"]}`); if (where) args.push(`where:${toGraphQLInput(where)}`); args.push(`limit:${topK}`); const hybridQuery = `{ Get { ${collection}(${args.join(', ')}) { ${fields} } } }`; const data = await gql(hybridQuery); const hits: any[] = data?.Get?.[collection] || []; // Build groups from topK seed hits: group by (docUuid, page) when page_number exists; otherwise by docUuid type GroupKey = string; // docUuid or `${docUuid}@${pageNumber}` when grouping by page interface GroupInfo { docUuid: string; pageNumber?: number; // present only when hasPageNumber and value is known source: string; // collected chunks for this group chunks: Map<number, string>; // chunkId -> content // seed chunk ids (used for extension) seedChunkIds: Set<number>; } const groups = new Map<GroupKey, GroupInfo>(); const uniqueDocUuids = new Set<string>(); for (const obj of hits) { const { content, doc_chunk_id, source, page_number } = obj; const docChunkId = String(doc_chunk_id); const idx = docChunkId.lastIndexOf('_'); const docUuid = idx >= 0 ? docChunkId.slice(0, idx) : docChunkId; const chunkId = idx >= 0 ? parseInt(docChunkId.slice(idx + 1), 10) : NaN; if (!Number.isFinite(chunkId)) continue; // 防御 uniqueDocUuids.add(docUuid); // Determine grouping key let groupKey: GroupKey; let pageNum: number | undefined = undefined; if (hasPageNumber && page_number !== undefined && page_number !== null) { pageNum = Number(page_number); groupKey = `${docUuid}@${pageNum}`; } else { groupKey = docUuid; } let group = groups.get(groupKey); if (!group) { group = { docUuid, pageNumber: pageNum, source: source ? String(source) : '', chunks: new Map<number, string>(), seedChunkIds: new Set<number>(), }; groups.set(groupKey, group); } else if (!group.source) { group.source = source ? String(source) : ''; } // Initialize group with seed chunk only group.chunks.set(chunkId, content ? String(content) : ''); group.seedChunkIds.add(chunkId); } // If no extension requested, aggregate per group and return if (extK === 0) { const results = Array.from(groups.values()).map((g) => { const sortedIds = Array.from(g.chunks.keys()).sort((a, b) => a - b); const mergedContent = sortedIds.map((id) => g.chunks.get(id) || '').join(''); const result: any = { content: mergedContent, source: g.source }; if (g.pageNumber !== undefined) result.page_number = g.pageNumber; return result; }); return { content: results.map((r) => ({ type: 'text', text: JSON.stringify(r, null, 2) })), }; } // For each group, expand neighbors from seed chunks only, within the same document; don't create new groups const chunkSpecToGroups = new Map<string, Set<GroupKey>>(); // "docUuid_chunkId" -> groups needing it for (const [groupKey, group] of groups.entries()) { for (const seedId of group.seedChunkIds) { for (let i = 1; i <= extK; i++) { const prev = seedId - i; const next = seedId + i; if (prev >= 0 && !group.chunks.has(prev)) { const spec = `${group.docUuid}_${prev}`; if (!chunkSpecToGroups.has(spec)) chunkSpecToGroups.set(spec, new Set()); chunkSpecToGroups.get(spec)!.add(groupKey); } if (!group.chunks.has(next)) { // 让后续查询决定是否存在 const spec = `${group.docUuid}_${next}`; if (!chunkSpecToGroups.has(spec)) chunkSpecToGroups.set(spec, new Set()); chunkSpecToGroups.get(spec)!.add(groupKey); } } } } // Fetch all required neighbor chunks (deduplicated globally), then assign to requesting groups const chunkSpecs = Array.from(chunkSpecToGroups.keys()); if (chunkSpecs.length > 0) { // Only fetch fields required for neighbor assignment const neighborFields = ['content', 'doc_chunk_id'].join('\n'); // Batch size to avoid overly large GraphQL payloads const BATCH_SIZE = 256; for (let start = 0; start < chunkSpecs.length; start += BATCH_SIZE) { const specs = chunkSpecs.slice(start, start + BATCH_SIZE); const orOperands = specs .map((spec) => `{path:["doc_chunk_id"], operator:Equal, valueText:"${spec}"}`) .join(', '); const chunkWhere = where ? `operator:And, operands:[{operator:Or, operands:[${orOperands}]}, ${toGraphQLInput( where, )}]` : `operator:Or, operands:[${orOperands}]`; const fetchQuery = `{ Get { ${collection}( where:{${chunkWhere}} limit:${specs.length} ) { ${neighborFields} } } }`; const fd = await gql(fetchQuery); const objs: any[] = fd?.Get?.[collection] || []; for (const obj of objs) { if (!obj) continue; const { content, doc_chunk_id } = obj; const docChunkId = String(doc_chunk_id); const idx = docChunkId.lastIndexOf('_'); const chunkId = idx >= 0 ? parseInt(docChunkId.slice(idx + 1), 10) : NaN; if (!Number.isFinite(chunkId)) continue; // 防御 const spec = docChunkId; // same format as we generated earlier const targetGroups = chunkSpecToGroups.get(spec); if (!targetGroups) continue; for (const gKey of targetGroups) { const g = groups.get(gKey); if (!g) continue; if (!g.chunks.has(chunkId)) { g.chunks.set(chunkId, content ? String(content) : ''); } } } } } // Aggregate per group: sort by chunkId and merge content; include page_number when available const results = Array.from(groups.values()).map((g) => { const sortedIds = Array.from(g.chunks.keys()).sort((a, b) => a - b); const mergedContent = sortedIds.map((id) => g.chunks.get(id) || '').join(''); const result: any = { content: mergedContent, source: g.source }; if (g.pageNumber !== undefined) result.page_number = g.pageNumber; return result; }); return { content: results.map((r) => ({ type: 'text', text: JSON.stringify(r, null, 2) })) }; }, ); }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/linancn/tiangong-ai-mcp-local'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

weaviate.ts•10.2 KiB