Skip to main content
Glama
unified-search.tools.ts12.5 kB
import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; import { z } from 'zod'; // Import search functions and types from both modules import { searchDatasets, getAllDatasets, DatasetMetadata } from './datacatalogue.tools.js'; import { searchDashboards, getAllDashboards, DashboardMetadata } from './dashboards.tools.js'; import { prefixToolName } from './utils/tool-naming.js'; // Define result interfaces interface SearchResult { type: 'dataset' | 'dashboard'; id: string; title: string; description?: string; url?: string; score: number; } /** * Unified search across both datasets and dashboards * @param query Search query * @param prioritizeType Optional type to prioritize in results ('dataset' or 'dashboard') * @returns Combined search results from both sources */ // Helper function to tokenize a query into individual terms function tokenizeQuery(query: string): string[] { // Remove special characters and split by spaces return query.toLowerCase() .replace(/[^a-z0-9\s]/g, ' ') .split(/\s+/) .filter(term => term.length > 0); } // Helper function to normalize terms by removing common prefixes and handling variations function normalizeTerm(term: string): string[] { // Remove hyphens and normalize spacing let normalized = term.replace(/-/g, '').trim(); // Handle common prefixes/variations if (normalized.startsWith('e') && normalized.length > 1) { // e.g., 'epayment' -> also try 'payment' return [normalized, normalized.substring(1)]; } return [normalized]; } // A small set of common synonyms for frequently used terms const COMMON_SYNONYMS: Record<string, string[]> = { 'payment': ['payment', 'pay', 'transaction'], 'electronic': ['electronic', 'digital', 'online', 'cashless'], 'statistics': ['statistics', 'stats', 'data', 'figures', 'numbers'], 'dashboard': ['dashboard', 'visualization', 'chart', 'graph'], 'dataset': ['dataset', 'data set', 'database', 'data'], }; // Helper function to expand search terms for better matching function expandSearchTerms(term: string): string[] { const normalizedTerm = term.toLowerCase().trim(); // Start with the original term let expanded = [normalizedTerm]; // Add normalized variations expanded = expanded.concat(normalizeTerm(normalizedTerm)); // Check for common synonyms for (const [key, synonyms] of Object.entries(COMMON_SYNONYMS)) { if (normalizedTerm === key || synonyms.includes(normalizedTerm)) { expanded = expanded.concat(synonyms); break; } } // Basic stemming for plurals if (normalizedTerm.endsWith('s')) { expanded.push(normalizedTerm.slice(0, -1)); // Remove trailing 's' } else { expanded.push(normalizedTerm + 's'); // Add trailing 's' } // Remove duplicates and return return [...new Set(expanded)]; } function unifiedSearch(query: string, prioritizeType?: 'dataset' | 'dashboard'): SearchResult[] { // Tokenize the query into individual terms const queryTerms = tokenizeQuery(query); // Expand each term with better matching const expandedTerms = queryTerms.flatMap(term => expandSearchTerms(term)); // Search in datasets with improved scoring const datasetResults = searchDatasets(query).map((dataset: DatasetMetadata) => { const title = dataset.title_en.toLowerCase(); const id = dataset.id.toLowerCase(); const description = dataset.description_en.toLowerCase(); // Calculate score based on term matches let score = 0; // Check for exact query match (highest priority) if (title.includes(query.toLowerCase())) score += 10; if (description.includes(query.toLowerCase())) score += 5; // Check for individual term matches expandedTerms.forEach(term => { if (title.includes(term)) score += 3; if (id.includes(term)) score += 2; if (description.includes(term)) score += 1; }); return { type: 'dataset' as const, id: dataset.id, title: dataset.title_en, description: dataset.description_en, url: `https://data.gov.my/data-catalogue/${dataset.id}`, score }; }); // Search in dashboards with improved scoring const dashboardResults = searchDashboards(query).map((dashboard: DashboardMetadata) => { const name = dashboard.dashboard_name.toLowerCase(); const route = (dashboard.route || '').toLowerCase(); // Calculate score based on term matches let score = 0; // Check for exact query match (highest priority) if (name.includes(query.toLowerCase())) score += 10; if (route.includes(query.toLowerCase())) score += 5; // Check for individual term matches expandedTerms.forEach(term => { if (name.includes(term)) score += 3; if (route.includes(term)) score += 2; }); return { type: 'dashboard' as const, id: dashboard.dashboard_name, title: dashboard.dashboard_name.replace(/_/g, ' ').replace(/\b\w/g, (l: string) => l.toUpperCase()), description: dashboard.route || '', url: dashboard.route ? (dashboard.sites?.includes('opendosm') ? `https://open.dosm.gov.my${dashboard.route}` : `https://data.gov.my${dashboard.route}`) : `/dashboard/${dashboard.dashboard_name}`, score }; }); // Combine results let combinedResults = [...datasetResults, ...dashboardResults]; // If a type is prioritized, boost its score if (prioritizeType) { combinedResults = combinedResults.map(result => { if (result.type === prioritizeType) { return { ...result, score: result.score + 5 }; } return result; }); } // Sort by score (descending) return combinedResults.sort((a, b) => b.score - a.score); } /** * Check if a query might be referring to a dashboard based on keywords * @param query Search query * @returns True if the query likely refers to a dashboard */ function isDashboardQuery(query: string): boolean { const dashboardKeywords = ['dashboard', 'chart', 'graph', 'visualization', 'visualisation', 'stats', 'statistics']; const lowerQuery = query.toLowerCase(); return dashboardKeywords.some(keyword => lowerQuery.includes(keyword)); } /** * Check if a query might be referring to a dataset based on keywords * @param query Search query * @returns True if the query likely refers to a dataset */ function isDatasetQuery(query: string): boolean { const datasetKeywords = ['dataset', 'data', 'catalogue', 'catalog', 'file', 'download', 'csv', 'excel', 'raw']; const lowerQuery = query.toLowerCase(); return datasetKeywords.some(keyword => lowerQuery.includes(keyword)); } /** * Performs an intelligent search that automatically falls back to searching both datasets and dashboards * if the primary search returns no results * @param query Search query * @param prioritizeType Optional type to prioritize in results ('dataset' or 'dashboard') * @param limit Maximum number of results to return * @returns Search results with fallback if needed */ function intelligentSearch(query: string, prioritizeType?: 'dataset' | 'dashboard', limit: number = 10): { results: SearchResult[]; usedFallback: boolean; fallbackType?: 'dataset' | 'dashboard'; originalType?: 'dataset' | 'dashboard'; } { // First try with the prioritized type const initialResults = unifiedSearch(query, prioritizeType); // If we have enough results, return them if (initialResults.length >= 3 || !prioritizeType) { return { results: initialResults.slice(0, limit), usedFallback: false, originalType: prioritizeType }; } // If we have few results, try the opposite type as fallback const fallbackType = prioritizeType === 'dataset' ? 'dashboard' : 'dataset'; const fallbackResults = unifiedSearch(query, fallbackType); // If fallback has results, return combined results with fallback first if (fallbackResults.length > 0) { const combinedResults = [...fallbackResults, ...initialResults] .sort((a, b) => b.score - a.score) .slice(0, limit); return { results: combinedResults, usedFallback: true, fallbackType, originalType: prioritizeType }; } // If neither search yielded good results, return the initial results return { results: initialResults.slice(0, limit), usedFallback: false, originalType: prioritizeType }; } export function registerUnifiedSearchTools(server: McpServer) { // Unified search across datasets and dashboards server.tool( prefixToolName('search_all'), '⭐⭐⭐ PRIMARY SEARCH TOOL: Always use this first for any data or visualization queries. Searches across both datasets and dashboards with intelligent fallback. ⭐⭐⭐', { query: z.string().describe('Search query to match against all content'), limit: z.number().min(1).max(20).optional().describe('Number of results to return (1-20)'), prioritize: z.enum(['dataset', 'dashboard']).optional().describe('Type of content to prioritize in results'), }, async ({ query, limit = 10, prioritize }) => { try { // Determine if query suggests a specific content type let prioritizeType = prioritize; if (!prioritizeType) { if (isDashboardQuery(query)) { prioritizeType = 'dashboard'; } else if (isDatasetQuery(query)) { prioritizeType = 'dataset'; } // Special case for domain-specific queries const lowerQuery = query.toLowerCase(); // Payment-related terms are more likely to be found in dashboards if (lowerQuery.includes('payment') || lowerQuery.includes('pay') || lowerQuery.includes('transaction') || lowerQuery.includes('electronic') || lowerQuery.includes('digital')) { prioritizeType = 'dashboard'; } // Statistics-related terms are more likely to be found in dashboards if (lowerQuery.includes('statistics') || lowerQuery.includes('stats') || lowerQuery.includes('chart') || lowerQuery.includes('graph')) { prioritizeType = 'dashboard'; } } // Get intelligent search results with automatic fallback const { results: searchResults, usedFallback, fallbackType, originalType } = intelligentSearch(query, prioritizeType as 'dataset' | 'dashboard' | undefined, limit); // Group results by type for better presentation const groupedResults = { datasets: searchResults.filter(r => r.type === 'dataset'), dashboards: searchResults.filter(r => r.type === 'dashboard') }; return { content: [ { type: 'text', text: JSON.stringify({ message: 'Unified search results', query, total_matches: searchResults.length, showing: searchResults.length, prioritized_type: originalType || 'none', used_fallback: usedFallback, fallback_type: fallbackType, search_note: usedFallback ? `Limited results found in ${originalType} search, automatically included relevant ${fallbackType} results` : undefined, results: searchResults, grouped_results: groupedResults, data_access_notes: { dashboards: 'Dashboard data is visualized on the web interface. Raw data files (e.g., parquet) cannot be directly accessed through this API.', datasets: 'Dataset metadata is available through this API. For downloading the actual data files, please visit the dataset page on the data portal.', }, timestamp: new Date().toISOString() }, null, 2), }, ], }; } catch (error) { return { content: [ { type: 'text', text: JSON.stringify({ error: 'Failed to perform unified search', message: error instanceof Error ? error.message : String(error), timestamp: new Date().toISOString() }, null, 2), }, ], }; } } ); }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/hithereiamaliff/mcp-datagovmy'

If you have feedback or need assistance with the MCP directory API, please join our Discord server