import { Server } from '@modelcontextprotocol/sdk/server/index.js';
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
import { CallToolRequestSchema, ListToolsRequestSchema } from '@modelcontextprotocol/sdk/types.js';
// Auto-generate tool definitions
function generateTools() {
return [
{
name: 'web_search',
description: 'Perform web or news search using Bing search engine. Supports both general web search and news search modes.',
inputSchema: {
type: 'object',
properties: {
query: {
type: 'string',
description: 'Search query, e.g., Node.js tutorial, tech news, political updates, etc.'
},
searchType: {
type: 'string',
enum: ['web', 'news'],
description: 'Search type: web (general web search), news (news search) - required'
},
maxResults: {
type: 'number',
description: 'Maximum number of results',
default: 10,
minimum: 1,
maximum: 20
},
timeFilter: {
type: 'string',
enum: ['past_hour', 'past_24_hours', 'past_7_days', 'past_30_days'],
description: 'Time filter (only valid for news search): past 1 hour, 24 hours, 7 days, 30 days',
default: 'past_24_hours'
}
},
required: ['query', 'searchType']
}
},
{
name: 'get_webpage_content',
description: 'Fetch webpage content and convert to specified format. Supports Markdown, HTML, and plain text.',
inputSchema: {
type: 'object',
properties: {
url: {
type: 'string',
description: 'The URL of the webpage to scrape. Must be a valid HTTP/HTTPS link.'
},
format: {
type: 'string',
enum: ['markdown', 'html', 'text'],
description: 'Output format: markdown (default), html, text',
default: 'markdown'
}
},
required: ['url']
}
},
{
name: 'get_webpage_source',
description: 'Fetch the raw HTML source code and page information of a webpage.',
inputSchema: {
type: 'object',
properties: {
url: {
type: 'string',
description: 'The URL of the webpage to get source from. Must be a valid HTTP/HTTPS link.'
}
},
required: ['url']
}
},
{
name: 'batch_webpage_scrape',
description: 'Batch scrape multiple webpages with concurrent processing support.',
inputSchema: {
type: 'object',
properties: {
urls: {
type: 'array',
items: {
type: 'string'
},
description: 'List of webpage URLs to scrape, up to 20.',
minItems: 1,
maxItems: 20
},
maxConcurrent: {
type: 'number',
description: 'Maximum concurrency',
default: 3,
minimum: 1,
maximum: 10
}
},
required: ['urls']
}
}
];
}
// Create MCP server
const server = new Server(
{
name: 'spider-mcp',
version: '1.0.0'
},
{
capabilities: {
tools: {}
}
}
);
// Implement tool invocation
server.setRequestHandler(ListToolsRequestSchema, async () => {
return {
tools: generateTools()
};
});
server.setRequestHandler(CallToolRequestSchema, async (request) => {
const { name, arguments: args } = request.params;
try {
let result;
switch (name) {
case 'web_search':
result = await handleWebSearch(args);
break;
case 'get_webpage_content':
result = await handleGetWebpageContent(args);
break;
case 'get_webpage_source':
result = await handleGetWebpageSource(args);
break;
case 'batch_webpage_scrape':
result = await handleBatchWebpageScrape(args);
break;
default:
throw new Error(`Unknown tool: ${name}`);
}
return {
content: [
{
type: 'text',
text: JSON.stringify(result, null, 2)
}
]
};
} catch (error) {
return {
content: [
{
type: 'text',
text: `Error: ${error.message}`
}
],
isError: true
};
}
});
// Handle web search (supports both general and news search)
async function handleWebSearch(args) {
const { query, searchType, maxResults = 10, timeFilter = 'past_24_hours' } = args;
if (!query || typeof query !== 'string') {
throw new Error('Query parameter is required and must be a string');
}
if (!searchType || !['web', 'news'].includes(searchType)) {
throw new Error('searchType is required and must be either "web" or "news"');
}
if (maxResults < 1 || maxResults > 20) {
throw new Error('maxResults must be between 1 and 20');
}
if (searchType === 'news' && !['past_hour', 'past_24_hours', 'past_7_days', 'past_30_days'].includes(timeFilter)) {
throw new Error('timeFilter must be a valid time filter option');
}
const searchService = (await import('../services/searchService.js')).default;
let results;
if (searchType === 'news') {
results = await searchService.searchBingNews(query, maxResults, timeFilter);
} else {
results = await searchService.searchBing(query, maxResults);
}
return {
tool: 'web_search',
searchType,
query,
maxResults,
timeFilter: searchType === 'news' ? timeFilter : undefined,
results: results.results,
totalResults: results.totalResults,
timestamp: results.timestamp
};
}
// Handle fetching webpage content
async function handleGetWebpageContent(args) {
const { url, format = 'markdown' } = args;
if (!url || typeof url !== 'string') {
throw new Error('URL parameter is required and must be a string');
}
try {
new URL(url);
} catch (error) {
throw new Error('Invalid URL format');
}
if (!['markdown', 'html', 'text'].includes(format)) {
throw new Error('format must be one of: markdown, html, text');
}
const searchService = (await import('../services/searchService.js')).default;
let result;
if (format === 'markdown') {
result = await searchService.getWebpageMarkdown(url);
} else {
result = await searchService.scrapeWebpage(url);
}
return {
tool: 'get_webpage_content',
url,
format,
title: result.title,
description: result.description,
content: format === 'markdown' ? result.markdown : result.content,
timestamp: result.timestamp
};
}
// Handle fetching webpage source
async function handleGetWebpageSource(args) {
const { url } = args;
if (!url || typeof url !== 'string') {
throw new Error('URL parameter is required and must be a string');
}
try {
new URL(url);
} catch (error) {
throw new Error('Invalid URL format');
}
const searchService = (await import('../services/searchService.js')).default;
const result = await searchService.scrapeWebpage(url);
return {
tool: 'get_webpage_source',
url,
title: result.title,
description: result.description,
keywords: result.keywords,
content: result.content,
links: result.links,
timestamp: result.timestamp
};
}
// Handle batch webpage scraping
async function handleBatchWebpageScrape(args) {
const { urls, maxConcurrent = 3 } = args;
if (!Array.isArray(urls) || urls.length === 0) {
throw new Error('urls must be a non-empty array');
}
if (urls.length > 20) {
throw new Error('A maximum of 20 URLs is supported');
}
if (maxConcurrent < 1 || maxConcurrent > 10) {
throw new Error('maxConcurrent must be between 1 and 10');
}
const searchService = (await import('../services/searchService.js')).default;
const results = [];
const errors = [];
// Process in batches
for (let i = 0; i < urls.length; i += maxConcurrent) {
const batch = urls.slice(i, i + maxConcurrent);
const batchPromises = batch.map(async (url) => {
try {
const result = await searchService.scrapeWebpage(url);
return { success: true, url, data: result };
} catch (error) {
return { success: false, url, error: error.message };
}
});
const batchResults = await Promise.allSettled(batchPromises);
batchResults.forEach((result) => {
if (result.status === 'fulfilled') {
if (result.value.success) {
results.push(result.value);
} else {
errors.push(result.value);
}
} else {
errors.push({ url: 'unknown', error: result.reason?.message || 'Unknown error' });
}
});
}
return {
tool: 'batch_webpage_scrape',
totalUrls: urls.length,
successful: results.length,
failed: errors.length,
maxConcurrent,
results,
errors,
timestamp: new Date().toISOString()
};
}
// Start the server
const transport = new StdioServerTransport();
server.connect(transport);
console.error('Spider MCP server started');
console.error('Available tools:', generateTools().map(t => t.name).join(', '));