# MCP Server Implementation
## Overview
The YouTube KB MCP server exposes search and discovery tools via the Model Context Protocol. It's built with TypeScript, uses the official MCP SDK, and deploys to Vercel with StreamableHTTP transport.
---
## Technology Stack
| Component | Technology | Version |
|-----------|------------|---------|
| Runtime | Node.js | 20+ |
| Language | TypeScript | 5.x |
| MCP SDK | @modelcontextprotocol/sdk | ^1.25.2 |
| Vercel Adapter | mcp-handler | ^1.0.0 |
| Database Client | @supabase/supabase-js | ^2.x |
| Validation | zod | ^3.x |
| Embeddings | openai | ^4.x |
---
## Project Structure
```
packages/server/
├── src/
│ ├── index.ts # Main entry point
│ ├── server.ts # MCP server setup
│ ├── tools/
│ │ ├── index.ts # Tool exports
│ │ ├── search.ts # search tool
│ │ ├── list-domains.ts # list_domains tool
│ │ ├── list-videos.ts # list_videos tool
│ │ └── stats.ts # stats tool
│ ├── db/
│ │ ├── client.ts # Supabase client
│ │ ├── queries.ts # Database queries
│ │ └── types.ts # TypeScript types
│ ├── embeddings/
│ │ └── openai.ts # OpenAI embedding client
│ └── utils/
│ ├── config.ts # Environment config
│ └── logger.ts # Logging utilities
├── app/
│ └── api/
│ └── mcp/
│ └── route.ts # Vercel API route
├── package.json
├── tsconfig.json
├── vercel.json
└── .env.example
```
---
## Core Implementation
### Main Server Setup (server.ts)
```typescript
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
import { registerSearchTool } from "./tools/search.js";
import { registerListDomainsTool } from "./tools/list-domains.js";
import { registerListVideosTool } from "./tools/list-videos.js";
import { registerStatsTool } from "./tools/stats.js";
export function createServer() {
const server = new McpServer({
name: "youtube-kb",
version: "1.0.0",
description: "Search educational YouTube content across domains like marketing, engineering, and growth strategy.",
});
// Register all tools
registerSearchTool(server);
registerListDomainsTool(server);
registerListVideosTool(server);
registerStatsTool(server);
return server;
}
```
### Vercel API Route (app/api/mcp/route.ts)
```typescript
import { createMcpHandler } from "mcp-handler";
import { createServer } from "../../../src/server.js";
const server = createServer();
const handler = createMcpHandler(
(mcpServer) => {
// Server is already configured, just return
return mcpServer;
},
{ server },
{
basePath: "/api",
maxDuration: 60,
verboseLogs: process.env.NODE_ENV === "development",
}
);
export { handler as GET, handler as POST, handler as DELETE };
```
---
## Tool Implementations
### Search Tool (tools/search.ts)
```typescript
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
import { z } from "zod";
import { getEmbedding } from "../embeddings/openai.js";
import { hybridSearch, semanticSearch } from "../db/queries.js";
const SearchParamsSchema = z.object({
query: z.string().min(1).describe("Natural language search query"),
domain: z.string().optional().describe("Filter by domain (e.g., 'growth', 'engineering')"),
max_results: z.number().int().min(1).max(20).default(5).describe("Maximum results to return"),
use_hybrid: z.boolean().default(true).describe("Enable hybrid search (semantic + keyword)"),
});
export function registerSearchTool(server: McpServer) {
server.tool(
"search",
"Search the YouTube knowledge base for educational content. Returns relevant excerpts with citations.",
SearchParamsSchema.shape,
async (params) => {
const { query, domain, max_results, use_hybrid } = SearchParamsSchema.parse(params);
// Generate embedding for the query
const queryEmbedding = await getEmbedding(query);
// Perform search
const results = use_hybrid
? await hybridSearch(queryEmbedding, query, domain, max_results)
: await semanticSearch(queryEmbedding, domain, max_results);
if (results.length === 0) {
return {
content: [{
type: "text",
text: `No results found for: "${query}"${domain ? ` in domain '${domain}'` : ""}`,
}],
};
}
// Format results
const formattedResults = results.map((r, i) => {
const meta = [
`Domain: ${r.domain}`,
r.view_count ? `Views: ${formatViews(r.view_count)}` : null,
].filter(Boolean).join(" | ");
return [
`### Result ${i + 1} (relevance: ${(r.score * 100).toFixed(0)}%)`,
`> ${r.text}`,
``,
`— [${r.video_title}](${r.video_url}) by ${r.channel_name}`,
meta ? `*${meta}*` : "",
].join("\n");
});
return {
content: [{
type: "text",
text: [
`## Search Results for: "${query}"`,
domain ? `*Domain: ${domain}*` : "",
"",
...formattedResults,
].join("\n"),
}],
};
}
);
}
function formatViews(count: number): string {
if (count >= 1_000_000) return `${(count / 1_000_000).toFixed(1)}M`;
if (count >= 1_000) return `${(count / 1_000).toFixed(0)}K`;
return count.toString();
}
```
### List Domains Tool (tools/list-domains.ts)
```typescript
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
import { getDomainStats } from "../db/queries.js";
export function registerListDomainsTool(server: McpServer) {
server.tool(
"list_domains",
"List all available knowledge domains in the YouTube KB.",
{},
async () => {
const domains = await getDomainStats();
if (domains.length === 0) {
return {
content: [{
type: "text",
text: "No domains found. The knowledge base may be empty.",
}],
};
}
const lines = [
"## Available Domains",
"",
...domains.map((d) =>
`- **${d.domain}**: ${d.video_count} videos, ${d.chunk_count} searchable chunks`
),
];
return {
content: [{
type: "text",
text: lines.join("\n"),
}],
};
}
);
}
```
### Stats Tool (tools/stats.ts)
```typescript
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
import { getStats } from "../db/queries.js";
export function registerStatsTool(server: McpServer) {
server.tool(
"stats",
"Get statistics about the YouTube knowledge base.",
{},
async () => {
const stats = await getStats();
const lines = [
"## YouTube Knowledge Base Statistics",
"",
`- **Total Videos**: ${stats.total_videos.toLocaleString()}`,
`- **Total Chunks**: ${stats.total_chunks.toLocaleString()}`,
`- **Domains**: ${stats.domains.join(", ")}`,
"",
`*Last updated: ${new Date().toISOString().split("T")[0]}*`,
];
return {
content: [{
type: "text",
text: lines.join("\n"),
}],
};
}
);
}
```
### List Videos Tool (tools/list-videos.ts)
```typescript
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
import { z } from "zod";
import { listVideos } from "../db/queries.js";
const ListVideosParamsSchema = z.object({
domain: z.string().optional().describe("Filter by domain"),
limit: z.number().int().min(1).max(50).default(20).describe("Maximum videos to return"),
});
export function registerListVideosTool(server: McpServer) {
server.tool(
"list_videos",
"List videos in the YouTube knowledge base.",
ListVideosParamsSchema.shape,
async (params) => {
const { domain, limit } = ListVideosParamsSchema.parse(params);
const videos = await listVideos(domain, limit);
if (videos.length === 0) {
return {
content: [{
type: "text",
text: `No videos found${domain ? ` in domain '${domain}'` : ""}.`,
}],
};
}
const lines = [
`## Videos in Knowledge Base${domain ? ` (${domain})` : ""}`,
"",
`*${videos.length} videos*`,
"",
...videos.map((v) =>
`- **${v.title}** by ${v.channel_name}\n - ID: \`${v.video_id}\` | Domain: ${v.domain}`
),
];
return {
content: [{
type: "text",
text: lines.join("\n"),
}],
};
}
);
}
```
---
## Database Layer
### Supabase Client (db/client.ts)
```typescript
import { createClient } from "@supabase/supabase-js";
import { config } from "../utils/config.js";
export const supabase = createClient(
config.supabaseUrl,
config.supabaseAnonKey
);
```
### Database Queries (db/queries.ts)
```typescript
import { supabase } from "./client.js";
import type { SearchResult, DomainStats, Stats, Video } from "./types.js";
export async function semanticSearch(
embedding: number[],
domain: string | undefined,
limit: number
): Promise<SearchResult[]> {
const { data, error } = await supabase.rpc("search_chunks", {
query_embedding: embedding,
match_domain: domain || null,
match_count: limit,
});
if (error) throw error;
// Join with video metadata
const videoIds = [...new Set(data.map((d: any) => d.video_id))];
const { data: videos } = await supabase
.from("videos")
.select("video_id, title, channel_name, url, view_count")
.in("video_id", videoIds);
const videoMap = new Map(videos?.map((v) => [v.video_id, v]) || []);
return data.map((chunk: any) => {
const video = videoMap.get(chunk.video_id);
return {
text: chunk.text,
score: chunk.similarity,
video_id: chunk.video_id,
video_title: video?.title || "Unknown",
video_url: video?.url || `https://youtube.com/watch?v=${chunk.video_id}`,
channel_name: video?.channel_name || "Unknown",
domain: chunk.domain,
view_count: video?.view_count,
};
});
}
export async function hybridSearch(
embedding: number[],
queryText: string,
domain: string | undefined,
limit: number
): Promise<SearchResult[]> {
const { data, error } = await supabase.rpc("hybrid_search", {
query_embedding: embedding,
query_text: queryText,
match_domain: domain || null,
match_count: limit,
semantic_weight: 0.7,
keyword_weight: 0.3,
});
if (error) throw error;
// Same video join as above
const videoIds = [...new Set(data.map((d: any) => d.video_id))];
const { data: videos } = await supabase
.from("videos")
.select("video_id, title, channel_name, url, view_count")
.in("video_id", videoIds);
const videoMap = new Map(videos?.map((v) => [v.video_id, v]) || []);
return data.map((chunk: any) => {
const video = videoMap.get(chunk.video_id);
return {
text: chunk.text,
score: chunk.combined_score,
video_id: chunk.video_id,
video_title: video?.title || "Unknown",
video_url: video?.url || `https://youtube.com/watch?v=${chunk.video_id}`,
channel_name: video?.channel_name || "Unknown",
domain: chunk.domain,
view_count: video?.view_count,
};
});
}
export async function getDomainStats(): Promise<DomainStats[]> {
const { data, error } = await supabase.rpc("get_domain_stats");
if (error) throw error;
return data;
}
export async function getStats(): Promise<Stats> {
const { data, error } = await supabase.rpc("get_stats");
if (error) throw error;
return data[0];
}
export async function listVideos(
domain: string | undefined,
limit: number
): Promise<Video[]> {
let query = supabase
.from("videos")
.select("video_id, title, channel_name, domain, url")
.order("view_count", { ascending: false })
.limit(limit);
if (domain) {
query = query.eq("domain", domain);
}
const { data, error } = await query;
if (error) throw error;
return data;
}
```
---
## Embeddings Layer
### OpenAI Client (embeddings/openai.ts)
```typescript
import OpenAI from "openai";
import { config } from "../utils/config.js";
const openai = new OpenAI({
apiKey: config.openaiApiKey,
});
const EMBEDDING_MODEL = "text-embedding-3-small";
const EMBEDDING_DIMENSIONS = 1536;
export async function getEmbedding(text: string): Promise<number[]> {
const response = await openai.embeddings.create({
model: EMBEDDING_MODEL,
input: text,
dimensions: EMBEDDING_DIMENSIONS,
});
return response.data[0].embedding;
}
```
---
## Configuration
### Environment Variables (.env.example)
```bash
# Supabase
SUPABASE_URL=https://xxxxx.supabase.co
SUPABASE_ANON_KEY=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9...
# OpenAI (for query embeddings)
OPENAI_API_KEY=sk-...
# Optional
NODE_ENV=development
LOG_LEVEL=info
```
### Config Module (utils/config.ts)
```typescript
import { z } from "zod";
const ConfigSchema = z.object({
supabaseUrl: z.string().url(),
supabaseAnonKey: z.string().min(1),
openaiApiKey: z.string().startsWith("sk-"),
nodeEnv: z.enum(["development", "production"]).default("production"),
logLevel: z.enum(["debug", "info", "warn", "error"]).default("info"),
});
export const config = ConfigSchema.parse({
supabaseUrl: process.env.SUPABASE_URL,
supabaseAnonKey: process.env.SUPABASE_ANON_KEY,
openaiApiKey: process.env.OPENAI_API_KEY,
nodeEnv: process.env.NODE_ENV,
logLevel: process.env.LOG_LEVEL,
});
```
---
## Vercel Configuration
### vercel.json
```json
{
"functions": {
"app/api/mcp/route.ts": {
"maxDuration": 60
}
},
"rewrites": [
{
"source": "/mcp",
"destination": "/api/mcp"
}
]
}
```
### package.json
```json
{
"name": "@youtube-kb/server",
"version": "1.0.0",
"type": "module",
"scripts": {
"dev": "next dev",
"build": "next build",
"start": "next start",
"lint": "eslint .",
"test": "vitest"
},
"dependencies": {
"@modelcontextprotocol/sdk": "^1.25.2",
"@supabase/supabase-js": "^2.45.0",
"mcp-handler": "^1.0.0",
"next": "^14.0.0",
"openai": "^4.67.0",
"zod": "^3.23.0"
},
"devDependencies": {
"@types/node": "^20.0.0",
"typescript": "^5.6.0",
"eslint": "^9.0.0",
"vitest": "^2.0.0"
}
}
```
---
## Local Development
### Running Locally
```bash
# Install dependencies
pnpm install
# Start dev server
pnpm dev
# Server runs at http://localhost:3000/api/mcp
```
### Testing with MCP Inspector
```bash
# Install inspector
npm install -g @modelcontextprotocol/inspector
# Run inspector
npx @modelcontextprotocol/inspector
# In browser:
# 1. Select "Streamable HTTP" transport
# 2. Enter URL: http://localhost:3000/api/mcp
# 3. Connect and test tools
```
### Testing Tools Directly
```bash
# Test search
curl -X POST http://localhost:3000/api/mcp \
-H "Content-Type: application/json" \
-d '{"jsonrpc":"2.0","method":"tools/call","params":{"name":"search","arguments":{"query":"pricing strategy"}},"id":1}'
```
---
## Deployment
### Deploy to Vercel
```bash
# Install Vercel CLI
npm i -g vercel
# Deploy
cd packages/server
vercel
# Set environment variables
vercel env add SUPABASE_URL
vercel env add SUPABASE_ANON_KEY
vercel env add OPENAI_API_KEY
# Deploy to production
vercel --prod
```
### Verify Deployment
```bash
# Test the deployed endpoint
curl https://youtube-kb.vercel.app/api/mcp
# Should return MCP server info
```
---
## Error Handling
### Error Response Format
```typescript
// All errors follow this format
{
content: [{
type: "text",
text: "Error: [description of what went wrong]"
}],
isError: true
}
```
### Common Errors
| Error | Cause | Handling |
|-------|-------|----------|
| `Invalid query` | Empty or malformed query | Zod validation |
| `Embedding failed` | OpenAI API error | Retry with backoff |
| `Database error` | Supabase query failed | Log and return error |
| `Rate limited` | Too many requests | Return 429 (future) |
---
## Performance Optimization
### Caching Strategy (Future)
```typescript
// Edge caching for common queries
// Vercel Edge Config or Redis
const cache = new Map<string, { data: any; expiry: number }>();
async function cachedSearch(query: string, domain?: string) {
const key = `${query}:${domain || "all"}`;
const cached = cache.get(key);
if (cached && cached.expiry > Date.now()) {
return cached.data;
}
const data = await performSearch(query, domain);
cache.set(key, { data, expiry: Date.now() + 300_000 }); // 5 min
return data;
}
```
### Connection Pooling
Supabase handles connection pooling automatically. For high traffic:
```typescript
// Use connection pooler URL for production
const supabase = createClient(
process.env.SUPABASE_URL!.replace('.supabase.co', '.pooler.supabase.com'),
process.env.SUPABASE_ANON_KEY!
);
```