Gemini MCP Server

index.ts•24.1 KiB

#!/usr/bin/env node
/**
 * Gemini MCP Server
 *
 * A drop-in replacement for Codex MCP that uses Google Gemini 3 Pro Preview.
 * Mirrors the exact interface: gemini (like codex) and gemini-reply (like codex-reply)
 */

import { Server } from "@modelcontextprotocol/sdk/server/index.js";
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
import {
  CallToolRequestSchema,
  ListToolsRequestSchema,
  Tool,
} from "@modelcontextprotocol/sdk/types.js";
import { GoogleGenAI, Content } from "@google/genai";

// Configuration - Gemini 3 Pro Preview (latest)
const MODEL = process.env.GEMINI_MODEL || "gemini-3-pro-preview";
const API_KEY = process.env.GEMINI_API_KEY || process.env.GOOGLE_API_KEY;

if (!API_KEY) {
  console.error("Error: GEMINI_API_KEY or GOOGLE_API_KEY environment variable required");
  process.exit(1);
}

// Initialize Gemini client
const ai = new GoogleGenAI({ apiKey: API_KEY });

// Session storage for multi-turn conversations
interface ConversationSession {
  history: Content[];
  createdAt: number;
  lastUsed: number;
  cwd?: string;
}

const sessions = new Map<string, ConversationSession>();

// Video generation operation storage
interface VideoOperation {
  operationId: string;
  prompt: string;
  startedAt: number;
  aspectRatio?: string;
}

const videoOperations = new Map<string, VideoOperation>();
let lastVideoOperationId: string | null = null;

// Video generation model - Veo 3.1
const VIDEO_MODEL = "veo-3.1-generate-preview";

// Clean up old sessions (older than 1 hour)
function cleanupSessions() {
  const oneHourAgo = Date.now() - 60 * 60 * 1000;
  for (const [id, session] of sessions.entries()) {
    if (session.lastUsed < oneHourAgo) {
      sessions.delete(id);
    }
  }
}

// Generate unique conversation ID
function generateConversationId(): string {
  return `gemini-${Date.now()}-${Math.random().toString(36).substring(2, 9)}`;
}

// Build system instruction based on config
function buildSystemInstruction(config: {
  cwd?: string;
  sandbox?: string;
  baseInstructions?: string;
  developerInstructions?: string;
}): string {
  const parts: string[] = [];

  // Base instructions
  if (config.baseInstructions) {
    parts.push(config.baseInstructions);
  } else {
    parts.push(`You are an expert software engineer assistant powered by Gemini 3 Pro Preview.
You help with code review, analysis, planning, and problem-solving.
Provide clear, concise, and actionable responses.
When reviewing code or plans, be thorough but practical.`);
  }

  // Working directory context
  if (config.cwd) {
    parts.push(`\nWorking directory: ${config.cwd}`);
  }

  // Sandbox mode context
  if (config.sandbox) {
    const sandboxDescriptions: Record<string, string> = {
      "read-only": "You are in read-only mode. You can analyze and review but cannot make changes.",
      "workspace-write": "You can read and write within the workspace directory.",
      "danger-full-access": "You have full access to read and write files.",
    };
    if (sandboxDescriptions[config.sandbox]) {
      parts.push(`\nAccess level: ${sandboxDescriptions[config.sandbox]}`);
    }
  }

  // Developer instructions
  if (config.developerInstructions) {
    parts.push(`\nDeveloper Instructions:\n${config.developerInstructions}`);
  }

  return parts.join("\n");
}

// Image generation models - Nano Banana (Gemini native image generation)
// gemini-2.5-flash-image = Nano Banana (fast, cheap ~$0.04/image)
// gemini-3-pro-image-preview = Nano Banana Pro (advanced, better text rendering)
const IMAGE_MODEL_FAST = "gemini-2.5-flash-image";
const IMAGE_MODEL_PRO = "gemini-3-pro-image-preview";

// Keywords that trigger Nano Banana Pro (text-heavy, precision work)
const PRO_KEYWORDS = [
  "nano banana pro", "nanobanana pro", "pro model",
  "infographic", "diagram", "chart", "graph",
  "text", "typography", "font", "lettering", "writing",
  "slide", "presentation", "deck",
  "logo", "brand", "poster", "flyer", "banner",
  "document", "page", "layout",
  "high quality", "high-quality", "hq", "4k",
  "detailed text", "readable", "legible"
];

// Auto-detect if prompt needs Pro model
function shouldUsePro(prompt: string): boolean {
  const lower = prompt.toLowerCase();
  return PRO_KEYWORDS.some(keyword => lower.includes(keyword));
}

// Generate images using Nano Banana (via generateContent with IMAGE modality)
async function generateImage(
  prompt: string,
  options: {
    numberOfImages?: number;
    aspectRatio?: string;
    outputDir?: string;
    usePro?: boolean;
  } = {}
): Promise<{ images: Array<{ base64: string; mimeType: string }>; prompt: string; model: string }> {
  const numberOfImages = options.numberOfImages || 1;
  const aspectRatio = options.aspectRatio || "1:1";
  // Auto-detect Pro if prompt contains keywords, or use explicit usePro flag
  const usePro = options.usePro ?? shouldUsePro(prompt);
  const model = usePro ? IMAGE_MODEL_PRO : IMAGE_MODEL_FAST;

  const images: Array<{ base64: string; mimeType: string }> = [];

  // Nano Banana uses generateContent with response_modalities: ['IMAGE']
  // Generate each image separately (generateContent returns 1 image per call)
  for (let i = 0; i < numberOfImages; i++) {
    const response = await ai.models.generateContent({
      model: model,
      contents: prompt,
      config: {
        responseModalities: ["IMAGE"],
        // Include aspect ratio in the generation config
        ...(aspectRatio !== "1:1" && { 
          generationConfig: { aspectRatio } 
        }),
      },
    });

    // Extract image from response parts
    if (response.candidates && response.candidates[0]?.content?.parts) {
      for (const part of response.candidates[0].content.parts) {
        if (part.inlineData?.data) {
          images.push({
            base64: part.inlineData.data,
            mimeType: part.inlineData.mimeType || "image/png",
          });
        }
      }
    }
  }

  return { images, prompt, model };
}

// Call Gemini API
async function callGemini(
  prompt: string,
  history: Content[] = [],
  systemInstruction?: string
): Promise<{ text: string; history: Content[] }> {

  // Create the chat with history and config
  const chat = ai.chats.create({
    model: MODEL,
    config: systemInstruction ? { systemInstruction } : undefined,
    history: history,
  });

  // Send the message
  const response = await chat.sendMessage({ message: prompt });

  // Build updated history
  const newHistory: Content[] = [
    ...history,
    { role: "user", parts: [{ text: prompt }] },
    { role: "model", parts: [{ text: response.text || "" }] },
  ];

  return {
    text: response.text || "",
    history: newHistory,
  };
}

// Define tools - mirrors Codex MCP interface
const tools: Tool[] = [
  {
    name: "gemini",
    description: `Run a Gemini session. Similar to Codex but uses Google Gemini 3 Pro Preview.

Supports configuration parameters matching the Codex Config struct:
- prompt: The initial user prompt to start the conversation (required)
- cwd: Working directory context
- sandbox: Access policy ("read-only", "workspace-write", "danger-full-access")
- base-instructions: Override default system instructions
- developer-instructions: Additional developer context
- model: Optional override for model (default: ${MODEL})`,
    inputSchema: {
      type: "object" as const,
      properties: {
        prompt: {
          type: "string",
          description: "The initial user prompt to start the Gemini conversation",
        },
        cwd: {
          type: "string",
          description: "Working directory for context",
        },
        sandbox: {
          type: "string",
          enum: ["read-only", "workspace-write", "danger-full-access"],
          description: "Access policy mode",
        },
        "base-instructions": {
          type: "string",
          description: "Override the default system instructions",
        },
        "developer-instructions": {
          type: "string",
          description: "Developer instructions for additional context",
        },
        model: {
          type: "string",
          description: `Model override (default: ${MODEL})`,
        },
        config: {
          type: "object",
          description: "Additional config settings (passthrough)",
          additionalProperties: true,
        },
      },
      required: ["prompt"],
    },
  },
  {
    name: "gemini-reply",
    description: `Continue a Gemini conversation by providing the conversation ID and prompt.

Use this to continue a multi-turn conversation started with the 'gemini' tool.`,
    inputSchema: {
      type: "object" as const,
      properties: {
        conversationId: {
          type: "string",
          description: "The conversation ID from a previous gemini call",
        },
        prompt: {
          type: "string",
          description: "The next user prompt to continue the conversation",
        },
      },
      required: ["conversationId", "prompt"],
    },
  },
  {
    name: "gemini-image",
    description: `Generate images using Nano Banana (Gemini's native image generation).

Two models available:
- Nano Banana (default): Fast, cheap (~$0.04/image), good for most use cases
- Nano Banana Pro: Advanced model with better text rendering, infographics, diagrams

Auto-detection: Says "nano banana pro" or mentions text/infographic/diagram/chart/logo/poster
in prompt → automatically uses Pro model.

Parameters:
- prompt: Text description of the image to generate (required)
- numberOfImages: How many images to generate (1-4, default: 1)
- aspectRatio: Image aspect ratio ("1:1", "3:4", "4:3", "9:16", "16:9", default: "1:1")
- usePro: Force Nano Banana Pro (auto-detected from prompt if not specified)
- outputPath: Optional path to save images`,
    inputSchema: {
      type: "object" as const,
      properties: {
        prompt: {
          type: "string",
          description: "Text description of the image to generate",
        },
        numberOfImages: {
          type: "number",
          description: "Number of images to generate (1-4)",
          minimum: 1,
          maximum: 4,
        },
        aspectRatio: {
          type: "string",
          enum: ["1:1", "3:4", "4:3", "9:16", "16:9"],
          description: "Aspect ratio of generated images",
        },
        usePro: {
          type: "boolean",
          description: "Use Nano Banana Pro for higher quality (better text, infographics)",
          default: false,
        },
        outputPath: {
          type: "string",
          description: "Optional directory path to save generated images",
        },
      },
      required: ["prompt"],
    },
  },
  {
    name: "gemini-video-generate",
    description: `Generate a video using Veo 3.1 (Google's video generation model).

This starts an async video generation that takes 1-5 minutes. Returns an operation ID
that you can use with gemini-video-check to poll for completion.

Parameters:
- prompt: Text description of the video to generate (required)
- aspectRatio: Video aspect ratio ("16:9" or "9:16", default: "16:9")
- resolution: Video resolution ("720p", default: "720p")
- firstFrameBase64: Optional base64 PNG image to use as first frame (from gemini-image)

Workflow:
1. Call gemini-video-generate → returns operationId
2. Wait 30-60 seconds
3. Call gemini-video-check with operationId → returns status or video`,
    inputSchema: {
      type: "object" as const,
      properties: {
        prompt: {
          type: "string",
          description: "Text description of the video to generate",
        },
        aspectRatio: {
          type: "string",
          enum: ["16:9", "9:16"],
          description: "Video aspect ratio (default: 16:9)",
        },
        resolution: {
          type: "string",
          enum: ["720p"],
          description: "Video resolution (default: 720p)",
        },
        firstFrameBase64: {
          type: "string",
          description: "Optional base64 PNG image to use as first frame",
        },
      },
      required: ["prompt"],
    },
  },
  {
    name: "gemini-video-check",
    description: `Check the status of a video generation operation.

If the video is still processing, returns the current status.
If the video is complete, returns the video data.

Parameters:
- operationId: The operation ID from gemini-video-generate (optional - uses last operation if not provided)
- outputPath: Optional path to save the video file when complete`,
    inputSchema: {
      type: "object" as const,
      properties: {
        operationId: {
          type: "string",
          description: "Operation ID from gemini-video-generate (uses last operation if not provided)",
        },
        outputPath: {
          type: "string",
          description: "Optional path to save the video file",
        },
      },
      required: [],
    },
  },
];

// Create MCP server
const server = new Server(
  {
    name: "gemini-mcp",
    version: "1.0.0",
  },
  {
    capabilities: {
      tools: {},
    },
  }
);

// Handle tool listing
server.setRequestHandler(ListToolsRequestSchema, async () => {
  return { tools };
});

// Handle tool calls
server.setRequestHandler(CallToolRequestSchema, async (request) => {
  const { name, arguments: args } = request.params;

  // Cleanup old sessions periodically
  cleanupSessions();

  try {
    if (name === "gemini") {
      const {
        prompt,
        cwd,
        sandbox,
        "base-instructions": baseInstructions,
        "developer-instructions": developerInstructions,
        model,
      } = args as {
        prompt: string;
        cwd?: string;
        sandbox?: string;
        "base-instructions"?: string;
        "developer-instructions"?: string;
        model?: string;
      };

      // Build system instruction
      const systemInstruction = buildSystemInstruction({
        cwd,
        sandbox,
        baseInstructions,
        developerInstructions,
      });

      // Call Gemini
      const result = await callGemini(prompt, [], systemInstruction);

      // Create new session
      const conversationId = generateConversationId();
      sessions.set(conversationId, {
        history: result.history,
        createdAt: Date.now(),
        lastUsed: Date.now(),
        cwd,
      });

      return {
        content: [
          {
            type: "text",
            text: result.text,
          },
        ],
        // Include conversation ID in metadata for continuation
        _meta: {
          conversationId,
        },
      };

    } else if (name === "gemini-reply") {
      const { conversationId, prompt } = args as {
        conversationId: string;
        prompt: string;
      };

      // Get existing session
      const session = sessions.get(conversationId);
      if (!session) {
        return {
          content: [
            {
              type: "text",
              text: `Error: Conversation ${conversationId} not found. It may have expired or never existed.`,
            },
          ],
          isError: true,
        };
      }

      // Build system instruction (use stored context)
      const systemInstruction = buildSystemInstruction({
        cwd: session.cwd,
      });

      // Continue conversation
      const result = await callGemini(prompt, session.history, systemInstruction);

      // Update session
      session.history = result.history;
      session.lastUsed = Date.now();

      return {
        content: [
          {
            type: "text",
            text: result.text,
          },
        ],
        _meta: {
          conversationId,
        },
      };

    } else if (name === "gemini-image") {
      const { prompt, numberOfImages, aspectRatio, usePro, outputPath } = args as {
        prompt: string;
        numberOfImages?: number;
        aspectRatio?: string;
        usePro?: boolean;
        outputPath?: string;
      };

      // Generate images using Nano Banana
      const result = await generateImage(prompt, {
        numberOfImages: numberOfImages || 1,
        aspectRatio: aspectRatio || "1:1",
        usePro: usePro || false,
      });

      // If outputPath provided, save images to disk
      let savedPaths: string[] = [];
      if (outputPath && result.images.length > 0) {
        const fs = await import("fs");
        const path = await import("path");

        // Ensure directory exists
        if (!fs.existsSync(outputPath)) {
          fs.mkdirSync(outputPath, { recursive: true });
        }

        // Generate safe filename from prompt
        const safePrompt = prompt.slice(0, 50).replace(/[^a-zA-Z0-9]/g, "_");

        for (let i = 0; i < result.images.length; i++) {
          const filename = `${safePrompt}_${i + 1}.png`;
          const fullPath = path.join(outputPath, filename);
          const buffer = Buffer.from(result.images[i].base64, "base64");
          fs.writeFileSync(fullPath, buffer);
          savedPaths.push(fullPath);
        }
      }

      // Return response with image data
      const content: Array<{ type: string; text?: string; data?: string; mimeType?: string }> = [];

      // Add summary text
      content.push({
        type: "text",
        text: `Generated ${result.images.length} image(s) using ${result.model} for prompt: "${prompt}"${savedPaths.length > 0 ? `\n\nSaved to:\n${savedPaths.join("\n")}` : ""}`,
      });

      // Add images as base64
      for (const img of result.images) {
        content.push({
          type: "image",
          data: img.base64,
          mimeType: img.mimeType,
        });
      }

      return { content };

    } else if (name === "gemini-video-generate") {
      const { prompt, aspectRatio, resolution, firstFrameBase64 } = args as {
        prompt: string;
        aspectRatio?: string;
        resolution?: string;
        firstFrameBase64?: string;
      };

      // Build video generation config
      const config: Record<string, unknown> = {
        aspectRatio: aspectRatio || "16:9",
      };
      if (resolution) {
        config.resolution = resolution;
      }

      // Build request params
      const requestParams: Record<string, unknown> = {
        model: VIDEO_MODEL,
        prompt: prompt,
        config: config,
      };

      // Add first frame if provided
      if (firstFrameBase64) {
        requestParams.image = {
          imageBytes: firstFrameBase64,
          mimeType: "image/png",
        };
      }

      // Start video generation
      const operation = await (ai.models as any).generateVideos(requestParams);

      // Store operation info
      const operationId = operation.name || `veo-${Date.now()}`;
      videoOperations.set(operationId, {
        operationId,
        prompt,
        startedAt: Date.now(),
        aspectRatio: aspectRatio || "16:9",
      });
      lastVideoOperationId = operationId;

      // Do a quick 10-second poll in case it completes fast (unlikely but possible)
      await new Promise(resolve => setTimeout(resolve, 10000));

      try {
        const checkOperation = await (ai.operations as any).getVideosOperation({ operation });
        if (checkOperation.done) {
          // Completed quickly! Return the video
          const video = checkOperation.response?.generatedVideos?.[0];
          if (video?.video) {
            return {
              content: [
                {
                  type: "text",
                  text: `Video generation completed quickly!\n\nPrompt: "${prompt}"\nOperation ID: ${operationId}`,
                },
              ],
              _meta: { operationId, status: "complete" },
            };
          }
        }
      } catch {
        // Poll failed, that's fine - just return the operation ID
      }

      return {
        content: [
          {
            type: "text",
            text: `Video generation started!\n\nPrompt: "${prompt}"\nOperation ID: ${operationId}\nAspect Ratio: ${aspectRatio || "16:9"}\n\nUse gemini-video-check to poll for completion (typically takes 1-5 minutes).`,
          },
        ],
        _meta: { operationId, status: "processing" },
      };

    } else if (name === "gemini-video-check") {
      const { operationId: providedId, outputPath } = args as {
        operationId?: string;
        outputPath?: string;
      };

      // Use provided ID or fall back to last operation
      const operationId = providedId || lastVideoOperationId;
      if (!operationId) {
        return {
          content: [
            {
              type: "text",
              text: "Error: No operation ID provided and no recent video generation found.",
            },
          ],
          isError: true,
        };
      }

      // Get stored operation info
      const opInfo = videoOperations.get(operationId);
      const elapsedSeconds = opInfo ? Math.round((Date.now() - opInfo.startedAt) / 1000) : 0;

      try {
        // Check operation status
        const operation = await (ai.operations as any).getVideosOperation({
          operation: { name: operationId }
        });

        if (!operation.done) {
          return {
            content: [
              {
                type: "text",
                text: `Video still processing...\n\nOperation ID: ${operationId}\nElapsed: ${elapsedSeconds} seconds\n${opInfo ? `Prompt: "${opInfo.prompt}"` : ""}\n\nTry again in 30 seconds.`,
              },
            ],
            _meta: { operationId, status: "processing", elapsedSeconds },
          };
        }

        // Video is complete!
        const video = operation.response?.generatedVideos?.[0];
        if (!video?.video) {
          return {
            content: [
              {
                type: "text",
                text: `Video generation completed but no video was returned.\n\nOperation ID: ${operationId}`,
              },
            ],
            isError: true,
          };
        }

        // Download the video
        const videoData = await (ai.files as any).download({ file: video.video });

        // Save to file if outputPath provided
        let savedPath: string | null = null;
        if (outputPath) {
          const fs = await import("fs");
          const path = await import("path");

          // Ensure directory exists
          const dir = path.dirname(outputPath);
          if (!fs.existsSync(dir)) {
            fs.mkdirSync(dir, { recursive: true });
          }

          // Add .mp4 extension if not present
          const finalPath = outputPath.endsWith(".mp4") ? outputPath : `${outputPath}.mp4`;

          // Write video file
          if (videoData.videoBytes) {
            fs.writeFileSync(finalPath, Buffer.from(videoData.videoBytes));
            savedPath = finalPath;
          } else if (video.video.videoBytes) {
            fs.writeFileSync(finalPath, Buffer.from(video.video.videoBytes));
            savedPath = finalPath;
          }
        }

        // Clean up stored operation
        videoOperations.delete(operationId);

        return {
          content: [
            {
              type: "text",
              text: `Video generation complete!\n\nOperation ID: ${operationId}\nTotal time: ${elapsedSeconds} seconds\n${opInfo ? `Prompt: "${opInfo.prompt}"` : ""}${savedPath ? `\n\nSaved to: ${savedPath}` : ""}`,
            },
          ],
          _meta: {
            operationId,
            status: "complete",
            savedPath,
            videoUri: video.video?.uri || video.video?.videoUri,
          },
        };

      } catch (error) {
        const errorMessage = error instanceof Error ? error.message : String(error);
        return {
          content: [
            {
              type: "text",
              text: `Error checking video status: ${errorMessage}\n\nOperation ID: ${operationId}`,
            },
          ],
          isError: true,
        };
      }

    } else {
      return {
        content: [
          {
            type: "text",
            text: `Unknown tool: ${name}`,
          },
        ],
        isError: true,
      };
    }
  } catch (error) {
    const errorMessage = error instanceof Error ? error.message : String(error);
    return {
      content: [
        {
          type: "text",
          text: `Gemini API error: ${errorMessage}`,
        },
      ],
      isError: true,
    };
  }
});

// Start server
async function main() {
  const transport = new StdioServerTransport();
  await server.connect(transport);
  console.error("Gemini MCP server running on stdio");
}

main().catch((error) => {
  console.error("Fatal error:", error);
  process.exit(1);
});

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/riotofgeese/gemini-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

index.ts•24.1 KiB

#!/usr/bin/env node
/**
 * Gemini MCP Server
 *
 * A drop-in replacement for Codex MCP that uses Google Gemini 3 Pro Preview.
 * Mirrors the exact interface: gemini (like codex) and gemini-reply (like codex-reply)
 */

import { Server } from "@modelcontextprotocol/sdk/server/index.js";
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
import {
  CallToolRequestSchema,
  ListToolsRequestSchema,
  Tool,
} from "@modelcontextprotocol/sdk/types.js";
import { GoogleGenAI, Content } from "@google/genai";

// Configuration - Gemini 3 Pro Preview (latest)
const MODEL = process.env.GEMINI_MODEL || "gemini-3-pro-preview";
const API_KEY = process.env.GEMINI_API_KEY || process.env.GOOGLE_API_KEY;

if (!API_KEY) {
  console.error("Error: GEMINI_API_KEY or GOOGLE_API_KEY environment variable required");
  process.exit(1);
}

// Initialize Gemini client
const ai = new GoogleGenAI({ apiKey: API_KEY });

// Session storage for multi-turn conversations
interface ConversationSession {
  history: Content[];
  createdAt: number;
  lastUsed: number;
  cwd?: string;
}

const sessions = new Map<string, ConversationSession>();

// Video generation operation storage
interface VideoOperation {
  operationId: string;
  prompt: string;
  startedAt: number;
  aspectRatio?: string;
}

const videoOperations = new Map<string, VideoOperation>();
let lastVideoOperationId: string | null = null;

// Video generation model - Veo 3.1
const VIDEO_MODEL = "veo-3.1-generate-preview";

// Clean up old sessions (older than 1 hour)
function cleanupSessions() {
  const oneHourAgo = Date.now() - 60 * 60 * 1000;
  for (const [id, session] of sessions.entries()) {
    if (session.lastUsed < oneHourAgo) {
      sessions.delete(id);
    }
  }
}

// Generate unique conversation ID
function generateConversationId(): string {
  return `gemini-${Date.now()}-${Math.random().toString(36).substring(2, 9)}`;
}

// Build system instruction based on config
function buildSystemInstruction(config: {
  cwd?: string;
  sandbox?: string;
  baseInstructions?: string;
  developerInstructions?: string;
}): string {
  const parts: string[] = [];

  // Base instructions
  if (config.baseInstructions) {
    parts.push(config.baseInstructions);
  } else {
    parts.push(`You are an expert software engineer assistant powered by Gemini 3 Pro Preview.
You help with code review, analysis, planning, and problem-solving.
Provide clear, concise, and actionable responses.
When reviewing code or plans, be thorough but practical.`);
  }

  // Working directory context
  if (config.cwd) {
    parts.push(`\nWorking directory: ${config.cwd}`);
  }

  // Sandbox mode context
  if (config.sandbox) {
    const sandboxDescriptions: Record<string, string> = {
      "read-only": "You are in read-only mode. You can analyze and review but cannot make changes.",
      "workspace-write": "You can read and write within the workspace directory.",
      "danger-full-access": "You have full access to read and write files.",
    };
    if (sandboxDescriptions[config.sandbox]) {
      parts.push(`\nAccess level: ${sandboxDescriptions[config.sandbox]}`);
    }
  }

  // Developer instructions
  if (config.developerInstructions) {
    parts.push(`\nDeveloper Instructions:\n${config.developerInstructions}`);
  }

  return parts.join("\n");
}

// Image generation models - Nano Banana (Gemini native image generation)
// gemini-2.5-flash-image = Nano Banana (fast, cheap ~$0.04/image)
// gemini-3-pro-image-preview = Nano Banana Pro (advanced, better text rendering)
const IMAGE_MODEL_FAST = "gemini-2.5-flash-image";
const IMAGE_MODEL_PRO = "gemini-3-pro-image-preview";

// Keywords that trigger Nano Banana Pro (text-heavy, precision work)
const PRO_KEYWORDS = [
  "nano banana pro", "nanobanana pro", "pro model",
  "infographic", "diagram", "chart", "graph",
  "text", "typography", "font", "lettering", "writing",
  "slide", "presentation", "deck",
  "logo", "brand", "poster", "flyer", "banner",
  "document", "page", "layout",
  "high quality", "high-quality", "hq", "4k",
  "detailed text", "readable", "legible"
];

// Auto-detect if prompt needs Pro model
function shouldUsePro(prompt: string): boolean {
  const lower = prompt.toLowerCase();
  return PRO_KEYWORDS.some(keyword => lower.includes(keyword));
}

// Generate images using Nano Banana (via generateContent with IMAGE modality)
async function generateImage(
  prompt: string,
  options: {
    numberOfImages?: number;
    aspectRatio?: string;
    outputDir?: string;
    usePro?: boolean;
  } = {}
): Promise<{ images: Array<{ base64: string; mimeType: string }>; prompt: string; model: string }> {
  const numberOfImages = options.numberOfImages || 1;
  const aspectRatio = options.aspectRatio || "1:1";
  // Auto-detect Pro if prompt contains keywords, or use explicit usePro flag
  const usePro = options.usePro ?? shouldUsePro(prompt);
  const model = usePro ? IMAGE_MODEL_PRO : IMAGE_MODEL_FAST;

  const images: Array<{ base64: string; mimeType: string }> = [];

  // Nano Banana uses generateContent with response_modalities: ['IMAGE']
  // Generate each image separately (generateContent returns 1 image per call)
  for (let i = 0; i < numberOfImages; i++) {
    const response = await ai.models.generateContent({
      model: model,
      contents: prompt,
      config: {
        responseModalities: ["IMAGE"],
        // Include aspect ratio in the generation config
        ...(aspectRatio !== "1:1" && { 
          generationConfig: { aspectRatio } 
        }),
      },
    });

    // Extract image from response parts
    if (response.candidates && response.candidates[0]?.content?.parts) {
      for (const part of response.candidates[0].content.parts) {
        if (part.inlineData?.data) {
          images.push({
            base64: part.inlineData.data,
            mimeType: part.inlineData.mimeType || "image/png",
          });
        }
      }
    }
  }

  return { images, prompt, model };
}

// Call Gemini API
async function callGemini(
  prompt: string,
  history: Content[] = [],
  systemInstruction?: string
): Promise<{ text: string; history: Content[] }> {

  // Create the chat with history and config
  const chat = ai.chats.create({
    model: MODEL,
    config: systemInstruction ? { systemInstruction } : undefined,
    history: history,
  });

  // Send the message
  const response = await chat.sendMessage({ message: prompt });

  // Build updated history
  const newHistory: Content[] = [
    ...history,
    { role: "user", parts: [{ text: prompt }] },
    { role: "model", parts: [{ text: response.text || "" }] },
  ];

  return {
    text: response.text || "",
    history: newHistory,
  };
}

// Define tools - mirrors Codex MCP interface
const tools: Tool[] = [
  {
    name: "gemini",
    description: `Run a Gemini session. Similar to Codex but uses Google Gemini 3 Pro Preview.

Supports configuration parameters matching the Codex Config struct:
- prompt: The initial user prompt to start the conversation (required)
- cwd: Working directory context
- sandbox: Access policy ("read-only", "workspace-write", "danger-full-access")
- base-instructions: Override default system instructions
- developer-instructions: Additional developer context
- model: Optional override for model (default: ${MODEL})`,
    inputSchema: {
      type: "object" as const,
      properties: {
        prompt: {
          type: "string",
          description: "The initial user prompt to start the Gemini conversation",
        },
        cwd: {
          type: "string",
          description: "Working directory for context",
        },
        sandbox: {
          type: "string",
          enum: ["read-only", "workspace-write", "danger-full-access"],
          description: "Access policy mode",
        },
        "base-instructions": {
          type: "string",
          description: "Override the default system instructions",
        },
        "developer-instructions": {
          type: "string",
          description: "Developer instructions for additional context",
        },
        model: {
          type: "string",
          description: `Model override (default: ${MODEL})`,
        },
        config: {
          type: "object",
          description: "Additional config settings (passthrough)",
          additionalProperties: true,
        },
      },
      required: ["prompt"],
    },
  },
  {
    name: "gemini-reply",
    description: `Continue a Gemini conversation by providing the conversation ID and prompt.

Use this to continue a multi-turn conversation started with the 'gemini' tool.`,
    inputSchema: {
      type: "object" as const,
      properties: {
        conversationId: {
          type: "string",
          description: "The conversation ID from a previous gemini call",
        },
        prompt: {
          type: "string",
          description: "The next user prompt to continue the conversation",
        },
      },
      required: ["conversationId", "prompt"],
    },
  },
  {
    name: "gemini-image",
    description: `Generate images using Nano Banana (Gemini's native image generation).

Two models available:
- Nano Banana (default): Fast, cheap (~$0.04/image), good for most use cases
- Nano Banana Pro: Advanced model with better text rendering, infographics, diagrams

Auto-detection: Says "nano banana pro" or mentions text/infographic/diagram/chart/logo/poster
in prompt → automatically uses Pro model.

Parameters:
- prompt: Text description of the image to generate (required)
- numberOfImages: How many images to generate (1-4, default: 1)
- aspectRatio: Image aspect ratio ("1:1", "3:4", "4:3", "9:16", "16:9", default: "1:1")
- usePro: Force Nano Banana Pro (auto-detected from prompt if not specified)
- outputPath: Optional path to save images`,
    inputSchema: {
      type: "object" as const,
      properties: {
        prompt: {
          type: "string",
          description: "Text description of the image to generate",
        },
        numberOfImages: {
          type: "number",
          description: "Number of images to generate (1-4)",
          minimum: 1,
          maximum: 4,
        },
        aspectRatio: {
          type: "string",
          enum: ["1:1", "3:4", "4:3", "9:16", "16:9"],
          description: "Aspect ratio of generated images",
        },
        usePro: {
          type: "boolean",
          description: "Use Nano Banana Pro for higher quality (better text, infographics)",
          default: false,
        },
        outputPath: {
          type: "string",
          description: "Optional directory path to save generated images",
        },
      },
      required: ["prompt"],
    },
  },
  {
    name: "gemini-video-generate",
    description: `Generate a video using Veo 3.1 (Google's video generation model).

This starts an async video generation that takes 1-5 minutes. Returns an operation ID
that you can use with gemini-video-check to poll for completion.

Parameters:
- prompt: Text description of the video to generate (required)
- aspectRatio: Video aspect ratio ("16:9" or "9:16", default: "16:9")
- resolution: Video resolution ("720p", default: "720p")
- firstFrameBase64: Optional base64 PNG image to use as first frame (from gemini-image)

Workflow:
1. Call gemini-video-generate → returns operationId
2. Wait 30-60 seconds
3. Call gemini-video-check with operationId → returns status or video`,
    inputSchema: {
      type: "object" as const,
      properties: {
        prompt: {
          type: "string",
          description: "Text description of the video to generate",
        },
        aspectRatio: {
          type: "string",
          enum: ["16:9", "9:16"],
          description: "Video aspect ratio (default: 16:9)",
        },
        resolution: {
          type: "string",
          enum: ["720p"],
          description: "Video resolution (default: 720p)",
        },
        firstFrameBase64: {
          type: "string",
          description: "Optional base64 PNG image to use as first frame",
        },
      },
      required: ["prompt"],
    },
  },
  {
    name: "gemini-video-check",
    description: `Check the status of a video generation operation.

If the video is still processing, returns the current status.
If the video is complete, returns the video data.

Parameters:
- operationId: The operation ID from gemini-video-generate (optional - uses last operation if not provided)
- outputPath: Optional path to save the video file when complete`,
    inputSchema: {
      type: "object" as const,
      properties: {
        operationId: {
          type: "string",
          description: "Operation ID from gemini-video-generate (uses last operation if not provided)",
        },
        outputPath: {
          type: "string",
          description: "Optional path to save the video file",
        },
      },
      required: [],
    },
  },
];

// Create MCP server
const server = new Server(
  {
    name: "gemini-mcp",
    version: "1.0.0",
  },
  {
    capabilities: {
      tools: {},
    },
  }
);

// Handle tool listing
server.setRequestHandler(ListToolsRequestSchema, async () => {
  return { tools };
});

// Handle tool calls
server.setRequestHandler(CallToolRequestSchema, async (request) => {
  const { name, arguments: args } = request.params;

  // Cleanup old sessions periodically
  cleanupSessions();

  try {
    if (name === "gemini") {
      const {
        prompt,
        cwd,
        sandbox,
        "base-instructions": baseInstructions,
        "developer-instructions": developerInstructions,
        model,
      } = args as {
        prompt: string;
        cwd?: string;
        sandbox?: string;
        "base-instructions"?: string;
        "developer-instructions"?: string;
        model?: string;
      };

      // Build system instruction
      const systemInstruction = buildSystemInstruction({
        cwd,
        sandbox,
        baseInstructions,
        developerInstructions,
      });

      // Call Gemini
      const result = await callGemini(prompt, [], systemInstruction);

      // Create new session
      const conversationId = generateConversationId();
      sessions.set(conversationId, {
        history: result.history,
        createdAt: Date.now(),
        lastUsed: Date.now(),
        cwd,
      });

      return {
        content: [
          {
            type: "text",
            text: result.text,
          },
        ],
        // Include conversation ID in metadata for continuation
        _meta: {
          conversationId,
        },
      };

    } else if (name === "gemini-reply") {
      const { conversationId, prompt } = args as {
        conversationId: string;
        prompt: string;
      };

      // Get existing session
      const session = sessions.get(conversationId);
      if (!session) {
        return {
          content: [
            {
              type: "text",
              text: `Error: Conversation ${conversationId} not found. It may have expired or never existed.`,
            },
          ],
          isError: true,
        };
      }

      // Build system instruction (use stored context)
      const systemInstruction = buildSystemInstruction({
        cwd: session.cwd,
      });

      // Continue conversation
      const result = await callGemini(prompt, session.history, systemInstruction);

      // Update session
      session.history = result.history;
      session.lastUsed = Date.now();

      return {
        content: [
          {
            type: "text",
            text: result.text,
          },
        ],
        _meta: {
          conversationId,
        },
      };

    } else if (name === "gemini-image") {
      const { prompt, numberOfImages, aspectRatio, usePro, outputPath } = args as {
        prompt: string;
        numberOfImages?: number;
        aspectRatio?: string;
        usePro?: boolean;
        outputPath?: string;
      };

      // Generate images using Nano Banana
      const result = await generateImage(prompt, {
        numberOfImages: numberOfImages || 1,
        aspectRatio: aspectRatio || "1:1",
        usePro: usePro || false,
      });

      // If outputPath provided, save images to disk
      let savedPaths: string[] = [];
      if (outputPath && result.images.length > 0) {
        const fs = await import("fs");
        const path = await import("path");

        // Ensure directory exists
        if (!fs.existsSync(outputPath)) {
          fs.mkdirSync(outputPath, { recursive: true });
        }

        // Generate safe filename from prompt
        const safePrompt = prompt.slice(0, 50).replace(/[^a-zA-Z0-9]/g, "_");

        for (let i = 0; i < result.images.length; i++) {
          const filename = `${safePrompt}_${i + 1}.png`;
          const fullPath = path.join(outputPath, filename);
          const buffer = Buffer.from(result.images[i].base64, "base64");
          fs.writeFileSync(fullPath, buffer);
          savedPaths.push(fullPath);
        }
      }

      // Return response with image data
      const content: Array<{ type: string; text?: string; data?: string; mimeType?: string }> = [];

      // Add summary text
      content.push({
        type: "text",
        text: `Generated ${result.images.length} image(s) using ${result.model} for prompt: "${prompt}"${savedPaths.length > 0 ? `\n\nSaved to:\n${savedPaths.join("\n")}` : ""}`,
      });

      // Add images as base64
      for (const img of result.images) {
        content.push({
          type: "image",
          data: img.base64,
          mimeType: img.mimeType,
        });
      }

      return { content };

    } else if (name === "gemini-video-generate") {
      const { prompt, aspectRatio, resolution, firstFrameBase64 } = args as {
        prompt: string;
        aspectRatio?: string;
        resolution?: string;
        firstFrameBase64?: string;
      };

      // Build video generation config
      const config: Record<string, unknown> = {
        aspectRatio: aspectRatio || "16:9",
      };
      if (resolution) {
        config.resolution = resolution;
      }

      // Build request params
      const requestParams: Record<string, unknown> = {
        model: VIDEO_MODEL,
        prompt: prompt,
        config: config,
      };

      // Add first frame if provided
      if (firstFrameBase64) {
        requestParams.image = {
          imageBytes: firstFrameBase64,
          mimeType: "image/png",
        };
      }

      // Start video generation
      const operation = await (ai.models as any).generateVideos(requestParams);

      // Store operation info
      const operationId = operation.name || `veo-${Date.now()}`;
      videoOperations.set(operationId, {
        operationId,
        prompt,
        startedAt: Date.now(),
        aspectRatio: aspectRatio || "16:9",
      });
      lastVideoOperationId = operationId;

      // Do a quick 10-second poll in case it completes fast (unlikely but possible)
      await new Promise(resolve => setTimeout(resolve, 10000));

      try {
        const checkOperation = await (ai.operations as any).getVideosOperation({ operation });
        if (checkOperation.done) {
          // Completed quickly! Return the video
          const video = checkOperation.response?.generatedVideos?.[0];
          if (video?.video) {
            return {
              content: [
                {
                  type: "text",
                  text: `Video generation completed quickly!\n\nPrompt: "${prompt}"\nOperation ID: ${operationId}`,
                },
              ],
              _meta: { operationId, status: "complete" },
            };
          }
        }
      } catch {
        // Poll failed, that's fine - just return the operation ID
      }

      return {
        content: [
          {
            type: "text",
            text: `Video generation started!\n\nPrompt: "${prompt}"\nOperation ID: ${operationId}\nAspect Ratio: ${aspectRatio || "16:9"}\n\nUse gemini-video-check to poll for completion (typically takes 1-5 minutes).`,
          },
        ],
        _meta: { operationId, status: "processing" },
      };

    } else if (name === "gemini-video-check") {
      const { operationId: providedId, outputPath } = args as {
        operationId?: string;
        outputPath?: string;
      };

      // Use provided ID or fall back to last operation
      const operationId = providedId || lastVideoOperationId;
      if (!operationId) {
        return {
          content: [
            {
              type: "text",
              text: "Error: No operation ID provided and no recent video generation found.",
            },
          ],
          isError: true,
        };
      }

      // Get stored operation info
      const opInfo = videoOperations.get(operationId);
      const elapsedSeconds = opInfo ? Math.round((Date.now() - opInfo.startedAt) / 1000) : 0;

      try {
        // Check operation status
        const operation = await (ai.operations as any).getVideosOperation({
          operation: { name: operationId }
        });

        if (!operation.done) {
          return {
            content: [
              {
                type: "text",
                text: `Video still processing...\n\nOperation ID: ${operationId}\nElapsed: ${elapsedSeconds} seconds\n${opInfo ? `Prompt: "${opInfo.prompt}"` : ""}\n\nTry again in 30 seconds.`,
              },
            ],
            _meta: { operationId, status: "processing", elapsedSeconds },
          };
        }

        // Video is complete!
        const video = operation.response?.generatedVideos?.[0];
        if (!video?.video) {
          return {
            content: [
              {
                type: "text",
                text: `Video generation completed but no video was returned.\n\nOperation ID: ${operationId}`,
              },
            ],
            isError: true,
          };
        }

        // Download the video
        const videoData = await (ai.files as any).download({ file: video.video });

        // Save to file if outputPath provided
        let savedPath: string | null = null;
        if (outputPath) {
          const fs = await import("fs");
          const path = await import("path");

          // Ensure directory exists
          const dir = path.dirname(outputPath);
          if (!fs.existsSync(dir)) {
            fs.mkdirSync(dir, { recursive: true });
          }

          // Add .mp4 extension if not present
          const finalPath = outputPath.endsWith(".mp4") ? outputPath : `${outputPath}.mp4`;

          // Write video file
          if (videoData.videoBytes) {
            fs.writeFileSync(finalPath, Buffer.from(videoData.videoBytes));
            savedPath = finalPath;
          } else if (video.video.videoBytes) {
            fs.writeFileSync(finalPath, Buffer.from(video.video.videoBytes));
            savedPath = finalPath;
          }
        }

        // Clean up stored operation
        videoOperations.delete(operationId);

        return {
          content: [
            {
              type: "text",
              text: `Video generation complete!\n\nOperation ID: ${operationId}\nTotal time: ${elapsedSeconds} seconds\n${opInfo ? `Prompt: "${opInfo.prompt}"` : ""}${savedPath ? `\n\nSaved to: ${savedPath}` : ""}`,
            },
          ],
          _meta: {
            operationId,
            status: "complete",
            savedPath,
            videoUri: video.video?.uri || video.video?.videoUri,
          },
        };

      } catch (error) {
        const errorMessage = error instanceof Error ? error.message : String(error);
        return {
          content: [
            {
              type: "text",
              text: `Error checking video status: ${errorMessage}\n\nOperation ID: ${operationId}`,
            },
          ],
          isError: true,
        };
      }

    } else {
      return {
        content: [
          {
            type: "text",
            text: `Unknown tool: ${name}`,
          },
        ],
        isError: true,
      };
    }
  } catch (error) {
    const errorMessage = error instanceof Error ? error.message : String(error);
    return {
      content: [
        {
          type: "text",
          text: `Gemini API error: ${errorMessage}`,
        },
      ],
      isError: true,
    };
  }
});

// Start server
async function main() {
  const transport = new StdioServerTransport();
  await server.connect(transport);
  console.error("Gemini MCP server running on stdio");
}

main().catch((error) => {
  console.error("Fatal error:", error);
  process.exit(1);
});