MCP Remote macOS Control Server

llmService.ts•11.8 kB

import OpenAI from 'openai' import { config } from '../config/environment' import logger from '../utils/logger' import { MCPTool, MCPToolCall } from '../types/mcp' export interface LLMResponse { content: string toolCalls?: MCPToolCall[] requiresTools: boolean streaming?: boolean } export interface ConversationMessage { role: 'system' | 'user' | 'assistant' content: string timestamp: number } export interface LLMStreamChunk { content?: string done: boolean toolCalls?: MCPToolCall[] } export class LLMService { private openai: OpenAI | null = null private conversations: Map<string, ConversationMessage[]> = new Map() private readonly MAX_CONTEXT_MESSAGES = 20 private readonly MAX_RETRIES = 3 private readonly RETRY_DELAY_MS = 1000 constructor() { // Use OpenRouter if available, fallback to OpenAI if (config.llm.openrouterApiKey) { this.openai = new OpenAI({ apiKey: config.llm.openrouterApiKey, baseURL: 'https://openrouter.ai/api/v1' }) logger.info('LLM Service initialized with OpenRouter') } else if (config.llm.openaiApiKey) { this.openai = new OpenAI({ apiKey: config.llm.openaiApiKey }) logger.info('LLM Service initialized with OpenAI') } } async processMessage( message: string, availableTools: MCPTool[], sessionId: string = 'default', streaming: boolean = false ): Promise<LLMResponse> { try { if (!this.openai) { throw new Error('No LLM provider configured') } // Get conversation context const context = this.getConversationContext(sessionId) // Convert MCP tools to OpenAI tools format const tools = this.convertMCPToolsToOpenAIFormat(availableTools) // Prepare messages with enhanced system prompt const messages: OpenAI.Chat.Completions.ChatCompletionMessageParam[] = [ { role: 'system', content: this.getEnhancedSystemPrompt(availableTools) }, ...context.map(msg => { if (msg.role === 'system' || msg.role === 'user' || msg.role === 'assistant') { return { role: msg.role, content: msg.content } } return { role: 'user' as const, content: msg.content } }), { role: 'user', content: message } ] // Add user message to context this.addToConversationContext(sessionId, { role: 'user', content: message, timestamp: Date.now() }) const response = await this.callLLMWithRetry(messages, tools, streaming) // Add assistant response to context this.addToConversationContext(sessionId, { role: 'assistant', content: response.content, timestamp: Date.now() }) return response } catch (error) { logger.error('Error processing LLM message:', error) throw error } } private async callLLMWithRetry( messages: OpenAI.Chat.Completions.ChatCompletionMessageParam[], tools: OpenAI.Chat.Completions.ChatCompletionTool[], streaming: boolean = false ): Promise<LLMResponse> { let lastError: Error | null = null for (let attempt = 1; attempt <= this.MAX_RETRIES; attempt++) { try { const model = config.llm.openrouterApiKey ? 'openai/gpt-4-turbo-preview' : 'gpt-4-turbo-preview' const requestParams: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming = { model, messages, temperature: 0.1, max_tokens: 2000, } // Add tools if available if (tools.length > 0) { requestParams.tools = tools requestParams.tool_choice = 'auto' } if (streaming && !tools.length) { // Use streaming for regular responses (not tool calls) return await this.handleStreamingResponse(requestParams) } const response = await this.openai!.chat.completions.create(requestParams) const choice = response.choices[0] if (!choice.message) { throw new Error('No response from LLM') } // Check for tool calls if (choice.message.tool_calls && choice.message.tool_calls.length > 0) { const toolCalls: MCPToolCall[] = choice.message.tool_calls.map(call => ({ name: call.function.name, arguments: JSON.parse(call.function.arguments || '{}') })) return { content: choice.message.content || 'Executing tools...', toolCalls, requiresTools: true } } return { content: choice.message.content || 'I\'m not sure how to respond to that.', requiresTools: false } } catch (error) { lastError = error as Error logger.warn(`LLM call attempt ${attempt} failed:`, error) if (attempt < this.MAX_RETRIES) { await new Promise(resolve => setTimeout(resolve, this.RETRY_DELAY_MS * attempt)) } } } throw lastError || new Error('All LLM retry attempts failed') } private async handleStreamingResponse( requestParams: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming ): Promise<LLMResponse> { // Note: This is a placeholder for streaming implementation // In a full implementation, this would return an async generator // For now, we'll fall back to regular response const response = await this.openai!.chat.completions.create(requestParams) const choice = response.choices[0] return { content: choice.message?.content || 'I\'m not sure how to respond to that.', requiresTools: false, streaming: false } } private getEnhancedSystemPrompt(availableTools: MCPTool[]): string { return `You are an advanced AI assistant specialized in controlling macOS systems through natural language commands. You have access to powerful tools that allow you to: 🖥️ **Screen Control:** - Take screenshots to see what's currently displayed - Analyze UI elements and layout 🖱️ **Mouse & Interaction:** - Click on specific coordinates or UI elements - Perform double-clicks, right-clicks - Drag and drop operations - Scroll in any direction ⌨️ **Keyboard Control:** - Type any text or special characters - Send keyboard shortcuts (cmd+c, cmd+v, etc.) - Press special keys (enter, tab, esc, etc.) 🚀 **Application Management:** - Launch and switch between applications - Open specific applications by name **Available Tools:** ${availableTools.map(t => `${t.name} - ${t.description}`).join(', ')} **CRITICAL: You CAN and SHOULD use tools to perform actions on the user's Mac. Do NOT give conversational responses when the user asks you to do something - USE THE TOOLS!** **Multi-Step Task Handling:** For complex requests with multiple steps (like "Take a screenshot, then open Finder, navigate to Applications..."): 1. If the request starts with "Take a screenshot" or similar, ONLY call the screenshot tool first 2. For continuation requests, identify the NEXT SPECIFIC ACTION needed and execute it using tools 3. Always use tools when the user requests actions - never give explanatory responses about what they "could" do **Tool Usage Priority:** - When asked to "open" something → USE remote_macos_open_application - When asked to "click" something → USE remote_macos_mouse_click - When asked to "type" something → USE remote_macos_send_keys - When asked to "take screenshot" → USE remote_macos_get_screen **Best Practices:** 1. Always take a screenshot first if you need to see the current state 2. Be precise with coordinates when clicking 3. USE TOOLS to perform actions, don't just explain what to do 4. After using tools, acknowledge what you actually did 5. For multi-step tasks, execute the immediate next action using available tools **Interaction Style:** - EXECUTE actions using tools rather than explaining them - Be direct about what you accomplished - Use tools confidently when users request actions - Only ask for clarification if the request is genuinely unclear **Important:** When a user requests ANY action that can be performed with available tools, USE THE TOOLS immediately. Do not give instructions or explanations unless tools are not available or fail. Remember: You're actively controlling the user's Mac. Use the tools to perform the requested actions!` } private convertMCPToolsToOpenAIFormat(tools: MCPTool[]): OpenAI.Chat.Completions.ChatCompletionTool[] { return tools.map(tool => ({ type: 'function' as const, function: { name: tool.name, description: tool.description, parameters: tool.inputSchema } })) } private getConversationContext(sessionId: string): ConversationMessage[] { return this.conversations.get(sessionId) || [] } private addToConversationContext(sessionId: string, message: ConversationMessage): void { const context = this.conversations.get(sessionId) || [] context.push(message) // Keep only the most recent messages if (context.length > this.MAX_CONTEXT_MESSAGES) { context.splice(0, context.length - this.MAX_CONTEXT_MESSAGES) } this.conversations.set(sessionId, context) } clearConversationContext(sessionId: string): void { this.conversations.delete(sessionId) } async generateFollowUpResponse( originalMessage: string, toolResults: any[], availableTools: MCPTool[], sessionId: string = 'default' ): Promise<LLMResponse> { try { if (!this.openai) { throw new Error('No LLM provider configured') } // Create detailed summary of what was accomplished const successfulTools = toolResults.filter(r => r.success) const failedTools = toolResults.filter(r => !r.success) let toolSummary = '' if (successfulTools.length > 0) { toolSummary += `✅ Successfully executed: ${successfulTools.map(r => r.toolName).join(', ')}\n` } if (failedTools.length > 0) { toolSummary += `❌ Failed to execute: ${failedTools.map(r => `${r.toolName} (${r.error})`).join(', ')}\n` } const messages: OpenAI.Chat.Completions.ChatCompletionMessageParam[] = [ { role: 'system', content: 'You are an AI assistant that just executed tools to help the user. Provide a helpful summary of what was accomplished and offer relevant next steps or ask if the user needs anything else. Be conversational and supportive.' }, { role: 'user', content: `Original request: "${originalMessage}" Tool execution results: ${toolSummary} Please provide a helpful summary and suggest what the user might want to do next.` } ] const model = config.llm.openrouterApiKey ? 'openai/gpt-4-turbo-preview' : 'gpt-4-turbo-preview' const response = await this.openai.chat.completions.create({ model, messages, temperature: 0.3, max_tokens: 500 }) const choice = response.choices[0] const content = choice.message?.content || 'Task completed successfully!' // Add follow-up to context this.addToConversationContext(sessionId, { role: 'assistant', content, timestamp: Date.now() }) return { content, requiresTools: false } } catch (error) { logger.error('Error generating follow-up response:', error) return { content: 'Task completed.', requiresTools: false } } } }

Latest Blog Posts

What Is Context Bloat in MCP?
By Om-Shree-0709 on December 16, 2025.
mcp
Context Bloat
MCP Moves to the Linux Foundation: Neutral Stewardship for Agentic Infrastructure
By Om-Shree-0709 on December 15, 2025.
mcp
anthropic
Linux Foundation
Code Execution with MCP: Architecting Agentic Efficiency
By Om-Shree-0709 on December 14, 2025.
mcp
Token bloat

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/senseisven/mcp_macos'

If you have feedback or need assistance with the MCP directory API, please join our Discord server