Groq MCP Server

text-completion.ts•4.41 KiB

import { z } from 'zod'; import Groq from 'groq-sdk'; import { Logger } from '../utils/logger.js'; import { RateLimiter } from '../utils/rate-limiter.js'; import { CacheManager } from '../utils/cache-manager.js'; import { zodToJsonSchema } from 'zod-to-json-schema'; const TextCompletionSchema = z.object({ prompt: z.string().min(1, 'Prompt is required'), model: z.string().optional(), max_tokens: z.number().min(1).max(8000).optional(), temperature: z.number().min(0).max(2).optional(), top_p: z.number().min(0).max(1).optional(), stream: z.boolean().optional(), json_mode: z.boolean().optional(), system_prompt: z.string().optional(), priority: z.enum(['speed', 'quality', 'cost']).optional() }); export class TextCompletionTool { public readonly name = 'groq_text_completion'; public readonly description = 'Generate text completions using Groq models with intelligent routing'; public readonly inputSchema = zodToJsonSchema(TextCompletionSchema, { $refStrategy: 'none', target: 'jsonSchema7' }); constructor( private groq: Groq, private logger: Logger, private rateLimiter: RateLimiter, private cache: CacheManager ) {} async execute(args: z.infer<typeof TextCompletionSchema>): Promise<any> { const validated = TextCompletionSchema.parse(args); // Intelligent model selection const selectedModel = this.selectOptimalModel(validated); // Check cache first const cacheKey = this.generateCacheKey(validated, selectedModel); const cached = this.cache.get(cacheKey); if (cached) { this.logger.info('Returning cached result', { model: selectedModel }); return cached; } // Apply rate limiting for specific model await this.rateLimiter.checkLimit(selectedModel); const messages = [ ...(validated.system_prompt ? [{ role: 'system' as const, content: validated.system_prompt }] : []), { role: 'user' as const, content: validated.prompt } ]; const requestParams: any = { model: selectedModel, messages, max_tokens: validated.max_tokens || 1000, temperature: validated.temperature || 0.7, top_p: validated.top_p || 1, stream: validated.stream || false }; if (validated.json_mode) { requestParams.response_format = { type: 'json_object' }; } try { this.logger.info('Making Groq API request', { model: selectedModel, tokens: validated.max_tokens }); const completion = await this.groq.chat.completions.create(requestParams); const result = { content: completion.choices[0]?.message?.content || '', model: selectedModel, usage: completion.usage, finish_reason: completion.choices[0]?.finish_reason, timestamp: new Date().toISOString() }; // Cache successful results this.cache.set(cacheKey, result, 300); // 5 minutes TTL return result; } catch (error) { const errorMessage = error instanceof Error ? error.message : String(error); // ✅ CORREÇÃO: Usar 'this.name' this.logger.error(`Tool ${this.name} failed`, { error: errorMessage }); } } private selectOptimalModel(args: z.infer<typeof TextCompletionSchema>): string { if (args.model) return args.model; // Intelligent model selection based on priority switch (args.priority) { case 'speed': return 'llama-3.1-8b-instant'; case 'quality': return 'llama-3.1-70b-versatile'; case 'cost': return 'llama-3.1-8b-instant'; default: // Analyze prompt complexity for auto-selection const complexity = this.analyzePromptComplexity(args.prompt); return complexity > 0.7 ? 'llama-3.1-70b-versatile' : 'llama-3.1-8b-instant'; } } private analyzePromptComplexity(prompt: string): number { // Simple complexity analysis const factors = { length: Math.min(prompt.length / 1000, 1), keywords: ['analyze', 'explain', 'complex', 'detailed', 'comprehensive'].some(kw => prompt.toLowerCase().includes(kw) ) ? 0.3 : 0, questions: (prompt.match(/\?/g) || []).length * 0.1 }; return Math.min(factors.length + factors.keywords + factors.questions, 1); } private generateCacheKey(args: z.infer<typeof TextCompletionSchema>, model: string): string { const key = `${model}:${args.prompt}:${args.temperature || 0.7}:${args.max_tokens || 1000}`; return Buffer.from(key).toString('base64').slice(0, 50); } }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/AyrtonFelipe/GroqCloud-MCP_server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

text-completion.ts•4.41 KiB