Skip to main content
Glama
generator.py2.18 kB
"""LLM generation wrapper for RAG responses.""" from typing import List, Optional from litellm import acompletion class LLMGenerator: """LLM generation client via LiteLLM.""" def __init__( self, api_base: Optional[str] = None, model: str = "ollama/llama3", temperature: float = 0.7, max_tokens: int = 1000 ): self.api_base = api_base self.model = model self.temperature = temperature self.max_tokens = max_tokens async def generate( self, question: str, context: List[str], system_prompt: Optional[str] = None ) -> str: """Generate response using RAG context. Args: question: User question context: Retrieved context chunks system_prompt: Optional system prompt Returns: Generated response """ # Build context string context_str = "\n\n".join([ f"[Source {i+1}]\n{ctx}" for i, ctx in enumerate(context) ]) # Default system prompt if not system_prompt: system_prompt = ( "You are a helpful assistant. Answer the user's question " "based on the provided context. If the context doesn't contain " "enough information, say so clearly." ) # Build messages messages = [ {"role": "system", "content": system_prompt}, {"role": "user", "content": f"Context:\n{context_str}\n\nQuestion: {question}"} ] # Generate response kwargs = { "model": self.model, "messages": messages, "temperature": self.temperature, "max_tokens": self.max_tokens } if self.api_base: # When using LiteLLM proxy, we need to use the openai/ prefix # or set custom_llm_provider to force it to use the proxy kwargs["api_base"] = f"{self.api_base}/v1" kwargs["custom_llm_provider"] = "openai" response = await acompletion(**kwargs) return response.choices[0].message.content

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/timerickson/personal-rag-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server