We provide all the information about MCP servers via our MCP API.
curl -X GET 'https://glama.ai/api/mcp/v1/servers/timerickson/personal-rag-mcp'
If you have feedback or need assistance with the MCP directory API, please join our Discord server
"""LLM generation wrapper for RAG responses."""
from typing import List, Optional
from litellm import acompletion
class LLMGenerator:
"""LLM generation client via LiteLLM."""
def __init__(
self,
api_base: Optional[str] = None,
model: str = "ollama/llama3",
temperature: float = 0.7,
max_tokens: int = 1000
):
self.api_base = api_base
self.model = model
self.temperature = temperature
self.max_tokens = max_tokens
async def generate(
self,
question: str,
context: List[str],
system_prompt: Optional[str] = None
) -> str:
"""Generate response using RAG context.
Args:
question: User question
context: Retrieved context chunks
system_prompt: Optional system prompt
Returns:
Generated response
"""
# Build context string
context_str = "\n\n".join([
f"[Source {i+1}]\n{ctx}"
for i, ctx in enumerate(context)
])
# Default system prompt
if not system_prompt:
system_prompt = (
"You are a helpful assistant. Answer the user's question "
"based on the provided context. If the context doesn't contain "
"enough information, say so clearly."
)
# Build messages
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": f"Context:\n{context_str}\n\nQuestion: {question}"}
]
# Generate response
kwargs = {
"model": self.model,
"messages": messages,
"temperature": self.temperature,
"max_tokens": self.max_tokens
}
if self.api_base:
# When using LiteLLM proxy, we need to use the openai/ prefix
# or set custom_llm_provider to force it to use the proxy
kwargs["api_base"] = f"{self.api_base}/v1"
kwargs["custom_llm_provider"] = "openai"
response = await acompletion(**kwargs)
return response.choices[0].message.content