Personal RAG MCP Server

generator.py•2.13 KiB

"""LLM generation wrapper for RAG responses."""

from typing import List, Optional
from litellm import acompletion


class LLMGenerator:
    """LLM generation client via LiteLLM."""

    def __init__(
        self,
        api_base: Optional[str] = None,
        model: str = "ollama/llama3",
        temperature: float = 0.7,
        max_tokens: int = 1000
    ):
        self.api_base = api_base
        self.model = model
        self.temperature = temperature
        self.max_tokens = max_tokens

    async def generate(
        self,
        question: str,
        context: List[str],
        system_prompt: Optional[str] = None
    ) -> str:
        """Generate response using RAG context.

        Args:
            question: User question
            context: Retrieved context chunks
            system_prompt: Optional system prompt

        Returns:
            Generated response
        """
        # Build context string
        context_str = "\n\n".join([
            f"[Source {i+1}]\n{ctx}"
            for i, ctx in enumerate(context)
        ])

        # Default system prompt
        if not system_prompt:
            system_prompt = (
                "You are a helpful assistant. Answer the user's question "
                "based on the provided context. If the context doesn't contain "
                "enough information, say so clearly."
            )

        # Build messages
        messages = [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": f"Context:\n{context_str}\n\nQuestion: {question}"}
        ]

        # Generate response
        kwargs = {
            "model": self.model,
            "messages": messages,
            "temperature": self.temperature,
            "max_tokens": self.max_tokens
        }

        if self.api_base:
            # When using LiteLLM proxy, we need to use the openai/ prefix
            # or set custom_llm_provider to force it to use the proxy
            kwargs["api_base"] = f"{self.api_base}/v1"
            kwargs["custom_llm_provider"] = "openai"

        response = await acompletion(**kwargs)

        return response.choices[0].message.content

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/timerickson/personal-rag-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

generator.py•2.13 KiB

"""LLM generation wrapper for RAG responses."""

from typing import List, Optional
from litellm import acompletion


class LLMGenerator:
    """LLM generation client via LiteLLM."""

    def __init__(
        self,
        api_base: Optional[str] = None,
        model: str = "ollama/llama3",
        temperature: float = 0.7,
        max_tokens: int = 1000
    ):
        self.api_base = api_base
        self.model = model
        self.temperature = temperature
        self.max_tokens = max_tokens

    async def generate(
        self,
        question: str,
        context: List[str],
        system_prompt: Optional[str] = None
    ) -> str:
        """Generate response using RAG context.

        Args:
            question: User question
            context: Retrieved context chunks
            system_prompt: Optional system prompt

        Returns:
            Generated response
        """
        # Build context string
        context_str = "\n\n".join([
            f"[Source {i+1}]\n{ctx}"
            for i, ctx in enumerate(context)
        ])

        # Default system prompt
        if not system_prompt:
            system_prompt = (
                "You are a helpful assistant. Answer the user's question "
                "based on the provided context. If the context doesn't contain "
                "enough information, say so clearly."
            )

        # Build messages
        messages = [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": f"Context:\n{context_str}\n\nQuestion: {question}"}
        ]

        # Generate response
        kwargs = {
            "model": self.model,
            "messages": messages,
            "temperature": self.temperature,
            "max_tokens": self.max_tokens
        }

        if self.api_base:
            # When using LiteLLM proxy, we need to use the openai/ prefix
            # or set custom_llm_provider to force it to use the proxy
            kwargs["api_base"] = f"{self.api_base}/v1"
            kwargs["custom_llm_provider"] = "openai"

        response = await acompletion(**kwargs)

        return response.choices[0].message.content