generator.py•2.18 kB
"""LLM generation wrapper for RAG responses."""
from typing import List, Optional
from litellm import acompletion
class LLMGenerator:
"""LLM generation client via LiteLLM."""
def __init__(
self,
api_base: Optional[str] = None,
model: str = "ollama/llama3",
temperature: float = 0.7,
max_tokens: int = 1000
):
self.api_base = api_base
self.model = model
self.temperature = temperature
self.max_tokens = max_tokens
async def generate(
self,
question: str,
context: List[str],
system_prompt: Optional[str] = None
) -> str:
"""Generate response using RAG context.
Args:
question: User question
context: Retrieved context chunks
system_prompt: Optional system prompt
Returns:
Generated response
"""
# Build context string
context_str = "\n\n".join([
f"[Source {i+1}]\n{ctx}"
for i, ctx in enumerate(context)
])
# Default system prompt
if not system_prompt:
system_prompt = (
"You are a helpful assistant. Answer the user's question "
"based on the provided context. If the context doesn't contain "
"enough information, say so clearly."
)
# Build messages
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": f"Context:\n{context_str}\n\nQuestion: {question}"}
]
# Generate response
kwargs = {
"model": self.model,
"messages": messages,
"temperature": self.temperature,
"max_tokens": self.max_tokens
}
if self.api_base:
# When using LiteLLM proxy, we need to use the openai/ prefix
# or set custom_llm_provider to force it to use the proxy
kwargs["api_base"] = f"{self.api_base}/v1"
kwargs["custom_llm_provider"] = "openai"
response = await acompletion(**kwargs)
return response.choices[0].message.content