Modular RAG MCP Server

llm_reranker.py•5.49 KiB

import json
import logging
import re
from pathlib import Path
from typing import Any, Dict, List, Optional

from src.libs.llm.base_llm import BaseLLM
from src.libs.reranker.base_reranker import BaseReranker

logger = logging.getLogger(__name__)


class LLMReranker(BaseReranker):
    """
    Reranker implementation that uses an LLM to reorder candidates.
    It reads a prompt template from a file, injects the query and candidates,
    and parses the LLM's response to determine the new order.
    """

    def __init__(self, llm: BaseLLM, prompt_path: str = "config/prompts/rerank.txt"):
        """
        Initialize the LLM Reranker.

        Args:
            llm: An instance of BaseLLM to perform the reranking.
            prompt_path: Path to the prompt template file.
        """
        self.llm = llm
        self.prompt_path = prompt_path
        self._prompt_template = self._load_prompt_template()

    def _load_prompt_template(self) -> str:
        """Load prompt template from file or return default fallback."""
        try:
            return Path(self.prompt_path).read_text(encoding="utf-8")
        except Exception as e:
            logger.warning(
                f"Failed to load rerank prompt from {self.prompt_path}: {e}. Using default."
            )
            return (
                "Rank the following passages based on relevance to the query: {query}.\n"
                "Passages:\n{candidates}\n"
                "Return a JSON list of indices of the top passages, e.g. [0, 2, 1]."
            )

    def rerank(
        self,
        query: str,
        candidates: List[Any],
        top_k: Optional[int] = None,
        trace: Optional[Any] = None,
    ) -> List[Any]:
        """
        Rerank candidates using LLM.

        Args:
            query: The search query.
            candidates: List of candidate objects.
            top_k: Number of results to return.
            trace: Trace context (unused in this implementation, passed for interface compatibility).

        Returns:
            Reordered list of candidates.
        """
        if not candidates:
            return []

        # Format candidates
        # Try to extract text content: 'page_content', 'text', or str()
        candidate_texts = []
        for i, cand in enumerate(candidates):
            # Safe text extraction
            text = ""
            if hasattr(cand, "page_content"):
                text = cand.page_content
            elif hasattr(cand, "text"):
                text = cand.text
            elif isinstance(cand, dict) and "text" in cand:
                text = cand["text"]
            else:
                text = str(cand)

            # Truncate text to avoid excessive token usage (heuristic)
            text = text[:300].replace("\n", " ")
            candidate_texts.append(f"[{i}] {text}")

        candidates_str = "\n".join(candidate_texts)

        # Construct prompt
        try:
            prompt = self._prompt_template.format(
                query=query, candidates=candidates_str
            )
        except KeyError as e:
            logger.error(
                f"Prompt template format error: {e}. Missing keys in template."
            )
            # Fallback to simple concatenation if template is broken
            prompt = f"Query: {query}\nCandidates:\n{candidates_str}\nRank them as JSON list of indices."

        try:
            # Call LLM
            messages = [{"role": "user", "content": prompt}]
            response = self.llm.chat(messages)

            # Parse response
            ranked_indices = self._parse_indices(response)

            if not ranked_indices:
                logger.warning(
                    "LLM returned no valid indices. Returning original order."
                )
                if top_k:
                    return candidates[:top_k]
                return candidates

            # Reorder
            # Filter indices to ensure they are valid
            valid_indices = [i for i in ranked_indices if 0 <= i < len(candidates)]

            # Create result list
            ranked_results = [candidates[i] for i in valid_indices]

            # Append missing candidates at the end (if any) to preserve recall
            seen_indices = set(valid_indices)
            for i in range(len(candidates)):
                if i not in seen_indices:
                    ranked_results.append(candidates[i])

            if top_k:
                ranked_results = ranked_results[:top_k]

            return ranked_results

        except Exception as e:
            logger.error(f"LLM reranking failed: {e}")
            # Fallback: return original order
            if top_k:
                return candidates[:top_k]
            return candidates

    def _parse_indices(self, response: str) -> List[int]:
        """Parse JSON list of indices from LLM response."""
        try:
            # Clean up response (remove markdown code blocks if present)
            clean_response = response.strip()
            if clean_response.startswith("```"):
                clean_response = clean_response.split("```")[1]
                if clean_response.startswith("json"):
                    clean_response = clean_response[4:]

            # Try to find something that looks like a JSON list
            match = re.search(r"\[[\d,\s]+\]", clean_response)
            if match:
                return json.loads(match.group(0))
        except Exception as e:
            logger.debug(
                f"Failed to parse indices from response: {response[:100]}... Error: {e}"
            )
        return []

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/yj-liuzepeng/rag-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

llm_reranker.py•5.49 KiB

import json
import logging
import re
from pathlib import Path
from typing import Any, Dict, List, Optional

from src.libs.llm.base_llm import BaseLLM
from src.libs.reranker.base_reranker import BaseReranker

logger = logging.getLogger(__name__)


class LLMReranker(BaseReranker):
    """
    Reranker implementation that uses an LLM to reorder candidates.
    It reads a prompt template from a file, injects the query and candidates,
    and parses the LLM's response to determine the new order.
    """

    def __init__(self, llm: BaseLLM, prompt_path: str = "config/prompts/rerank.txt"):
        """
        Initialize the LLM Reranker.

        Args:
            llm: An instance of BaseLLM to perform the reranking.
            prompt_path: Path to the prompt template file.
        """
        self.llm = llm
        self.prompt_path = prompt_path
        self._prompt_template = self._load_prompt_template()

    def _load_prompt_template(self) -> str:
        """Load prompt template from file or return default fallback."""
        try:
            return Path(self.prompt_path).read_text(encoding="utf-8")
        except Exception as e:
            logger.warning(
                f"Failed to load rerank prompt from {self.prompt_path}: {e}. Using default."
            )
            return (
                "Rank the following passages based on relevance to the query: {query}.\n"
                "Passages:\n{candidates}\n"
                "Return a JSON list of indices of the top passages, e.g. [0, 2, 1]."
            )

    def rerank(
        self,
        query: str,
        candidates: List[Any],
        top_k: Optional[int] = None,
        trace: Optional[Any] = None,
    ) -> List[Any]:
        """
        Rerank candidates using LLM.

        Args:
            query: The search query.
            candidates: List of candidate objects.
            top_k: Number of results to return.
            trace: Trace context (unused in this implementation, passed for interface compatibility).

        Returns:
            Reordered list of candidates.
        """
        if not candidates:
            return []

        # Format candidates
        # Try to extract text content: 'page_content', 'text', or str()
        candidate_texts = []
        for i, cand in enumerate(candidates):
            # Safe text extraction
            text = ""
            if hasattr(cand, "page_content"):
                text = cand.page_content
            elif hasattr(cand, "text"):
                text = cand.text
            elif isinstance(cand, dict) and "text" in cand:
                text = cand["text"]
            else:
                text = str(cand)

            # Truncate text to avoid excessive token usage (heuristic)
            text = text[:300].replace("\n", " ")
            candidate_texts.append(f"[{i}] {text}")

        candidates_str = "\n".join(candidate_texts)

        # Construct prompt
        try:
            prompt = self._prompt_template.format(
                query=query, candidates=candidates_str
            )
        except KeyError as e:
            logger.error(
                f"Prompt template format error: {e}. Missing keys in template."
            )
            # Fallback to simple concatenation if template is broken
            prompt = f"Query: {query}\nCandidates:\n{candidates_str}\nRank them as JSON list of indices."

        try:
            # Call LLM
            messages = [{"role": "user", "content": prompt}]
            response = self.llm.chat(messages)

            # Parse response
            ranked_indices = self._parse_indices(response)

            if not ranked_indices:
                logger.warning(
                    "LLM returned no valid indices. Returning original order."
                )
                if top_k:
                    return candidates[:top_k]
                return candidates

            # Reorder
            # Filter indices to ensure they are valid
            valid_indices = [i for i in ranked_indices if 0 <= i < len(candidates)]

            # Create result list
            ranked_results = [candidates[i] for i in valid_indices]

            # Append missing candidates at the end (if any) to preserve recall
            seen_indices = set(valid_indices)
            for i in range(len(candidates)):
                if i not in seen_indices:
                    ranked_results.append(candidates[i])

            if top_k:
                ranked_results = ranked_results[:top_k]

            return ranked_results

        except Exception as e:
            logger.error(f"LLM reranking failed: {e}")
            # Fallback: return original order
            if top_k:
                return candidates[:top_k]
            return candidates

    def _parse_indices(self, response: str) -> List[int]:
        """Parse JSON list of indices from LLM response."""
        try:
            # Clean up response (remove markdown code blocks if present)
            clean_response = response.strip()
            if clean_response.startswith("```"):
                clean_response = clean_response.split("```")[1]
                if clean_response.startswith("json"):
                    clean_response = clean_response[4:]

            # Try to find something that looks like a JSON list
            match = re.search(r"\[[\d,\s]+\]", clean_response)
            if match:
                return json.loads(match.group(0))
        except Exception as e:
            logger.debug(
                f"Failed to parse indices from response: {response[:100]}... Error: {e}"
            )
        return []