Archive Agent

AiRerank.py•6.48 KiB

# Copyright © 2025 Dr.-Ing. Paul Wilhelm <paul@wilhelm.dev> # This file is part of Archive Agent. See LICENSE for details. from typing import List, Tuple from pydantic import BaseModel, ConfigDict class RerankSchema(BaseModel): reranked_indices: List[int] is_rejected: bool rejection_reason: str model_config = ConfigDict(extra='forbid') # Ensures additionalProperties: false — DO NOT REMOVE THIS class AiRerank: # Copyright © 2025 Dr.-Ing. Paul Wilhelm <paul@wilhelm.dev> # This file is part of Archive Agent. See LICENSE for details. @staticmethod def get_prompt_rerank(question: str, indexed_chunks_json_text: str) -> str: return "\n".join([ "Act as a reranking agent for a semantic retrieval system (Retrieval-Augmented Generation / RAG).", "Your task is to assess the semantic relevance of each chunk to the question.", "You are given a list of text chunks as a JSON array, where each array index corresponds to the chunk index.", "You must output a JSON object with the exact fields described below.", "Do not return any explanations, commentary, or additional fields. Output only the JSON.", "", "RESPONSE FIELDS:", "", "- `reranked_indices`:", " A list of integer indices.", " If `is_rejected` is false, this list MUST include ALL indices from 0 to (number of chunks - 1) exactly once,", " sorted by descending relevance (most relevant first).", " Do not omit any indices; include even low-relevance ones at the end. Omitting indices will cause errors.", " If `is_rejected` is true, this must be an empty list [].", "", "- `is_rejected`:", " A Boolean flag. Set to true ONLY if NONE of the chunks contain ANY relevant information to the question", " (e.g., all chunks are completely unrelated to the question's topic, or the list is empty).", " A chunk is relevant if it provides any information that directly helps answer the question or offers useful context.", " If at least one chunk has any degree of relevance (even partial),", " set to false and include ALL indices in reranked_indices, sorted by relevance.", "", "- `rejection_reason`:", " A short, factual reason for rejection.", " Include this ONLY if `is_rejected` is true. If `is_rejected` is false, set to empty string ''.", " Examples: 'All chunks are entirely unrelated to the question', 'Chunk list is empty',", " 'No relevant content in any chunk'.", "", "RERANKING RULES:", "- Consider only the provided chunk texts and the question.", "- Assess semantic relevance, not superficial similarity. Relevance means the chunk helps in answering the question.", "- If several chunks are equally relevant, preserve their original order.", "- IMPORTANT: Never return a partial list of indices when is_rejected is false. Always include all or none.", "", "EXAMPLE 1 (no relevant chunks):", "{\"reranked_indices\": [], \"is_rejected\": true, \"rejection_reason\": \"All chunks are unrelated to the question\"}", "", "EXAMPLE 2 (some relevant chunks):", "{\"reranked_indices\": [2, 0, 1], \"is_rejected\": false, \"rejection_reason\": \"\"}", "", "Chunks (JSON array):\n" + indexed_chunks_json_text, "", "Question:\n\"\"\"\n" + question + "\n\"\"\"", ]) @staticmethod def validate_permutation(original: List[int], reranked: List[int]) -> Tuple[bool, List[int], List[int], List[int]]: """ Validate that *reranked* is a permutation of *original*. :param original: Original index list. :param reranked: Proposed reranked index list. :returns: Tuple (is_valid, missing, extra, out_of_range) where: - is_valid: True iff sorted(original) == sorted(reranked) - missing: elements in original but not in reranked - extra: elements in reranked but not in original - out_of_range: elements in reranked not in the closed interval [min(original), max(original)] """ original_set = set(original) reranked_set = set(reranked) missing = sorted(original_set - reranked_set) extra = sorted(reranked_set - original_set) if original: lo, hi = min(original), max(original) out_of_range = sorted([i for i in reranked if (i < lo or i > hi)]) else: out_of_range = [] is_valid = (not missing) and (not extra) and (not out_of_range) and (len(reranked) == len(original)) return is_valid, missing, extra, out_of_range @staticmethod def repair_permutation(original: List[int], reranked: List[int]) -> List[int]: """ Attempt to repair a non-permutation rerank result into a valid permutation. Strategy -------- 1) Drop out-of-range indices and duplicates while preserving first occurrence order. 2) Remove any indices not present in *original*. 3) Append all *missing* indices (those in *original* but missing from *reranked*) in the order they appear in *original*. This keeps the LLM's preference ordering as much as possible while ensuring a correct permutation of *original*. :param original: Original index list. :param reranked: Proposed reranked index list. :returns: A repaired list that is guaranteed to be a permutation of *original*, provided *original* itself is a set-like list of unique ints. """ original_set = set(original) seen: set[int] = set() lo, hi = (min(original), max(original)) if original else (0, -1) # 1) Drop out-of-range & duplicates; 2) remove items not in original filtered: List[int] = [] for i in reranked: if i not in original_set: continue if i < lo or i > hi: continue if i in seen: continue seen.add(i) filtered.append(i) # 3) Append missing indices in original order missing_tail = [i for i in original if i not in seen] return filtered + missing_tail

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/shredEngineer/Archive-Agent'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

AiRerank.py•6.48 KiB