Skip to main content
Glama
utils.py7.11 kB
from typing import Any, Dict, List from langchain_core.messages import AIMessage, AnyMessage, HumanMessage def get_research_topic(messages: List[AnyMessage]) -> str: """ Get the research topic from the messages. """ # check if request has a history and combine the messages into a single string if len(messages) == 1: research_topic = messages[-1].content else: research_topic = "" for message in messages: if isinstance(message, HumanMessage): research_topic += f"User: {message.content}\n" elif isinstance(message, AIMessage): research_topic += f"Assistant: {message.content}\n" return research_topic def resolve_urls(urls_to_resolve: List[Any], id: int) -> Dict[str, str]: """ Create a map of the vertex ai search urls (very long) to a short url with a unique id for each url. Ensures each original URL gets a consistent shortened form while maintaining uniqueness. """ prefix = f"https://vertexaisearch.cloud.google.com/id/" urls = [site["web"]["uri"] for site in urls_to_resolve] # Create a dictionary that maps each unique URL to its first occurrence index resolved_map = {} for idx, url in enumerate(urls): if url not in resolved_map: resolved_map[url] = f"{prefix}{id}-{idx}" return resolved_map def insert_citation_markers(text, citations_list): """ Inserts citation markers into a text string based on start and end indices. Args: text (str): The original text string. citations_list (list): A list of dictionaries, where each dictionary contains 'start_index', 'end_index', and 'segment_string' (the marker to insert). Indices are assumed to be for the original text. Returns: str: The text with citation markers inserted. """ # Sort citations by end_index in descending order. # If end_index is the same, secondary sort by start_index descending. # This ensures that insertions at the end of the string don't affect # the indices of earlier parts of the string that still need to be processed. sorted_citations = sorted( citations_list, key=lambda c: (c["end_index"], c["start_index"]), reverse=True ) modified_text = text for citation_info in sorted_citations: # These indices refer to positions in the *original* text, # but since we iterate from the end, they remain valid for insertion # relative to the parts of the string already processed. end_idx = citation_info["end_index"] marker_to_insert = "" for segment in citation_info["segments"]: marker_to_insert += f" [{segment['label']}]({segment['short_url']})" # Insert the citation marker at the original end_idx position modified_text = ( modified_text[:end_idx] + marker_to_insert + modified_text[end_idx:] ) return modified_text def get_citations(response, resolved_urls_map): """ Extracts and formats citation information from a Gemini model's response. This function processes the grounding metadata provided in the response to construct a list of citation objects. Each citation object includes the start and end indices of the text segment it refers to, and a string containing formatted markdown links to the supporting web chunks. Args: response: The response object from LangChain's ChatGoogleGenerativeAI, expected to have a structure including response_metadata["grounding_metadata"]. resolved_urls_map: A dictionary mapping original URLs to resolved URLs. Returns: list: A list of dictionaries, where each dictionary represents a citation and has the following keys: - "start_index" (int): The starting character index of the cited segment in the original text. Defaults to 0 if not specified. - "end_index" (int): The character index immediately after the end of the cited segment (exclusive). - "segments" (list[str]): A list of individual markdown-formatted links for each grounding chunk. - "segment_string" (str): A concatenated string of all markdown- formatted links for the citation. Returns an empty list if no valid grounding supports are found, or if essential data is missing. """ citations = [] # Ensure response and necessary nested structures are present if not response: return citations if ( "grounding_metadata" not in response.response_metadata or not response.response_metadata["grounding_metadata"] or "grounding_supports" not in response.response_metadata["grounding_metadata"] ): return citations grounding_metadata = response.response_metadata["grounding_metadata"] for support in grounding_metadata["grounding_supports"]: citation = {} # Ensure segment information is present if "segment" not in support or not support["segment"]: continue # Skip this support if segment info is missing start_index = ( support["segment"]["start_index"] if support["segment"]["start_index"] is not None else 0 ) # Ensure end_index is present to form a valid segment if support["segment"]["end_index"] is None: continue # Skip if end_index is missing, as it's crucial # Add 1 to end_index to make it an exclusive end for slicing/range purposes # (assuming the API provides an inclusive end_index) citation["start_index"] = start_index citation["end_index"] = support["segment"]["end_index"] citation["segments"] = [] if "grounding_chunk_indices" in support and support["grounding_chunk_indices"]: for ind in support["grounding_chunk_indices"]: try: chunk = grounding_metadata["grounding_chunks"][ind] resolved_url = resolved_urls_map.get(chunk["web"]["uri"], None) citation["segments"].append( { "label": chunk["web"]["title"].split(".")[:-1][0], "short_url": resolved_url, "value": chunk["web"]["uri"], } ) except (IndexError, AttributeError, NameError): # Handle cases where chunk, web, uri, or resolved_map might be problematic # For simplicity, we'll just skip adding this particular segment link # In a production system, you might want to log this. pass citations.append(citation) return citations

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/alexcong/gemini-deepsearch-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server