RAG MCP Server

ingest.py•2.37 kB

# ingest.py from langchain_community.document_loaders import PyPDFDirectoryLoader from langchain_text_splitters import RecursiveCharacterTextSplitter from langchain_google_genai import GoogleGenerativeAIEmbeddings # Changed from OpenAIEmbeddings from langchain_chroma import Chroma from uuid import uuid4 import os # import the .env file from dotenv import load_dotenv load_dotenv() # configuration # DATA_PATH is relative to the script's location, which will be inside rag-mcp-app DATA_PATH = r"data" CHROMA_PATH = r"chroma_db" # Ensure the data directory exists and is populated if not os.path.exists(DATA_PATH) or not os.listdir(DATA_PATH): print(f"Error: Data directory '{DATA_PATH}' not found or is empty.") print("Please ensure you have copied your PDF files into the 'data' directory within 'rag-mcp-app'.") exit(1) # Initiate the embeddings model using Google Generative AI # Ensure your GOOGLE_API_KEY is set in your .env file embeddings_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001") # Changed to GoogleGenerativeAIEmbeddings # Ensure the chroma_db directory exists os.makedirs(CHROMA_PATH, exist_ok=True) # Initiate the vector store vector_store = Chroma( collection_name="example_collection", embedding_function=embeddings_model, persist_directory=CHROMA_PATH, ) # Loading the PDF document print(f"Loading documents from: {DATA_PATH}") loader = PyPDFDirectoryLoader(DATA_PATH) try: raw_documents = loader.load() print(f"Loaded {len(raw_documents)} documents.") except Exception as e: print(f"Error loading documents: {e}") exit(1) # Splitting the document text_splitter = RecursiveCharacterTextSplitter( chunk_size=300, chunk_overlap=100, length_function=len, is_separator_regex=False, ) # Creating the chunks print("Splitting documents into chunks...") chunks = text_splitter.split_documents(raw_documents) print(f"Created {len(chunks)} chunks.") # Creating unique ID's uuids = [str(uuid4()) for _ in range(len(chunks))] # Adding chunks to vector store print(f"Adding {len(chunks)} chunks to ChromaDB at '{CHROMA_PATH}'...") try: vector_store.add_documents(documents=chunks, ids=uuids) print("Successfully added documents to ChromaDB.") except Exception as e: print(f"Error adding documents to ChromaDB: {e}") exit(1) print("Ingestion complete.")

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Alex-ChanHC/rag-mcp-app'

If you have feedback or need assistance with the MCP directory API, please join our Discord server