milvus-sdk-code-helper

md_2_embedding.py•3.07 KiB

import os import pandas as pd from openai import OpenAI import sys from pathlib import Path # Add the parent directory to import the retry decorator sys.path.append(str(Path(__file__).parent.parent.parent / "mcp_pymilvus_code_generate_helper")) from retry_decorator import openai_retry @openai_retry def _create_embedding_with_retry(client, content): """ Create embedding with retry logic for a single piece of content. """ response = client.embeddings.create(model="text-embedding-3-small", input=content) return response.data[0].embedding def generate_embeddings(docs_dir_path, save_file_name): """ Generate embeddings for markdown files in the specified directory. Args: docs_dir_path: Path to the directory containing markdown files save_file_name: Name of the output CSV file """ client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY")) markdown_files = [] for root, dirs, files in os.walk(docs_dir_path): for file in files: if file.endswith(".md"): markdown_files.append(os.path.join(root, file)) print(f"Found {len(markdown_files)} Markdown files to process") df = pd.DataFrame(columns=["metadata", "content", "embedding", "file_name"]) processed_count = 0 for file_index, markdown_path in enumerate(markdown_files): print(f"Processing file [{file_index + 1}/{len(markdown_files)}]: {markdown_path}") file_name = os.path.basename(markdown_path) try: with open(markdown_path, "r", encoding="utf-8") as f: content = f.read() except Exception as e: print(f"Failed to read file: {e}") continue print(f"Content length: {len(content)} characters") try: print("Generating embedding...") embedding = _create_embedding_with_retry(client, content) print(f"Generation successful! Embedding dimension: {len(embedding)}") df.loc[len(df)] = { "metadata": "", "content": content, "embedding": embedding, "file_name": file_name, } processed_count += 1 if processed_count % 10 == 0: temp_path = "embeddings_temp.csv" df.to_csv(temp_path, index=False) print( f"Saved temporary results to: {temp_path}, processed {processed_count} documents so far" ) except Exception as e: print(f"Failed to generate embedding after all retries: {e}") print("All documents process finished.") df.to_csv(save_file_name, index=False) print(f"Successfully generated embeddings and saved to: {save_file_name}") print(f"Dataset size: {len(df)} rows x {len(df.columns)} columns") return save_file_name if __name__ == "__main__": # Example usage docs_dir_path = "/Users/zilliz/Downloads/web-content-master/API_Reference/pymilvus/v2.5.x/MilvusClient" save_file_name = "MilvusClient.csv" generate_embeddings(docs_dir_path, save_file_name)

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Shawnzheng011019/milvus-sdk-code-helper'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

md_2_embedding.py•3.07 KiB