Skip to main content
Glama
aegntic

Obsidian Elite RAG MCP Server

find_datasources_prompt.py5.66 kB
"""Find data sources prompt implementation.""" import os from pathlib import Path from typing import List, Optional async def find_datasources(directory_path: str = ".") -> str: """Discover available data files and present them as load options.""" try: # Get the current working directory or specified path current_dir = Path(directory_path).resolve() # Find .csv and .json files csv_files = list(current_dir.glob("*.csv")) json_files = list(current_dir.glob("*.json")) # Also check common data subdirectories data_subdirs = ["data", "datasets", "files"] subdir_files = [] for subdir in data_subdirs: subdir_path = current_dir / subdir if subdir_path.exists() and subdir_path.is_dir(): subdir_csv = list(subdir_path.glob("*.csv")) subdir_json = list(subdir_path.glob("*.json")) if subdir_csv or subdir_json: subdir_files.append((subdir, subdir_csv + subdir_json)) # Build the prompt response prompt = f"""📁 **Data Source Discovery: {current_dir.name}** Looking for data files in: `{current_dir}` """ # Current directory files if csv_files or json_files: prompt += f"**📊 Data files found in current directory:**\n\n" all_current_files = sorted(csv_files + json_files, key=lambda x: x.name.lower()) for file_path in all_current_files: file_size = file_path.stat().st_size size_str = format_file_size(file_size) file_type = file_path.suffix.upper()[1:] # Remove the dot # Generate suggested dataset name (filename without extension) suggested_name = file_path.stem.lower().replace(" ", "_").replace("-", "_") prompt += f"• **{file_path.name}** ({file_type}, {size_str})\n" prompt += f" → `load_dataset('{file_path}', '{suggested_name}')`\n\n" # Subdirectory files if subdir_files: prompt += f"**📂 Data files found in subdirectories:**\n\n" for subdir_name, files in subdir_files: prompt += f"**{subdir_name}/ directory:**\n" sorted_files = sorted(files, key=lambda x: x.name.lower()) for file_path in sorted_files: file_size = file_path.stat().st_size size_str = format_file_size(file_size) file_type = file_path.suffix.upper()[1:] # Generate suggested dataset name suggested_name = file_path.stem.lower().replace(" ", "_").replace("-", "_") prompt += f" • **{file_path.name}** ({file_type}, {size_str})\n" prompt += f" → `load_dataset('{file_path}', '{suggested_name}')`\n" prompt += "\n" # No files found if not csv_files and not json_files and not subdir_files: prompt += f"""**❌ No data files found** No .csv or .json files were found in: • Current directory: `{current_dir}` • Common data subdirectories: {', '.join(data_subdirs)} **💡 Suggestions:** • Check if you're in the correct directory • Look for data files with different extensions • Create sample data files for testing • Download sample datasets from online sources **🔍 Manual file search:** You can also manually specify file paths: • `load_dataset('path/to/your/file.csv', 'my_dataset')` • `load_dataset('path/to/your/file.json', 'my_dataset')` """ else: # Add usage instructions total_files = len(csv_files) + len(json_files) + sum(len(files) for _, files in subdir_files) prompt += f"""**🚀 Ready to load data!** Found **{total_files} data file(s)** ready for analysis. **Next steps:** 1. Copy one of the `load_dataset()` commands above 2. Run it to load your data into memory 3. Start exploring with `dataset_first_look('dataset_name')` **💡 Pro tips:** • Choose descriptive dataset names for easier reference • Larger files may take longer to load • You can load multiple datasets simultaneously • Use `list_loaded_datasets()` to see what's currently loaded **🔧 Advanced loading options:** • Sample large datasets: `load_dataset('file.csv', 'name', sample_size=1000)` • Custom paths: `load_dataset('/full/path/to/file.csv', 'name')` """ return prompt except Exception as e: return f"""**❌ Error discovering data sources** Failed to scan directory: {str(e)} **💡 Troubleshooting:** • Check if the directory path exists and is accessible • Ensure you have read permissions for the directory • Try specifying a different directory path • Use absolute paths if relative paths aren't working **Manual alternative:** If automatic discovery isn't working, you can still load data manually: `load_dataset('your_file.csv', 'dataset_name')` """ def format_file_size(size_bytes: int) -> str: """Convert file size in bytes to human readable format.""" if size_bytes == 0: return "0 B" size_names = ["B", "KB", "MB", "GB"] i = 0 size = float(size_bytes) while size >= 1024.0 and i < len(size_names) - 1: size /= 1024.0 i += 1 if i == 0: return f"{int(size)} {size_names[i]}" else: return f"{size:.1f} {size_names[i]}"

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/aegntic/aegntic-MCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server