Skip to main content
Glama
indexer.py5.1 kB
"""Indexer for Jekyll blog posts.""" from pathlib import Path from typing import List, Dict, Any from collections import defaultdict from jekyll_mcp.parser import PostParser class PostIndexer: """Index Jekyll blog posts for searching.""" def __init__(self, posts_dir: Path, drafts_dir: Path = None): """ Initialize the indexer. Args: posts_dir: Path to _posts directory drafts_dir: Optional path to _drafts directory """ self.posts_dir = Path(posts_dir) self.drafts_dir = Path(drafts_dir) if drafts_dir else None self.parser = PostParser() self.posts = [] self.categories = defaultdict(int) self.tags = defaultdict(int) def index_all(self): """Index all posts and drafts.""" self._index_directory(self.posts_dir, published=True) if self.drafts_dir and self.drafts_dir.exists(): self._index_directory(self.drafts_dir, published=False) self._build_category_tag_counts() def _index_directory(self, directory: Path, published: bool): """Index posts from a directory.""" for file_path in directory.iterdir(): if self.parser.is_valid_post(file_path): try: post_data = self.parser.parse_post(file_path) post_data['published'] = published self.posts.append(post_data) except Exception as e: print(f"Error parsing {file_path}: {e}") def _build_category_tag_counts(self): """Build category and tag counts.""" for post in self.posts: metadata = post['metadata'] # Handle categories (can be list or single string) categories = metadata.get('categories', []) if isinstance(categories, str): categories = [categories] for cat in categories: self.categories[cat] += 1 # Handle tags (can be list or single string) tags = metadata.get('tags', []) if isinstance(tags, str): tags = [tags] for tag in tags: self.tags[tag] += 1 def search_posts( self, query: str = None, category: str = None, tags: List[str] = None, limit: int = 10, published_only: bool = True ) -> List[Dict[str, Any]]: """ Search for posts. Args: query: Search term (searches in title, content, and metadata) category: Filter by category tags: Filter by tags limit: Maximum number of results published_only: Only return published posts Returns: List of matching posts with metadata """ results = [] for post in self.posts: # Skip drafts if published_only if published_only and not post['published']: continue # Filter by category if category: post_categories = post['metadata'].get('categories', []) if isinstance(post_categories, str): post_categories = [post_categories] if category not in post_categories: continue # Filter by tags if tags: post_tags = post['metadata'].get('tags', []) if isinstance(post_tags, str): post_tags = [post_tags] if not any(tag in post_tags for tag in tags): continue # Search query if query: query_lower = query.lower() searchable_text = ( post['metadata'].get('title', '').lower() + ' ' + post['content'].lower() + ' ' + post['metadata'].get('slug', '').lower() ) if query_lower not in searchable_text: continue results.append(self._format_result(post)) if len(results) >= limit: break return results def get_post_by_slug(self, slug: str) -> Dict[str, Any]: """Get a post by its slug.""" for post in self.posts: if post['metadata'].get('slug') == slug: return post return None def _format_result(self, post: Dict[str, Any]) -> Dict[str, Any]: """Format a post for return.""" metadata = post['metadata'] content = post['content'] # Create excerpt (first 200 chars) excerpt = content[:200] + '...' if len(content) > 200 else content return { 'title': metadata.get('title', 'Untitled'), 'slug': metadata.get('slug', ''), 'date': str(metadata.get('date', '')), 'categories': metadata.get('categories', []), 'tags': metadata.get('tags', []), 'file_path': post['file_path'], 'published': post['published'], 'excerpt': excerpt }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/jottinger/jekyll-mcp-server'

If you have feedback or need assistance with the MCP directory API, please join our Discord server