Writer MCP

text_processing.py•8.67 KiB

"""Text processing utilities for Writer MCP.""" import re from typing import List, Optional, Set from collections import Counter from ..utils.logger import get_logger logger = get_logger(__name__) def clean_text(text: str) -> str: """Clean and normalize text. Args: text: Text to clean Returns: Cleaned text """ if not isinstance(text, str): return "" # Remove extra whitespace text = re.sub(r'\s+', ' ', text) # Remove leading/trailing whitespace text = text.strip() return text def extract_keywords(text: str, min_length: int = 3, max_keywords: int = 20) -> List[str]: """Extract keywords from text. Args: text: Text to extract keywords from min_length: Minimum keyword length max_keywords: Maximum number of keywords to return Returns: List of extracted keywords """ if not isinstance(text, str) or not text.strip(): return [] # Convert to lowercase text = text.lower() # Remove punctuation and split into words words = re.findall(r'\b[a-zA-Z]+\b', text) # Filter by length and remove common stop words stop_words = { 'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could', 'should', 'may', 'might', 'must', 'can', 'this', 'that', 'these', 'those', 'i', 'you', 'he', 'she', 'it', 'we', 'they', 'me', 'him', 'her', 'us', 'them', 'my', 'your', 'his', 'her', 'its', 'our', 'their' } filtered_words = [ word for word in words if len(word) >= min_length and word not in stop_words ] # Count word frequency word_counts = Counter(filtered_words) # Return most common words keywords = [word for word, _ in word_counts.most_common(max_keywords)] return keywords def generate_summary(text: str, max_sentences: int = 3) -> str: """Generate a simple extractive summary of text. Args: text: Text to summarize max_sentences: Maximum number of sentences in summary Returns: Generated summary """ if not isinstance(text, str) or not text.strip(): return "" # Split into sentences sentences = re.split(r'[.!?]+', text) sentences = [s.strip() for s in sentences if s.strip()] if len(sentences) <= max_sentences: return text # Simple scoring based on sentence length and position scored_sentences = [] for i, sentence in enumerate(sentences): # Score based on length (prefer medium-length sentences) length_score = min(len(sentence.split()) / 20.0, 1.0) # Score based on position (prefer earlier sentences) position_score = 1.0 - (i / len(sentences)) total_score = length_score * 0.7 + position_score * 0.3 scored_sentences.append((sentence, total_score)) # Sort by score and take top sentences scored_sentences.sort(key=lambda x: x[1], reverse=True) top_sentences = [s[0] for s in scored_sentences[:max_sentences]] # Maintain original order summary_sentences = [] for sentence in sentences: if sentence in top_sentences: summary_sentences.append(sentence) if len(summary_sentences) >= max_sentences: break return '. '.join(summary_sentences) + '.' def calculate_text_similarity(text1: str, text2: str) -> float: """Calculate similarity between two texts using Jaccard similarity. Args: text1: First text text2: Second text Returns: Similarity score between 0.0 and 1.0 """ if not isinstance(text1, str) or not isinstance(text2, str): return 0.0 # Extract keywords from both texts keywords1 = set(extract_keywords(text1)) keywords2 = set(extract_keywords(text2)) if not keywords1 and not keywords2: return 1.0 if text1.strip() == text2.strip() else 0.0 if not keywords1 or not keywords2: return 0.0 # Calculate Jaccard similarity intersection = len(keywords1.intersection(keywords2)) union = len(keywords1.union(keywords2)) return intersection / union if union > 0 else 0.0 def extract_character_mentions(text: str, character_names: List[str]) -> List[str]: """Extract mentions of character names from text. Args: text: Text to search character_names: List of character names to look for Returns: List of mentioned character names """ if not isinstance(text, str) or not character_names: return [] mentioned = [] text_lower = text.lower() for name in character_names: if isinstance(name, str) and name.strip(): # Create regex pattern for whole word matching pattern = r'\b' + re.escape(name.lower()) + r'\b' if re.search(pattern, text_lower): mentioned.append(name) return mentioned def tokenize_text(text: str) -> List[str]: """Tokenize text into words. Args: text: Text to tokenize Returns: List of tokens """ if not isinstance(text, str): return [] # Extract words (letters only) tokens = re.findall(r'\b[a-zA-Z]+\b', text.lower()) return tokens def calculate_readability_score(text: str) -> float: """Calculate a simple readability score for text. Args: text: Text to analyze Returns: Readability score (higher = more readable) """ if not isinstance(text, str) or not text.strip(): return 0.0 # Split into sentences and words sentences = re.split(r'[.!?]+', text) sentences = [s.strip() for s in sentences if s.strip()] words = tokenize_text(text) if not sentences or not words: return 0.0 # Calculate metrics avg_sentence_length = len(words) / len(sentences) avg_word_length = sum(len(word) for word in words) / len(words) # Simple readability score (lower sentence length and word length = higher score) # Normalize to 0-1 range sentence_score = max(0, 1 - (avg_sentence_length - 10) / 20) word_score = max(0, 1 - (avg_word_length - 4) / 6) return (sentence_score + word_score) / 2 def find_text_patterns(text: str, patterns: List[str]) -> List[tuple[str, List[str]]]: """Find specific patterns in text. Args: text: Text to search patterns: List of regex patterns to find Returns: List of (pattern, matches) tuples """ if not isinstance(text, str) or not patterns: return [] results = [] for pattern in patterns: try: matches = re.findall(pattern, text, re.IGNORECASE) if matches: results.append((pattern, matches)) except re.error as e: logger.warning(f"Invalid regex pattern '{pattern}': {e}") return results def normalize_whitespace(text: str) -> str: """Normalize whitespace in text. Args: text: Text to normalize Returns: Text with normalized whitespace """ if not isinstance(text, str): return "" # Replace multiple whitespace characters with single space text = re.sub(r'\s+', ' ', text) # Remove leading/trailing whitespace text = text.strip() return text def truncate_text(text: str, max_length: int, suffix: str = "...") -> str: """Truncate text to maximum length. Args: text: Text to truncate max_length: Maximum length suffix: Suffix to add if truncated Returns: Truncated text """ if not isinstance(text, str): return "" if len(text) <= max_length: return text # Try to truncate at word boundary truncated = text[:max_length - len(suffix)] last_space = truncated.rfind(' ') if last_space > max_length * 0.7: # If we can find a reasonable word boundary truncated = truncated[:last_space] return truncated + suffix def extract_quoted_text(text: str) -> List[str]: """Extract quoted text from a string. Args: text: Text to search Returns: List of quoted strings """ if not isinstance(text, str): return [] # Find text in double quotes double_quoted = re.findall(r'"([^"]+)"', text) # Find text in single quotes single_quoted = re.findall(r"'([^']+)'", text) return double_quoted + single_quoted

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/huangjien/writer-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

text_processing.py•8.67 KiB