Skip to main content
Glama

Scrapy MCP Server

by ThreeFish-AI
analyze_structure.py2.64 kB
#!/usr/bin/env python3 """Analyze the structure issues in current markdown output.""" def analyze_markdown_structure(): """Analyze the current markdown structure to identify issues.""" with open("langchain_blog_simple.md", "r", encoding="utf-8") as f: content = f.read() lines = content.split("\n") print("🔍 Analyzing current markdown structure:") print("=" * 60) # Identify problematic lines (very long lines that should be split) long_lines = [] for i, line in enumerate(lines): if len(line) > 200 and line.strip(): long_lines.append((i + 1, len(line), line[:100] + "...")) print(f"📏 Long lines (>200 chars): {len(long_lines)}") for line_num, length, preview in long_lines[:10]: print(f" Line {line_num:3d} ({length:4d} chars): {preview}") # Look for sentences that should be separate paragraphs print(f"\n🔤 Checking for sentence boundaries:") sentence_issues = [] for i, line in enumerate(lines): if line.strip(): # Look for multiple sentences in one line sentence_endings = line.count(".") + line.count("!") + line.count("?") if sentence_endings > 1 and len(line) > 100: sentence_issues.append((i + 1, sentence_endings, line[:100] + "...")) print(f"📝 Lines with multiple sentences: {len(sentence_issues)}") for line_num, count, preview in sentence_issues[:10]: print(f" Line {line_num:3d} ({count} sentences): {preview}") # Check for missing paragraph breaks around key patterns print(f"\n🎯 Checking for missing paragraph breaks:") patterns_to_check = [ ("Headers/Titles", r"[A-Z][a-z]+ [A-Z][a-z]+[A-Z]"), ("Questions", r"\?[A-Z]"), ("Topic transitions", r"\. [A-Z][a-z]+ (is|are|can|will|should)"), ] import re for pattern_name, pattern in patterns_to_check: matches = [] for i, line in enumerate(lines): if re.search(pattern, line): matches.append((i + 1, line[:80] + "...")) print(f" {pattern_name}: {len(matches)} potential issues") for line_num, preview in matches[:5]: print(f" Line {line_num:3d}: {preview}") print(f"\n📊 Summary:") print(f" Total lines: {len(lines)}") print(f" Empty lines: {len([l for l in lines if not l.strip()])}") print(f" Content lines: {len([l for l in lines if l.strip()])}") print(f" Long lines (>200): {len(long_lines)}") print(f" Multi-sentence lines: {len(sentence_issues)}") if __name__ == "__main__": analyze_markdown_structure()

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/ThreeFish-AI/scrapy-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server