We provide all the information about MCP servers via our MCP API.
curl -X GET 'https://glama.ai/api/mcp/v1/servers/entanglr/mediawiki-api-mcp'
If you have feedback or need assistance with the MCP directory API, please join our Discord server
"""MediaWiki page parsing handlers for MCP server."""
import logging
from collections.abc import Sequence
from typing import Any
import mcp.types as types
from ..client import MediaWikiClient
logger = logging.getLogger(__name__)
async def handle_parse_page(
client: MediaWikiClient,
arguments: dict[str, Any]
) -> Sequence[types.TextContent]:
"""Handle wiki_page_parse tool calls with comprehensive Parse API support."""
# Extract arguments with defaults
title = arguments.get("title")
pageid = arguments.get("pageid")
oldid = arguments.get("oldid")
text = arguments.get("text")
revid = arguments.get("revid")
summary = arguments.get("summary")
page = arguments.get("page")
redirects = arguments.get("redirects", False)
prop = arguments.get("prop")
wrapoutputclass = arguments.get("wrapoutputclass")
usearticle = arguments.get("usearticle", False)
parsoid = arguments.get("parsoid", False)
pst = arguments.get("pst", False)
onlypst = arguments.get("onlypst", False)
section = arguments.get("section")
sectiontitle = arguments.get("sectiontitle")
disablelimitreport = arguments.get("disablelimitreport", False)
disableeditsection = arguments.get("disableeditsection", False)
disablestylededuplication = arguments.get("disablestylededuplication", False)
showstrategykeys = arguments.get("showstrategykeys", False)
preview = arguments.get("preview", False)
sectionpreview = arguments.get("sectionpreview", False)
disabletoc = arguments.get("disabletoc", False)
useskin = arguments.get("useskin")
contentformat = arguments.get("contentformat")
contentmodel = arguments.get("contentmodel")
mobileformat = arguments.get("mobileformat", False)
templatesandboxprefix = arguments.get("templatesandboxprefix")
templatesandboxtitle = arguments.get("templatesandboxtitle")
templatesandboxtext = arguments.get("templatesandboxtext")
templatesandboxcontentmodel = arguments.get("templatesandboxcontentmodel")
templatesandboxcontentformat = arguments.get("templatesandboxcontentformat")
# Validate that at least one content source is provided
content_sources = [title, pageid, oldid, text, page, summary]
if not any(content_sources):
return [types.TextContent(
type="text",
text="Error: Must provide one of: title, pageid, oldid, text, page, or summary"
)]
# Special handling for summary-only parsing
if summary and not any([title, pageid, oldid, text, page]):
# Summary-only parsing requires empty prop parameter according to API docs
if prop is None:
prop = []
logger.debug(f"Summary-only parsing requested: {summary}")
# Convert prop to list if it's a string
if prop and isinstance(prop, str):
prop = [p.strip() for p in prop.split("|") if p.strip()]
# Convert templatesandboxprefix to list if it's a string
if templatesandboxprefix and isinstance(templatesandboxprefix, str):
templatesandboxprefix = [p.strip() for p in templatesandboxprefix.split("|") if p.strip()]
# Convert title parameter to page parameter for consistency with MediaWiki API
# The Parse API primarily uses 'page' parameter, not 'title'
if title and not page:
page = title
title = None
try:
# Track if this is a summary-only parsing request for fallback handling
is_summary_only = summary and not any([title, pageid, oldid, text, page])
result = await client.parse_page(
title=title,
pageid=pageid,
oldid=oldid,
text=text,
revid=revid,
summary=summary,
page=page,
redirects=redirects,
prop=prop,
wrapoutputclass=wrapoutputclass,
usearticle=usearticle,
parsoid=parsoid,
pst=pst,
onlypst=onlypst,
section=section,
sectiontitle=sectiontitle,
disablelimitreport=disablelimitreport,
disableeditsection=disableeditsection,
disablestylededuplication=disablestylededuplication,
showstrategykeys=showstrategykeys,
preview=preview,
sectionpreview=sectionpreview,
disabletoc=disabletoc,
useskin=useskin,
contentformat=contentformat,
contentmodel=contentmodel,
mobileformat=mobileformat,
templatesandboxprefix=templatesandboxprefix,
templatesandboxtitle=templatesandboxtitle,
templatesandboxtext=templatesandboxtext,
templatesandboxcontentmodel=templatesandboxcontentmodel,
templatesandboxcontentformat=templatesandboxcontentformat
)
# If this was a summary-only parsing request and we got minimal content,
# try a fallback approach by parsing the summary as regular text
if is_summary_only and "parse" in result:
parse_data = result["parse"]
if "text" in parse_data:
text_content = parse_data["text"]
if isinstance(text_content, dict) and "*" in text_content:
text_content = text_content["*"]
# Check for minimal content (empty or very minimal parser output)
if _is_minimal_content(text_content):
logger.warning(f"Summary parsing returned minimal content, attempting fallback for: {summary}")
# Try parsing the summary as regular text instead
try:
fallback_result = await client.parse_page(
text=summary,
contentmodel="wikitext",
prop=["text", "categories", "links", "templates", "parsewarnings"],
disablelimitreport=disablelimitreport,
disableeditsection=disableeditsection
)
if "parse" in fallback_result:
fallback_parse = fallback_result["parse"]
if "text" in fallback_parse:
fallback_text = fallback_parse["text"]
if isinstance(fallback_text, dict) and "*" in fallback_text:
fallback_text = fallback_text["*"]
# If fallback has better content, use it with a note
if not _is_minimal_content(fallback_text):
logger.info("Fallback summary parsing succeeded, using text parsing approach")
# Update the result with fallback data but preserve original structure
parse_data["text"] = fallback_parse["text"]
# Add other useful properties from fallback if available
for prop_name in ["categories", "links", "templates", "parsewarnings"]:
if prop_name in fallback_parse:
parse_data[prop_name] = fallback_parse[prop_name]
# Add a note about the fallback
if "parsewarnings" not in parse_data:
parse_data["parsewarnings"] = []
parse_data["parsewarnings"].append("Note: Used text parsing fallback due to summary parsing issue")
except Exception as fallback_error:
logger.warning(f"Fallback summary parsing also failed: {fallback_error}")
# Continue with original result
# Handle API errors
if "error" in result:
error_info = result["error"]
if isinstance(error_info, dict):
error_code = error_info.get("code", "unknown")
error_message = error_info.get("info", "Unknown error")
return [types.TextContent(
type="text",
text=f"MediaWiki API Error ({error_code}): {error_message}"
)]
else:
# Handle case where error is a string
return [types.TextContent(
type="text",
text=f"MediaWiki API Error: {error_info}"
)]
# Handle warnings
warning_text = None
if "warnings" in result:
warnings = result["warnings"]
warning_messages = []
for key, warning in warnings.items():
if isinstance(warning, dict) and "*" in warning:
warning_messages.append(f"{key}: {warning['*']}")
else:
warning_messages.append(f"{key}: {warning}")
if warning_messages:
warning_text = "API Warnings:\n" + "\n".join(warning_messages)
if "parse" not in result:
# Enhanced error reporting for debugging
response_keys = list(result.keys())
# Create detailed error message based on response content
error_details = []
error_details.append(f"Response keys: {response_keys}")
# Check for common error patterns and provide specific guidance
if "query" in result:
error_details.append("Note: Received 'query' response - this indicates the wrong API endpoint was used")
error_details.append("The Parse API should return a 'parse' key, not 'query'")
# Check for missing pages in query response
if isinstance(result.get("query", {}).get("pages", {}), dict):
pages = result["query"]["pages"]
missing_pages = [page for page in pages.values() if page.get("missing", False)]
if missing_pages:
error_details.append(f"Note: {len(missing_pages)} page(s) marked as missing in query response")
elif "missing" in result:
error_details.append("Note: Page is marked as missing in the response")
elif "badtitle" in str(result).lower():
error_details.append("Note: Response suggests the page title may be invalid")
elif any(key in result for key in ["nosuchsection", "invalidsection"]):
error_details.append("Note: The specified section does not exist or is invalid")
elif "invalidparammix" in str(result).lower():
error_details.append("Note: Invalid parameter combination detected")
error_details.append("Check that conflicting parameters (page/title/text/oldid) are not used together")
# Add parameter information for debugging
used_params = []
for param in ["title", "pageid", "oldid", "text", "page", "summary"]:
value = arguments.get(param)
if value:
used_params.append(f"{param}={repr(value)}")
if used_params:
error_details.append(f"Used parameters: {', '.join(used_params)}")
# Add full response for debugging in case of unknown errors
if len(str(result)) < 500: # Only include full response if it's not too long
error_details.append(f"Full response: {result}")
error_message = "Error: Unexpected response format from Parse API.\n" + "\n".join(error_details)
return [types.TextContent(
type="text",
text=error_message
)]
return await _format_parse_result(result, prop, warning_text)
except Exception as e:
return [types.TextContent(
type="text",
text=f"Error parsing content: {str(e)}"
)]
async def _format_parse_result(
result: dict[str, Any],
requested_prop: list[str] | None,
warning_text: str | None = None
) -> Sequence[types.TextContent]:
"""Format the parse result into a readable response."""
parse_data = result["parse"]
# Build the response header
title = parse_data.get("title", "Unknown")
pageid = parse_data.get("pageid", "Unknown")
revid = parse_data.get("revid", "Unknown")
response_lines = [
f"Parse Results for: {title}",
f"Page ID: {pageid}",
f"Revision ID: {revid}",
""
]
# Show warnings if any
if warning_text:
response_lines.append(warning_text)
response_lines.append("")
# Show which properties were requested
if requested_prop:
response_lines.append(f"Requested properties: {', '.join(requested_prop)}")
response_lines.append("")
# Format each available property
formatted_sections = []
# Main content (text)
if "text" in parse_data:
text_content = parse_data["text"]
if isinstance(text_content, dict) and "*" in text_content:
text_content = text_content["*"]
# Check for minimal content issue mentioned in bug report
if text_content and len(text_content.strip()) > 0:
# Check if it's just an empty div (common issue) or if fallback was used
is_minimal = _is_minimal_content(text_content)
if is_minimal:
# This looks like minimal content - add warning
formatted_sections.append(_format_section("Parsed HTML",
f"WARNING: Content appears minimal. This may indicate a summary parsing issue.\n\n{text_content}"))
else:
formatted_sections.append(_format_section("Parsed HTML", text_content))
else:
formatted_sections.append(_format_section("Parsed HTML",
"WARNING: No HTML content returned. The page may be empty or there may be a parsing issue."))
else:
# Check if this was an existing page request but no text was returned
if any(parse_data.get(key) for key in ["pageid", "title"]) and parse_data.get("pageid", 0) > 0:
formatted_sections.append(_format_section("Parsed HTML",
"WARNING: No text content in parse result for existing page. This may indicate the page is empty or a parsing error occurred."))
# Wikitext
if "wikitext" in parse_data:
wikitext_content = parse_data["wikitext"]
if isinstance(wikitext_content, dict) and "*" in wikitext_content:
wikitext_content = wikitext_content["*"]
formatted_sections.append(_format_section("Wikitext", wikitext_content))
# Categories
if "categories" in parse_data:
categories = parse_data["categories"]
if categories:
cat_list = [cat.get("*", cat.get("category", str(cat))) for cat in categories]
formatted_sections.append(_format_section("Categories", "\n".join(cat_list)))
# Links
if "links" in parse_data:
links = parse_data["links"]
if links:
link_list = [link.get("*", link.get("title", str(link))) for link in links]
formatted_sections.append(_format_section("Internal Links", "\n".join(link_list)))
# Templates
if "templates" in parse_data:
templates = parse_data["templates"]
if templates:
template_list = [tmpl.get("*", tmpl.get("title", str(tmpl))) for tmpl in templates]
formatted_sections.append(_format_section("Templates", "\n".join(template_list)))
# Images
if "images" in parse_data:
images = parse_data["images"]
if images:
image_list = [img if isinstance(img, str) else str(img) for img in images]
formatted_sections.append(_format_section("Images", "\n".join(image_list)))
# External links
if "externallinks" in parse_data:
external_links = parse_data["externallinks"]
if external_links:
link_list = [link if isinstance(link, str) else str(link) for link in external_links]
formatted_sections.append(_format_section("External Links", "\n".join(link_list)))
# Sections
if "sections" in parse_data:
sections = parse_data["sections"]
if sections:
section_list = []
for section in sections:
level = section.get("level", "")
line = section.get("line", "")
section_list.append(f"Level {level}: {line}")
formatted_sections.append(_format_section("Sections", "\n".join(section_list)))
# Language links
if "langlinks" in parse_data:
langlinks = parse_data["langlinks"]
if langlinks:
lang_list = []
for link in langlinks:
lang = link.get("lang", "")
title = link.get("*", link.get("title", ""))
lang_list.append(f"{lang}: {title}")
formatted_sections.append(_format_section("Language Links", "\n".join(lang_list)))
# Interwiki links
if "iwlinks" in parse_data:
iwlinks = parse_data["iwlinks"]
if iwlinks:
iw_list = []
for link in iwlinks:
prefix = link.get("prefix", "")
title = link.get("*", link.get("title", ""))
iw_list.append(f"{prefix}: {title}")
formatted_sections.append(_format_section("Interwiki Links", "\n".join(iw_list)))
# Properties
if "properties" in parse_data:
properties = parse_data["properties"]
if properties:
prop_list = []
for prop in properties:
name = prop.get("name", "")
value = prop.get("*", prop.get("value", ""))
prop_list.append(f"{name}: {value}")
formatted_sections.append(_format_section("Properties", "\n".join(prop_list)))
# Parse warnings
if "parsewarnings" in parse_data:
warnings = parse_data["parsewarnings"]
if warnings:
warning_list = [warning if isinstance(warning, str) else str(warning) for warning in warnings]
formatted_sections.append(_format_section("Parse Warnings", "\n".join(warning_list)))
# Display title
if "displaytitle" in parse_data:
display_title = parse_data["displaytitle"]
formatted_sections.append(_format_section("Display Title", display_title))
# Combine all sections
if formatted_sections:
response_lines.extend(formatted_sections)
else:
response_lines.append("No content available in the parsed output.")
return [types.TextContent(
type="text",
text="\n".join(response_lines)
)]
def _is_minimal_content(content: str) -> bool:
"""
Check if content appears to be minimal/empty, indicating a parsing issue.
This detects cases where the MediaWiki API returns nearly empty content,
which often indicates that the summary parsing didn't work properly.
"""
if not content or len(content.strip()) == 0:
return True
# Check for common minimal content patterns
content_lower = content.lower().strip()
# Empty parser wrapper divs with minimal content
if ('<div class="mw-' in content_lower and
content_lower.count('<') <= 3 and
len(content.strip()) < 200):
return True
# Just whitespace or basic HTML structure
if content_lower in ['<p></p>', '<div></div>', ''] or content_lower.isspace():
return True
# Check for content that's basically just a parser wrapper with no actual content
# Remove common wrapper patterns and see if there's actual content left
import re
# Remove parser wrapper divs
cleaned = re.sub(r'<div[^>]*class="[^"]*mw-[^"]*"[^>]*>', '', content)
cleaned = re.sub(r'</div>', '', cleaned)
# Remove empty paragraphs and whitespace
cleaned = re.sub(r'<p[^>]*></p>', '', cleaned)
cleaned = re.sub(r'\s+', ' ', cleaned).strip()
# If after removing wrappers there's very little content, it's likely minimal
return len(cleaned) < 10
def _format_section(title: str, content: str) -> str:
"""Format a section with a title and content."""
if not content:
return f"## {title}\n(No content)\n"
# Truncate very long content for readability
if len(content) > 5000:
content = content[:5000] + "\n... (content truncated)"
return f"## {title}\n{content}\n"