XMind AI MCP

universal_xmind_converter.py•29.7 KiB

#!/usr/bin/env python3
"""
Universal XMind Converter

A Python tool that converts multiple file formats (TXT, HTML, Word, Excel, Markdown) to XMind mind map format.
Supports automatic format detection and extensible parser architecture.

Version: 2.0
"""

import json
import re
import os
from datetime import datetime
from pathlib import Path
import mimetypes
from xmind_writer import create_xmind_file

# 依赖改为懒加载，避免在模块导入时引入重量库
def _lazy_import_docx():
    try:
        import importlib
        return importlib.import_module('docx')
    except Exception as e:
        raise ImportError("需要安装python-docx: pip install python-docx") from e

def _lazy_import_openpyxl():
    try:
        import importlib
        return importlib.import_module('openpyxl')
    except Exception as e:
        raise ImportError("需要安装openpyxl: pip install openpyxl") from e

def _lazy_import_beautifulsoup():
    try:
        from bs4 import BeautifulSoup
        return BeautifulSoup
    except Exception as e:
        raise ImportError("需要安装BeautifulSoup4: pip install beautifulsoup4") from e


def escape_xml_text(text):
    """Escape XML special characters"""
    if not text:
        return ""
    return (text.replace("&", "&amp;")
               .replace("<", "&lt;")
               .replace(">", "&gt;")
               .replace("\"", "&quot;")
               .replace("'", "&apos;"))


def generate_id():
    """Generate unique ID"""
    import uuid
    return str(uuid.uuid4()).replace('-', '')


def create_json_structure(title, children):
    """Create JSON structure"""
    return {
        "id": generate_id(),
        "class": "sheet",
        "rootTopic": {
            "id": generate_id(),
            "class": "topic",
            "title": title,
            "structureClass": "org.xmind.ui.logic.right",
            "children": {
                "attached": children
            }
        },
        "title": "画布 1",
        "extensions": [{
            "provider": "org.xmind.ui.skeleton.structure.style",
            "content": {
                "centralTopic": "org.xmind.ui.logic.right"
            }
        }],
        "theme": {
            "map": {
                "id": generate_id(),
                "properties": {
                    "svg:fill": "#ffffff",
                    "multi-line-colors": "#F9423A #F6A04D #F3D321 #00BC7B #486AFF #4D49BE",
                    "color-list": "#000229 #1F2766 #52CC83 #4D86DB #99142F #245570",
                    "line-tapered": "none"
                }
            },
            "centralTopic": {
                "id": generate_id(),
                "properties": {
                    "fo:font-family": "Droid Serif",
                    "fo:font-size": "30pt",
                    "fo:font-weight": "400",
                    "fo:font-style": "normal",
                    "fo:color": "inherited",
                    "fo:text-transform": "manual",
                    "fo:text-decoration": "none",
                    "fo:text-align": "center",
                    "svg:fill": "#000229",
                    "fill-pattern": "solid",
                    "line-width": "3pt",
                    "line-color": "#000229",
                    "line-pattern": "solid",
                    "border-line-color": "inherited",
                    "border-line-width": "3pt",
                    "border-line-pattern": "inherited",
                    "shape-class": "org.xmind.topicShape.roundedRect",
                    "line-class": "org.xmind.branchConnection.roundedElbow",
                    "arrow-end-class": "org.xmind.arrowShape.none",
                    "alignment-by-level": "inactived"
                }
            },
            "mainTopic": {
                "id": generate_id(),
                "properties": {
                    "fo:font-family": "Droid Serif",
                    "fo:font-size": "18pt",
                    "fo:font-weight": "400",
                    "fo:font-style": "normal",
                    "fo:color": "inherited",
                    "fo:text-transform": "manual",
                    "fo:text-decoration": "none",
                    "fo:text-align": "left",
                    "svg:fill": "inherited",
                    "fill-pattern": "none",
                    "line-width": "inherited",
                    "line-color": "inherited",
                    "line-pattern": "inherited",
                    "border-line-color": "inherited",
                    "border-line-width": "0pt",
                    "border-line-pattern": "inherited",
                    "shape-class": "org.xmind.topicShape.roundedRect",
                    "line-class": "org.xmind.branchConnection.roundedElbow",
                    "arrow-end-class": "inherited"
                }
            },
            "subTopic": {
                "id": generate_id(),
                "properties": {
                    "fo:font-family": "Droid Serif",
                    "fo:font-size": "14pt",
                    "fo:font-weight": "400",
                    "fo:font-style": "normal",
                    "fo:color": "inherited",
                    "fo:text-transform": "manual",
                    "fo:text-decoration": "none",
                    "fo:text-align": "left",
                    "svg:fill": "inherited",
                    "fill-pattern": "none",
                    "line-width": "2pt",
                    "line-color": "inherited",
                    "line-pattern": "inherited",
                    "border-line-color": "inherited",
                    "border-line-width": "0pt",
                    "border-line-pattern": "inherited",
                    "shape-class": "org.xmind.topicShape.roundedRect",
                    "line-class": "org.xmind.branchConnection.roundedElbow",
                    "arrow-end-class": "inherited"
                }
            },
            "skeletonThemeId": "c1fbada1b45ba2e3bfc3b8b57b",
            "colorThemeId": "Rainbow-#000229-MULTI_LINE_COLORS"
        }
    }


def create_topic(title, children=None):
    """Create topic node"""
    topic = {
        "id": generate_id(),
        "title": title
    }
    
    if children:
        topic["children"] = {
            "attached": children
        }
    
    return topic


# ==============================================
# 文件格式解析器基类
# ==============================================

class BaseParser:
    """基础解析器类"""
    
    def __init__(self, file_path):
        self.file_path = file_path
    
    def parse(self):
        """解析文件并返回JSON结构"""
        raise NotImplementedError("子类必须实现parse方法")
    
    def extract_title(self, content):
        """提取标题"""
        return Path(self.file_path).stem


# ==============================================
# Markdown解析器
# ==============================================

class MarkdownParser(BaseParser):
    """Markdown文件解析器"""
    
    def parse(self):
        """解析Markdown文件"""
        with open(self.file_path, 'r', encoding='utf-8') as f:
            content = f.read()
        
        lines = content.split('\n')
        title = self.extract_title_from_content(lines)
        
        # 构建层级结构
        topics_by_level = {}
        root_children = []
        title_extracted = False
        
        for line in lines:
            line = line.strip()
            if not line:
                continue
                
            # 检测标题级别
            if line.startswith('#### '):
                level = 4
                title_text = line[5:].strip()
            elif line.startswith('### '):
                level = 3
                title_text = line[4:].strip()
            elif line.startswith('## '):
                level = 2
                title_text = line[3:].strip()
            elif line.startswith('# '):
                level = 1
                title_text = line[2:].strip()
                # 第一个一级标题作为文档标题，不处理为节点
                if not title_extracted:
                    title_extracted = True
                    continue
            else:
                continue
            
            # 创建topic
            topic = create_topic(title_text)
            topics_by_level[level] = topic
            
            # 添加到父节点
            if level == 1:
                # 一级标题直接作为根节点的子节点
                root_children.append(topic)
            else:
                # 其他级别的标题，找到父节点（级别-1）
                parent_level = level - 1
                parent_found = False
                
                # 从当前级别往上找，直到找到合适的父节点
                while parent_level >= 1 and not parent_found:
                    if parent_level in topics_by_level:
                        parent_topic = topics_by_level[parent_level]
                        if "children" not in parent_topic:
                            parent_topic["children"] = {"attached": []}
                        parent_topic["children"]["attached"].append(topic)
                        parent_found = True
                    else:
                        parent_level -= 1
                
                # 如果没找到父节点，就作为根节点的子节点
                if not parent_found:
                    root_children.append(topic)
        
        return create_json_structure(title, root_children)
    
    def extract_title_from_content(self, lines):
        """从内容中提取标题"""
        for line in lines:
            if line.startswith('# '):
                return line[2:].strip()
        return Path(self.file_path).stem
    
    def parse_markdown_to_json(self, markdown_file):
        """解析Markdown文件并转换为JSON结构（兼容旧版本）"""
        return self.parse()


# ==============================================
# 文本大纲解析器
# ==============================================

class TextOutlineParser(BaseParser):
    """文本大纲解析器 - 支持缩进层级"""
    
    def parse(self):
        """解析文本大纲文件"""
        with open(self.file_path, 'r', encoding='utf-8') as f:
            lines = f.readlines()
        
        title = self.extract_title(lines)
        
        # 构建层级结构
        topics_by_level = {}
        root_children = []
        
        # 跳过标题行（如果标题行存在）
        title_found = False
        for line in lines:
            line = line.rstrip('\n\r')
            if not line.strip():
                continue
            
            # 检查是否是标题行
            stripped_line = line.lstrip()
            if stripped_line.strip() == title.strip() and not title_found:
                title_found = True
                continue  # 跳过标题行
            
            # 计算缩进级别
            indent_level = len(line) - len(stripped_line)
            level = self.indent_to_level(indent_level)
            title_text = stripped_line.strip('-*• ')
            
            if not title_text:
                continue
            
            # 创建topic
            topic = create_topic(title_text)
            topics_by_level[level] = topic
            
            # 添加到父节点
            if level == 0:
                root_children.append(topic)
            else:
                parent_level = level - 1
                if parent_level in topics_by_level:
                    parent_topic = topics_by_level[parent_level]
                    if "children" not in parent_topic:
                        parent_topic["children"] = {"attached": []}
                    parent_topic["children"]["attached"].append(topic)
                else:
                    # 父层级不存在，直接添加到根节点
                    root_children.append(topic)
        
        return create_json_structure(title, root_children)
    
    def indent_to_level(self, indent):
        """将缩进转换为层级"""
        # 假设每4个空格或1个制表符为一级
        return indent // 4
    
    def extract_title(self, lines):
        """提取标题"""
        for line in lines:
            stripped = line.strip()
            if stripped and not stripped.startswith((' ', '\t', '-', '*', '•')):
                return stripped
        return Path(self.file_path).stem


# ==============================================
# HTML解析器
# ==============================================

class HtmlParser(BaseParser):
    """HTML文件解析器"""
    
    def __init__(self, file_path):
        super().__init__(file_path)
    
    def parse(self):
        """解析HTML文件"""
        with open(self.file_path, 'r', encoding='utf-8') as f:
            content = f.read()
        
        BeautifulSoup = _lazy_import_beautifulsoup()
        soup = BeautifulSoup(content, 'html.parser')
        title = self.extract_title(soup)
        
        # 查找标题结构 (h1-h6)
        root_children = []
        topics_by_level = {}
        
        for heading_tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']:
            headings = soup.find_all(heading_tag)
            level = int(heading_tag[1]) - 1  # h1=0, h2=1, etc.
            
            for heading in headings:
                title_text = heading.get_text().strip()
                if not title_text:
                    continue
                
                topic = create_topic(title_text)
                topics_by_level[level] = topic
                
                if level == 0:
                    root_children.append(topic)
                else:
                    parent_level = level - 1
                    if parent_level in topics_by_level:
                        parent_topic = topics_by_level[parent_level]
                        if "children" not in parent_topic:
                            parent_topic["children"] = {"attached": []}
                        parent_topic["children"]["attached"].append(topic)
        
        # 如果没有找到标题，尝试列表结构
        if not root_children:
            root_children = self.parse_lists(soup)
        
        return create_json_structure(title, root_children)
    
    def extract_title(self, soup):
        """提取标题"""
        title_tag = soup.find('title')
        if title_tag:
            return title_tag.get_text().strip()
        
        h1_tag = soup.find('h1')
        if h1_tag:
            return h1_tag.get_text().strip()
        
        return Path(self.file_path).stem
    
    def parse_lists(self, soup):
        """解析列表结构"""
        def parse_list_items(items, level=0):
            topics = []
            for item in items:
                text = item.get_text().strip()
                if text:
                    topic = create_topic(text)
                    
                    # 查找子列表
                    sublist = item.find(['ul', 'ol'])
                    if sublist:
                        sub_items = sublist.find_all('li', recursive=False)
                        if sub_items:
                            sub_topics = parse_list_items(sub_items, level + 1)
                            if sub_topics:
                                topic["children"] = {"attached": sub_topics}
                    
                    topics.append(topic)
            return topics
        
        # 查找顶级列表
        root_topics = []
        for list_tag in soup.find_all(['ul', 'ol']):
            # 只处理顶级列表（不在其他列表内的）
            if not list_tag.find_parent(['ul', 'ol']):
                items = list_tag.find_all('li', recursive=False)
                if items:
                    root_topics.extend(parse_list_items(items))
        
        return root_topics


# ==============================================
# Word文档解析器
# ==============================================

class WordParser(BaseParser):
    """Word文档解析器"""
    
    def __init__(self, file_path):
        super().__init__(file_path)
    
    def parse(self):
        """解析Word文档"""
        docx = _lazy_import_docx()
        doc = docx.Document(self.file_path)
        title = self.extract_title(doc)
        
        # 按段落解析
        root_children = []
        topics_by_level = {}
        
        for paragraph in doc.paragraphs:
            text = paragraph.text.strip()
            if not text:
                continue
            
            # 根据样式判断层级
            level = self.get_heading_level(paragraph)
            if level is not None:
                topic = create_topic(text)
                topics_by_level[level] = topic
                
                if level == 0:
                    root_children.append(topic)
                else:
                    parent_level = level - 1
                    if parent_level in topics_by_level:
                        parent_topic = topics_by_level[parent_level]
                        if "children" not in parent_topic:
                            parent_topic["children"] = {"attached": []}
                        parent_topic["children"]["attached"].append(topic)
        
        return create_json_structure(title, root_children)
    
    def extract_title(self, doc):
        """提取标题"""
        # 尝试从属性中获取
        if doc.core_properties.title:
            return doc.core_properties.title
        
        # 查找第一个标题
        for paragraph in doc.paragraphs:
            if paragraph.style and 'Heading' in paragraph.style.name:
                return paragraph.text.strip()
        
        # 使用文件名
        return Path(self.file_path).stem
    
    def get_heading_level(self, paragraph):
        """获取段落标题级别"""
        if not paragraph.style:
            return None
        
        style_name = paragraph.style.name
        
        # Word标准标题样式
        if style_name.startswith('Heading '):
            try:
                level = int(style_name.split()[-1]) - 1
                return min(level, 5)  # 限制最大层级
            except ValueError:
                pass
        
        # 自定义标题样式
        heading_patterns = ['标题', 'Heading', 'head', 'title']
        for i, pattern in enumerate(heading_patterns):
            if pattern.lower() in style_name.lower():
                return i
        
        return None


# ==============================================
# Excel解析器
# ==============================================

class ExcelParser(BaseParser):
    """Excel文件解析器"""
    
    def __init__(self, file_path):
        super().__init__(file_path)
    
    def parse(self):
        """解析Excel文件"""
        openpyxl = _lazy_import_openpyxl()
        wb = openpyxl.load_workbook(self.file_path)
        
        # 使用第一个工作表
        ws = wb.active
        title = self.extract_title(ws)
        
        # 按层级组织数据
        root_children = []
        
        # 假设第一列是层级，第二列是内容
        # 或者按缩进层级解析
        current_topics = {}  # level -> topic
        
        for row in ws.iter_rows(min_row=1, values_only=True):
            if not row or not row[0]:
                continue
            
            # 尝试不同的解析策略
            level, text = self.parse_row(row)
            if text:
                topic = create_topic(text)
                current_topics[level] = topic
                
                if level == 0:
                    root_children.append(topic)
                else:
                    parent_level = level - 1
                    if parent_level in current_topics:
                        parent_topic = current_topics[parent_level]
                        if "children" not in parent_topic:
                            parent_topic["children"] = {"attached": []}
                        parent_topic["children"]["attached"].append(topic)
        
        return create_json_structure(title, root_children)
    
    def extract_title(self, worksheet):
        """提取标题"""
        # 使用工作表名称
        title = worksheet.title
        
        # 或者使用第一个非空单元格
        if not title or title == 'Sheet1':
            for row in worksheet.iter_rows(min_row=1, max_row=1, values_only=True):
                for cell in row:
                    if cell:
                        title = str(cell)
                        break
        
        return title or Path(self.file_path).stem
    
    def parse_row(self, row):
        """解析行数据"""
        # 策略1: 第一列是层级，第二列是内容
        if len(row) >= 2:
            try:
                level = int(row[0]) if isinstance(row[0], (int, float)) else 0
                text = str(row[1]) if row[1] else ""
                return level, text.strip()
            except (ValueError, TypeError):
                pass
        
        # 策略2: 按缩进或特殊字符判断
        text = str(row[0]) if row[0] else ""
        level = 0
        
        # 计算前导空格或特殊字符
        stripped = text.lstrip()
        if stripped != text:
            indent = len(text) - len(stripped)
            level = indent // 2  # 每2个空格一级
            text = stripped.strip('-*•→')
        
        return level, text.strip()


# ==============================================
# 解析器工厂
# ==============================================

class ParserFactory:
    """解析器工厂类"""
    
    PARSERS = {
        '.md': MarkdownParser,
        '.markdown': MarkdownParser,
        '.txt': TextOutlineParser,
        '.text': TextOutlineParser,
        '.html': HtmlParser,
        '.htm': HtmlParser,
        '.docx': WordParser,
        '.xlsx': ExcelParser,
        '.xls': ExcelParser,
    }
    
    @classmethod
    def detect_file_type(cls, file_path):
        """检测文件类型（通过扩展名和内容）"""
        ext = Path(file_path).suffix.lower()
        
        # 如果有扩展名，优先使用扩展名
        if ext:
            return ext
        
        # 没有扩展名时，通过内容检测
        try:
            with open(file_path, 'r', encoding='utf-8') as f:
                content = f.read(1000)  # 读取前1000字符
                
                # HTML检测
                if content.strip().startswith('<!DOCTYPE') or content.strip().startswith('<html'):
                    return '.html'
                
                # Markdown检测
                if any(line.strip().startswith('#') for line in content.split('\n')[:10]):
                    return '.md'
                
                # OPML检测
                if '<opml' in content.lower():
                    return '.opml'
                
                # 文本大纲检测（有缩进结构）
                lines = content.split('\n')[:20]
                indent_chars = ['  ', '\t', '·', '•', '-']
                if any(any(line.startswith(char) for char in indent_chars) for line in lines if line.strip()):
                    return '.txt'
                
                # 默认文本
                return '.txt'
                
        except Exception:
            return '.txt'
    
    @classmethod
    def get_parser(cls, file_path):
        """根据文件路径获取相应的解析器"""
        file_ext = cls.detect_file_type(file_path)
        
        if file_ext not in cls.PARSERS:
            raise ValueError(f"不支持的文件格式: {file_ext}")
        
        parser_class = cls.PARSERS[file_ext]
        return parser_class(file_path)
    
    @classmethod
    def get_supported_formats(cls):
        """获取支持的文件格式列表"""
        return list(cls.PARSERS.keys())
    
    @classmethod
    def detect_format(cls, file_path):
        """自动检测文件格式"""
        # 基于文件扩展名
        file_ext = Path(file_path).suffix.lower()
        if file_ext in cls.PARSERS:
            return file_ext
        
        # 基于MIME类型
        mime_type, _ = mimetypes.guess_type(file_path)
        if mime_type:
            mime_to_ext = {
                'text/markdown': '.md',
                'text/plain': '.txt',
                'text/html': '.html',
                'application/vnd.openxmlformats-officedocument.wordprocessingml.document': '.docx',
                'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': '.xlsx',
            }
            return mime_to_ext.get(mime_type, file_ext)
        
        return file_ext


# ==============================================
# XMind文件生成器（复用原有代码）
# ==============================================

# 写入器功能已迁移至 xmind_writer.py；保持对 create_xmind_file 的导入使用


# ==============================================
# 兼容性函数（从markdown_to_xmind_converter.py合并）
# ==============================================

def parse_markdown_to_json(markdown_file):
    """兼容旧版本的Markdown解析函数"""
    parser = MarkdownParser(markdown_file)
    return parser.parse()

def main():
    """主函数 - 支持命令行参数"""
    import sys
    
    # 检查命令行参数
    if len(sys.argv) <= 1:
        print("[ERROR] 错误: 请提供输入文件路径")
        print("用法: python universal_xmind_converter.py <input_file>")
        print("支持的格式: .md, .txt, .html, .docx, .xlsx")
        return 1
    
    input_file = sys.argv[1]
    
    # 验证输入文件是否存在
    if not os.path.exists(input_file):
        print(f"[ERROR] 错误: 文件 '{input_file}' 不存在")
        return 1
    
    # 使用ParserFactory自动检测格式并转换
    try:
        factory = ParserFactory()
        parser = factory.get_parser(input_file)
        
        print(f"正在解析文件: {input_file}")
        json_structure = parser.parse()
        
        # 生成输出文件名（输出到output目录）
        base_name = os.path.splitext(os.path.basename(input_file))[0]
        output_dir = "output"
        
        # 确保输出目录存在
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)
            
        output_file = os.path.join(output_dir, f"{base_name}.xmind")
        
        print(f"正在创建XMind文件: {output_file}")
        create_xmind_file(json_structure, output_file)
        
        print("[SUCCESS] 转换完成！")
        print(f"输出文件: {output_file}")
        
    except Exception as e:
        print(f"[ERROR] 转换失败: {str(e)}")
        return 1


if __name__ == '__main__':
    main()


# ==============================================
# 主函数
# ==============================================

def main():
    """Main function"""
    import sys
    
    # 检查命令行参数
    if len(sys.argv) < 2 or sys.argv[1] in ['-h', '--help', 'help']:
        print("=" * 60)
        print("Universal XMind Converter - 多格式思维导图转换器")
        print("=" * 60)
        print("\n支持的文件格式:")
        print("  • Markdown     (.md)     - 标题层级转换")
        print("  • 文本大纲     (.txt)    - 缩进格式大纲")
        print("  • HTML网页     (.html)   - 标题和列表结构")
        print("  • Word文档     (.docx)   - 标题样式转换")
        print("  • Excel表格    (.xlsx)   - 多列层级结构")
        print("\n使用方法:")
        print("  python universal_xmind_converter.py <输入文件> [输出文件]")
        print("\n示例:")
        print("  python universal_xmind_converter.py document.md")
        print("  python universal_xmind_converter.py outline.txt mymap.xmind")
        print("  python universal_xmind_converter.py data.xlsx")
        print("\n自动识别:")
        print("  无扩展名文件会自动检测格式")
        print("\n依赖安装:")
        print("  pip install beautifulsoup4 python-docx openpyxl")
        print("=" * 60)
        return 0
    
    if len(sys.argv) < 2:
        print("[ERROR] 错误: 请提供输入文件路径")
        print("用法: python universal_xmind_converter.py <input_file>")
        print("\n支持的文件格式:")
        for fmt in ParserFactory.get_supported_formats():
            print(f"  {fmt}")
        return 1
    
    input_file = sys.argv[1]
    output_file = sys.argv[2] if len(sys.argv) > 2 else None
    
    # 验证输入文件是否存在
    if not os.path.exists(input_file):
        print(f"[ERROR] 错误: 文件 '{input_file}' 不存在")
        return 1
    
    # 检测文件格式
    file_format = ParserFactory.detect_format(input_file)
    print(f"检测到文件格式: {file_format}")
    
    try:
        # 获取相应的解析器
        parser = ParserFactory.get_parser(input_file)
        print(f"使用解析器: {parser.__class__.__name__}")
        
        # 解析文件
        print(f"正在解析文件: {input_file}")
        json_structure = parser.parse()
        
        # 生成输出文件名
        base_name = os.path.splitext(os.path.basename(input_file))[0]
        output_dir = "output"
        
        # 确保输出目录存在
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)
            
        output_file = os.path.join(output_dir, f"{base_name}.xmind")
        
        # 创建XMind文件
        print(f"正在创建XMind文件: {output_file}")
        create_xmind_file(json_structure, output_file)
        
        print("[SUCCESS] 转换完成！")
        print(f"📁 输出文件: {output_file}")
        
    except ImportError as e:
        print(f"[ERROR] 缺少依赖包: {e}")
        print("请安装相应的依赖包:")
        print("  pip install beautifulsoup4  # HTML解析")
        print("  pip install python-docx   # Word文档解析")
        print("  pip install openpyxl      # Excel解析")
        return 1
        
    except Exception as e:
        print(f"[ERROR] 转换失败: {e}")
        import traceback
        traceback.print_exc()
        return 1


if __name__ == '__main__':
    main()

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Master-Frank/XmindMcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

universal_xmind_converter.py•29.7 KiB

#!/usr/bin/env python3
"""
Universal XMind Converter

A Python tool that converts multiple file formats (TXT, HTML, Word, Excel, Markdown) to XMind mind map format.
Supports automatic format detection and extensible parser architecture.

Version: 2.0
"""

import json
import re
import os
from datetime import datetime
from pathlib import Path
import mimetypes
from xmind_writer import create_xmind_file

# 依赖改为懒加载，避免在模块导入时引入重量库
def _lazy_import_docx():
    try:
        import importlib
        return importlib.import_module('docx')
    except Exception as e:
        raise ImportError("需要安装python-docx: pip install python-docx") from e

def _lazy_import_openpyxl():
    try:
        import importlib
        return importlib.import_module('openpyxl')
    except Exception as e:
        raise ImportError("需要安装openpyxl: pip install openpyxl") from e

def _lazy_import_beautifulsoup():
    try:
        from bs4 import BeautifulSoup
        return BeautifulSoup
    except Exception as e:
        raise ImportError("需要安装BeautifulSoup4: pip install beautifulsoup4") from e


def escape_xml_text(text):
    """Escape XML special characters"""
    if not text:
        return ""
    return (text.replace("&", "&amp;")
               .replace("<", "&lt;")
               .replace(">", "&gt;")
               .replace("\"", "&quot;")
               .replace("'", "&apos;"))


def generate_id():
    """Generate unique ID"""
    import uuid
    return str(uuid.uuid4()).replace('-', '')


def create_json_structure(title, children):
    """Create JSON structure"""
    return {
        "id": generate_id(),
        "class": "sheet",
        "rootTopic": {
            "id": generate_id(),
            "class": "topic",
            "title": title,
            "structureClass": "org.xmind.ui.logic.right",
            "children": {
                "attached": children
            }
        },
        "title": "画布 1",
        "extensions": [{
            "provider": "org.xmind.ui.skeleton.structure.style",
            "content": {
                "centralTopic": "org.xmind.ui.logic.right"
            }
        }],
        "theme": {
            "map": {
                "id": generate_id(),
                "properties": {
                    "svg:fill": "#ffffff",
                    "multi-line-colors": "#F9423A #F6A04D #F3D321 #00BC7B #486AFF #4D49BE",
                    "color-list": "#000229 #1F2766 #52CC83 #4D86DB #99142F #245570",
                    "line-tapered": "none"
                }
            },
            "centralTopic": {
                "id": generate_id(),
                "properties": {
                    "fo:font-family": "Droid Serif",
                    "fo:font-size": "30pt",
                    "fo:font-weight": "400",
                    "fo:font-style": "normal",
                    "fo:color": "inherited",
                    "fo:text-transform": "manual",
                    "fo:text-decoration": "none",
                    "fo:text-align": "center",
                    "svg:fill": "#000229",
                    "fill-pattern": "solid",
                    "line-width": "3pt",
                    "line-color": "#000229",
                    "line-pattern": "solid",
                    "border-line-color": "inherited",
                    "border-line-width": "3pt",
                    "border-line-pattern": "inherited",
                    "shape-class": "org.xmind.topicShape.roundedRect",
                    "line-class": "org.xmind.branchConnection.roundedElbow",
                    "arrow-end-class": "org.xmind.arrowShape.none",
                    "alignment-by-level": "inactived"
                }
            },
            "mainTopic": {
                "id": generate_id(),
                "properties": {
                    "fo:font-family": "Droid Serif",
                    "fo:font-size": "18pt",
                    "fo:font-weight": "400",
                    "fo:font-style": "normal",
                    "fo:color": "inherited",
                    "fo:text-transform": "manual",
                    "fo:text-decoration": "none",
                    "fo:text-align": "left",
                    "svg:fill": "inherited",
                    "fill-pattern": "none",
                    "line-width": "inherited",
                    "line-color": "inherited",
                    "line-pattern": "inherited",
                    "border-line-color": "inherited",
                    "border-line-width": "0pt",
                    "border-line-pattern": "inherited",
                    "shape-class": "org.xmind.topicShape.roundedRect",
                    "line-class": "org.xmind.branchConnection.roundedElbow",
                    "arrow-end-class": "inherited"
                }
            },
            "subTopic": {
                "id": generate_id(),
                "properties": {
                    "fo:font-family": "Droid Serif",
                    "fo:font-size": "14pt",
                    "fo:font-weight": "400",
                    "fo:font-style": "normal",
                    "fo:color": "inherited",
                    "fo:text-transform": "manual",
                    "fo:text-decoration": "none",
                    "fo:text-align": "left",
                    "svg:fill": "inherited",
                    "fill-pattern": "none",
                    "line-width": "2pt",
                    "line-color": "inherited",
                    "line-pattern": "inherited",
                    "border-line-color": "inherited",
                    "border-line-width": "0pt",
                    "border-line-pattern": "inherited",
                    "shape-class": "org.xmind.topicShape.roundedRect",
                    "line-class": "org.xmind.branchConnection.roundedElbow",
                    "arrow-end-class": "inherited"
                }
            },
            "skeletonThemeId": "c1fbada1b45ba2e3bfc3b8b57b",
            "colorThemeId": "Rainbow-#000229-MULTI_LINE_COLORS"
        }
    }


def create_topic(title, children=None):
    """Create topic node"""
    topic = {
        "id": generate_id(),
        "title": title
    }
    
    if children:
        topic["children"] = {
            "attached": children
        }
    
    return topic


# ==============================================
# 文件格式解析器基类
# ==============================================

class BaseParser:
    """基础解析器类"""
    
    def __init__(self, file_path):
        self.file_path = file_path
    
    def parse(self):
        """解析文件并返回JSON结构"""
        raise NotImplementedError("子类必须实现parse方法")
    
    def extract_title(self, content):
        """提取标题"""
        return Path(self.file_path).stem


# ==============================================
# Markdown解析器
# ==============================================

class MarkdownParser(BaseParser):
    """Markdown文件解析器"""
    
    def parse(self):
        """解析Markdown文件"""
        with open(self.file_path, 'r', encoding='utf-8') as f:
            content = f.read()
        
        lines = content.split('\n')
        title = self.extract_title_from_content(lines)
        
        # 构建层级结构
        topics_by_level = {}
        root_children = []
        title_extracted = False
        
        for line in lines:
            line = line.strip()
            if not line:
                continue
                
            # 检测标题级别
            if line.startswith('#### '):
                level = 4
                title_text = line[5:].strip()
            elif line.startswith('### '):
                level = 3
                title_text = line[4:].strip()
            elif line.startswith('## '):
                level = 2
                title_text = line[3:].strip()
            elif line.startswith('# '):
                level = 1
                title_text = line[2:].strip()
                # 第一个一级标题作为文档标题，不处理为节点
                if not title_extracted:
                    title_extracted = True
                    continue
            else:
                continue
            
            # 创建topic
            topic = create_topic(title_text)
            topics_by_level[level] = topic
            
            # 添加到父节点
            if level == 1:
                # 一级标题直接作为根节点的子节点
                root_children.append(topic)
            else:
                # 其他级别的标题，找到父节点（级别-1）
                parent_level = level - 1
                parent_found = False
                
                # 从当前级别往上找，直到找到合适的父节点
                while parent_level >= 1 and not parent_found:
                    if parent_level in topics_by_level:
                        parent_topic = topics_by_level[parent_level]
                        if "children" not in parent_topic:
                            parent_topic["children"] = {"attached": []}
                        parent_topic["children"]["attached"].append(topic)
                        parent_found = True
                    else:
                        parent_level -= 1
                
                # 如果没找到父节点，就作为根节点的子节点
                if not parent_found:
                    root_children.append(topic)
        
        return create_json_structure(title, root_children)
    
    def extract_title_from_content(self, lines):
        """从内容中提取标题"""
        for line in lines:
            if line.startswith('# '):
                return line[2:].strip()
        return Path(self.file_path).stem
    
    def parse_markdown_to_json(self, markdown_file):
        """解析Markdown文件并转换为JSON结构（兼容旧版本）"""
        return self.parse()


# ==============================================
# 文本大纲解析器
# ==============================================

class TextOutlineParser(BaseParser):
    """文本大纲解析器 - 支持缩进层级"""
    
    def parse(self):
        """解析文本大纲文件"""
        with open(self.file_path, 'r', encoding='utf-8') as f:
            lines = f.readlines()
        
        title = self.extract_title(lines)
        
        # 构建层级结构
        topics_by_level = {}
        root_children = []
        
        # 跳过标题行（如果标题行存在）
        title_found = False
        for line in lines:
            line = line.rstrip('\n\r')
            if not line.strip():
                continue
            
            # 检查是否是标题行
            stripped_line = line.lstrip()
            if stripped_line.strip() == title.strip() and not title_found:
                title_found = True
                continue  # 跳过标题行
            
            # 计算缩进级别
            indent_level = len(line) - len(stripped_line)
            level = self.indent_to_level(indent_level)
            title_text = stripped_line.strip('-*• ')
            
            if not title_text:
                continue
            
            # 创建topic
            topic = create_topic(title_text)
            topics_by_level[level] = topic
            
            # 添加到父节点
            if level == 0:
                root_children.append(topic)
            else:
                parent_level = level - 1
                if parent_level in topics_by_level:
                    parent_topic = topics_by_level[parent_level]
                    if "children" not in parent_topic:
                        parent_topic["children"] = {"attached": []}
                    parent_topic["children"]["attached"].append(topic)
                else:
                    # 父层级不存在，直接添加到根节点
                    root_children.append(topic)
        
        return create_json_structure(title, root_children)
    
    def indent_to_level(self, indent):
        """将缩进转换为层级"""
        # 假设每4个空格或1个制表符为一级
        return indent // 4
    
    def extract_title(self, lines):
        """提取标题"""
        for line in lines:
            stripped = line.strip()
            if stripped and not stripped.startswith((' ', '\t', '-', '*', '•')):
                return stripped
        return Path(self.file_path).stem


# ==============================================
# HTML解析器
# ==============================================

class HtmlParser(BaseParser):
    """HTML文件解析器"""
    
    def __init__(self, file_path):
        super().__init__(file_path)
    
    def parse(self):
        """解析HTML文件"""
        with open(self.file_path, 'r', encoding='utf-8') as f:
            content = f.read()
        
        BeautifulSoup = _lazy_import_beautifulsoup()
        soup = BeautifulSoup(content, 'html.parser')
        title = self.extract_title(soup)
        
        # 查找标题结构 (h1-h6)
        root_children = []
        topics_by_level = {}
        
        for heading_tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']:
            headings = soup.find_all(heading_tag)
            level = int(heading_tag[1]) - 1  # h1=0, h2=1, etc.
            
            for heading in headings:
                title_text = heading.get_text().strip()
                if not title_text:
                    continue
                
                topic = create_topic(title_text)
                topics_by_level[level] = topic
                
                if level == 0:
                    root_children.append(topic)
                else:
                    parent_level = level - 1
                    if parent_level in topics_by_level:
                        parent_topic = topics_by_level[parent_level]
                        if "children" not in parent_topic:
                            parent_topic["children"] = {"attached": []}
                        parent_topic["children"]["attached"].append(topic)
        
        # 如果没有找到标题，尝试列表结构
        if not root_children:
            root_children = self.parse_lists(soup)
        
        return create_json_structure(title, root_children)
    
    def extract_title(self, soup):
        """提取标题"""
        title_tag = soup.find('title')
        if title_tag:
            return title_tag.get_text().strip()
        
        h1_tag = soup.find('h1')
        if h1_tag:
            return h1_tag.get_text().strip()
        
        return Path(self.file_path).stem
    
    def parse_lists(self, soup):
        """解析列表结构"""
        def parse_list_items(items, level=0):
            topics = []
            for item in items:
                text = item.get_text().strip()
                if text:
                    topic = create_topic(text)
                    
                    # 查找子列表
                    sublist = item.find(['ul', 'ol'])
                    if sublist:
                        sub_items = sublist.find_all('li', recursive=False)
                        if sub_items:
                            sub_topics = parse_list_items(sub_items, level + 1)
                            if sub_topics:
                                topic["children"] = {"attached": sub_topics}
                    
                    topics.append(topic)
            return topics
        
        # 查找顶级列表
        root_topics = []
        for list_tag in soup.find_all(['ul', 'ol']):
            # 只处理顶级列表（不在其他列表内的）
            if not list_tag.find_parent(['ul', 'ol']):
                items = list_tag.find_all('li', recursive=False)
                if items:
                    root_topics.extend(parse_list_items(items))
        
        return root_topics


# ==============================================
# Word文档解析器
# ==============================================

class WordParser(BaseParser):
    """Word文档解析器"""
    
    def __init__(self, file_path):
        super().__init__(file_path)
    
    def parse(self):
        """解析Word文档"""
        docx = _lazy_import_docx()
        doc = docx.Document(self.file_path)
        title = self.extract_title(doc)
        
        # 按段落解析
        root_children = []
        topics_by_level = {}
        
        for paragraph in doc.paragraphs:
            text = paragraph.text.strip()
            if not text:
                continue
            
            # 根据样式判断层级
            level = self.get_heading_level(paragraph)
            if level is not None:
                topic = create_topic(text)
                topics_by_level[level] = topic
                
                if level == 0:
                    root_children.append(topic)
                else:
                    parent_level = level - 1
                    if parent_level in topics_by_level:
                        parent_topic = topics_by_level[parent_level]
                        if "children" not in parent_topic:
                            parent_topic["children"] = {"attached": []}
                        parent_topic["children"]["attached"].append(topic)
        
        return create_json_structure(title, root_children)
    
    def extract_title(self, doc):
        """提取标题"""
        # 尝试从属性中获取
        if doc.core_properties.title:
            return doc.core_properties.title
        
        # 查找第一个标题
        for paragraph in doc.paragraphs:
            if paragraph.style and 'Heading' in paragraph.style.name:
                return paragraph.text.strip()
        
        # 使用文件名
        return Path(self.file_path).stem
    
    def get_heading_level(self, paragraph):
        """获取段落标题级别"""
        if not paragraph.style:
            return None
        
        style_name = paragraph.style.name
        
        # Word标准标题样式
        if style_name.startswith('Heading '):
            try:
                level = int(style_name.split()[-1]) - 1
                return min(level, 5)  # 限制最大层级
            except ValueError:
                pass
        
        # 自定义标题样式
        heading_patterns = ['标题', 'Heading', 'head', 'title']
        for i, pattern in enumerate(heading_patterns):
            if pattern.lower() in style_name.lower():
                return i
        
        return None


# ==============================================
# Excel解析器
# ==============================================

class ExcelParser(BaseParser):
    """Excel文件解析器"""
    
    def __init__(self, file_path):
        super().__init__(file_path)
    
    def parse(self):
        """解析Excel文件"""
        openpyxl = _lazy_import_openpyxl()
        wb = openpyxl.load_workbook(self.file_path)
        
        # 使用第一个工作表
        ws = wb.active
        title = self.extract_title(ws)
        
        # 按层级组织数据
        root_children = []
        
        # 假设第一列是层级，第二列是内容
        # 或者按缩进层级解析
        current_topics = {}  # level -> topic
        
        for row in ws.iter_rows(min_row=1, values_only=True):
            if not row or not row[0]:
                continue
            
            # 尝试不同的解析策略
            level, text = self.parse_row(row)
            if text:
                topic = create_topic(text)
                current_topics[level] = topic
                
                if level == 0:
                    root_children.append(topic)
                else:
                    parent_level = level - 1
                    if parent_level in current_topics:
                        parent_topic = current_topics[parent_level]
                        if "children" not in parent_topic:
                            parent_topic["children"] = {"attached": []}
                        parent_topic["children"]["attached"].append(topic)
        
        return create_json_structure(title, root_children)
    
    def extract_title(self, worksheet):
        """提取标题"""
        # 使用工作表名称
        title = worksheet.title
        
        # 或者使用第一个非空单元格
        if not title or title == 'Sheet1':
            for row in worksheet.iter_rows(min_row=1, max_row=1, values_only=True):
                for cell in row:
                    if cell:
                        title = str(cell)
                        break
        
        return title or Path(self.file_path).stem
    
    def parse_row(self, row):
        """解析行数据"""
        # 策略1: 第一列是层级，第二列是内容
        if len(row) >= 2:
            try:
                level = int(row[0]) if isinstance(row[0], (int, float)) else 0
                text = str(row[1]) if row[1] else ""
                return level, text.strip()
            except (ValueError, TypeError):
                pass
        
        # 策略2: 按缩进或特殊字符判断
        text = str(row[0]) if row[0] else ""
        level = 0
        
        # 计算前导空格或特殊字符
        stripped = text.lstrip()
        if stripped != text:
            indent = len(text) - len(stripped)
            level = indent // 2  # 每2个空格一级
            text = stripped.strip('-*•→')
        
        return level, text.strip()


# ==============================================
# 解析器工厂
# ==============================================

class ParserFactory:
    """解析器工厂类"""
    
    PARSERS = {
        '.md': MarkdownParser,
        '.markdown': MarkdownParser,
        '.txt': TextOutlineParser,
        '.text': TextOutlineParser,
        '.html': HtmlParser,
        '.htm': HtmlParser,
        '.docx': WordParser,
        '.xlsx': ExcelParser,
        '.xls': ExcelParser,
    }
    
    @classmethod
    def detect_file_type(cls, file_path):
        """检测文件类型（通过扩展名和内容）"""
        ext = Path(file_path).suffix.lower()
        
        # 如果有扩展名，优先使用扩展名
        if ext:
            return ext
        
        # 没有扩展名时，通过内容检测
        try:
            with open(file_path, 'r', encoding='utf-8') as f:
                content = f.read(1000)  # 读取前1000字符
                
                # HTML检测
                if content.strip().startswith('<!DOCTYPE') or content.strip().startswith('<html'):
                    return '.html'
                
                # Markdown检测
                if any(line.strip().startswith('#') for line in content.split('\n')[:10]):
                    return '.md'
                
                # OPML检测
                if '<opml' in content.lower():
                    return '.opml'
                
                # 文本大纲检测（有缩进结构）
                lines = content.split('\n')[:20]
                indent_chars = ['  ', '\t', '·', '•', '-']
                if any(any(line.startswith(char) for char in indent_chars) for line in lines if line.strip()):
                    return '.txt'
                
                # 默认文本
                return '.txt'
                
        except Exception:
            return '.txt'
    
    @classmethod
    def get_parser(cls, file_path):
        """根据文件路径获取相应的解析器"""
        file_ext = cls.detect_file_type(file_path)
        
        if file_ext not in cls.PARSERS:
            raise ValueError(f"不支持的文件格式: {file_ext}")
        
        parser_class = cls.PARSERS[file_ext]
        return parser_class(file_path)
    
    @classmethod
    def get_supported_formats(cls):
        """获取支持的文件格式列表"""
        return list(cls.PARSERS.keys())
    
    @classmethod
    def detect_format(cls, file_path):
        """自动检测文件格式"""
        # 基于文件扩展名
        file_ext = Path(file_path).suffix.lower()
        if file_ext in cls.PARSERS:
            return file_ext
        
        # 基于MIME类型
        mime_type, _ = mimetypes.guess_type(file_path)
        if mime_type:
            mime_to_ext = {
                'text/markdown': '.md',
                'text/plain': '.txt',
                'text/html': '.html',
                'application/vnd.openxmlformats-officedocument.wordprocessingml.document': '.docx',
                'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': '.xlsx',
            }
            return mime_to_ext.get(mime_type, file_ext)
        
        return file_ext


# ==============================================
# XMind文件生成器（复用原有代码）
# ==============================================

# 写入器功能已迁移至 xmind_writer.py；保持对 create_xmind_file 的导入使用


# ==============================================
# 兼容性函数（从markdown_to_xmind_converter.py合并）
# ==============================================

def parse_markdown_to_json(markdown_file):
    """兼容旧版本的Markdown解析函数"""
    parser = MarkdownParser(markdown_file)
    return parser.parse()

def main():
    """主函数 - 支持命令行参数"""
    import sys
    
    # 检查命令行参数
    if len(sys.argv) <= 1:
        print("[ERROR] 错误: 请提供输入文件路径")
        print("用法: python universal_xmind_converter.py <input_file>")
        print("支持的格式: .md, .txt, .html, .docx, .xlsx")
        return 1
    
    input_file = sys.argv[1]
    
    # 验证输入文件是否存在
    if not os.path.exists(input_file):
        print(f"[ERROR] 错误: 文件 '{input_file}' 不存在")
        return 1
    
    # 使用ParserFactory自动检测格式并转换
    try:
        factory = ParserFactory()
        parser = factory.get_parser(input_file)
        
        print(f"正在解析文件: {input_file}")
        json_structure = parser.parse()
        
        # 生成输出文件名（输出到output目录）
        base_name = os.path.splitext(os.path.basename(input_file))[0]
        output_dir = "output"
        
        # 确保输出目录存在
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)
            
        output_file = os.path.join(output_dir, f"{base_name}.xmind")
        
        print(f"正在创建XMind文件: {output_file}")
        create_xmind_file(json_structure, output_file)
        
        print("[SUCCESS] 转换完成！")
        print(f"输出文件: {output_file}")
        
    except Exception as e:
        print(f"[ERROR] 转换失败: {str(e)}")
        return 1


if __name__ == '__main__':
    main()


# ==============================================
# 主函数
# ==============================================

def main():
    """Main function"""
    import sys
    
    # 检查命令行参数
    if len(sys.argv) < 2 or sys.argv[1] in ['-h', '--help', 'help']:
        print("=" * 60)
        print("Universal XMind Converter - 多格式思维导图转换器")
        print("=" * 60)
        print("\n支持的文件格式:")
        print("  • Markdown     (.md)     - 标题层级转换")
        print("  • 文本大纲     (.txt)    - 缩进格式大纲")
        print("  • HTML网页     (.html)   - 标题和列表结构")
        print("  • Word文档     (.docx)   - 标题样式转换")
        print("  • Excel表格    (.xlsx)   - 多列层级结构")
        print("\n使用方法:")
        print("  python universal_xmind_converter.py <输入文件> [输出文件]")
        print("\n示例:")
        print("  python universal_xmind_converter.py document.md")
        print("  python universal_xmind_converter.py outline.txt mymap.xmind")
        print("  python universal_xmind_converter.py data.xlsx")
        print("\n自动识别:")
        print("  无扩展名文件会自动检测格式")
        print("\n依赖安装:")
        print("  pip install beautifulsoup4 python-docx openpyxl")
        print("=" * 60)
        return 0
    
    if len(sys.argv) < 2:
        print("[ERROR] 错误: 请提供输入文件路径")
        print("用法: python universal_xmind_converter.py <input_file>")
        print("\n支持的文件格式:")
        for fmt in ParserFactory.get_supported_formats():
            print(f"  {fmt}")
        return 1
    
    input_file = sys.argv[1]
    output_file = sys.argv[2] if len(sys.argv) > 2 else None
    
    # 验证输入文件是否存在
    if not os.path.exists(input_file):
        print(f"[ERROR] 错误: 文件 '{input_file}' 不存在")
        return 1
    
    # 检测文件格式
    file_format = ParserFactory.detect_format(input_file)
    print(f"检测到文件格式: {file_format}")
    
    try:
        # 获取相应的解析器
        parser = ParserFactory.get_parser(input_file)
        print(f"使用解析器: {parser.__class__.__name__}")
        
        # 解析文件
        print(f"正在解析文件: {input_file}")
        json_structure = parser.parse()
        
        # 生成输出文件名
        base_name = os.path.splitext(os.path.basename(input_file))[0]
        output_dir = "output"
        
        # 确保输出目录存在
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)
            
        output_file = os.path.join(output_dir, f"{base_name}.xmind")
        
        # 创建XMind文件
        print(f"正在创建XMind文件: {output_file}")
        create_xmind_file(json_structure, output_file)
        
        print("[SUCCESS] 转换完成！")
        print(f"📁 输出文件: {output_file}")
        
    except ImportError as e:
        print(f"[ERROR] 缺少依赖包: {e}")
        print("请安装相应的依赖包:")
        print("  pip install beautifulsoup4  # HTML解析")
        print("  pip install python-docx   # Word文档解析")
        print("  pip install openpyxl      # Excel解析")
        return 1
        
    except Exception as e:
        print(f"[ERROR] 转换失败: {e}")
        import traceback
        traceback.print_exc()
        return 1


if __name__ == '__main__':
    main()