ShowDoc MCP Server

showdoc_mcp
file_operations

content_analyzer.py•6.75 KiB

""" 文件内容分析工具提供代码行数统计、文件大小分析、全文搜索等功能。 """ import re from pathlib import Path from typing import Dict, Any, List, Optional, Pattern from collections import Counter # 使用内置异常 from .content_processor import read_file_safe def count_lines( file_path: str | Path, encoding: Optional[str] = None, ) -> Dict[str, int]: """ 统计代码行数 Args: file_path: 文件路径 encoding: 文件编码 Returns: 统计结果字典，包含： - total_lines: 总行数 - code_lines: 代码行数（排除空行和注释） - blank_lines: 空行数 - comment_lines: 注释行数 - max_line_length: 最长行长度 Raises: FileNotFoundError: 文件不存在 """ path = Path(file_path) if not path.exists(): raise FileNotFoundError(f"文件不存在: {path}") content = read_file_safe(path, encoding=encoding) lines = content.splitlines() total_lines = len(lines) blank_lines = 0 comment_lines = 0 code_lines = 0 max_line_length = 0 # 简单的注释检测（支持 Python、JavaScript、Kotlin、Dart 等） comment_patterns = [ re.compile(r'^\s*#'), # Python 单行注释 re.compile(r'^\s*//'), # JavaScript/Kotlin/Dart 单行注释 re.compile(r'^\s*\*'), # 多行注释中的行 ] for line in lines: line_length = len(line) if line_length > max_line_length: max_line_length = line_length stripped = line.strip() if not stripped: blank_lines += 1 else: # 检查是否是注释 is_comment = False for pattern in comment_patterns: if pattern.match(line): is_comment = True break if is_comment: comment_lines += 1 else: code_lines += 1 return { "total_lines": total_lines, "code_lines": code_lines, "blank_lines": blank_lines, "comment_lines": comment_lines, "max_line_length": max_line_length, } def analyze_file_size( file_path: str | Path, ) -> Dict[str, Any]: """ 分析文件大小 Args: file_path: 文件路径 Returns: 分析结果字典，包含： - size_bytes: 文件大小（字节） - size_kb: 文件大小（KB） - size_mb: 文件大小（MB） - size_gb: 文件大小（GB） - human_readable: 人类可读的大小字符串 Raises: FileNotFoundError: 文件不存在 """ path = Path(file_path) if not path.exists(): raise FileNotFoundError(f"文件不存在: {path}") size_bytes = path.stat().st_size size_kb = size_bytes / 1024 size_mb = size_kb / 1024 size_gb = size_mb / 1024 # 生成人类可读的大小 if size_gb >= 1: human_readable = f"{size_gb:.2f} GB" elif size_mb >= 1: human_readable = f"{size_mb:.2f} MB" elif size_kb >= 1: human_readable = f"{size_kb:.2f} KB" else: human_readable = f"{size_bytes} B" return { "size_bytes": size_bytes, "size_kb": round(size_kb, 2), "size_mb": round(size_mb, 2), "size_gb": round(size_gb, 2), "human_readable": human_readable, } def search_text( file_path: str | Path, search_pattern: str | Pattern, regex: bool = False, case_sensitive: bool = False, encoding: Optional[str] = None, ) -> List[Dict[str, Any]]: """ 在文件中搜索文本 Args: file_path: 文件路径 search_pattern: 搜索模式（字符串或正则表达式） regex: 是否使用正则表达式 case_sensitive: 是否区分大小写 encoding: 文件编码 Returns: 匹配结果列表，每个结果包含： - line_number: 行号 - column: 列号 - match_text: 匹配的文本 - line_content: 行内容 - context_before: 前几行内容（可选） - context_after: 后几行内容（可选） Raises: FileNotFoundError: 文件不存在 """ path = Path(file_path) if not path.exists(): raise FileNotFoundError(f"文件不存在: {path}") content = read_file_safe(path, encoding=encoding) lines = content.splitlines() matches = [] # 准备搜索模式 if regex: if isinstance(search_pattern, str): flags = 0 if case_sensitive else re.IGNORECASE pattern = re.compile(search_pattern, flags) else: pattern = search_pattern else: if case_sensitive: pattern = re.compile(re.escape(search_pattern)) else: pattern = re.compile(re.escape(search_pattern), re.IGNORECASE) # 搜索 for line_num, line in enumerate(lines, 1): for match in pattern.finditer(line): matches.append({ "line_number": line_num, "column": match.start() + 1, "match_text": match.group(), "line_content": line, "start": match.start(), "end": match.end(), }) return matches def count_words( file_path: str | Path, encoding: Optional[str] = None, ) -> Dict[str, Any]: """ 统计文件中的单词数 Args: file_path: 文件路径 encoding: 文件编码 Returns: 统计结果字典，包含： - total_words: 总单词数 - unique_words: 唯一单词数 - total_chars: 总字符数 - total_chars_no_spaces: 不含空格的字符数 - word_frequency: 单词频率字典（前10个最常见的单词） Raises: FileNotFoundError: 文件不存在 """ path = Path(file_path) if not path.exists(): raise FileNotFoundError(f"文件不存在: {path}") content = read_file_safe(path, encoding=encoding) # 统计字符 total_chars = len(content) total_chars_no_spaces = len(content.replace(" ", "").replace("\n", "").replace("\t", "")) # 提取单词（简单的单词分割） words = re.findall(r'\b\w+\b', content.lower()) total_words = len(words) unique_words = len(set(words)) # 统计单词频率 word_freq = Counter(words) top_words = dict(word_freq.most_common(10)) return { "total_words": total_words, "unique_words": unique_words, "total_chars": total_chars, "total_chars_no_spaces": total_chars_no_spaces, "word_frequency": top_words, }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/yfcyfc123234/showdoc_mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

content_analyzer.py•6.75 KiB