Skip to main content
Glama

MCP Atlassian

by ArconixForge
jira.py12.6 kB
"""Jira-specific text preprocessing module.""" import logging import re from typing import Any from .base import BasePreprocessor logger = logging.getLogger("mcp-atlassian") class JiraPreprocessor(BasePreprocessor): """Handles text preprocessing for Jira content.""" def __init__(self, base_url: str = "", **kwargs: Any) -> None: """ Initialize the Jira text preprocessor. Args: base_url: Base URL for Jira API **kwargs: Additional arguments for the base class """ super().__init__(base_url=base_url, **kwargs) def clean_jira_text(self, text: str) -> str: """ Clean Jira text content by: 1. Processing user mentions and links 2. Converting Jira markup to markdown 3. Converting HTML/wiki markup to markdown """ if not text: return "" # Process user mentions mention_pattern = r"\[~accountid:(.*?)\]" text = self._process_mentions(text, mention_pattern) # Process Jira smart links text = self._process_smart_links(text) # First convert any Jira markup to Markdown text = self.jira_to_markdown(text) # Then convert any remaining HTML to markdown text = self._convert_html_to_markdown(text) return text.strip() def _process_mentions(self, text: str, pattern: str) -> str: """ Process user mentions in text. Args: text: The text containing mentions pattern: Regular expression pattern to match mentions Returns: Text with mentions replaced with display names """ mentions = re.findall(pattern, text) for account_id in mentions: try: # Note: This is a placeholder - actual user fetching should be injected display_name = f"User:{account_id}" text = text.replace(f"[~accountid:{account_id}]", display_name) except Exception as e: logger.error(f"Error processing mention for {account_id}: {str(e)}") return text def _process_smart_links(self, text: str) -> str: """Process Jira/Confluence smart links.""" # Pattern matches: [text|url|smart-link] link_pattern = r"\[(.*?)\|(.*?)\|smart-link\]" matches = re.finditer(link_pattern, text) for match in matches: full_match = match.group(0) link_text = match.group(1) link_url = match.group(2) # Extract issue key if it's a Jira issue link issue_key_match = re.search(r"browse/([A-Z]+-\d+)", link_url) # Check if it's a Confluence wiki link confluence_match = re.search( r"wiki/spaces/.+?/pages/\d+/(.+?)(?:\?|$)", link_url ) if issue_key_match: issue_key = issue_key_match.group(1) clean_url = f"{self.base_url}/browse/{issue_key}" text = text.replace(full_match, f"[{issue_key}]({clean_url})") elif confluence_match: url_title = confluence_match.group(1) readable_title = url_title.replace("+", " ") readable_title = re.sub(r"^[A-Z]+-\d+\s+", "", readable_title) text = text.replace(full_match, f"[{readable_title}]({link_url})") else: clean_url = link_url.split("?")[0] text = text.replace(full_match, f"[{link_text}]({clean_url})") return text def jira_to_markdown(self, input_text: str) -> str: """ Convert Jira markup to Markdown format. Args: input_text: Text in Jira markup format Returns: Text in Markdown format """ if not input_text: return "" # Block quotes output = re.sub(r"^bq\.(.*?)$", r"> \1\n", input_text, flags=re.MULTILINE) # Text formatting (bold, italic) output = re.sub( r"([*_])(.*?)\1", lambda match: ("**" if match.group(1) == "*" else "*") + match.group(2) + ("**" if match.group(1) == "*" else "*"), output, ) # Multi-level numbered list output = re.sub( r"^((?:#|-|\+|\*)+) (.*)$", lambda match: self._convert_jira_list_to_markdown(match), output, flags=re.MULTILINE, ) # Headers output = re.sub( r"^h([0-6])\.(.*)$", lambda match: "#" * int(match.group(1)) + match.group(2), output, flags=re.MULTILINE, ) # Inline code output = re.sub(r"\{\{([^}]+)\}\}", r"`\1`", output) # Citation output = re.sub(r"\?\?((?:.[^?]|[^?].)+)\?\?", r"<cite>\1</cite>", output) # Inserted text output = re.sub(r"\+([^+]*)\+", r"<ins>\1</ins>", output) # Superscript output = re.sub(r"\^([^^]*)\^", r"<sup>\1</sup>", output) # Subscript output = re.sub(r"~([^~]*)~", r"<sub>\1</sub>", output) # Strikethrough output = re.sub(r"-([^-]*)-", r"-\1-", output) # Code blocks with optional language specification output = re.sub( r"\{code(?::([a-z]+))?\}([\s\S]*?)\{code\}", r"```\1\n\2\n```", output, flags=re.MULTILINE, ) # No format output = re.sub(r"\{noformat\}([\s\S]*?)\{noformat\}", r"```\n\1\n```", output) # Quote blocks output = re.sub( r"\{quote\}([\s\S]*)\{quote\}", lambda match: "\n".join( [f"> {line}" for line in match.group(1).split("\n")] ), output, flags=re.MULTILINE, ) # Images with alt text output = re.sub( r"!([^|\n\s]+)\|([^\n!]*)alt=([^\n!\,]+?)(,([^\n!]*))?!", r"![\3](\1)", output, ) # Images with other parameters (ignore them) output = re.sub(r"!([^|\n\s]+)\|([^\n!]*)!", r"![](\1)", output) # Images without parameters output = re.sub(r"!([^\n\s!]+)!", r"![](\1)", output) # Links output = re.sub(r"\[([^|]+)\|(.+?)\]", r"[\1](\2)", output) output = re.sub(r"\[(.+?)\]([^\(]+)", r"<\1>\2", output) # Colored text output = re.sub( r"\{color:([^}]+)\}([\s\S]*?)\{color\}", r"<span style=\"color:\1\">\2</span>", output, flags=re.MULTILINE, ) # Convert Jira table headers (||) to markdown table format lines = output.split("\n") i = 0 while i < len(lines): line = lines[i] if "||" in line: # Replace Jira table headers lines[i] = lines[i].replace("||", "|") # Add a separator line for markdown tables header_cells = lines[i].count("|") - 1 if header_cells > 0: separator_line = "|" + "---|" * header_cells lines.insert(i + 1, separator_line) i += 1 # Skip the newly inserted line in next iteration i += 1 # Rejoin the lines output = "\n".join(lines) return output def markdown_to_jira(self, input_text: str) -> str: """ Convert Markdown syntax to Jira markup syntax. Args: input_text: Text in Markdown format Returns: Text in Jira markup format """ if not input_text: return "" # Save code blocks to prevent recursive processing code_blocks = [] inline_codes = [] # Extract code blocks def save_code_block(match: re.Match) -> str: """ Process and save a code block. Args: match: Regex match object containing the code block Returns: Jira-formatted code block """ syntax = match.group(1) or "" content = match.group(2) code = "{code" if syntax: code += ":" + syntax code += "}" + content + "{code}" code_blocks.append(code) return str(code) # Ensure we return a string # Extract inline code def save_inline_code(match: re.Match) -> str: """ Process and save inline code. Args: match: Regex match object containing the inline code Returns: Jira-formatted inline code """ content = match.group(1) code = "{{" + content + "}}" inline_codes.append(code) return str(code) # Ensure we return a string # Save code sections temporarily output = re.sub(r"```(\w*)\n([\s\S]+?)```", save_code_block, input_text) output = re.sub(r"`([^`]+)`", save_inline_code, output) # Headers with = or - underlines output = re.sub( r"^(.*?)\n([=-])+$", lambda match: f"h{1 if match.group(2)[0] == '=' else 2}. {match.group(1)}", output, flags=re.MULTILINE, ) # Headers with # prefix output = re.sub( r"^([#]+)(.*?)$", lambda match: f"h{len(match.group(1))}." + match.group(2), output, flags=re.MULTILINE, ) # Bold and italic output = re.sub( r"([*_]+)(.*?)\1", lambda match: ("_" if len(match.group(1)) == 1 else "*") + match.group(2) + ("_" if len(match.group(1)) == 1 else "*"), output, ) # Multi-level bulleted list output = re.sub( r"^(\s*)- (.*)$", lambda match: ( "* " + match.group(2) if not match.group(1) else " " * (len(match.group(1)) // 2) + "* " + match.group(2) ), output, flags=re.MULTILINE, ) # Multi-level numbered list output = re.sub( r"^(\s+)1\. (.*)$", lambda match: "#" * (int(len(match.group(1)) / 4) + 2) + " " + match.group(2), output, flags=re.MULTILINE, ) # HTML formatting tags to Jira markup tag_map = {"cite": "??", "del": "-", "ins": "+", "sup": "^", "sub": "~"} for tag, replacement in tag_map.items(): output = re.sub( rf"<{tag}>(.*?)<\/{tag}>", rf"{replacement}\1{replacement}", output ) # Colored text output = re.sub( r"<span style=\"color:(#[^\"]+)\">([\s\S]*?)</span>", r"{color:\1}\2{color}", output, flags=re.MULTILINE, ) # Strikethrough output = re.sub(r"~~(.*?)~~", r"-\1-", output) # Images without alt text output = re.sub(r"!\[\]\(([^)\n\s]+)\)", r"!\1!", output) # Images with alt text output = re.sub(r"!\[([^\]\n]+)\]\(([^)\n\s]+)\)", r"!\2|alt=\1!", output) # Links output = re.sub(r"\[([^\]]+)\]\(([^)]+)\)", r"[\1|\2]", output) output = re.sub(r"<([^>]+)>", r"[\1]", output) # Convert markdown tables to Jira table format lines = output.split("\n") i = 0 while i < len(lines): if i < len(lines) - 1 and re.match(r"\|[-\s|]+\|", lines[i + 1]): # Convert header row to Jira format lines[i] = lines[i].replace("|", "||") # Remove the separator line lines.pop(i + 1) i += 1 # Rejoin the lines output = "\n".join(lines) return output def _convert_jira_list_to_markdown(self, match: re.Match) -> str: """ Helper method to convert Jira lists to Markdown format. Args: match: Regex match object containing the Jira list markup Returns: Markdown-formatted list item """ jira_bullets = match.group(1) content = match.group(2) # Calculate indentation level based on number of symbols indent_level = len(jira_bullets) - 1 indent = " " * (indent_level * 2) # Determine the marker based on the last character last_char = jira_bullets[-1] prefix = "1." if last_char == "#" else "-" return f"{indent}{prefix} {content}"

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/ArconixForge/mcp-atlassian'

If you have feedback or need assistance with the MCP directory API, please join our Discord server