Jotdown

JotDown
src

formatter.rs•13.3 KiB

use serde_json::{json, Value}; use regex::Regex; /// Maximum size of a block in the Notion API const MAX_BLOCK_SIZE: usize = 2000; /// Split content into chunks to respect Notion API limits /// /// # Arguments /// /// * `text` - Text to be split /// * `max_length` - Maximum length of each chunk (default: 2000 characters) /// /// # Returns /// /// * `Vec<String>` - List of text chunks pub fn split_content(text: &str, max_length: usize) -> Vec<String> { if text.len() <= max_length { return vec![text.to_string()]; } // Try to split by headers let header_regex = Regex::new(r"(?m)^(#{1,3}\s.+)$").unwrap(); let headers: Vec<_> = header_regex.find_iter(text).collect(); if headers.is_empty() { // No headers, use simple method return simple_split(text, max_length); } let mut parts = Vec::new(); let mut last_pos = 0; let mut current_chunk = String::new(); // Process headers as splitting points for (i, header_match) in headers.iter().enumerate() { // Get content from last point to current header if i > 0 { let header_content = &text[last_pos..header_match.start()]; // If adding this header section would exceed max size, // start a new chunk if current_chunk.len() + header_content.len() > max_length { parts.push(current_chunk.clone()); current_chunk = header_content.to_string(); } else { current_chunk.push_str(header_content); } } // First header or after a split if current_chunk.is_empty() { current_chunk = text[header_match.start()..].to_string(); // If still too large, we'll need to split it later } last_pos = header_match.start(); } // Add final chunk if last_pos < text.len() { let final_content = &text[last_pos..]; if current_chunk.len() + final_content.len() > max_length { parts.push(current_chunk.clone()); parts.push(final_content.to_string()); } else { current_chunk.push_str(final_content); parts.push(current_chunk.clone()); } } else if !current_chunk.is_empty() { parts.push(current_chunk.clone()); } // If any chunk is still too large, split it further let mut result = Vec::new(); for chunk in parts { if chunk.len() > max_length { result.extend(simple_split(&chunk, max_length)); } else { result.push(chunk); } } result } /// Fallback method to split text without headers /// /// # Arguments /// /// * `text` - Text to be split /// * `max_length` - Maximum length of each chunk /// /// # Returns /// /// * `Vec<String>` - List of text chunks fn simple_split(text: &str, max_length: usize) -> Vec<String> { // For pure text without natural splitting points (like headers), // we need to ensure we're breaking the content into chunks that respect the max_length let mut chunks = Vec::new(); let mut current_chunk = String::new(); let mut in_code_block = false; let mut code_block_content = String::new(); // If text is just a repeating character (like in tests) with no structure, // split it into equal chunks first if text.chars().all(|c| c == text.chars().next().unwrap()) && text.len() > max_length { let mut remaining = text.to_string(); while !remaining.is_empty() { let split_point = std::cmp::min(max_length, remaining.len()); chunks.push(remaining[..split_point].to_string()); remaining = remaining[split_point..].to_string(); } return chunks; } for line in text.split('\n') { // Check for code block markers if line.trim().starts_with("```") { in_code_block = !in_code_block; // If we're starting a code block if in_code_block { code_block_content = format!("{}{}", line, "\n"); continue; } else { // We're ending a code block, add it as a whole code_block_content.push_str(line); if current_chunk.len() + code_block_content.len() > max_length { // If adding the whole block exceeds the limit, // finalize the current chunk and start a new one if !current_chunk.is_empty() { chunks.push(current_chunk.clone()); } chunks.push(code_block_content.clone()); current_chunk = String::new(); } else { current_chunk.push_str(&code_block_content); } code_block_content = String::new(); continue; } } // If we're inside a code block, collect the content if in_code_block { code_block_content.push_str(line); code_block_content.push('\n'); continue; } // For regular lines let line_with_newline = format!("{}{}", line, "\n"); if current_chunk.len() + line_with_newline.len() > max_length { if !current_chunk.is_empty() { chunks.push(current_chunk.clone()); } current_chunk = line_with_newline; } else { current_chunk.push_str(&line_with_newline); } } // Add any remaining content if !code_block_content.is_empty() { if current_chunk.len() + code_block_content.len() > max_length { if !current_chunk.is_empty() { chunks.push(current_chunk.clone()); } chunks.push(code_block_content.clone()); } else { current_chunk.push_str(&code_block_content); } } if !current_chunk.is_empty() { chunks.push(current_chunk.clone()); } // Check if any chunk still exceeds max_length (this could happen with very long lines) // and split it further if needed let mut final_chunks = Vec::new(); for chunk in chunks { if chunk.len() <= max_length { final_chunks.push(chunk); } else { let mut remaining = chunk; while !remaining.is_empty() { let split_point = std::cmp::min(max_length, remaining.len()); final_chunks.push(remaining[..split_point].to_string()); remaining = remaining[split_point..].to_string(); } } } final_chunks } /// Convert Markdown text into Notion blocks /// /// # Arguments /// /// * `text` - Markdown text to be converted /// /// # Returns /// /// * `Vec<Value>` - List of Notion blocks pub fn format_for_notion(text: &str) -> Vec<Value> { let lines: Vec<&str> = text.split('\n').collect(); let mut blocks = Vec::new(); let mut current_code_block: Option<Value> = None; let mut i = 0; while i < lines.len() { let line = lines[i].trim_end(); i += 1; // Detect start of code block (```language) if let Some(code_lang) = line.strip_prefix("```") { if current_code_block.is_none() { // Start a new code block current_code_block = Some(json!({ "type": "code", "code": { "rich_text": [], "language": get_valid_notion_language(code_lang.trim()) } })); continue; } } // Detect end of code block if line.trim() == "```" && current_code_block.is_some() { // Close the current code block blocks.push(current_code_block.take().unwrap()); continue; } // Add lines to current code block if let Some(ref mut code_block) = current_code_block { let line_with_newline = format!("{}{}", line, "\n"); code_block["code"]["rich_text"].as_array_mut().unwrap().push(json!({ "type": "text", "text": { "content": line_with_newline } })); continue; } // Ignore empty lines outside code blocks if line.trim().is_empty() { // Add a paragraph with a newline for spacing blocks.push(json!({ "type": "paragraph", "paragraph": { "rich_text": [] } })); continue; } // Headers if line.starts_with("# ") { blocks.push(json!({ "type": "heading_1", "heading_1": { "rich_text": [{ "text": { "content": &line[2..] } }] } })); } else if line.starts_with("## ") { blocks.push(json!({ "type": "heading_2", "heading_2": { "rich_text": [{ "text": { "content": &line[3..] } }] } })); } else if line.starts_with("### ") { blocks.push(json!({ "type": "heading_3", "heading_3": { "rich_text": [{ "text": { "content": &line[4..] } }] } })); } // Bulleted list else if line.starts_with("- ") || line.starts_with("* ") { let content = &line[2..]; blocks.push(json!({ "type": "bulleted_list_item", "bulleted_list_item": { "rich_text": [{ "text": { "content": content } }] } })); } // Numbered list else if Regex::new(r"^\d+\.\s").unwrap().is_match(line) { let content = Regex::new(r"^\d+\.\s").unwrap().replace(line, ""); blocks.push(json!({ "type": "numbered_list_item", "numbered_list_item": { "rich_text": [{ "text": { "content": content } }] } })); } // Regular paragraphs else { blocks.push(json!({ "type": "paragraph", "paragraph": { "rich_text": [{ "text": { "content": line } }] } })); } } // Close any remaining code block if let Some(code_block) = current_code_block { blocks.push(code_block); } blocks } fn get_valid_notion_language(language: &str) -> &str { // List of languages supported by the Notion API let valid_languages = [ "abap", "agda", "arduino", "assembly", "bash", "basic", "c", "c#", "c++", "clojure", "coffeescript", "css", "dart", "diff", "docker", "elixir", "elm", "erlang", "f#", "flow", "fortran", "go", "graphql", "groovy", "haskell", "html", "java", "javascript", "json", "julia", "kotlin", "latex", "less", "lisp", "lua", "makefile", "markdown", "matlab", "mermaid", "nix", "objective-c", "ocaml", "pascal", "perl", "php", "python", "r", "ruby", "rust", "scala", "scheme", "scss", "shell", "sql", "swift", "typescript", "vb.net", "verilog", "vhdl", "xml", "yaml" ]; // Normalize the language name let normalized = language.trim().to_lowercase(); if valid_languages.contains(&normalized.as_str()) { for &valid in &valid_languages { if valid == normalized { return valid; } } } if normalized.is_empty() { return "plain text"; } else { // Try to find a close match match normalized.as_str() { "js" | "jsx" => "javascript", "py" => "python", "ts" | "tsx" => "typescript", "sh" | "zsh" => "shell", "md" => "markdown", _ => "plain text" } } } #[cfg(test)] mod tests { use super::*; #[test] fn test_split_content_small_text() { let text = "This is a small text."; let chunks = split_content(text, 100); assert_eq!(chunks.len(), 1); assert_eq!(chunks[0], text); } #[test] fn test_split_content_large_text_no_headers() { let text = "A".repeat(3000); // Large text without headers let chunks = split_content(&text, 1000); assert!(chunks.len() > 1); for chunk in chunks { assert!(chunk.len() <= 1000); } } #[test] fn test_split_content_with_headers() { let text = format!( "# Title 1\n{}\n\n## Title 2\n{}\n\n### Title 3\n{}", "A".repeat(900), "B".repeat(900), "C".repeat(900) ); let chunks = split_content(&text, 1000); assert!(chunks.len() > 1); for chunk in chunks { assert!(chunk.len() <= 1000); } } #[test] fn test_format_for_notion_heading() { let text = "# Main Title"; let blocks = format_for_notion(text); assert_eq!(blocks.len(), 1); assert_eq!(blocks[0]["type"], "heading_1"); assert_eq!(blocks[0]["heading_1"]["rich_text"][0]["text"]["content"], "Main Title"); } #[test] fn test_format_for_notion_paragraph() { let text = "This is a normal paragraph."; let blocks = format_for_notion(text); assert_eq!(blocks.len(), 1); assert_eq!(blocks[0]["type"], "paragraph"); assert_eq!(blocks[0]["paragraph"]["rich_text"][0]["text"]["content"], text); } }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Harry-027/JotDown'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

formatter.rs•13.3 KiB