PLTM MCP Server

by Alby2007

Overview Schema Related Servers Score Discussions

PLTM-MCP
src
extraction

semantic_atom_parser.py•8.98 KiB

""" Semantic Atom Parser - Breaks text into atomic semantic units. Inspired by semantic linebreaks: each atom represents one unit of thought that corresponds to how LLMs process information via embeddings. """ import re from typing import List, Dict, Tuple, Optional from dataclasses import dataclass from loguru import logger @dataclass class SemanticUnit: """A single atomic unit of meaning""" text: str unit_type: str # clause, phrase, concept subject: Optional[str] = None predicate: Optional[str] = None object: Optional[str] = None confidence: float = 0.7 related_units: List[int] = None # Indices of related units def __post_init__(self): if self.related_units is None: self.related_units = [] class SemanticAtomParser: """ Parses text into atomic semantic units aligned with LLM embedding boundaries. Each unit represents one "thought" - a clause or phrase that forms a coherent semantic chunk for vector embedding. """ def __init__(self): # Patterns for semantic boundaries self.clause_markers = [ r',\s+(?:and|but|or|yet|so|for|nor)\s+', # Coordinating conjunctions r';\s+', # Semicolons r'\.\s+', # Sentence boundaries r',\s+(?:which|that|who|where|when)\s+', # Relative clauses r'\s+(?:because|since|although|while|if|unless|until)\s+', # Subordinating ] # Patterns for extracting relationships self.relation_patterns = [ (r'(.+?)\s+(?:is|are|was|were)\s+(.+)', 'is_a'), (r'(.+?)\s+(?:has|have|had)\s+(.+)', 'has'), (r'(.+?)\s+(?:uses|use|used)\s+(.+)', 'uses'), (r'(.+?)\s+(?:applies to|applied to)\s+(.+)', 'applies_to'), (r'(.+?)\s+(?:extends|extended)\s+(.+)', 'extends'), (r'(.+?)\s+(?:proposes|propose|proposed)\s+(.+)', 'proposes'), (r'(.+?)\s+(?:shows|show|showed)\s+(.+)', 'shows'), (r'(.+?)\s+(?:requires|require|required)\s+(.+)', 'requires'), ] def parse_text(self, text: str) -> List[SemanticUnit]: """ Parse text into atomic semantic units. Args: text: Input text to parse Returns: List of SemanticUnit objects """ # Split into sentences first sentences = self._split_sentences(text) units = [] for sentence in sentences: # Split sentence into clauses clauses = self._split_clauses(sentence) for clause in clauses: # Try to extract structured relationship unit = self._extract_relationship(clause) if unit: units.append(unit) else: # Store as unstructured semantic unit units.append(SemanticUnit( text=clause.strip(), unit_type='clause', confidence=0.6 )) # Link related units self._link_related_units(units) logger.debug(f"Parsed {len(units)} semantic units from text") return units def _split_sentences(self, text: str) -> List[str]: """Split text into sentences""" # Simple sentence splitting (can be enhanced with spaCy/nltk) sentences = re.split(r'(?<=[.!?])\s+', text) return [s.strip() for s in sentences if s.strip()] def _split_clauses(self, sentence: str) -> List[str]: """Split sentence into clauses at semantic boundaries""" clauses = [sentence] # Apply each clause marker pattern for pattern in self.clause_markers: new_clauses = [] for clause in clauses: # Split but keep the marker parts = re.split(f'({pattern})', clause) # Recombine keeping semantic chunks current = "" for part in parts: if re.match(pattern, part): if current: new_clauses.append(current.strip()) current = "" else: current += part if current: new_clauses.append(current.strip()) clauses = new_clauses if new_clauses else clauses return [c for c in clauses if len(c) > 10] # Filter very short fragments def _extract_relationship(self, clause: str) -> Optional[SemanticUnit]: """ Try to extract structured subject-predicate-object from clause. Returns SemanticUnit if successful, None otherwise. """ clause_clean = clause.strip().rstrip('.,;') for pattern, predicate in self.relation_patterns: match = re.match(pattern, clause_clean, re.IGNORECASE) if match: subject = match.group(1).strip() obj = match.group(2).strip() return SemanticUnit( text=clause_clean, unit_type='relationship', subject=subject, predicate=predicate, object=obj, confidence=0.8 ) return None def _link_related_units(self, units: List[SemanticUnit]): """ Identify and link semantically related units. Uses simple heuristics: - Units with overlapping entities - Sequential units (discourse coherence) - Units with same subject """ for i, unit in enumerate(units): # Link to previous unit (discourse flow) if i > 0: unit.related_units.append(i - 1) # Link units with same subject if unit.subject: for j, other in enumerate(units): if i != j and other.subject == unit.subject: unit.related_units.append(j) def units_to_atoms( self, units: List[SemanticUnit], source_id: str, source_type: str = "arxiv" ) -> List[Dict]: """ Convert semantic units to atom format for storage. Args: units: List of SemanticUnit objects source_id: Source identifier (e.g., arxiv ID) source_type: Type of source Returns: List of atom dictionaries ready for storage """ atoms = [] for i, unit in enumerate(units): if unit.subject and unit.predicate and unit.object: # Structured relationship atom atom = { 'subject': 'pltm_knowledge', 'predicate': unit.predicate, 'object': f"{unit.subject} → {unit.object}", 'confidence': unit.confidence, 'metadata': { 'source_id': source_id, 'source_type': source_type, 'unit_index': i, 'unit_type': unit.unit_type, 'full_text': unit.text, 'related_units': unit.related_units } } else: # Unstructured semantic unit atom atom = { 'subject': 'pltm_knowledge', 'predicate': 'learned_from_semantic_unit', 'object': unit.text, 'confidence': unit.confidence, 'metadata': { 'source_id': source_id, 'source_type': source_type, 'unit_index': i, 'unit_type': unit.unit_type, 'related_units': unit.related_units } } atoms.append(atom) logger.info(f"Converted {len(units)} semantic units to {len(atoms)} atoms") return atoms # Example usage if __name__ == "__main__": parser = SemanticAtomParser() sample_text = """ The theory of pattern formation in reaction-diffusion systems is extended to the case of a directed network. We propose a new methodology that uses cellular automata to model complex systems. This approach shows promising results for understanding emergent behavior. """ units = parser.parse_text(sample_text) print(f"\nParsed {len(units)} semantic units:\n") for i, unit in enumerate(units): print(f"{i+1}. [{unit.unit_type}] {unit.text}") if unit.subject: print(f" → {unit.subject} --{unit.predicate}--> {unit.object}") if unit.related_units: print(f" Related to units: {unit.related_units}") print() atoms = parser.units_to_atoms(units, "test_paper") print(f"\nConverted to {len(atoms)} atoms")

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Alby2007/PLTM-MCP'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

semantic_atom_parser.py•8.98 KiB