Skip to main content
Glama
Replicant-Partners

Congo River Compositional Intelligence

triple_decomposer.py8.6 kB
#!/usr/bin/env python3 """ Triple Decomposition Service Decomposes complex concepts into RDF-style subject-predicate-object triples Implements Stanley Fish's 3-word sentence principle for semantic analysis. Uses spaCy for NLP parsing and linguistic structure extraction. """ import json import sys import spacy from typing import List, Dict, Any, Optional from dataclasses import dataclass, asdict @dataclass class Triple: """RDF-style triple representation""" subject: str predicate: str object: str confidence: float = 1.0 source: str = "triple_decomposer" def to_dict(self) -> Dict[str, Any]: return asdict(self) class TripleDecomposer: """ Decomposes natural language into semantic triples. Uses multiple strategies: 1. Syntactic parsing (spaCy dependency trees) 2. Entity-relation extraction 3. Property attribution 4. Taxonomic relationships (is_a, has_property, etc.) """ def __init__(self): """Initialize with spaCy model""" try: self.nlp = spacy.load("en_core_web_sm") except OSError: print("Error: spaCy model 'en_core_web_sm' not found.", file=sys.stderr) print("Install with: python -m spacy download en_core_web_sm", file=sys.stderr) sys.exit(1) def decompose(self, text: str, context: Optional[str] = None) -> List[Triple]: """ Main decomposition method. Args: text: Natural language text to decompose context: Optional context/domain for disambiguation Returns: List of Triple objects """ doc = self.nlp(text) triples = [] # Strategy 1: Extract from dependency parse triples.extend(self._extract_from_dependencies(doc)) # Strategy 2: Extract entity relationships triples.extend(self._extract_entity_relations(doc)) # Strategy 3: Extract copula relationships (X is Y) triples.extend(self._extract_copula_relations(doc)) # Strategy 4: Extract property attributions triples.extend(self._extract_properties(doc)) # Deduplicate while preserving order seen = set() unique_triples = [] for triple in triples: key = (triple.subject, triple.predicate, triple.object) if key not in seen: seen.add(key) unique_triples.append(triple) return unique_triples def _extract_from_dependencies(self, doc) -> List[Triple]: """Extract triples from spaCy dependency parse""" triples = [] for token in doc: # Subject-Verb-Object patterns if token.dep_ in ("nsubj", "nsubjpass") and token.head.pos_ == "VERB": subject = self._get_compound_phrase(token) predicate = token.head.lemma_ # Find object for child in token.head.children: if child.dep_ in ("dobj", "attr", "oprd"): obj = self._get_compound_phrase(child) triples.append(Triple( subject=subject, predicate=predicate, object=obj, confidence=0.9 )) elif child.dep_ == "prep": # Prepositional phrases for pobj in child.children: if pobj.dep_ == "pobj": obj = self._get_compound_phrase(pobj) predicate_full = f"{predicate}_{child.lemma_}" triples.append(Triple( subject=subject, predicate=predicate_full, object=obj, confidence=0.85 )) return triples def _extract_entity_relations(self, doc) -> List[Triple]: """Extract relationships between named entities""" triples = [] entities = list(doc.ents) for i, ent1 in enumerate(entities): for ent2 in entities[i+1:]: # Find connecting verb or relation relation = self._find_relation_between(ent1, ent2, doc) if relation: triples.append(Triple( subject=ent1.text, predicate=relation, object=ent2.text, confidence=0.8 )) return triples def _extract_copula_relations(self, doc) -> List[Triple]: """Extract 'X is Y' type relationships""" triples = [] for token in doc: if token.lemma_ == "be" and token.pos_ == "AUX": # Find subject subject = None obj = None for child in token.children: if child.dep_ in ("nsubj", "nsubjpass"): subject = self._get_compound_phrase(child) elif child.dep_ in ("attr", "acomp"): obj = self._get_compound_phrase(child) if subject and obj: triples.append(Triple( subject=subject, predicate="is_a", object=obj, confidence=0.95 )) return triples def _extract_properties(self, doc) -> List[Triple]: """Extract property attributions (X has property Y)""" triples = [] for token in doc: # Adjective modifiers if token.pos_ == "ADJ" and token.head.pos_ in ("NOUN", "PROPN"): subject = self._get_compound_phrase(token.head) property_value = token.text triples.append(Triple( subject=subject, predicate="has_property", object=property_value, confidence=0.85 )) # Possessive relationships if token.dep_ == "poss": subject = self._get_compound_phrase(token.head) owner = token.text triples.append(Triple( subject=subject, predicate="owned_by", object=owner, confidence=0.9 )) return triples def _get_compound_phrase(self, token) -> str: """Get the full compound phrase for a token""" # Collect compounds compounds = [child for child in token.children if child.dep_ == "compound"] phrase = " ".join([c.text for c in compounds] + [token.text]) return phrase def _find_relation_between(self, ent1, ent2, doc) -> Optional[str]: """Find the relation/verb connecting two entities""" # Simple heuristic: find verb between entities start = min(ent1.start, ent2.start) end = max(ent1.end, ent2.end) for token in doc[start:end]: if token.pos_ == "VERB": return token.lemma_ return "relates_to" # Default relation def main(): """CLI interface for triple decomposition""" if len(sys.argv) < 2: print("Usage: python triple_decomposer.py <text>", file=sys.stderr) print(" OR: python triple_decomposer.py --json '{\"text\": \"...\", \"context\": \"...\"}'", file=sys.stderr) sys.exit(1) # Parse input if sys.argv[1] == "--json": # JSON input input_data = json.loads(sys.argv[2]) text = input_data.get("text") or input_data.get("concept") context = input_data.get("context") else: # Plain text input text = " ".join(sys.argv[1:]) context = None # Decompose decomposer = TripleDecomposer() triples = decomposer.decompose(text, context) # Output as JSON result = { "success": True, "input": text, "context": context, "triples": [t.to_dict() for t in triples], "count": len(triples) } print(json.dumps(result, indent=2)) if __name__ == "__main__": main()

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/Replicant-Partners/Congo'

If you have feedback or need assistance with the MCP directory API, please join our Discord server