graphsync_post_store.py•1.8 kB
#!/usr/bin/env python3
import json, sys
from datetime import datetime
import spacy
from neo4j import GraphDatabase
CONFIG = json.load(open(f"{sys.path[0]}/../config/memory.config.json", "r"))
KG_URI = CONFIG["kg"]["uri"]; KG_USER = CONFIG["kg"]["user"]; KG_PASS = CONFIG["kg"]["pass"]
nlp = spacy.load("en_core_web_sm")
driver = GraphDatabase.driver(KG_URI, auth=(KG_USER, KG_PASS))
INVERSE_REL = {
"employs": "employed_by",
"depends_on": "supports",
"improves": "improved_by",
"related_to": "related_to"
}
def extract_triples(text):
doc = nlp(text)
ents = [e.text for e in doc.ents]
triples = []
for i, s in enumerate(ents):
for j, o in enumerate(ents):
if i == j: continue
triples.append((s, "related_to", o))
return triples
def store_triple(tx, subj, rel, obj, source_id=None):
tx.run("""
MERGE (a:Entity {name: $subj})
MERGE (b:Entity {name: $obj})
MERGE (a)-[r:RELATION {type: $rel}]->(b)
ON CREATE SET r.added=$ts, r.source_id=$sid
""", subj=subj, rel=rel, obj=obj, ts=datetime.utcnow().isoformat(), sid=source_id)
def sync_to_kg(content, source_id=None):
triples = extract_triples(content)
with driver.session() as s:
for (subj, rel, obj) in triples:
s.execute_write(store_triple, subj, rel, obj, source_id)
inv = INVERSE_REL.get(rel)
if inv:
s.execute_write(store_triple, obj, inv, subj, source_id)
if __name__ == "__main__":
if len(sys.argv) < 2:
print("Usage: graphsync_post_store.py '<content>' [source_id]")
sys.exit(1)
content = sys.argv[1]
sid = sys.argv[2] if len(sys.argv) > 2 else None
sync_to_kg(content, sid)
print("GraphSync: triples stored (symmetry enforced)")