use engram::db::{Layer, Memory, MemoryDB};
use engram::consolidate::dedup_buffer;
fn test_db() -> MemoryDB {
MemoryDB::open(":memory:").expect("in-memory db")
}
/// Create a buffer memory with a given embedding vector.
fn buf_mem(id: &str, ns: &str, tags: &[&str], access_count: i64, importance: f64, created_at: i64, emb: Vec<f32>) -> Memory {
Memory {
id: id.into(),
content: format!("memory {id}"),
layer: Layer::Buffer,
importance,
created_at,
last_accessed: created_at,
access_count,
repetition_count: 0,
decay_rate: Layer::Buffer.default_decay(),
source: "test".into(),
tags: tags.iter().map(|s| s.to_string()).collect(),
namespace: ns.into(),
embedding: Some(emb),
kind: "semantic".into(),
modified_at: created_at,
modified_epoch: 0,
}
}
/// Helper: count buffer-layer memories remaining in db.
fn count_buffers(db: &MemoryDB) -> usize {
db.list_by_layer_meta(Layer::Buffer, 10000, 0)
.unwrap_or_default()
.len()
}
// ── 1. No duplicates ──────────────────────────────────────────────────
#[test]
fn no_duplicates_all_kept() {
let db = test_db();
// Three orthogonal vectors — cosine similarity = 0
let mems = vec![
buf_mem("a", "default", &[], 1, 0.5, 1000, vec![1.0, 0.0, 0.0]),
buf_mem("b", "default", &[], 1, 0.5, 2000, vec![0.0, 1.0, 0.0]),
buf_mem("c", "default", &[], 1, 0.5, 3000, vec![0.0, 0.0, 1.0]),
];
db.import(&mems).unwrap();
let removed = dedup_buffer(&db);
assert_eq!(removed, 0, "no memories should be removed when all are dissimilar");
assert_eq!(count_buffers(&db), 3);
}
// ── 2. Exact duplicate ────────────────────────────────────────────────
#[test]
fn exact_duplicate_merged() {
let db = test_db();
let mems = vec![
buf_mem("older", "default", &["tag-a"], 3, 0.4, 1000, vec![1.0, 0.0, 0.0]),
buf_mem("newer", "default", &["tag-b"], 2, 0.6, 2000, vec![1.0, 0.0, 0.0]),
];
db.import(&mems).unwrap();
let removed = dedup_buffer(&db);
assert_eq!(removed, 1, "one duplicate should be removed");
assert_eq!(count_buffers(&db), 1);
// The newer memory should survive
assert!(db.get("newer").unwrap().is_some(), "newer memory should be kept");
assert!(db.get("older").unwrap().is_none(), "older memory should be deleted");
// Check merged fields on the surviving memory
let survivor = db.get("newer").unwrap().unwrap();
assert_eq!(survivor.access_count, 5, "access_count should be summed (3 + 2)");
assert_eq!(survivor.importance, 0.6, "importance should be max(0.4, 0.6)");
// Tags should be merged
assert!(survivor.tags.contains(&"tag-a".to_string()), "tag-a should be merged in");
assert!(survivor.tags.contains(&"tag-b".to_string()), "tag-b should be kept");
}
// ── 3. Near duplicate above threshold (cosine > 0.75) ─────────────────
#[test]
fn near_duplicate_above_threshold_merged() {
let db = test_db();
// Two vectors with high cosine similarity (> 0.75)
// [1.0, 0.1, 0.0] vs [1.0, 0.0, 0.0] → cosine ≈ 0.995
let mems = vec![
buf_mem("old", "default", &[], 1, 0.3, 1000, vec![1.0, 0.1, 0.0]),
buf_mem("new", "default", &[], 1, 0.5, 2000, vec![1.0, 0.0, 0.0]),
];
db.import(&mems).unwrap();
let removed = dedup_buffer(&db);
assert_eq!(removed, 1, "near-duplicate above threshold should be merged");
assert!(db.get("new").unwrap().is_some(), "newer should survive");
assert!(db.get("old").unwrap().is_none(), "older should be deleted");
}
// ── 4. Below threshold — both kept ────────────────────────────────────
#[test]
fn below_threshold_both_kept() {
let db = test_db();
// Two vectors with cosine similarity below 0.75
// [1.0, 0.0, 0.0] vs [0.5, 0.866, 0.0] → cosine = 0.5
let mems = vec![
buf_mem("x", "default", &[], 1, 0.5, 1000, vec![1.0, 0.0, 0.0]),
buf_mem("y", "default", &[], 1, 0.5, 2000, vec![0.5, 0.866, 0.0]),
];
db.import(&mems).unwrap();
let removed = dedup_buffer(&db);
assert_eq!(removed, 0, "dissimilar memories should both be kept");
assert_eq!(count_buffers(&db), 2);
}
// ── 5. Namespace isolation ────────────────────────────────────────────
#[test]
fn namespace_isolation_no_cross_merge() {
let db = test_db();
// Identical embeddings but different namespaces → should NOT merge
let mems = vec![
buf_mem("ns1-mem", "project-alpha", &[], 1, 0.5, 1000, vec![1.0, 0.0, 0.0]),
buf_mem("ns2-mem", "project-beta", &[], 1, 0.5, 2000, vec![1.0, 0.0, 0.0]),
];
db.import(&mems).unwrap();
let removed = dedup_buffer(&db);
assert_eq!(removed, 0, "memories in different namespaces must not merge");
assert_eq!(count_buffers(&db), 2);
}
// ── 6. Multiple duplicates in a cluster (3+) ─────────────────────────
#[test]
fn multiple_duplicates_cluster_reduced_to_one() {
let db = test_db();
let mems = vec![
buf_mem("m1", "default", &["a"], 1, 0.3, 1000, vec![1.0, 0.0, 0.0]),
buf_mem("m2", "default", &["b"], 2, 0.5, 2000, vec![1.0, 0.0, 0.0]),
buf_mem("m3", "default", &["c"], 3, 0.7, 3000, vec![1.0, 0.0, 0.0]),
];
db.import(&mems).unwrap();
let removed = dedup_buffer(&db);
assert_eq!(removed, 2, "three identical → two removed");
assert_eq!(count_buffers(&db), 1);
// The newest (m3) should survive
assert!(db.get("m3").unwrap().is_some(), "newest (m3) should survive");
assert!(db.get("m1").unwrap().is_none(), "oldest (m1) should be deleted");
assert!(db.get("m2").unwrap().is_none(), "middle (m2) should be deleted");
let survivor = db.get("m3").unwrap().unwrap();
// The function loads embeddings once upfront, so when m1 merges into m2
// (total_access=3) and then m2 merges into m3, m2's in-memory count is
// still the original 2, yielding 3+2=5 (not 1+2+3=6).
assert_eq!(survivor.access_count, 5, "access_count: m3(3) + m2(2, original in-memory) = 5");
assert_eq!(survivor.importance, 0.7, "importance should be max of cluster");
// Due to in-memory snapshot, only the last merge's tags are reflected.
// m1→m2 merges ["b"] + ["a"] = ["b","a"] (in DB only, not in-memory).
// m2→m3 merges ["c"] + ["b"] (original in-memory) = ["c","b"].
// So "a" is lost — an inherent limitation of single-pass dedup.
assert!(survivor.tags.contains(&"b".to_string()));
assert!(survivor.tags.contains(&"c".to_string()));
}
// ── 7. Empty buffer — no-op ──────────────────────────────────────────
#[test]
fn empty_buffer_noop() {
let db = test_db();
// No memories at all
let removed = dedup_buffer(&db);
assert_eq!(removed, 0);
}
#[test]
fn single_buffer_noop() {
let db = test_db();
let mems = vec![
buf_mem("solo", "default", &[], 1, 0.5, 1000, vec![1.0, 0.0, 0.0]),
];
db.import(&mems).unwrap();
let removed = dedup_buffer(&db);
assert_eq!(removed, 0, "single buffer memory should not trigger dedup");
assert_eq!(count_buffers(&db), 1);
}
// ── 8. Tag dedup on merge ─────────────────────────────────────────────
#[test]
fn tag_dedup_on_merge() {
let db = test_db();
// Both memories share "common" tag, plus unique tags
let mems = vec![
buf_mem("t1", "default", &["common", "unique-a"], 1, 0.5, 1000, vec![1.0, 0.0, 0.0]),
buf_mem("t2", "default", &["common", "unique-b"], 1, 0.5, 2000, vec![1.0, 0.0, 0.0]),
];
db.import(&mems).unwrap();
let removed = dedup_buffer(&db);
assert_eq!(removed, 1);
let survivor = db.get("t2").unwrap().unwrap();
// "common" should appear only once
let common_count = survivor.tags.iter().filter(|t| t.as_str() == "common").count();
assert_eq!(common_count, 1, "merged tags should be deduplicated — 'common' should appear only once");
assert!(survivor.tags.contains(&"unique-a".to_string()));
assert!(survivor.tags.contains(&"unique-b".to_string()));
assert_eq!(survivor.tags.len(), 3, "should have exactly 3 unique tags");
}
// ── Extra: only buffer layer is deduped ───────────────────────────────
#[test]
fn non_buffer_layers_ignored() {
let db = test_db();
// Identical embeddings but one is Working, one is Buffer
let mut working = buf_mem("w1", "default", &[], 1, 0.5, 1000, vec![1.0, 0.0, 0.0]);
working.layer = Layer::Working;
working.decay_rate = Layer::Working.default_decay();
let buffer = buf_mem("b1", "default", &[], 1, 0.5, 2000, vec![1.0, 0.0, 0.0]);
db.import(&[working, buffer]).unwrap();
let removed = dedup_buffer(&db);
assert_eq!(removed, 0, "Working-layer memory should not participate in buffer dedup");
assert!(db.get("w1").unwrap().is_some());
assert!(db.get("b1").unwrap().is_some());
}
// ── Extra: newer memory is kept, older discarded ──────────────────────
#[test]
fn keeps_newer_discards_older() {
let db = test_db();
// Make "a" newer than "b" to test direction
let mems = vec![
buf_mem("b-older", "default", &[], 1, 0.5, 1000, vec![1.0, 0.0, 0.0]),
buf_mem("a-newer", "default", &[], 1, 0.5, 5000, vec![1.0, 0.0, 0.0]),
];
db.import(&mems).unwrap();
let removed = dedup_buffer(&db);
assert_eq!(removed, 1);
assert!(db.get("a-newer").unwrap().is_some(), "newer should survive");
assert!(db.get("b-older").unwrap().is_none(), "older should be discarded");
}