use docdexd::repo_manager::repo_fingerprint_sha256;
use serde_json::Value;
use std::error::Error;
use std::fs;
use std::path::{Path, PathBuf};
use std::process::Command;
use tempfile::TempDir;
fn docdex_bin() -> PathBuf {
std::env::set_var("DOCDEX_CLI_LOCAL", "1");
std::env::set_var("DOCDEX_WEB_ENABLED", "0");
assert_cmd::cargo::cargo_bin!("docdexd").to_path_buf()
}
fn normalize_path(path: &Path) -> String {
path.canonicalize()
.unwrap_or_else(|_| path.to_path_buf())
.to_string_lossy()
.replace('\\', "/")
}
fn run_docdex<I, S>(args: I) -> Result<Vec<u8>, Box<dyn Error>>
where
I: IntoIterator<Item = S>,
S: AsRef<std::ffi::OsStr>,
{
let output = Command::new(docdex_bin())
.env("DOCDEX_WEB_ENABLED", "0")
.env("DOCDEX_ENABLE_MEMORY", "0")
.env_remove("DOCDEX_ENABLE_SYMBOL_EXTRACTION")
.args(args)
.output()?;
if !output.status.success() {
return Err(format!(
"docdexd exited with {}: {}",
output.status,
String::from_utf8_lossy(&output.stderr)
)
.into());
}
Ok(output.stdout)
}
fn write_repo(repo_root: &Path, filename: &str, token: &str) -> Result<(), Box<dyn Error>> {
fs::create_dir_all(repo_root)?;
fs::create_dir_all(repo_root.join(".git"))?;
fs::write(
repo_root.join(filename),
format!(
r#"
# Fixture
shared_term
{token}
"#
),
)?;
Ok(())
}
fn write_repo_no_git(repo_root: &Path, filename: &str, token: &str) -> Result<(), Box<dyn Error>> {
fs::create_dir_all(repo_root)?;
fs::write(
repo_root.join(filename),
format!(
r#"
# Fixture
shared_term
{token}
"#
),
)?;
Ok(())
}
fn hits_from_query(stdout: &[u8]) -> Result<Vec<Value>, Box<dyn Error>> {
let payload: Value = serde_json::from_slice(stdout)?;
let hits = payload
.get("hits")
.and_then(|value| value.as_array())
.ok_or("hits array missing")?;
Ok(hits.to_vec())
}
fn parse_error(stderr: &[u8]) -> Result<Value, Box<dyn Error>> {
let raw = String::from_utf8_lossy(stderr);
let json_line = raw
.lines()
.rev()
.find(|line| line.trim_start().starts_with('{'))
.ok_or("expected JSON error line in stderr")?;
Ok(serde_json::from_str(json_line.trim())?)
}
fn registry_entry_for_path(
state_root: &Path,
canonical_path: &str,
) -> Result<(String, String), Box<dyn Error>> {
let registry_path = state_root.join("repos").join("repo_registry.json");
let registry_raw = fs::read_to_string(®istry_path)?;
let registry_json: Value = serde_json::from_str(®istry_raw)?;
let repos = registry_json
.get("repos")
.and_then(|value| value.as_object())
.ok_or("registry missing repos object")?;
for (fingerprint, entry) in repos {
let canon = entry
.get("canonical_path")
.and_then(|v| v.as_str())
.unwrap_or_default();
if canon == canonical_path {
let state_key = entry
.get("state_key")
.and_then(|v| v.as_str())
.ok_or("registry entry missing state_key")?;
return Ok((fingerprint.clone(), state_key.to_string()));
}
}
Err(format!("no registry entry found for canonical_path={canonical_path}").into())
}
#[test]
fn absolute_state_dir_is_repo_scoped_and_prevents_cross_repo_mixing() -> Result<(), Box<dyn Error>>
{
let state_root = TempDir::new()?;
let state_root = state_root.path().canonicalize()?;
let repo_a = TempDir::new()?;
let repo_b = TempDir::new()?;
write_repo(repo_a.path(), "a-only.md", "repo_a_token")?;
write_repo(repo_b.path(), "b-only.md", "repo_b_token")?;
let repo_a_str = repo_a.path().to_string_lossy().to_string();
let repo_b_str = repo_b.path().to_string_lossy().to_string();
let state_root_str = state_root.to_string_lossy().to_string();
run_docdex([
"index",
"--repo",
repo_a_str.as_str(),
"--state-dir",
&state_root_str,
])?;
run_docdex([
"index",
"--repo",
repo_b_str.as_str(),
"--state-dir",
&state_root_str,
])?;
let repos_dir = state_root.join("repos");
let mut repo_dirs: Vec<PathBuf> = fs::read_dir(&repos_dir)?
.filter_map(|entry| entry.ok())
.filter_map(|entry| {
let path = entry.path();
let file_type = entry.file_type().ok()?;
if file_type.is_dir() {
Some(path)
} else {
None
}
})
.collect();
repo_dirs.sort();
assert_eq!(
repo_dirs.len(),
2,
"expected exactly 2 repo state dirs under shared base state dir"
);
for dir in &repo_dirs {
assert!(
dir.join("index").exists(),
"expected {dir} to contain index subdir",
dir = dir.display()
);
}
let out_a = run_docdex([
"query",
"--repo",
repo_a_str.as_str(),
"--state-dir",
&state_root_str,
"--query",
"shared_term",
"--limit",
"10",
])?;
let hits_a = hits_from_query(&out_a)?;
assert!(
hits_a.iter().all(|hit| hit
.get("path")
.and_then(|value| value.as_str())
.unwrap_or_default()
.ends_with("a-only.md")),
"repo A query must not return docs from repo B"
);
let out_b = run_docdex([
"query",
"--repo",
repo_b_str.as_str(),
"--state-dir",
&state_root_str,
"--query",
"shared_term",
"--limit",
"10",
])?;
let hits_b = hits_from_query(&out_b)?;
assert!(
hits_b.iter().all(|hit| hit
.get("path")
.and_then(|value| value.as_str())
.unwrap_or_default()
.ends_with("b-only.md")),
"repo B query must not return docs from repo A"
);
Ok(())
}
#[test]
fn moved_repo_reuses_existing_state_key_under_shared_state_dir() -> Result<(), Box<dyn Error>> {
let state_root = TempDir::new()?;
let state_root = state_root.path().canonicalize()?;
let workspace = TempDir::new()?;
let repo_a = workspace.path().join("repo-a");
let repo_b = workspace.path().join("repo-moved");
write_repo(&repo_a, "doc.md", "move_token")?;
let state_root_str = state_root.to_string_lossy().to_string();
let repo_a_str = repo_a.to_string_lossy().to_string();
let repo_b_str = repo_b.to_string_lossy().to_string();
run_docdex([
"index",
"--repo",
repo_a_str.as_str(),
"--state-dir",
&state_root_str,
])?;
let repos_dir = state_root.join("repos");
let mut repo_dirs: Vec<PathBuf> = fs::read_dir(&repos_dir)?
.filter_map(|entry| entry.ok())
.filter_map(|entry| {
let path = entry.path();
let file_type = entry.file_type().ok()?;
if file_type.is_dir() {
Some(path)
} else {
None
}
})
.collect();
repo_dirs.sort();
assert_eq!(
repo_dirs.len(),
1,
"expected one repo state dir after first index"
);
let canon_a = normalize_path(&repo_a);
let (fp_a, state_key) = registry_entry_for_path(&state_root, &canon_a)?;
fs::rename(&repo_a, &repo_b)?;
let moved_out = Command::new(docdex_bin())
.env("DOCDEX_WEB_ENABLED", "0")
.env("DOCDEX_ENABLE_MEMORY", "0")
.env_remove("DOCDEX_ENABLE_SYMBOL_EXTRACTION")
.args([
"index",
"--repo",
repo_b_str.as_str(),
"--state-dir",
&state_root_str,
])
.output()?;
assert!(
moved_out.status.success(),
"expected moved repo to reindex successfully"
);
let repo_dirs_after: Vec<PathBuf> = fs::read_dir(&repos_dir)?
.filter_map(|entry| entry.ok())
.filter_map(|entry| {
let path = entry.path();
let file_type = entry.file_type().ok()?;
if file_type.is_dir() {
Some(path)
} else {
None
}
})
.collect();
assert_eq!(
repo_dirs_after.len(),
2,
"expected repo move with new folder name to create a new state key"
);
let repo_dirs_after_names: Vec<String> = repo_dirs_after
.iter()
.filter_map(|path| {
path.file_name()
.and_then(|s| s.to_str())
.map(|v| v.to_string())
})
.collect();
assert!(
repo_dirs_after_names
.iter()
.any(|value| value == &state_key),
"expected original state key to remain"
);
let registry_path = state_root.join("repos").join("repo_registry.json");
let registry_raw = fs::read_to_string(®istry_path)?;
let registry_json: Value = serde_json::from_str(®istry_raw)?;
let repos = registry_json
.get("repos")
.and_then(|value| value.as_object())
.ok_or("registry missing repos object")?;
let entry = repos
.values()
.find(|value| {
value
.get("canonical_path")
.and_then(|v| v.as_str())
.unwrap_or_default()
== normalize_path(&repo_b)
})
.ok_or("registry entry missing for moved repo")?;
let state_key_after = entry
.get("state_key")
.and_then(|v| v.as_str())
.unwrap_or_default()
.to_string();
assert!(
state_key_after != state_key,
"expected new state key after renaming repo folder"
);
let canonical = entry
.get("canonical_path")
.and_then(|v| v.as_str())
.unwrap_or_default()
.to_string();
let expected = repo_b
.canonicalize()
.unwrap_or_else(|_| repo_b.clone())
.to_string_lossy()
.replace('\\', "/");
assert_eq!(
canonical, expected,
"expected registry canonical path to update after move"
);
let entry = repos
.get(fp_a.as_str())
.ok_or("expected registry entry for original repo")?;
assert_eq!(
entry
.get("canonical_path")
.and_then(|v| v.as_str())
.unwrap_or_default(),
canon_a
);
Ok(())
}
#[test]
fn reassociate_fails_closed_when_fingerprint_mismatches() -> Result<(), Box<dyn Error>> {
let state_root = TempDir::new()?;
let state_root = state_root.path().canonicalize()?;
let workspace = TempDir::new()?;
let repo_a = workspace.path().join("repo-a");
let repo_b = workspace.path().join("repo-b");
write_repo(&repo_a, "a.md", "repo_a_token")?;
write_repo(&repo_b, "b.md", "repo_b_token")?;
let state_root_str = state_root.to_string_lossy().to_string();
run_docdex([
"index",
"--repo",
repo_a.to_string_lossy().as_ref(),
"--state-dir",
&state_root_str,
])?;
let canon_a = normalize_path(&repo_a);
let (fp_a, _state_key_a) = registry_entry_for_path(&state_root, &canon_a)?;
let output = Command::new(docdex_bin())
.env("DOCDEX_WEB_ENABLED", "0")
.env("DOCDEX_ENABLE_MEMORY", "0")
.env_remove("DOCDEX_ENABLE_SYMBOL_EXTRACTION")
.args([
"repo",
"reassociate",
"--repo",
repo_b.to_string_lossy().as_ref(),
"--state-dir",
&state_root_str,
"--fingerprint",
fp_a.as_str(),
])
.output()?;
assert!(
!output.status.success(),
"expected reassociate to fail when fingerprint does not match"
);
let payload = parse_error(&output.stderr)?;
assert_eq!(
payload
.get("error")
.and_then(|e| e.get("code"))
.and_then(|v| v.as_str()),
Some("repo_state_mismatch")
);
let attempted = payload
.get("error")
.and_then(|e| e.get("details"))
.and_then(|d| d.get("attemptedFingerprint"))
.and_then(|v| v.as_str())
.ok_or("expected details.attemptedFingerprint")?;
assert_ne!(
attempted,
fp_a.as_str(),
"attemptedFingerprint should reflect the new repo, not the target registry fingerprint"
);
// Registry should remain unchanged: no reassociation to repo-b.
let registry_path = state_root.join("repos").join("repo_registry.json");
let registry_raw = fs::read_to_string(®istry_path)?;
let registry_json: Value = serde_json::from_str(®istry_raw)?;
let repos = registry_json
.get("repos")
.and_then(|value| value.as_object())
.ok_or("registry missing repos object")?;
let entry = repos
.get(&fp_a)
.ok_or("expected registry entry for repo-a fingerprint")?;
assert_eq!(
entry
.get("canonical_path")
.and_then(|v| v.as_str())
.unwrap_or_default(),
canon_a.as_str(),
"failed reassociate must not modify canonical_path"
);
let canon_b = normalize_path(&repo_b);
assert!(
!repos.values().any(|v| v
.get("canonical_path")
.and_then(|p| p.as_str())
.unwrap_or_default()
== canon_b),
"failed reassociate must not create a new registry entry"
);
Ok(())
}
#[test]
fn index_adds_docdex_to_gitignore_without_git_dir() -> Result<(), Box<dyn Error>> {
let state_root = TempDir::new()?;
let state_root = state_root.path().canonicalize()?;
let workspace = TempDir::new()?;
let repo = workspace.path().join("repo-no-git");
write_repo_no_git(&repo, "doc.md", "repo_token")?;
let state_root_str = state_root.to_string_lossy().to_string();
run_docdex([
"index",
"--repo",
repo.to_string_lossy().as_ref(),
"--state-dir",
&state_root_str,
])?;
let gitignore_path = repo.join(".gitignore");
let contents = fs::read_to_string(&gitignore_path)?;
assert!(
contents.lines().any(|line| {
let trimmed = line.trim();
trimmed == ".docdex"
|| trimmed == ".docdex/"
|| trimmed == "/.docdex/"
|| trimmed == "/.docdex"
}),
"expected .docdex entry in .gitignore"
);
assert!(
contents.lines().any(|line| {
let trimmed = line.trim();
trimmed == ".docdex-state"
|| trimmed == ".docdex-state/"
|| trimmed == "/.docdex-state/"
|| trimmed == "/.docdex-state"
}),
"expected .docdex-state entry in .gitignore"
);
Ok(())
}
#[test]
fn reclone_keeps_same_fingerprint_for_repo_name() -> Result<(), Box<dyn Error>> {
let state_root = TempDir::new()?;
let state_root = state_root.path().canonicalize()?;
let workspace = TempDir::new()?;
let repo = workspace.path().join("repo");
write_repo(&repo, "a.md", "repo_a_token")?;
let state_root_str = state_root.to_string_lossy().to_string();
run_docdex([
"index",
"--repo",
repo.to_string_lossy().as_ref(),
"--state-dir",
&state_root_str,
])?;
let fingerprint_old = repo_fingerprint_sha256(&repo)?;
let git_dir = repo.join(".git");
let old_git_dir = repo.join(".git_old");
fs::rename(&git_dir, &old_git_dir)?;
fs::create_dir_all(&git_dir)?;
fs::write(repo.join("b.md"), "repo_b_token")?;
let fingerprint_new = repo_fingerprint_sha256(&repo)?;
assert_eq!(
fingerprint_old, fingerprint_new,
"expected reclone to keep deterministic repo name fingerprint"
);
let output = Command::new(docdex_bin())
.env("DOCDEX_WEB_ENABLED", "0")
.env("DOCDEX_ENABLE_MEMORY", "0")
.env_remove("DOCDEX_ENABLE_SYMBOL_EXTRACTION")
.args([
"index",
"--repo",
repo.to_string_lossy().as_ref(),
"--state-dir",
&state_root_str,
])
.output()?;
assert!(
output.status.success(),
"expected reindex to succeed after reclone; stderr: {}",
String::from_utf8_lossy(&output.stderr)
);
let registry_path = state_root.join("repos").join("repo_registry.json");
let registry_raw = fs::read_to_string(®istry_path)?;
let registry_json: Value = serde_json::from_str(®istry_raw)?;
let repos = registry_json
.get("repos")
.and_then(|value| value.as_object())
.ok_or("registry missing repos object")?;
let entry = repos
.get(&fingerprint_new)
.ok_or("expected registry entry for repo fingerprint")?;
assert_eq!(
entry
.get("canonical_path")
.and_then(|v| v.as_str())
.unwrap_or_default(),
normalize_path(&repo)
);
Ok(())
}
#[test]
fn never_cross_associates_repo_requests_via_other_repo_scoped_state_dir(
) -> Result<(), Box<dyn Error>> {
let state_root = TempDir::new()?;
let state_root = state_root.path().canonicalize()?;
let workspace = TempDir::new()?;
let repo_a = workspace.path().join("repo-a");
let repo_b = workspace.path().join("repo-b");
write_repo(&repo_a, "a-only.md", "repo_a_token")?;
write_repo(&repo_b, "b-only.md", "repo_b_token")?;
let state_root_str = state_root.to_string_lossy().to_string();
run_docdex([
"index",
"--repo",
repo_b.to_string_lossy().as_ref(),
"--state-dir",
&state_root_str,
])?;
let canon_b = normalize_path(&repo_b);
let (_fp_b, state_key_b) = registry_entry_for_path(&state_root, &canon_b)?;
let scoped_b_index = state_root
.join("repos")
.join(&state_key_b)
.join("index")
.to_string_lossy()
.to_string();
// Querying repo-a with repo-b's scoped state dir must not return repo-b hits.
// Repo-a is unindexed, so the only safe outcome is a missing_index error.
let output = Command::new(docdex_bin())
.env("DOCDEX_WEB_ENABLED", "0")
.env("DOCDEX_ENABLE_MEMORY", "0")
.env_remove("DOCDEX_ENABLE_SYMBOL_EXTRACTION")
.args([
"query",
"--repo",
repo_a.to_string_lossy().as_ref(),
"--state-dir",
scoped_b_index.as_str(),
"--query",
"shared_term",
"--limit",
"10",
])
.output()?;
assert!(
!output.status.success(),
"expected repo-a query to fail closed instead of using repo-b state"
);
let payload = parse_error(&output.stderr)?;
assert_eq!(
payload
.get("error")
.and_then(|e| e.get("code"))
.and_then(|v| v.as_str()),
Some("missing_index"),
"repo-a must not be served from repo-b state; expected missing_index"
);
Ok(())
}