statelessagent

Overview Schema Related Servers Score Discussions

context_surfacing.go•14.4 KiB

package hooks import ( "fmt" "os" "strings" "github.com/sgx-labs/statelessagent/internal/cli" "github.com/sgx-labs/statelessagent/internal/config" "github.com/sgx-labs/statelessagent/internal/memory" "github.com/sgx-labs/statelessagent/internal/store" ) const ( minPromptChars = 20 maxResults = 3 // data shows expected notes often land at #3; sweep confirmed no precision loss maxSnippetChars = 400 maxDistance = 16.3 // L2 distance; relaxed from 16.0→16.2→16.3 — matches within this range are relevant; off-topic > 16.8 minComposite = 0.70 // composite threshold; distance gate handles negative discrimination minSemanticFloor = 0.25 // absolute floor: if semantic score < this, skip regardless of boost maxTokenBudget = 800 // tightened from 1000; less context waste minTitleOverlap = 0.10 // bidirectional overlap threshold for title matching highTierOverlap = 0.199 // effective 0.20 with floating point margin (e.g., 3/5*3/9 = 0.19999...) // maxPerNoteTokens caps any single note's contribution to the token budget. // Prevents a large note from consuming the entire budget and crowding out // other relevant results. At 400 tokens (~1600 chars), even a 10K-char // note will leave room for 1-2 more results within the 800 token budget. maxPerNoteTokens = 400 ) // Recency-aware weights: when query has recency intent, shift weight heavily to recency. const ( recencyRelWeight = 0.1 recencyRecWeight = 0.7 recencyConfWeight = 0.2 recencyMinComposite = 0.45 // lower threshold since semantic score may be weak recencyMaxResults = 3 // show more results for "what did I work on" queries ) var priorityTypes = map[string]bool{ "handoff": true, "decision": true, "research": true, "hub": true, } // noisyPathPrefixes returns the user-configured noise path prefixes. // Defaults to empty — no paths are filtered unless configured via // [vault] noise_paths in config.toml or SAME_NOISE_PATHS env var. func noisyPathPrefixes() []string { return config.NoisePaths() } type scored struct { path string title string contentType string confidence float64 snippet string composite float64 semantic float64 distance float64 titleOverlap float64 contentBoosted bool // true when titleOverlap was set by Mode 5 content boost tokens int // estimated tokens (set after selection) matchTerms []string // terms from prompt that matched this note } // runContextSurfacing embeds the user's prompt, searches the vault, // and injects relevant context. func runContextSurfacing(db *store.DB, input *HookInput) hookRunResult { prompt := input.Prompt if len(prompt) < minPromptChars { logDecision(db, input.SessionID, prompt, "", -1, "skip_short", nil) return hookSkipped("short prompt") } // Skip slash commands if strings.HasPrefix(strings.TrimSpace(prompt), "/") { logDecision(db, input.SessionID, prompt, "", -1, "skip_slash", nil) return hookSkipped("slash command") } // Skip conversational/social prompts that don't need vault context if isConversational(prompt) { logDecision(db, input.SessionID, prompt, "socializing", -1, "skip_conversational", nil) return hookSkipped("conversational prompt") } // Check display mode from config, with env var override displayMode := config.DisplayMode() // "full", "compact", or "quiet" if os.Getenv("SAME_QUIET") == "1" || os.Getenv("SAME_QUIET") == "true" { displayMode = "quiet" } else if os.Getenv("SAME_COMPACT") == "1" || os.Getenv("SAME_COMPACT") == "true" { displayMode = "compact" } quietMode := displayMode == "quiet" compactMode := displayMode == "compact" isRecency := memory.HasRecencyIntent(prompt) // Set prompt early for term extraction (used by low-signal gate and search) keyTermsPrompt = prompt // Skip low-signal prompts: if term extraction finds no specific terms // (acronyms, quoted phrases, hyphenated) and at most 1 broad term // (5+ char non-stop words), the prompt lacks enough domain signal // for a meaningful vault search. Recency queries bypass this gate // since temporal intent ("what did I work on") is the signal. if !isRecency && hasLowSignal() { logDecision(db, input.SessionID, prompt, "", -1, "skip_lowsignal", nil) return hookSkipped("low-signal prompt") } // --- Decision matrix: mode × topic change --- mode := detectMode(prompt) if !isRecency { switch mode { case ModeExecuting: logDecision(db, input.SessionID, prompt, mode.String(), -1, "skip_executing", nil) return hookSkipped("executing mode") case ModeSocializing: logDecision(db, input.SessionID, prompt, mode.String(), -1, "skip_socializing", nil) return hookSkipped("socializing mode") default: // Exploring, Deepening, Reflecting: check topic change if input.SessionID != "" && !isTopicChange(db, input.SessionID) { jScore := topicChangeScore(db, input.SessionID) logDecision(db, input.SessionID, prompt, mode.String(), jScore, "skip_sametopic", nil) return hookSkipped("same topic") // Context already in conversation window } } } // Clean up stale session state (runs opportunistically, ~0ms for small tables) _ = db.SessionStateCleanup(86400) // 24 hours // Get total vault note count for display totalVault, _ := db.NoteCount() // Embed the prompt — keyword fallback if provider unavailable var candidates []scored embedProvider, err := newEmbedProvider() if err != nil { // No embedding provider — fall through to keyword search fmt.Fprintf(os.Stderr, "same: no embedding provider, using keyword search\n") writeVerboseLog(fmt.Sprintf("Embed provider error: %v — keyword fallback\n", err)) candidates = keywordFallbackSearch(db) } else { // Check for embedding model/dimension mismatch before searching if mismatchErr := db.CheckEmbeddingMeta(embedProvider.Name(), embedProvider.Model(), embedProvider.Dimensions()); mismatchErr != nil { fmt.Fprintf(os.Stderr, "same: embedding model changed — run 'same reindex --force' to rebuild\n") result := hookError(mismatchErr.Error()) result.Output = &HookOutput{ HookSpecificOutput: &HookSpecific{ HookEventName: "UserPromptSubmit", AdditionalContext: fmt.Sprintf(`<same-diagnostic> %s Suggested actions for the user: - Run "same reindex --force" to rebuild the index with the current embedding model </same-diagnostic>`, mismatchErr), }, } return result } queryVec, embedErr := embedProvider.GetQueryEmbedding(prompt) if embedErr != nil { // Classify the error for better debugging errMsg := embedErr.Error() switch { case strings.Contains(errMsg, "connection_refused"): fmt.Fprintf(os.Stderr, "same: Ollama not running, falling back to keyword search\n") case strings.Contains(errMsg, "permission_denied"): fmt.Fprintf(os.Stderr, "same: Ollama blocked by sandbox policy, falling back to keyword search\n") case strings.Contains(errMsg, "timeout"): fmt.Fprintf(os.Stderr, "same: Ollama timeout (model loading?), falling back to keyword search\n") default: fmt.Fprintf(os.Stderr, "same: embedding failed, falling back to keyword search\n") } writeVerboseLog(fmt.Sprintf("Embedding failed: %v — keyword fallback\n", embedErr)) candidates = keywordFallbackSearch(db) } else if isRecency { candidates = recencyHybridSearch(db, queryVec) } else { candidates = standardSearch(db, queryVec) } } // If no candidates found, show empty state (unless quiet) if len(candidates) == 0 { if !quietMode { cli.SurfacingEmpty(totalVault) } return hookEmpty("no relevant notes") } // Inject pinned notes: always surface them regardless of search results. // They're prepended so they survive the effectiveMax cap. { pinnedRecords, _ := db.GetPinnedNotes() for _, rec := range pinnedRecords { // Skip if already in candidates alreadyPresent := false for _, c := range candidates { if c.path == rec.Path { alreadyPresent = true break } } if alreadyPresent { continue } // SECURITY: never auto-surface _PRIVATE/ content if shouldSkipPath(rec.Path) { continue } // Prepend with high score so pinned notes survive trimming pinned := scored{ path: rec.Path, title: rec.Title, contentType: rec.ContentType, confidence: rec.Confidence, snippet: rec.Text, composite: 1.0, titleOverlap: 1.0, // prevent overlap-based trimming } candidates = append([]scored{pinned}, candidates...) } } // Extract match terms from prompt for display promptTerms := extractDisplayTerms(prompt) effectiveMax := maxResults if isRecency { effectiveMax = recencyMaxResults } // When the best candidate lacks strong genuine title overlap, the // query is ambiguous and extra results are more likely to be noise. // Reduce to 2 to improve precision without losing coverage on the // best match. Content-boosted overlap (from Mode 5 rescue) is // artificial and treated as weak for this ambiguity check. { topOverlap := float64(0) if len(candidates) > 0 { topOverlap = candidates[0].titleOverlap if candidates[0].contentBoosted { topOverlap = 0 } } if !isRecency && topOverlap < highTierOverlap && effectiveMax > 2 { effectiveMax = 2 } } if len(candidates) > effectiveMax { candidates = candidates[:effectiveMax] } // Zero-overlap trim: when the top result has weak but positive title // relevance, trailing zero-overlap results are likely noise from // vector search (semantically similar but not about the specific topic). // Content-boosted overlap is treated as zero for this check. if len(candidates) > 1 { leaderOverlap := candidates[0].titleOverlap if candidates[0].contentBoosted { leaderOverlap = 0 } if leaderOverlap > 0 && leaderOverlap < highTierOverlap { for i := 1; i < len(candidates); i++ { if candidates[i].titleOverlap <= 0 { candidates = candidates[:i] break } } } } // Overlap gap cap: trim results that are significantly weaker than the // best match. Uses a relative threshold (< 65% of best overlap) to // trim weak matches that would dilute precision. Skipped when the top // result's overlap came from Mode 5 content boost (not a genuine // title match) — applying the gap cap would incorrectly trim // priority-type candidates (hub, decision, handoff) that the content // result was meant to complement. if len(candidates) > 1 && candidates[0].titleOverlap >= highTierOverlap && !candidates[0].contentBoosted { relThreshold := candidates[0].titleOverlap * 0.65 if relThreshold < 0.10 { relThreshold = 0.10 } for i := 1; i < len(candidates); i++ { if candidates[i].titleOverlap < relThreshold { candidates = candidates[:i] break } } } // Build context string, capped at token budget. // Continue past oversized candidates — a large note that doesn't fit // shouldn't prevent smaller, high-relevance notes behind it from being included. var parts []string var included []scored var excluded []scored totalTokens := 0 for i := range candidates { // Cap per-note tokens to prevent a single large note from starving // the budget. If the snippet alone exceeds the per-note limit, // truncate it so other results get a fair share of the budget. snippet := candidates[i].snippet if snippet != "" { maxSnipChars := maxPerNoteTokens * 4 // ~4 chars per token if len(snippet) > maxSnipChars { snippet = smartTruncate(snippet, maxSnipChars) } } var entry string if snippet != "" { entry = fmt.Sprintf("**%s** (%s, score: %.3f)\n%s\n%s", candidates[i].title, candidates[i].contentType, candidates[i].composite, candidates[i].path, snippet) } else { entry = fmt.Sprintf("**%s** (%s, score: %.3f)\n%s", candidates[i].title, candidates[i].contentType, candidates[i].composite, candidates[i].path) } entryTokens := memory.EstimateTokens(entry) // Find which prompt terms appear in this note's title/snippet candidates[i].matchTerms = findMatchingTerms(promptTerms, candidates[i].title, candidates[i].snippet) if totalTokens+entryTokens > maxTokenBudget { // Skip this note but keep scanning — smaller notes may still fit excluded = append(excluded, candidates[i]) continue } candidates[i].tokens = entryTokens parts = append(parts, entry) included = append(included, candidates[i]) totalTokens += entryTokens } if len(parts) == 0 { if !quietMode { cli.SurfacingEmpty(totalVault) } return hookEmpty("token budget excluded all candidates") } // Display surfacing feedback to stderr if !quietMode { if compactMode { // Compact mode (one-liner) cli.SurfacingCompact(len(included), len(candidates)) } else { // Full box (default) var displayNotes []cli.SurfacedNote for _, s := range included { displayNotes = append(displayNotes, cli.SurfacedNote{ Title: s.title, Tokens: s.tokens, Included: true, HighConf: s.semantic >= 0.7, // high confidence threshold MatchTerms: s.matchTerms, }) } for _, s := range excluded { displayNotes = append(displayNotes, cli.SurfacedNote{ Title: s.title, Tokens: 0, Included: false, HighConf: false, MatchTerms: s.matchTerms, }) } cli.SurfacingVerbose(displayNotes, totalVault) } } // Collect injected paths for usage tracking var injectedPaths []string for _, s := range included { injectedPaths = append(injectedPaths, s.path) } contextText := strings.Join(parts, "\n---\n") // Sanitize: strip XML-like closing tags that could break the wrapper // and allow indirect prompt injection via crafted note content. contextText = sanitizeContextTags(contextText) // Log the injection for budget tracking if input.SessionID != "" { memory.LogInjection(db, input.SessionID, "context_surfacing", injectedPaths, contextText) } // Store current topic terms so the next prompt can detect topic changes storeTopicTerms(db, input.SessionID) // Log the inject decision with styled verbose output logDecision(db, input.SessionID, prompt, mode.String(), -1, "inject", injectedPaths) var titles []string for _, s := range included { titles = append(titles, s.title) } verboseDecision("inject", mode.String(), -1, prompt, titles, totalTokens) out := &HookOutput{ HookSpecificOutput: &HookSpecific{ HookEventName: "UserPromptSubmit", AdditionalContext: fmt.Sprintf( "\n<vault-context>\nRelevant vault notes for this prompt:\n\n%s\n</vault-context>\n", contextText, ), }, } return hookInjected(out, len(injectedPaths), totalTokens, injectedPaths, "") }

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/sgx-labs/statelessagent'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

context_surfacing.go•14.4 KiB