basic-memory

Overview Schema Related Servers Score Discussions

link_resolver.py•10.8 KiB

"""Service for resolving markdown links to permalinks.""" from typing import Optional, Tuple from loguru import logger from basic_memory.models import Entity from basic_memory.repository.entity_repository import EntityRepository from basic_memory.schemas.search import SearchQuery, SearchItemType from basic_memory.services.search_service import SearchService class LinkResolver: """Service for resolving markdown links to permalinks. Uses a combination of exact matching and search-based resolution: 1. Try exact permalink match (fastest) 2. Try exact title match 3. Try exact file path match 4. Try file path with .md extension (for folder/title patterns) 5. Fall back to search for fuzzy matching """ def __init__(self, entity_repository: EntityRepository, search_service: SearchService): """Initialize with repositories.""" self.entity_repository = entity_repository self.search_service = search_service async def resolve_link( self, link_text: str, use_search: bool = True, strict: bool = False, source_path: Optional[str] = None, ) -> Optional[Entity]: """Resolve a markdown link to a permalink. Args: link_text: The link text to resolve use_search: Whether to use search-based fuzzy matching as fallback strict: If True, only exact matches are allowed (no fuzzy search fallback) source_path: Optional path of the source file containing the link. Used to prefer notes closer to the source (context-aware resolution). """ logger.trace(f"Resolving link: {link_text} (source: {source_path})") # Clean link text and extract any alias clean_text, alias = self._normalize_link_text(link_text) # --- Path Resolution --- # Note: All paths in Basic Memory are stored as POSIX strings (forward slashes) # for cross-platform compatibility. See entity_repository.py which normalizes # paths using Path().as_posix(). This allows consistent path operations here. # --- Relative Path Resolution --- # Trigger: source_path is provided AND link contains "/" # Why: Resolve paths like [[nested/deep-note]] relative to source folder first # Outcome: [[nested/deep-note]] from testing/link-test.md → testing/nested/deep-note.md if source_path and "/" in clean_text: source_folder = source_path.rsplit("/", 1)[0] if "/" in source_path else "" if source_folder: # Construct relative path from source folder relative_path = f"{source_folder}/{clean_text}" # Try with .md extension if not relative_path.endswith(".md"): relative_path_md = f"{relative_path}.md" entity = await self.entity_repository.get_by_file_path(relative_path_md) if entity: return entity # Try as-is (already has extension or is a permalink) entity = await self.entity_repository.get_by_file_path(relative_path) if entity: return entity # When source_path is provided, use context-aware resolution: # Check both permalink and title matches, prefer closest to source. # Example: [[testing]] from folder/note.md prefers folder/testing.md # over a root testing.md with permalink "testing". if source_path: # Gather all potential matches candidates: list[Entity] = [] # Check permalink match permalink_entity = await self.entity_repository.get_by_permalink(clean_text) if permalink_entity: candidates.append(permalink_entity) # Check title matches title_entities = await self.entity_repository.get_by_title(clean_text) for entity in title_entities: # Avoid duplicates (permalink match might also be in title matches) if entity.id not in [c.id for c in candidates]: candidates.append(entity) if candidates: if len(candidates) == 1: return candidates[0] else: # Multiple candidates - pick closest to source return self._find_closest_entity(candidates, source_path) # Standard resolution (no source context): permalink first, then title # 1. Try exact permalink match first (most efficient) entity = await self.entity_repository.get_by_permalink(clean_text) if entity: logger.debug(f"Found exact permalink match: {entity.permalink}") return entity # 2. Try exact title match found = await self.entity_repository.get_by_title(clean_text) if found: # Return first match (shortest path) if no source context entity = found[0] logger.debug(f"Found title match: {entity.title}") return entity # 3. Try file path found_path = await self.entity_repository.get_by_file_path(clean_text) if found_path: logger.debug(f"Found entity with path: {found_path.file_path}") return found_path # 4. Try file path with .md extension if not already present if not clean_text.endswith(".md") and "/" in clean_text: file_path_with_md = f"{clean_text}.md" found_path_md = await self.entity_repository.get_by_file_path(file_path_with_md) if found_path_md: logger.debug(f"Found entity with path (with .md): {found_path_md.file_path}") return found_path_md # In strict mode, don't try fuzzy search - return None if no exact match found if strict: return None # 5. Fall back to search for fuzzy matching (only if not in strict mode) if use_search and "*" not in clean_text: results = await self.search_service.search( query=SearchQuery(text=clean_text, entity_types=[SearchItemType.ENTITY]), ) if results: # Look for best match best_match = min(results, key=lambda x: x.score) # pyright: ignore logger.trace( f"Selected best match from {len(results)} results: {best_match.permalink}" ) if best_match.permalink: return await self.entity_repository.get_by_permalink(best_match.permalink) # if we couldn't find anything then return None return None def _normalize_link_text(self, link_text: str) -> Tuple[str, Optional[str]]: """Normalize link text and extract alias if present. Args: link_text: Raw link text from markdown Returns: Tuple of (normalized_text, alias or None) """ # Strip whitespace text = link_text.strip() # Remove enclosing brackets if present if text.startswith("[[") and text.endswith("]]"): text = text[2:-2] # Handle wiki link aliases (format: [[actual|alias]]) alias = None if "|" in text: text, alias = text.split("|", 1) text = text.strip() alias = alias.strip() else: # Strip whitespace from text even if no alias text = text.strip() return text, alias def _find_closest_entity(self, entities: list[Entity], source_path: str) -> Entity: """Find the entity closest to the source file path. Context-aware resolution: prefer notes in the same folder or closer in hierarchy. Proximity Scoring Algorithm: - Priority 0: Same folder as source (best match) - Priority 1-N: Ancestor folders (N = levels up from source) - Priority 100+N: Descendant folders (N = levels down, deprioritized) - Priority 1000: Completely unrelated paths (least preferred) - Ties are broken by shortest absolute path (consistent behavior) Args: entities: List of entities with the same title source_path: Path of the file containing the link Returns: The entity closest to the source path """ # Extract source folder (everything before the last /) source_folder = source_path.rsplit("/", 1)[0] if "/" in source_path else "" def path_proximity(entity: Entity) -> Tuple[int, int]: """Return (proximity_score, path_length) for sorting. Lower is better for both values. """ entity_path = entity.file_path entity_folder = entity_path.rsplit("/", 1)[0] if "/" in entity_path else "" # Trigger: entity is in the same folder as source # Why: same-folder notes are most contextually relevant # Outcome: priority = 0 (best), ties broken by shortest path if entity_folder == source_folder: return (0, len(entity_path)) # Trigger: entity is in an ancestor folder of source # e.g., source is "a/b/c/file.md", entity is "a/b/note.md" -> ancestor # Why: ancestors are contextually relevant (shared parent context) # Outcome: priority = levels_up (1, 2, 3...), closer ancestors preferred if source_folder.startswith(entity_folder + "/") if entity_folder else source_folder: # Count how many levels up if entity_folder: levels_up = source_folder.count("/") - entity_folder.count("/") else: # Root level levels_up = source_folder.count("/") + 1 return (levels_up, len(entity_path)) # Trigger: entity is in a descendant folder of source # e.g., source is "a/file.md", entity is "a/b/c/note.md" -> descendant # Why: descendants are less contextually relevant than ancestors # Outcome: priority = 100 + levels_down, significantly deprioritized if entity_folder.startswith(source_folder + "/") if source_folder else entity_folder: if source_folder: levels_down = entity_folder.count("/") - source_folder.count("/") else: # Source is at root levels_down = entity_folder.count("/") + 1 return (100 + levels_down, len(entity_path)) # Trigger: entity is in a completely unrelated path # Why: no folder relationship means minimal contextual relevance # Outcome: priority = 1000, only selected if no related paths exist return (1000, len(entity_path)) # Sort by proximity (lower is better), then by path length (shorter is better) return min(entities, key=path_proximity)

Loading blob content...

Latest Blog Posts

Redis vs ioredis vs valkey-glide
By punkpeye on January 26, 2026.
benchmark
Redis
valkey
Quickstart: Publish an MCP Server to the MCP Registry
By punkpeye on January 24, 2026.
mcp
official reference mirror
Official MCP Registry Server.json Requirements
By punkpeye on January 24, 2026.
mcp
official reference mirror

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/basicmachines-co/basic-memory'

If you have feedback or need assistance with the MCP directory API, please join our Discord server

link_resolver.py•10.8 KiB