Skip to main content
Glama

basic-memory

entity_service.py33.9 kB
"""Service for managing entities in the database.""" from pathlib import Path from typing import List, Optional, Sequence, Tuple, Union import frontmatter import yaml from loguru import logger from sqlalchemy.exc import IntegrityError from basic_memory.config import ProjectConfig, BasicMemoryConfig from basic_memory.file_utils import ( has_frontmatter, parse_frontmatter, remove_frontmatter, dump_frontmatter, ) from basic_memory.markdown import EntityMarkdown from basic_memory.markdown.entity_parser import EntityParser from basic_memory.markdown.utils import entity_model_from_markdown, schema_to_markdown from basic_memory.models import Entity as EntityModel from basic_memory.models import Observation, Relation from basic_memory.models.knowledge import Entity from basic_memory.repository import ObservationRepository, RelationRepository from basic_memory.repository.entity_repository import EntityRepository from basic_memory.schemas import Entity as EntitySchema from basic_memory.schemas.base import Permalink from basic_memory.services import BaseService, FileService from basic_memory.services.exceptions import EntityCreationError, EntityNotFoundError from basic_memory.services.link_resolver import LinkResolver from basic_memory.utils import generate_permalink class EntityService(BaseService[EntityModel]): """Service for managing entities in the database.""" def __init__( self, entity_parser: EntityParser, entity_repository: EntityRepository, observation_repository: ObservationRepository, relation_repository: RelationRepository, file_service: FileService, link_resolver: LinkResolver, app_config: Optional[BasicMemoryConfig] = None, ): super().__init__(entity_repository) self.observation_repository = observation_repository self.relation_repository = relation_repository self.entity_parser = entity_parser self.file_service = file_service self.link_resolver = link_resolver self.app_config = app_config async def detect_file_path_conflicts(self, file_path: str) -> List[Entity]: """Detect potential file path conflicts for a given file path. This checks for entities with similar file paths that might cause conflicts: - Case sensitivity differences (Finance/file.md vs finance/file.md) - Character encoding differences - Hyphen vs space differences - Unicode normalization differences Args: file_path: The file path to check for conflicts Returns: List of entities that might conflict with the given file path """ from basic_memory.utils import detect_potential_file_conflicts conflicts = [] # Get all existing file paths all_entities = await self.repository.find_all() existing_paths = [entity.file_path for entity in all_entities] # Use the enhanced conflict detection utility conflicting_paths = detect_potential_file_conflicts(file_path, existing_paths) # Find the entities corresponding to conflicting paths for entity in all_entities: if entity.file_path in conflicting_paths: conflicts.append(entity) return conflicts async def resolve_permalink( self, file_path: Permalink | Path, markdown: Optional[EntityMarkdown] = None ) -> str: """Get or generate unique permalink for an entity. Priority: 1. If markdown has permalink and it's not used by another file -> use as is 2. If markdown has permalink but it's used by another file -> make unique 3. For existing files, keep current permalink from db 4. Generate new unique permalink from file path Enhanced to detect and handle character-related conflicts. """ file_path_str = Path(file_path).as_posix() # Check for potential file path conflicts before resolving permalink conflicts = await self.detect_file_path_conflicts(file_path_str) if conflicts: logger.warning( f"Detected potential file path conflicts for '{file_path_str}': " f"{[entity.file_path for entity in conflicts]}" ) # If markdown has explicit permalink, try to validate it if markdown and markdown.frontmatter.permalink: desired_permalink = markdown.frontmatter.permalink existing = await self.repository.get_by_permalink(desired_permalink) # If no conflict or it's our own file, use as is if not existing or existing.file_path == file_path_str: return desired_permalink # For existing files, try to find current permalink existing = await self.repository.get_by_file_path(file_path_str) if existing: return existing.permalink # New file - generate permalink if markdown and markdown.frontmatter.permalink: desired_permalink = markdown.frontmatter.permalink else: desired_permalink = generate_permalink(file_path_str) # Make unique if needed - enhanced to handle character conflicts permalink = desired_permalink suffix = 1 while await self.repository.get_by_permalink(permalink): permalink = f"{desired_permalink}-{suffix}" suffix += 1 logger.debug(f"creating unique permalink: {permalink}") return permalink async def create_or_update_entity(self, schema: EntitySchema) -> Tuple[EntityModel, bool]: """Create new entity or update existing one. Returns: (entity, is_new) where is_new is True if a new entity was created """ logger.debug( f"Creating or updating entity: {schema.file_path}, permalink: {schema.permalink}" ) # Try to find existing entity using strict resolution (no fuzzy search) # This prevents incorrectly matching similar file paths like "Node A.md" and "Node C.md" existing = await self.link_resolver.resolve_link(schema.file_path, strict=True) if not existing and schema.permalink: existing = await self.link_resolver.resolve_link(schema.permalink, strict=True) if existing: logger.debug(f"Found existing entity: {existing.file_path}") return await self.update_entity(existing, schema), False else: # Create new entity return await self.create_entity(schema), True async def create_entity(self, schema: EntitySchema) -> EntityModel: """Create a new entity and write to filesystem.""" logger.debug(f"Creating entity: {schema.title}") # Get file path and ensure it's a Path object file_path = Path(schema.file_path) if await self.file_service.exists(file_path): raise EntityCreationError( f"file for entity {schema.folder}/{schema.title} already exists: {file_path}" ) # Parse content frontmatter to check for user-specified permalink and entity_type content_markdown = None if schema.content and has_frontmatter(schema.content): content_frontmatter = parse_frontmatter(schema.content) # If content has entity_type/type, use it to override the schema entity_type if "type" in content_frontmatter: schema.entity_type = content_frontmatter["type"] if "permalink" in content_frontmatter: # Create a minimal EntityMarkdown object for permalink resolution from basic_memory.markdown.schemas import EntityFrontmatter frontmatter_metadata = { "title": schema.title, "type": schema.entity_type, "permalink": content_frontmatter["permalink"], } frontmatter_obj = EntityFrontmatter(metadata=frontmatter_metadata) content_markdown = EntityMarkdown( frontmatter=frontmatter_obj, content="", # content not needed for permalink resolution observations=[], relations=[], ) # Get unique permalink (prioritizing content frontmatter) unless disabled if self.app_config and self.app_config.disable_permalinks: # Use empty string as sentinel to indicate permalinks are disabled # The permalink property will return None when it sees empty string schema._permalink = "" else: # Generate and set permalink permalink = await self.resolve_permalink(file_path, content_markdown) schema._permalink = permalink post = await schema_to_markdown(schema) # write file final_content = dump_frontmatter(post) checksum = await self.file_service.write_file(file_path, final_content) # parse entity from file entity_markdown = await self.entity_parser.parse_file(file_path) # create entity created = await self.create_entity_from_markdown(file_path, entity_markdown) # add relations entity = await self.update_entity_relations(created.file_path, entity_markdown) # Set final checksum to mark complete return await self.repository.update(entity.id, {"checksum": checksum}) async def update_entity(self, entity: EntityModel, schema: EntitySchema) -> EntityModel: """Update an entity's content and metadata.""" logger.debug( f"Updating entity with permalink: {entity.permalink} content-type: {schema.content_type}" ) # Convert file path string to Path file_path = Path(entity.file_path) # Read existing frontmatter from the file if it exists existing_markdown = await self.entity_parser.parse_file(file_path) # Parse content frontmatter to check for user-specified permalink and entity_type content_markdown = None if schema.content and has_frontmatter(schema.content): content_frontmatter = parse_frontmatter(schema.content) # If content has entity_type/type, use it to override the schema entity_type if "type" in content_frontmatter: schema.entity_type = content_frontmatter["type"] if "permalink" in content_frontmatter: # Create a minimal EntityMarkdown object for permalink resolution from basic_memory.markdown.schemas import EntityFrontmatter frontmatter_metadata = { "title": schema.title, "type": schema.entity_type, "permalink": content_frontmatter["permalink"], } frontmatter_obj = EntityFrontmatter(metadata=frontmatter_metadata) content_markdown = EntityMarkdown( frontmatter=frontmatter_obj, content="", # content not needed for permalink resolution observations=[], relations=[], ) # Check if we need to update the permalink based on content frontmatter (unless disabled) new_permalink = entity.permalink # Default to existing if self.app_config and not self.app_config.disable_permalinks: if content_markdown and content_markdown.frontmatter.permalink: # Resolve permalink with the new content frontmatter resolved_permalink = await self.resolve_permalink(file_path, content_markdown) if resolved_permalink != entity.permalink: new_permalink = resolved_permalink # Update the schema to use the new permalink schema._permalink = new_permalink # Create post with new content from schema post = await schema_to_markdown(schema) # Merge new metadata with existing metadata existing_markdown.frontmatter.metadata.update(post.metadata) # Ensure the permalink in the metadata is the resolved one if new_permalink != entity.permalink: existing_markdown.frontmatter.metadata["permalink"] = new_permalink # Create a new post with merged metadata merged_post = frontmatter.Post(post.content, **existing_markdown.frontmatter.metadata) # write file final_content = dump_frontmatter(merged_post) checksum = await self.file_service.write_file(file_path, final_content) # parse entity from file entity_markdown = await self.entity_parser.parse_file(file_path) # update entity in db entity = await self.update_entity_and_observations(file_path, entity_markdown) # add relations await self.update_entity_relations(file_path.as_posix(), entity_markdown) # Set final checksum to match file entity = await self.repository.update(entity.id, {"checksum": checksum}) return entity async def delete_entity(self, permalink_or_id: str | int) -> bool: """Delete entity and its file.""" logger.debug(f"Deleting entity: {permalink_or_id}") try: # Get entity first for file deletion if isinstance(permalink_or_id, str): entity = await self.get_by_permalink(permalink_or_id) else: entities = await self.get_entities_by_id([permalink_or_id]) if len(entities) != 1: # pragma: no cover logger.error( "Entity lookup error", entity_id=permalink_or_id, found_count=len(entities) ) raise ValueError( f"Expected 1 entity with ID {permalink_or_id}, got {len(entities)}" ) entity = entities[0] # Delete file first await self.file_service.delete_entity_file(entity) # Delete from DB (this will cascade to observations/relations) return await self.repository.delete(entity.id) except EntityNotFoundError: logger.info(f"Entity not found: {permalink_or_id}") return True # Already deleted async def get_by_permalink(self, permalink: str) -> EntityModel: """Get entity by type and name combination.""" logger.debug(f"Getting entity by permalink: {permalink}") db_entity = await self.repository.get_by_permalink(permalink) if not db_entity: raise EntityNotFoundError(f"Entity not found: {permalink}") return db_entity async def get_entities_by_id(self, ids: List[int]) -> Sequence[EntityModel]: """Get specific entities and their relationships.""" logger.debug(f"Getting entities: {ids}") return await self.repository.find_by_ids(ids) async def get_entities_by_permalinks(self, permalinks: List[str]) -> Sequence[EntityModel]: """Get specific nodes and their relationships.""" logger.debug(f"Getting entities permalinks: {permalinks}") return await self.repository.find_by_permalinks(permalinks) async def delete_entity_by_file_path(self, file_path: Union[str, Path]) -> None: """Delete entity by file path.""" await self.repository.delete_by_file_path(str(file_path)) async def create_entity_from_markdown( self, file_path: Path, markdown: EntityMarkdown ) -> EntityModel: """Create entity and observations only. Creates the entity with null checksum to indicate sync not complete. Relations will be added in second pass. Uses UPSERT approach to handle permalink/file_path conflicts cleanly. """ logger.debug(f"Creating entity: {markdown.frontmatter.title} file_path: {file_path}") model = entity_model_from_markdown(file_path, markdown) # Mark as incomplete because we still need to add relations model.checksum = None # Use UPSERT to handle conflicts cleanly try: return await self.repository.upsert_entity(model) except Exception as e: logger.error(f"Failed to upsert entity for {file_path}: {e}") raise EntityCreationError(f"Failed to create entity: {str(e)}") from e async def update_entity_and_observations( self, file_path: Path, markdown: EntityMarkdown ) -> EntityModel: """Update entity fields and observations. Updates everything except relations and sets null checksum to indicate sync not complete. """ logger.debug(f"Updating entity and observations: {file_path}") db_entity = await self.repository.get_by_file_path(file_path.as_posix()) # Clear observations for entity await self.observation_repository.delete_by_fields(entity_id=db_entity.id) # add new observations observations = [ Observation( entity_id=db_entity.id, content=obs.content, category=obs.category, context=obs.context, tags=obs.tags, ) for obs in markdown.observations ] await self.observation_repository.add_all(observations) # update values from markdown db_entity = entity_model_from_markdown(file_path, markdown, db_entity) # checksum value is None == not finished with sync db_entity.checksum = None # update entity return await self.repository.update( db_entity.id, db_entity, ) async def update_entity_relations( self, path: str, markdown: EntityMarkdown, ) -> EntityModel: """Update relations for entity""" logger.debug(f"Updating relations for entity: {path}") db_entity = await self.repository.get_by_file_path(path) # Clear existing relations first await self.relation_repository.delete_outgoing_relations_from_entity(db_entity.id) # Batch resolve all relation targets in parallel if markdown.relations: import asyncio # Create tasks for all relation lookups lookup_tasks = [ self.link_resolver.resolve_link(rel.target) for rel in markdown.relations ] # Execute all lookups in parallel resolved_entities = await asyncio.gather(*lookup_tasks, return_exceptions=True) # Process results and create relation records for rel, resolved in zip(markdown.relations, resolved_entities): # Handle exceptions from gather and None results target_entity: Optional[Entity] = None if not isinstance(resolved, Exception): # Type narrowing: resolved is Optional[Entity] here, not Exception target_entity = resolved # type: ignore # if the target is found, store the id target_id = target_entity.id if target_entity else None # if the target is found, store the title, otherwise add the target for a "forward link" target_name = target_entity.title if target_entity else rel.target # Create the relation relation = Relation( from_id=db_entity.id, to_id=target_id, to_name=target_name, relation_type=rel.type, context=rel.context, ) try: await self.relation_repository.add(relation) except IntegrityError: # Unique constraint violation - relation already exists logger.debug( f"Skipping duplicate relation {rel.type} from {db_entity.permalink} target: {rel.target}" ) continue return await self.repository.get_by_file_path(path) async def edit_entity( self, identifier: str, operation: str, content: str, section: Optional[str] = None, find_text: Optional[str] = None, expected_replacements: int = 1, ) -> EntityModel: """Edit an existing entity's content using various operations. Args: identifier: Entity identifier (permalink, title, etc.) operation: The editing operation (append, prepend, find_replace, replace_section) content: The content to add or use for replacement section: For replace_section operation - the markdown header find_text: For find_replace operation - the text to find and replace expected_replacements: For find_replace operation - expected number of replacements (default: 1) Returns: The updated entity model Raises: EntityNotFoundError: If the entity cannot be found ValueError: If required parameters are missing for the operation or replacement count doesn't match expected """ logger.debug(f"Editing entity: {identifier}, operation: {operation}") # Find the entity using the link resolver with strict mode for destructive operations entity = await self.link_resolver.resolve_link(identifier, strict=True) if not entity: raise EntityNotFoundError(f"Entity not found: {identifier}") # Read the current file content file_path = Path(entity.file_path) current_content, _ = await self.file_service.read_file(file_path) # Apply the edit operation new_content = self.apply_edit_operation( current_content, operation, content, section, find_text, expected_replacements ) # Write the updated content back to the file checksum = await self.file_service.write_file(file_path, new_content) # Parse the updated file to get new observations/relations entity_markdown = await self.entity_parser.parse_file(file_path) # Update entity and its relationships entity = await self.update_entity_and_observations(file_path, entity_markdown) await self.update_entity_relations(file_path.as_posix(), entity_markdown) # Set final checksum to match file entity = await self.repository.update(entity.id, {"checksum": checksum}) return entity def apply_edit_operation( self, current_content: str, operation: str, content: str, section: Optional[str] = None, find_text: Optional[str] = None, expected_replacements: int = 1, ) -> str: """Apply the specified edit operation to the current content.""" if operation == "append": # Ensure proper spacing if current_content and not current_content.endswith("\n"): return current_content + "\n" + content return current_content + content # pragma: no cover elif operation == "prepend": # Handle frontmatter-aware prepending return self._prepend_after_frontmatter(current_content, content) elif operation == "find_replace": if not find_text: raise ValueError("find_text is required for find_replace operation") if not find_text.strip(): raise ValueError("find_text cannot be empty or whitespace only") # Count actual occurrences actual_count = current_content.count(find_text) # Validate count matches expected if actual_count != expected_replacements: if actual_count == 0: raise ValueError(f"Text to replace not found: '{find_text}'") else: raise ValueError( f"Expected {expected_replacements} occurrences of '{find_text}', " f"but found {actual_count}" ) return current_content.replace(find_text, content) elif operation == "replace_section": if not section: raise ValueError("section is required for replace_section operation") if not section.strip(): raise ValueError("section cannot be empty or whitespace only") return self.replace_section_content(current_content, section, content) else: raise ValueError(f"Unsupported operation: {operation}") def replace_section_content( self, current_content: str, section_header: str, new_content: str ) -> str: """Replace content under a specific markdown section header. This method uses a simple, safe approach: when replacing a section, it only replaces the immediate content under that header until it encounters the next header of ANY level. This means: - Replacing "# Header" replaces content until "## Subsection" (preserves subsections) - Replacing "## Section" replaces content until "### Subsection" (preserves subsections) - More predictable and safer than trying to consume entire hierarchies Args: current_content: The current markdown content section_header: The section header to find and replace (e.g., "## Section Name") new_content: The new content to replace the section with Returns: The updated content with the section replaced Raises: ValueError: If multiple sections with the same header are found """ # Normalize the section header (ensure it starts with #) if not section_header.startswith("#"): section_header = "## " + section_header # First pass: count matching sections to check for duplicates lines = current_content.split("\n") matching_sections = [] for i, line in enumerate(lines): if line.strip() == section_header.strip(): matching_sections.append(i) # Handle multiple sections error if len(matching_sections) > 1: raise ValueError( f"Multiple sections found with header '{section_header}'. " f"Section replacement requires unique headers." ) # If no section found, append it if len(matching_sections) == 0: logger.info(f"Section '{section_header}' not found, appending to end of document") separator = "\n\n" if current_content and not current_content.endswith("\n\n") else "" return current_content + separator + section_header + "\n" + new_content # Replace the single matching section result_lines = [] section_line_idx = matching_sections[0] i = 0 while i < len(lines): line = lines[i] # Check if this is our target section header if i == section_line_idx: # Add the section header and new content result_lines.append(line) result_lines.append(new_content) i += 1 # Skip the original section content until next header or end while i < len(lines): next_line = lines[i] # Stop consuming when we hit any header (preserve subsections) if next_line.startswith("#"): # We found another header - continue processing from here break i += 1 # Continue processing from the next header (don't increment i again) continue # Add all other lines (including subsequent sections) result_lines.append(line) i += 1 return "\n".join(result_lines) def _prepend_after_frontmatter(self, current_content: str, content: str) -> str: """Prepend content after frontmatter, preserving frontmatter structure.""" # Check if file has frontmatter if has_frontmatter(current_content): try: # Parse and separate frontmatter from body frontmatter_data = parse_frontmatter(current_content) body_content = remove_frontmatter(current_content) # Prepend content to the body if content and not content.endswith("\n"): new_body = content + "\n" + body_content else: new_body = content + body_content # Reconstruct file with frontmatter + prepended body yaml_fm = yaml.dump(frontmatter_data, sort_keys=False, allow_unicode=True) return f"---\n{yaml_fm}---\n\n{new_body.strip()}" except Exception as e: # pragma: no cover logger.warning( f"Failed to parse frontmatter during prepend: {e}" ) # pragma: no cover # Fall back to simple prepend if frontmatter parsing fails # pragma: no cover # No frontmatter or parsing failed - do simple prepend # pragma: no cover if content and not content.endswith("\n"): # pragma: no cover return content + "\n" + current_content # pragma: no cover return content + current_content # pragma: no cover async def move_entity( self, identifier: str, destination_path: str, project_config: ProjectConfig, app_config: BasicMemoryConfig, ) -> EntityModel: """Move entity to new location with database consistency. Args: identifier: Entity identifier (title, permalink, or memory:// URL) destination_path: New path relative to project root project_config: Project configuration for file operations app_config: App configuration for permalink update settings Returns: Success message with move details Raises: EntityNotFoundError: If the entity cannot be found ValueError: If move operation fails due to validation or filesystem errors """ logger.debug(f"Moving entity: {identifier} to {destination_path}") # 1. Resolve identifier to entity with strict mode for destructive operations entity = await self.link_resolver.resolve_link(identifier, strict=True) if not entity: raise EntityNotFoundError(f"Entity not found: {identifier}") current_path = entity.file_path old_permalink = entity.permalink # 2. Validate destination path format first if not destination_path or destination_path.startswith("/") or not destination_path.strip(): raise ValueError(f"Invalid destination path: {destination_path}") # 3. Validate paths source_file = project_config.home / current_path destination_file = project_config.home / destination_path # Validate source exists if not source_file.exists(): raise ValueError(f"Source file not found: {current_path}") # Check if destination already exists if destination_file.exists(): raise ValueError(f"Destination already exists: {destination_path}") try: # 4. Create destination directory if needed destination_file.parent.mkdir(parents=True, exist_ok=True) # 5. Move physical file source_file.rename(destination_file) logger.info(f"Moved file: {current_path} -> {destination_path}") # 6. Prepare database updates updates = {"file_path": destination_path} # 7. Update permalink if configured or if entity has null permalink (unless disabled) if not app_config.disable_permalinks and ( app_config.update_permalinks_on_move or old_permalink is None ): # Generate new permalink from destination path new_permalink = await self.resolve_permalink(destination_path) # Update frontmatter with new permalink await self.file_service.update_frontmatter( destination_path, {"permalink": new_permalink} ) updates["permalink"] = new_permalink if old_permalink is None: logger.info( f"Generated permalink for entity with null permalink: {new_permalink}" ) else: logger.info(f"Updated permalink: {old_permalink} -> {new_permalink}") # 8. Recalculate checksum new_checksum = await self.file_service.compute_checksum(destination_path) updates["checksum"] = new_checksum # 9. Update database updated_entity = await self.repository.update(entity.id, updates) if not updated_entity: raise ValueError(f"Failed to update entity in database: {entity.id}") return updated_entity except Exception as e: # Rollback: try to restore original file location if move succeeded if destination_file.exists() and not source_file.exists(): try: destination_file.rename(source_file) logger.info(f"Rolled back file move: {destination_path} -> {current_path}") except Exception as rollback_error: # pragma: no cover logger.error(f"Failed to rollback file move: {rollback_error}") # Re-raise the original error with context raise ValueError(f"Move failed: {str(e)}") from e

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/basicmachines-co/basic-memory'

If you have feedback or need assistance with the MCP directory API, please join our Discord server