"""Config files analyzer for JSON, YAML, TOML, and INI files."""
import re
import json
from typing import Optional
from pathlib import Path
from .base import BaseAnalyzer
from .models import ImportInfo, EntryPointInfo
class ConfigAnalyzer(BaseAnalyzer):
"""Analyzer for configuration files (.json, .yaml, .yml, .toml, .ini)."""
@classmethod
def get_extensions(cls) -> list[str]:
"""Configuration file extensions."""
return [".json", ".yaml", ".yml", ".toml", ".ini"]
@classmethod
def get_language_name(cls) -> str:
"""Language name."""
return "Config"
@classmethod
def get_priority(cls) -> int:
"""Standard priority."""
return 10
def should_analyze(self, file_path: str) -> bool:
"""
Skip config files that should not be analyzed.
Skip patterns:
- package-lock.json (massive, auto-generated)
- .lock files (auto-generated dependency locks)
- .min.json (minified configs)
"""
filename = Path(file_path).name.lower()
# Skip lock files
if filename in ('package-lock.json', 'yarn.lock', 'pnpm-lock.yaml', 'poetry.lock', 'Cargo.lock'):
return False
# Skip if ends with .lock
if filename.endswith('.lock'):
return False
# Skip minified
if '.min.' in filename:
return False
return True
def extract_imports(self, file_path: str, content: str) -> list[ImportInfo]:
"""
Extract file path references from config files.
Handles:
- JSON: tsconfig paths, package.json scripts with file refs
- YAML: docker-compose volumes, env_file, Dockerfile paths
- TOML: pyproject.toml paths, Cargo.toml paths
- File path patterns in string values
Does NOT extract:
- Package names (npm, cargo, pip - these are registry names, not file imports)
"""
imports = []
filename = Path(file_path).name.lower()
# Detect file type
if filename.endswith(('.json',)):
imports.extend(self._extract_json_imports(file_path, content))
elif filename.endswith(('.yaml', '.yml')):
imports.extend(self._extract_yaml_imports(file_path, content))
elif filename.endswith('.toml'):
imports.extend(self._extract_toml_imports(file_path, content))
elif filename.endswith('.ini'):
imports.extend(self._extract_ini_imports(file_path, content))
# Generic file path pattern extraction (all config types)
imports.extend(self._extract_path_patterns(file_path, content))
return imports
def _extract_json_imports(self, file_path: str, content: str) -> list[ImportInfo]:
"""Extract imports from JSON config files."""
imports = []
filename = Path(file_path).name.lower()
try:
data = json.loads(content)
except json.JSONDecodeError:
return imports
# tsconfig.json specific
if filename == 'tsconfig.json':
# "extends": "./base.json"
if 'extends' in data and isinstance(data['extends'], str):
imports.append(ImportInfo(
source_file=file_path,
target_module=data['extends'],
line=self._find_line(content, data['extends']),
import_type="extends"
))
# "files": ["src/index.ts", ...]
if 'files' in data and isinstance(data['files'], list):
for file_ref in data['files']:
if isinstance(file_ref, str):
imports.append(ImportInfo(
source_file=file_path,
target_module=file_ref,
line=self._find_line(content, file_ref),
import_type="file_reference"
))
# "include": ["src/**/*"]
if 'include' in data and isinstance(data['include'], list):
for pattern in data['include']:
if isinstance(pattern, str) and not pattern.startswith('*'):
imports.append(ImportInfo(
source_file=file_path,
target_module=pattern,
line=self._find_line(content, pattern),
import_type="include_pattern"
))
# "paths": {"@/*": ["./src/*"]}
if 'compilerOptions' in data and 'paths' in data['compilerOptions']:
paths = data['compilerOptions']['paths']
if isinstance(paths, dict):
for alias, path_list in paths.items():
if isinstance(path_list, list):
for path in path_list:
if isinstance(path, str):
imports.append(ImportInfo(
source_file=file_path,
target_module=path,
line=self._find_line(content, path),
import_type="path_mapping"
))
# package.json - only extract file paths from scripts, not dependencies
elif filename == 'package.json':
# Scripts might reference local files
if 'scripts' in data and isinstance(data['scripts'], dict):
for script_name, script_cmd in data['scripts'].items():
if isinstance(script_cmd, str):
# Extract file paths from scripts (e.g., "node build.js", "node ./scripts/test.mjs")
# Match files with or without ./ prefix
file_refs = re.findall(r'\b(?:\./)?[\w/.-]+\.(?:js|ts|mjs|cjs|json|jsx|tsx)\b', script_cmd)
for ref in file_refs:
imports.append(ImportInfo(
source_file=file_path,
target_module=ref,
line=self._find_line(content, ref),
import_type="script_file"
))
return imports
def _extract_yaml_imports(self, file_path: str, content: str) -> list[ImportInfo]:
"""Extract imports from YAML config files."""
imports = []
filename = Path(file_path).name.lower()
# docker-compose.yml patterns
if 'docker-compose' in filename:
# env_file: .env.production or env_file: .env
env_file_pattern = r'env_file:\s*["\']?(\.env[^\s"\']*)["\']?'
for match in re.finditer(env_file_pattern, content):
imports.append(ImportInfo(
source_file=file_path,
target_module=match.group(1),
line=content[:match.start()].count('\n') + 1,
import_type="env_file"
))
# dockerfile: ./Dockerfile.prod
dockerfile_pattern = r'dockerfile:\s*["\']?([^\s"\']+Dockerfile[^\s"\']*)["\']?'
for match in re.finditer(dockerfile_pattern, content, re.IGNORECASE):
imports.append(ImportInfo(
source_file=file_path,
target_module=match.group(1),
line=content[:match.start()].count('\n') + 1,
import_type="dockerfile"
))
# volumes: - ./data:/app/data
volume_pattern = r'[-\s]+["\']?(\.{1,2}/[^:\s"\']+):[^\s"\']+["\']?'
for match in re.finditer(volume_pattern, content):
imports.append(ImportInfo(
source_file=file_path,
target_module=match.group(1),
line=content[:match.start()].count('\n') + 1,
import_type="volume_mount"
))
return imports
def _extract_toml_imports(self, file_path: str, content: str) -> list[ImportInfo]:
"""Extract imports from TOML config files."""
imports = []
filename = Path(file_path).name.lower()
# pyproject.toml - extract script paths, not package dependencies
if filename == 'pyproject.toml':
# [project.scripts] scantool = "scantool:main"
# These reference modules, not files, so skip
# [tool.mypy] config files
config_pattern = r'config_file\s*=\s*["\']([^"\']+)["\']'
for match in re.finditer(config_pattern, content):
imports.append(ImportInfo(
source_file=file_path,
target_module=match.group(1),
line=content[:match.start()].count('\n') + 1,
import_type="config_file"
))
# Cargo.toml - path dependencies (local crates)
elif filename == 'cargo.toml':
# my_crate = { path = "../my_crate" }
path_dep_pattern = r'path\s*=\s*["\']([^"\']+)["\']'
for match in re.finditer(path_dep_pattern, content):
imports.append(ImportInfo(
source_file=file_path,
target_module=match.group(1),
line=content[:match.start()].count('\n') + 1,
import_type="path_dependency"
))
return imports
def _extract_ini_imports(self, file_path: str, content: str) -> list[ImportInfo]:
"""Extract imports from INI config files."""
imports = []
# INI files may have file path values
# key = /path/to/file or key = ./relative/path
ini_path_pattern = r'^\s*[\w_-]+\s*=\s*(["\']?)([./][^\s"\']+\.[a-zA-Z0-9]+)\1'
for match in re.finditer(ini_path_pattern, content, re.MULTILINE):
imports.append(ImportInfo(
source_file=file_path,
target_module=match.group(2),
line=content[:match.start()].count('\n') + 1,
import_type="config_value"
))
return imports
def _extract_path_patterns(self, file_path: str, content: str) -> list[ImportInfo]:
"""
Extract generic file path patterns from config files.
Conservative patterns:
- Relative paths: ./file.ext, ../file.ext
- Quoted paths with extensions
- Avoid matching URLs, version numbers, or package names
"""
imports = []
# Pattern 1: Relative paths with common extensions (in quotes)
# Matches: "./config.json", "../utils/helper.ts", "./templates/base.html", etc.
quoted_path_pattern = r'["\'](\./(?:[^/"\s]+/)*[^/"\s]+\.[a-zA-Z0-9]+|\.\./(?:[^/"\s]+/)*[^/"\s]+\.[a-zA-Z0-9]+)["\']'
for match in re.finditer(quoted_path_pattern, content):
path = match.group(1)
# Skip if looks like URL
if '://' not in path:
imports.append(ImportInfo(
source_file=file_path,
target_module=path,
line=content[:match.start()].count('\n') + 1,
import_type="path_reference"
))
# Pattern 2: Unquoted relative paths on their own line (YAML-style)
# Matches: " - ./file.ext" or "key: ./file.ext"
unquoted_path_pattern = r'(?:^|\s)(\./[^\s:]+\.[a-zA-Z0-9]+|\.\./(?:[^/\s]+/)*[^/\s]+\.[a-zA-Z0-9]+)(?:\s|$)'
for match in re.finditer(unquoted_path_pattern, content, re.MULTILINE):
path = match.group(1)
if '://' not in path:
imports.append(ImportInfo(
source_file=file_path,
target_module=path,
line=content[:match.start()].count('\n') + 1,
import_type="path_reference"
))
return imports
def find_entry_points(self, file_path: str, content: str) -> list[EntryPointInfo]:
"""
Find entry points in config files.
Entry points:
- package.json: main, bin scripts
- Cargo.toml: [[bin]] targets
- pyproject.toml: [project.scripts]
- docker-compose.yml: services
- tsconfig.json: project config
"""
entry_points = []
filename = Path(file_path).name.lower()
try:
# JSON configs
if filename.endswith('.json'):
entry_points.extend(self._find_json_entry_points(file_path, content))
# TOML configs
elif filename.endswith('.toml'):
entry_points.extend(self._find_toml_entry_points(file_path, content))
# YAML configs
elif filename.endswith(('.yaml', '.yml')):
entry_points.extend(self._find_yaml_entry_points(file_path, content))
except Exception:
# Don't fail on parse errors
pass
return entry_points
def _find_json_entry_points(self, file_path: str, content: str) -> list[EntryPointInfo]:
"""Find entry points in JSON config files."""
entry_points = []
filename = Path(file_path).name.lower()
try:
data = json.loads(content)
except json.JSONDecodeError:
return entry_points
# package.json
if filename == 'package.json':
# Mark as npm/node project
entry_points.append(EntryPointInfo(
file=file_path,
type="project_config",
name="npm_project",
line=1,
framework="npm"
))
# main entry point
if 'main' in data:
entry_points.append(EntryPointInfo(
file=file_path,
type="main_entry",
name=data['main'],
line=self._find_line(content, data['main']),
framework="npm"
))
# bin scripts
if 'bin' in data:
if isinstance(data['bin'], dict):
for bin_name, bin_path in data['bin'].items():
entry_points.append(EntryPointInfo(
file=file_path,
type="bin_script",
name=bin_name,
line=self._find_line(content, bin_name),
framework="npm"
))
# tsconfig.json
elif filename == 'tsconfig.json':
entry_points.append(EntryPointInfo(
file=file_path,
type="project_config",
name="typescript_project",
line=1,
framework="TypeScript"
))
return entry_points
def _find_toml_entry_points(self, file_path: str, content: str) -> list[EntryPointInfo]:
"""Find entry points in TOML config files."""
entry_points = []
filename = Path(file_path).name.lower()
# pyproject.toml
if filename == 'pyproject.toml':
entry_points.append(EntryPointInfo(
file=file_path,
type="project_config",
name="python_project",
line=1,
framework="Python"
))
# [project.scripts]
script_pattern = r'\[project\.scripts\]'
if re.search(script_pattern, content):
line = content[:re.search(script_pattern, content).start()].count('\n') + 1
entry_points.append(EntryPointInfo(
file=file_path,
type="scripts_section",
name="project_scripts",
line=line,
framework="Python"
))
# Cargo.toml
elif filename == 'cargo.toml':
entry_points.append(EntryPointInfo(
file=file_path,
type="project_config",
name="rust_project",
line=1,
framework="Rust"
))
# [[bin]]
bin_pattern = r'\[\[bin\]\]'
for match in re.finditer(bin_pattern, content):
line = content[:match.start()].count('\n') + 1
entry_points.append(EntryPointInfo(
file=file_path,
type="bin_target",
name="bin",
line=line,
framework="Rust"
))
return entry_points
def _find_yaml_entry_points(self, file_path: str, content: str) -> list[EntryPointInfo]:
"""Find entry points in YAML config files."""
entry_points = []
filename = Path(file_path).name.lower()
# docker-compose.yml
if 'docker-compose' in filename:
entry_points.append(EntryPointInfo(
file=file_path,
type="project_config",
name="docker_compose_project",
line=1,
framework="Docker"
))
# services:
services_pattern = r'^services:\s*$'
for match in re.finditer(services_pattern, content, re.MULTILINE):
line = content[:match.start()].count('\n') + 1
entry_points.append(EntryPointInfo(
file=file_path,
type="services_section",
name="services",
line=line,
framework="Docker"
))
return entry_points
def classify_file(self, file_path: str, content: str) -> str:
"""All config files go to config cluster."""
return "config"
def _find_line(self, content: str, search_str: str) -> int:
"""Find line number of a string in content."""
try:
index = content.index(search_str)
return content[:index].count('\n') + 1
except ValueError:
return 0