license_scanner.py•17.1 kB
"""
License Scanner MCP Server
This MCP server provides tools to scan project dependencies and generate
license reports in markdown format.
"""
import json
import os
import subprocess
import sys
from pathlib import Path
from typing import Any, Dict, List, Optional, Union
import requests
import yaml
from fastmcp import FastMCP
from packageurl import PackageURL
# Initialize FastMCP server
mcp = FastMCP("License Scanner")
# Cache for license information to avoid repeated API calls
license_cache: Dict[str, Dict[str, Any]] = {}
def get_package_manager_files(project_path: Path) -> List[Path]:
"""Identify package manager files in the project."""
package_files = []
# Common package manager files
package_managers = {
"package.json": "npm",
"yarn.lock": "yarn",
"pnpm-lock.yaml": "pnpm",
"requirements.txt": "pip",
"pyproject.toml": "pip",
"Pipfile": "pip",
"Pipfile.lock": "pip",
"poetry.lock": "poetry",
"Cargo.toml": "cargo",
"Cargo.lock": "cargo",
"composer.json": "composer",
"composer.lock": "composer",
"Gemfile": "bundler",
"Gemfile.lock": "bundler",
"go.mod": "go",
"go.sum": "go",
"pom.xml": "maven",
"build.gradle": "gradle",
"gradle.lockfile": "gradle",
}
for file_name, manager in package_managers.items():
file_path = project_path / file_name
if file_path.exists():
package_files.append(file_path)
return package_files
def parse_package_json(package_file: Path) -> List[Dict[str, str]]:
"""Parse package.json file and extract dependencies."""
try:
with open(package_file, 'r', encoding='utf-8') as f:
data = json.load(f)
dependencies = []
# Check different dependency sections
dep_sections = ['dependencies', 'devDependencies', 'peerDependencies', 'optionalDependencies']
for section in dep_sections:
if section in data and data[section]:
for package_name, version in data[section].items():
dependencies.append({
'name': package_name,
'version': version,
'manager': 'npm',
'section': section
})
return dependencies
except Exception as e:
print(f"Error parsing package.json: {e}")
return []
def parse_requirements_txt(requirements_file: Path) -> List[Dict[str, str]]:
"""Parse requirements.txt file and extract dependencies."""
try:
dependencies = []
with open(requirements_file, 'r', encoding='utf-8') as f:
for line in f:
line = line.strip()
# Skip comments and empty lines
if not line or line.startswith('#'):
continue
# Handle different requirement formats
if '==' in line:
name, version = line.split('==', 1)
version = version.strip()
elif '>=' in line:
name, version = line.split('>=', 1)
version = f">={version.strip()}"
elif '<=' in line:
name, version = line.split('<=', 1)
version = f"<={version.strip()}"
else:
name = line
version = "unknown"
dependencies.append({
'name': name.strip(),
'version': version,
'manager': 'pip'
})
return dependencies
except Exception as e:
print(f"Error parsing requirements.txt: {e}")
return []
def parse_pyproject_toml(pyproject_file: Path) -> List[Dict[str, str]]:
"""Parse pyproject.toml file and extract dependencies."""
try:
import toml
with open(pyproject_file, 'r', encoding='utf-8') as f:
data = toml.load(f)
dependencies = []
# Check for dependencies in [project] section
if 'project' in data and 'dependencies' in data['project']:
for dep in data['project']['dependencies']:
# Parse dependency string (e.g., "requests>=2.31.0")
if '>=' in dep:
name, version = dep.split('>=', 1)
version = f">={version.strip()}"
elif '==' in dep:
name, version = dep.split('==', 1)
version = version.strip()
else:
name = dep
version = "unknown"
dependencies.append({
'name': name.strip(),
'version': version,
'manager': 'pip'
})
return dependencies
except Exception as e:
print(f"Error parsing pyproject.toml: {e}")
return []
def parse_cargo_toml(cargo_file: Path) -> List[Dict[str, str]]:
"""Parse Cargo.toml file and extract dependencies."""
try:
import toml
with open(cargo_file, 'r', encoding='utf-8') as f:
data = toml.load(f)
dependencies = []
# Check for dependencies section
if 'dependencies' in data:
for package_name, package_info in data['dependencies'].items():
if isinstance(package_info, str):
version = package_info
elif isinstance(package_info, dict) and 'version' in package_info:
version = package_info['version']
else:
version = "unknown"
dependencies.append({
'name': package_name,
'version': version,
'manager': 'cargo'
})
return dependencies
except Exception as e:
print(f"Error parsing Cargo.toml: {e}")
return []
def get_npm_package_license(package_name: str, version: str = None) -> Dict[str, Any]:
"""Get license information for an npm package."""
cache_key = f"npm:{package_name}:{version or 'latest'}"
if cache_key in license_cache:
return license_cache[cache_key]
try:
# Always use the package's latest info first, then try specific version if needed
url = f"https://registry.npmjs.org/{package_name}"
response = requests.get(url, timeout=10)
response.raise_for_status()
data = response.json()
# Get the latest version info by default
latest_version = data.get('dist-tags', {}).get('latest')
if latest_version and latest_version in data.get('versions', {}):
version_info = data['versions'][latest_version]
else:
# Fallback to the main package info
version_info = data
license_info = {
'license': version_info.get('license', 'Unknown'),
'homepage': version_info.get('homepage', ''),
'repository': version_info.get('repository', {}).get('url', '') if isinstance(version_info.get('repository'), dict) else version_info.get('repository', ''),
'author': version_info.get('author', {}).get('name', '') if isinstance(version_info.get('author'), dict) else str(version_info.get('author', '')),
'description': version_info.get('description', '')
}
license_cache[cache_key] = license_info
return license_info
except Exception as e:
print(f"Error fetching license for {package_name}: {e}")
return {
'license': 'Unknown',
'homepage': '',
'repository': '',
'author': '',
'description': ''
}
def get_pypi_package_license(package_name: str, version: str = None) -> Dict[str, Any]:
"""Get license information for a PyPI package."""
cache_key = f"pypi:{package_name}:{version or 'latest'}"
if cache_key in license_cache:
return license_cache[cache_key]
try:
# Always use the package's latest info (PyPI API doesn't support version ranges in URLs)
url = f"https://pypi.org/pypi/{package_name}/json"
response = requests.get(url, timeout=10)
response.raise_for_status()
data = response.json()
info = data.get('info', {})
license_info = {
'license': info.get('license', 'Unknown'),
'homepage': info.get('home_page', ''),
'repository': info.get('project_urls', {}).get('Source', '') if info.get('project_urls') else '',
'author': info.get('author', ''),
'description': info.get('summary', '')
}
license_cache[cache_key] = license_info
return license_info
except Exception as e:
print(f"Error fetching license for {package_name}: {e}")
return {
'license': 'Unknown',
'homepage': '',
'repository': '',
'author': '',
'description': ''
}
def get_crates_package_license(package_name: str, version: str = None) -> Dict[str, Any]:
"""Get license information for a Rust crate."""
cache_key = f"crates:{package_name}:{version or 'latest'}"
if cache_key in license_cache:
return license_cache[cache_key]
try:
# Use crates.io API
url = f"https://crates.io/api/v1/crates/{package_name}"
response = requests.get(url, timeout=10)
response.raise_for_status()
data = response.json()
crate = data.get('crate', {})
license_info = {
'license': crate.get('license', 'Unknown'),
'homepage': crate.get('homepage', ''),
'repository': crate.get('repository', ''),
'author': crate.get('authors', [''])[0] if crate.get('authors') else '',
'description': crate.get('description', '')
}
license_cache[cache_key] = license_info
return license_info
except Exception as e:
print(f"Error fetching license for {package_name}: {e}")
return {
'license': 'Unknown',
'homepage': '',
'repository': '',
'author': '',
'description': ''
}
def scan_project_dependencies(project_path: str) -> Dict[str, Any]:
"""Scan a project for dependencies and return license information."""
project_path = Path(project_path)
if not project_path.exists():
return {"error": f"Project path does not exist: {project_path}"}
package_files = get_package_manager_files(project_path)
if not package_files:
return {"error": "No package manager files found in project"}
all_dependencies = []
for package_file in package_files:
if package_file.name == "package.json":
dependencies = parse_package_json(package_file)
elif package_file.name == "requirements.txt":
dependencies = parse_requirements_txt(package_file)
elif package_file.name == "pyproject.toml":
dependencies = parse_pyproject_toml(package_file)
elif package_file.name == "Cargo.toml":
dependencies = parse_cargo_toml(package_file)
else:
continue
all_dependencies.extend(dependencies)
# Get license information for each dependency
for dep in all_dependencies:
if dep['manager'] == 'npm':
license_info = get_npm_package_license(dep['name'], dep['version'])
elif dep['manager'] == 'pip':
license_info = get_pypi_package_license(dep['name'], dep['version'])
elif dep['manager'] == 'cargo':
license_info = get_crates_package_license(dep['name'], dep['version'])
else:
license_info = {
'license': 'Unknown',
'homepage': '',
'repository': '',
'author': '',
'description': ''
}
dep.update(license_info)
return {
"project_path": str(project_path),
"package_files": [str(f) for f in package_files],
"dependencies": all_dependencies,
"total_dependencies": len(all_dependencies)
}
def generate_license_report_markdown(scan_result: Dict[str, Any]) -> str:
"""Generate a markdown report from scan results."""
if "error" in scan_result:
return f"# License Scan Error\n\n{scan_result['error']}"
markdown = f"# License Report\n\n"
markdown += f"**Project:** {scan_result['project_path']}\n\n"
markdown += f"**Total Dependencies:** {scan_result['total_dependencies']}\n\n"
markdown += f"**Package Files Found:** {', '.join(scan_result['package_files'])}\n\n"
# Group dependencies by manager
managers = {}
for dep in scan_result['dependencies']:
manager = dep['manager']
if manager not in managers:
managers[manager] = []
managers[manager].append(dep)
# Generate sections for each package manager
for manager, dependencies in managers.items():
markdown += f"## {manager.upper()} Dependencies\n\n"
markdown += f"Found {len(dependencies)} dependencies\n\n"
markdown += "| Package | Version | License | Author | Homepage |\n"
markdown += "|---------|---------|---------|--------|----------|\n"
for dep in sorted(dependencies, key=lambda x: x['name'].lower()):
homepage = dep.get('homepage', '')
if homepage and not homepage.startswith('http'):
homepage = ''
markdown += f"| {dep['name']} | {dep['version']} | {dep['license']} | {dep.get('author', '')} | {homepage} |\n"
markdown += "\n"
# License summary
license_counts = {}
for dep in scan_result['dependencies']:
license_name = dep['license']
license_counts[license_name] = license_counts.get(license_name, 0) + 1
markdown += "## License Summary\n\n"
markdown += "| License | Count |\n"
markdown += "|---------|-------|\n"
for license_name, count in sorted(license_counts.items(), key=lambda x: x[1], reverse=True):
markdown += f"| {license_name} | {count} |\n"
return markdown
@mcp.tool()
def scan_dependencies(project_path: str) -> str:
"""
Scan a project for dependencies and return license information.
Args:
project_path: Path to the project directory to scan
Returns:
JSON string containing dependency and license information
"""
try:
result = scan_project_dependencies(project_path)
return json.dumps(result, indent=2)
except Exception as e:
return json.dumps({"error": str(e)}, indent=2)
@mcp.tool()
def generate_license_report(project_path: str, output_file: str = None) -> str:
"""
Generate a markdown license report for a project.
Args:
project_path: Path to the project directory to scan
output_file: Optional path to save the markdown report (if not provided, returns the content)
Returns:
Markdown content of the license report
"""
try:
scan_result = scan_project_dependencies(project_path)
markdown_content = generate_license_report_markdown(scan_result)
if output_file:
output_path = Path(output_file)
with open(output_path, 'w', encoding='utf-8') as f:
f.write(markdown_content)
return f"License report saved to: {output_file}"
else:
return markdown_content
except Exception as e:
return f"Error generating license report: {str(e)}"
@mcp.tool()
def list_package_managers(project_path: str) -> str:
"""
List all package manager files found in a project.
Args:
project_path: Path to the project directory to scan
Returns:
JSON string containing list of package manager files
"""
try:
project_path = Path(project_path)
if not project_path.exists():
return json.dumps({"error": f"Project path does not exist: {project_path}"})
package_files = get_package_manager_files(project_path)
return json.dumps({
"project_path": str(project_path),
"package_files": [str(f) for f in package_files],
"total_files": len(package_files)
}, indent=2)
except Exception as e:
return json.dumps({"error": str(e)}, indent=2)
if __name__ == "__main__":
# Run the MCP server
mcp.run()