# SQLite3 Reporter Query Examples
The SQLite3 reporter generates a comprehensive database with package manifest and dependency data that can be queried using standard SQL. Here are some useful example queries:
## Basic Queries
### List all package manifests
```sql
SELECT manifest_id, ecosystem, display_path, created_at
FROM report_package_manifests
ORDER BY created_at;
```
### List all packages with their basic information
```sql
SELECT name, version, ecosystem, is_direct, is_malware, is_suspicious
FROM report_packages
ORDER BY name;
```
### Find all direct dependencies
```sql
SELECT p.name, p.version, p.ecosystem, m.display_path
FROM report_packages p
JOIN report_package_manifest_packages mp ON p.id = mp.report_package_id
JOIN report_package_manifests m ON mp.report_package_manifest_id = m.id
WHERE p.is_direct = 1
ORDER BY p.name;
```
## Security Analysis Queries
### Find all vulnerabilities
```sql
SELECT p.name, p.version, v.vulnerability_id, v.title, v.severity, v.cvss_score
FROM report_packages p
JOIN report_vulnerabilities v ON p.id = v.report_package_vulnerabilities
ORDER BY v.severity DESC, v.cvss_score DESC;
```
### Find critical and high severity vulnerabilities
```sql
SELECT p.name, p.version, v.vulnerability_id, v.title, v.severity, m.display_path
FROM report_packages p
JOIN report_vulnerabilities v ON p.id = v.report_package_vulnerabilities
JOIN report_package_manifest_packages mp ON p.id = mp.report_package_id
JOIN report_package_manifests m ON mp.report_package_manifest_id = m.id
WHERE v.severity IN ('CRITICAL', 'HIGH')
ORDER BY v.severity DESC;
```
### Count vulnerabilities by severity
```sql
SELECT v.severity, COUNT(*) as vulnerability_count
FROM report_vulnerabilities v
GROUP BY v.severity
ORDER BY vulnerability_count DESC;
```
### Find packages with most vulnerabilities
```sql
SELECT p.name, p.version, p.ecosystem, COUNT(v.id) as vuln_count
FROM report_packages p
LEFT JOIN report_vulnerabilities v ON p.id = v.report_package_vulnerabilities
GROUP BY p.id, p.name, p.version, p.ecosystem
HAVING vuln_count > 0
ORDER BY vuln_count DESC;
```
### Find vulnerability aliases
```sql
SELECT p.name, p.version, v.vulnerability_id, v.aliases
FROM report_packages p
JOIN report_vulnerabilities v ON p.id = v.report_package_vulnerabilities
WHERE v.aliases IS NOT NULL AND v.aliases != '[]';
```
## License Analysis Queries
### Find all licenses
```sql
SELECT p.name, p.version, l.license_id, l.name
FROM report_packages p
JOIN report_licenses l ON p.id = l.report_package_licenses
ORDER BY l.license_id, p.name;
```
### Count packages by license
```sql
SELECT l.license_id, COUNT(DISTINCT p.id) as package_count
FROM report_licenses l
JOIN report_packages p ON l.report_package_licenses = p.id
GROUP BY l.license_id
ORDER BY package_count DESC;
```
### Find packages with specific licenses
```sql
SELECT p.name, p.version, p.ecosystem, m.display_path
FROM report_packages p
JOIN report_licenses l ON p.id = l.report_package_licenses
JOIN report_package_manifest_packages mp ON p.id = mp.report_package_id
JOIN report_package_manifests m ON mp.report_package_manifest_id = m.id
WHERE l.license_id IN ('MIT', 'Apache-2.0', 'GPL-3.0')
ORDER BY l.license_id, p.name;
```
### License compliance overview by manifest
```sql
SELECT
m.display_path,
COUNT(DISTINCT p.id) as total_packages,
COUNT(DISTINCT l.license_id) as unique_licenses,
GROUP_CONCAT(DISTINCT l.license_id) as all_licenses
FROM report_package_manifests m
LEFT JOIN report_package_manifest_packages mp ON m.id = mp.report_package_manifest_id
LEFT JOIN report_packages p ON mp.report_package_id = p.id
LEFT JOIN report_licenses l ON p.id = l.report_package_licenses
GROUP BY m.id, m.display_path
ORDER BY unique_licenses DESC;
```
### Find packages without license information
```sql
SELECT p.name, p.version, p.ecosystem, m.display_path
FROM report_packages p
JOIN report_package_manifest_packages mp ON p.id = mp.report_package_id
JOIN report_package_manifests m ON mp.report_package_manifest_id = m.id
LEFT JOIN report_licenses l ON p.id = l.report_package_licenses
WHERE l.id IS NULL;
```
### Find potentially problematic licenses (copyleft, GPL, etc.)
```sql
SELECT p.name, p.version, l.license_id, m.display_path
FROM report_packages p
JOIN report_licenses l ON p.id = l.report_package_licenses
JOIN report_package_manifest_packages mp ON p.id = mp.report_package_id
JOIN report_package_manifests m ON mp.report_package_manifest_id = m.id
WHERE l.license_id LIKE '%GPL%'
OR l.license_id LIKE '%AGPL%'
OR l.license_id LIKE '%LGPL%'
OR l.license_id LIKE '%Copyleft%'
ORDER BY l.license_id, p.name;
```
### Find packages flagged as malware
```sql
SELECT p.name, p.version, p.ecosystem, m.display_path
FROM report_packages p
JOIN report_package_manifest_packages mp ON p.id = mp.report_package_id
JOIN report_package_manifests m ON mp.report_package_manifest_id = m.id
WHERE p.is_malware = 1;
```
### Find suspicious packages
```sql
SELECT p.name, p.version, p.ecosystem, m.display_path
FROM report_packages p
JOIN report_package_manifest_packages mp ON p.id = mp.report_package_id
JOIN report_package_manifests m ON mp.report_package_manifest_id = m.id
WHERE p.is_suspicious = 1;
```
### Get malware analysis details
```sql
SELECT p.name, p.version, ma.analysis_id, ma.is_malware, ma.confidence
FROM report_packages p
JOIN report_malwares ma ON p.id = ma.report_package_malware_analysis
WHERE ma.is_malware = 1;
```
## Dependency Analysis Queries
### Find packages with dependencies
```sql
SELECT p.name, p.version, COUNT(d.id) as dependency_count
FROM report_packages p
LEFT JOIN report_dependencies d ON p.id = d.report_package_dependencies
GROUP BY p.id, p.name, p.version
HAVING dependency_count > 0
ORDER BY dependency_count DESC;
```
### Show dependency relationships
```sql
SELECT
parent.name as parent_package,
parent.version as parent_version,
d.dependency_name,
d.dependency_version,
d.dependency_ecosystem
FROM report_packages parent
JOIN report_dependencies d ON parent.id = d.report_package_dependencies
ORDER BY parent.name, d.dependency_name;
```
## Ecosystem Analysis Queries
### Count packages by ecosystem
```sql
SELECT ecosystem, COUNT(*) as package_count
FROM report_packages
GROUP BY ecosystem
ORDER BY package_count DESC;
```
### Find npm packages only
```sql
SELECT name, version, is_direct
FROM report_packages
WHERE ecosystem = 'npm'
ORDER BY name;
```
## Cross-Manifest Analysis Queries
### Find packages used across multiple manifests
```sql
SELECT p.name, p.version, p.ecosystem, COUNT(DISTINCT mp.report_package_manifest_id) as manifest_count
FROM report_packages p
JOIN report_package_manifest_packages mp ON p.id = mp.report_package_id
GROUP BY p.name, p.version, p.ecosystem
HAVING manifest_count > 1
ORDER BY manifest_count DESC;
```
### Compare direct dependencies across manifests
```sql
SELECT
m.display_path,
p.name,
p.version,
p.ecosystem
FROM report_packages p
JOIN report_package_manifest_packages mp ON p.id = mp.report_package_id
JOIN report_package_manifests m ON mp.report_package_manifest_id = m.id
WHERE p.is_direct = 1
ORDER BY m.display_path, p.name;
```
## OSS Project and OpenSSF Scorecard Queries
### Find packages with project information
```sql
SELECT p.name, p.version, proj.name as project_name, proj.url, proj.stars, proj.forks
FROM report_packages p
JOIN report_projects proj ON p.id = proj.report_package_projects
ORDER BY proj.stars DESC;
```
### Find packages with low scorecard scores
```sql
SELECT p.name, p.version, s.score, s.scorecard_version, proj.name as project_name
FROM report_packages p
JOIN report_projects proj ON p.id = proj.report_package_projects
JOIN report_scorecards s ON proj.id = s.report_project_scorecard
WHERE s.score < 5.0
ORDER BY s.score ASC;
```
### Find packages failing specific scorecard checks
```sql
SELECT p.name, p.version, c.name as check_name, c.score, c.reason, proj.name as project_name
FROM report_packages p
JOIN report_projects proj ON p.id = proj.report_package_projects
JOIN report_scorecards s ON proj.id = s.report_project_scorecard
JOIN report_scorecard_checks c ON s.id = c.report_scorecard_checks
WHERE c.name = 'Maintained' AND c.score < 5
ORDER BY c.score ASC;
```
### OpenSSF scorecard check summary
```sql
SELECT
c.name as check_name,
AVG(c.score) as avg_score,
MIN(c.score) as min_score,
MAX(c.score) as max_score,
COUNT(*) as package_count
FROM report_scorecard_checks c
GROUP BY c.name
ORDER BY avg_score ASC;
```
### Find packages with security-related scorecard failures
```sql
SELECT DISTINCT p.name, p.version, c.name as failed_check, c.score, c.reason
FROM report_packages p
JOIN report_projects proj ON p.id = proj.report_package_projects
JOIN report_scorecards s ON proj.id = s.report_project_scorecard
JOIN report_scorecard_checks c ON s.id = c.report_scorecard_checks
WHERE c.name IN ('Security-Policy', 'Vulnerabilities', 'Token-Permissions', 'Dangerous-Workflow')
AND c.score < 5
ORDER BY c.score ASC;
```
### Find popular projects with poor security scores
```sql
SELECT
p.name,
p.version,
proj.name as project_name,
proj.stars,
s.score as scorecard_score,
COUNT(c.id) as failed_checks
FROM report_packages p
JOIN report_projects proj ON p.id = proj.report_package_projects
JOIN report_scorecards s ON proj.id = s.report_project_scorecard
LEFT JOIN report_scorecard_checks c ON s.id = c.report_scorecard_checks AND c.score < 5
WHERE proj.stars > 1000 AND s.score < 6
GROUP BY p.id, proj.id, s.id
ORDER BY proj.stars DESC;
```
### Scorecard overview by ecosystem
```sql
SELECT
p.ecosystem,
COUNT(DISTINCT p.id) as packages_with_scorecard,
AVG(s.score) as avg_scorecard_score,
MIN(s.score) as min_score,
MAX(s.score) as max_score,
AVG(proj.stars) as avg_stars
FROM report_packages p
JOIN report_projects proj ON p.id = proj.report_package_projects
JOIN report_scorecards s ON proj.id = s.report_project_scorecard
GROUP BY p.ecosystem
ORDER BY avg_scorecard_score DESC;
```
### Find packages with both vulnerabilities and poor scorecard scores
```sql
SELECT
p.name,
p.version,
s.score as scorecard_score,
COUNT(v.id) as vulnerability_count,
SUM(CASE WHEN v.severity = 'CRITICAL' THEN 1 ELSE 0 END) as critical_vulns,
proj.name as project_name,
proj.stars
FROM report_packages p
JOIN report_projects proj ON p.id = proj.report_package_projects
JOIN report_scorecards s ON proj.id = s.report_project_scorecard
LEFT JOIN report_vulnerabilities v ON p.id = v.report_package_vulnerabilities
WHERE s.score < 6
GROUP BY p.id, proj.id, s.id
HAVING vulnerability_count > 0
ORDER BY critical_vulns DESC, vulnerability_count DESC;
```
### Detailed scorecard check analysis
```sql
SELECT
p.name,
p.version,
proj.name as project_name,
s.score as overall_score,
c.name as check_name,
c.score as check_score,
c.reason
FROM report_packages p
JOIN report_projects proj ON p.id = proj.report_package_projects
JOIN report_scorecards s ON proj.id = s.report_project_scorecard
JOIN report_scorecard_checks c ON s.id = c.report_scorecard_checks
WHERE p.name = 'example-package'
ORDER BY c.score ASC;
```
### Find projects with best security practices
```sql
SELECT
p.name,
p.version,
proj.name as project_name,
proj.url,
s.score as scorecard_score,
proj.stars,
proj.forks
FROM report_packages p
JOIN report_projects proj ON p.id = proj.report_package_projects
JOIN report_scorecards s ON proj.id = s.report_project_scorecard
WHERE s.score >= 8.0
ORDER BY s.score DESC, proj.stars DESC;
```
## SLSA Provenance Queries
### Find packages WITH SLSA provenance
```sql
SELECT p.name, p.version, p.ecosystem, COUNT(slsa.id) as provenance_count
FROM report_packages p
JOIN report_slsa_provenances slsa ON p.id = slsa.report_package_slsa_provenances
GROUP BY p.id, p.name, p.version, p.ecosystem
ORDER BY provenance_count DESC;
```
### Find packages WITHOUT SLSA provenance (Missing Supply Chain Security)
```sql
SELECT p.name, p.version, p.ecosystem, m.display_path
FROM report_packages p
JOIN report_package_manifest_packages mp ON p.id = mp.report_package_id
JOIN report_package_manifests m ON mp.report_package_manifest_id = m.id
LEFT JOIN report_slsa_provenances slsa ON p.id = slsa.report_package_slsa_provenances
WHERE slsa.id IS NULL
ORDER BY p.name;
```
### Find packages with verified SLSA provenance
```sql
SELECT p.name, p.version, slsa.source_repository, slsa.commit_sha, slsa.url
FROM report_packages p
JOIN report_slsa_provenances slsa ON p.id = slsa.report_package_slsa_provenances
WHERE slsa.verified = true
ORDER BY p.name;
```
### Find packages with unverified SLSA provenance (Security Risk)
```sql
SELECT p.name, p.version, slsa.source_repository, slsa.commit_sha, slsa.verified
FROM report_packages p
JOIN report_slsa_provenances slsa ON p.id = slsa.report_package_slsa_provenances
WHERE slsa.verified = false
ORDER BY p.name;
```
### SLSA provenance summary by ecosystem
```sql
SELECT
p.ecosystem,
COUNT(DISTINCT p.id) as total_packages,
COUNT(DISTINCT CASE WHEN slsa.id IS NOT NULL THEN p.id END) as packages_with_provenance,
COUNT(DISTINCT CASE WHEN slsa.verified = true THEN p.id END) as packages_with_verified_provenance,
ROUND(
(COUNT(DISTINCT CASE WHEN slsa.id IS NOT NULL THEN p.id END) * 100.0) / COUNT(DISTINCT p.id), 2
) as provenance_coverage_percent,
ROUND(
(COUNT(DISTINCT CASE WHEN slsa.verified = true THEN p.id END) * 100.0) / COUNT(DISTINCT p.id), 2
) as verified_provenance_percent
FROM report_packages p
LEFT JOIN report_slsa_provenances slsa ON p.id = slsa.report_package_slsa_provenances
GROUP BY p.ecosystem
ORDER BY provenance_coverage_percent DESC;
```
### Find direct dependencies without SLSA provenance (High Priority Security Risk)
```sql
SELECT p.name, p.version, p.ecosystem, m.display_path
FROM report_packages p
JOIN report_package_manifest_packages mp ON p.id = mp.report_package_id
JOIN report_package_manifests m ON mp.report_package_manifest_id = m.id
LEFT JOIN report_slsa_provenances slsa ON p.id = slsa.report_package_slsa_provenances
WHERE p.is_direct = 1 AND slsa.id IS NULL
ORDER BY p.ecosystem, p.name;
```
### SLSA provenance source repository analysis
```sql
SELECT
slsa.source_repository,
COUNT(DISTINCT p.id) as package_count,
COUNT(CASE WHEN slsa.verified = true THEN 1 END) as verified_count,
COUNT(CASE WHEN slsa.verified = false THEN 1 END) as unverified_count,
GROUP_CONCAT(DISTINCT p.name) as packages
FROM report_slsa_provenances slsa
JOIN report_packages p ON slsa.report_package_slsa_provenances = p.id
GROUP BY slsa.source_repository
ORDER BY package_count DESC;
```
### Combined security analysis: Vulnerabilities + Missing SLSA Provenance
```sql
SELECT
p.name,
p.version,
p.ecosystem,
COUNT(v.id) as vulnerability_count,
CASE WHEN slsa.id IS NULL THEN 'Missing' ELSE 'Present' END as slsa_provenance,
CASE WHEN slsa.verified = true THEN 'Verified'
WHEN slsa.verified = false THEN 'Unverified'
ELSE 'Missing' END as slsa_status,
SUM(CASE WHEN v.severity = 'CRITICAL' THEN 1 ELSE 0 END) as critical_vulns
FROM report_packages p
LEFT JOIN report_vulnerabilities v ON p.id = v.report_package_vulnerabilities
LEFT JOIN report_slsa_provenances slsa ON p.id = slsa.report_package_slsa_provenances
GROUP BY p.id, p.name, p.version, p.ecosystem, slsa.id, slsa.verified
HAVING vulnerability_count > 0 OR slsa_provenance = 'Missing'
ORDER BY critical_vulns DESC, vulnerability_count DESC;
```
### Supply chain security scorecard
```sql
SELECT
m.display_path,
COUNT(DISTINCT p.id) as total_packages,
COUNT(DISTINCT CASE WHEN slsa.id IS NOT NULL THEN p.id END) as with_slsa_provenance,
COUNT(DISTINCT CASE WHEN slsa.verified = true THEN p.id END) as with_verified_slsa,
COUNT(DISTINCT CASE WHEN v.id IS NOT NULL THEN p.id END) as with_vulnerabilities,
COUNT(DISTINCT CASE WHEN p.is_malware = 1 THEN p.id END) as malware_packages,
ROUND(
(COUNT(DISTINCT CASE WHEN slsa.verified = true THEN p.id END) * 100.0) / COUNT(DISTINCT p.id), 2
) as supply_chain_security_score
FROM report_package_manifests m
LEFT JOIN report_package_manifest_packages mp ON m.id = mp.report_package_manifest_id
LEFT JOIN report_packages p ON mp.report_package_id = p.id
LEFT JOIN report_slsa_provenances slsa ON p.id = slsa.report_package_slsa_provenances
LEFT JOIN report_vulnerabilities v ON p.id = v.report_package_vulnerabilities
GROUP BY m.id, m.display_path
ORDER BY supply_chain_security_score DESC;
```
## Insights V2 Data Queries
### Query packages with Insights V2 data
```sql
SELECT name, version, ecosystem,
json_extract(insights_v2, '$.deprecated') as is_deprecated
FROM report_packages
WHERE insights_v2 IS NOT NULL;
```
### Extract vulnerability data from Insights V2
```sql
SELECT
name,
version,
json_extract(insights_v2, '$.vulnerabilities') as vulnerabilities
FROM report_packages
WHERE json_extract(insights_v2, '$.vulnerabilities') IS NOT NULL;
```
## Vulnerability Analysis Queries
### Vulnerability summary by manifest
```sql
SELECT
m.display_path,
m.ecosystem,
COUNT(DISTINCT p.id) as total_packages,
COUNT(v.id) as total_vulnerabilities,
SUM(CASE WHEN v.severity = 'CRITICAL' THEN 1 ELSE 0 END) as critical_vulns,
SUM(CASE WHEN v.severity = 'HIGH' THEN 1 ELSE 0 END) as high_vulns,
SUM(CASE WHEN v.severity = 'MEDIUM' THEN 1 ELSE 0 END) as medium_vulns,
SUM(CASE WHEN v.severity = 'LOW' THEN 1 ELSE 0 END) as low_vulns
FROM report_package_manifests m
LEFT JOIN report_package_manifest_packages mp ON m.id = mp.report_package_manifest_id
LEFT JOIN report_packages p ON mp.report_package_id = p.id
LEFT JOIN report_vulnerabilities v ON p.id = v.report_package_vulnerabilities
GROUP BY m.id, m.display_path, m.ecosystem
ORDER BY critical_vulns DESC, high_vulns DESC;
```
### Vulnerability timeline analysis
```sql
SELECT
DATE(v.created_at) as scan_date,
v.severity,
COUNT(*) as vulnerability_count
FROM report_vulnerabilities v
GROUP BY DATE(v.created_at), v.severity
ORDER BY scan_date DESC, v.severity;
```
### Extract vulnerability data from Insights v2 JSON
```sql
SELECT
p.name,
p.version,
json_extract(p.insights_v2, '$.vulnerabilities') as raw_vulnerabilities
FROM report_packages p
WHERE json_extract(p.insights_v2, '$.vulnerabilities') IS NOT NULL;
```
### Extract license data from Insights v2 JSON
```sql
SELECT
p.name,
p.version,
json_extract(p.insights_v2, '$.licenses') as raw_licenses
FROM report_packages p
WHERE json_extract(p.insights_v2, '$.licenses') IS NOT NULL;
```
### Combined security and license risk analysis
```sql
SELECT
p.name,
p.version,
COUNT(DISTINCT v.id) as vulnerability_count,
GROUP_CONCAT(DISTINCT l.license_id) as licenses,
SUM(CASE WHEN v.severity = 'CRITICAL' THEN 1 ELSE 0 END) as critical_vulns,
SUM(CASE WHEN v.severity = 'HIGH' THEN 1 ELSE 0 END) as high_vulns
FROM report_packages p
LEFT JOIN report_vulnerabilities v ON p.id = v.report_package_vulnerabilities
LEFT JOIN report_licenses l ON p.id = l.report_package_licenses
GROUP BY p.id, p.name, p.version
HAVING vulnerability_count > 0 OR licenses IS NOT NULL
ORDER BY critical_vulns DESC, high_vulns DESC, vulnerability_count DESC;
```
## Advanced Analysis Queries
### Security risk overview by manifest
```sql
SELECT
m.display_path,
m.ecosystem,
COUNT(p.id) as total_packages,
SUM(CASE WHEN p.is_malware = 1 THEN 1 ELSE 0 END) as malware_count,
SUM(CASE WHEN p.is_suspicious = 1 THEN 1 ELSE 0 END) as suspicious_count,
SUM(CASE WHEN p.is_direct = 1 THEN 1 ELSE 0 END) as direct_deps
FROM report_package_manifests m
LEFT JOIN report_package_manifest_packages mp ON m.id = mp.report_package_manifest_id
LEFT JOIN report_packages p ON mp.report_package_id = p.id
GROUP BY m.id, m.display_path, m.ecosystem
ORDER BY malware_count DESC, suspicious_count DESC;
```
### Generate package inventory report
```sql
SELECT
p.ecosystem,
p.name,
p.version,
p.is_direct,
p.package_url,
m.display_path as found_in_manifest,
p.created_at
FROM report_packages p
JOIN report_package_manifest_packages mp ON p.id = mp.report_package_id
JOIN report_package_manifests m ON mp.report_package_manifest_id = m.id
ORDER BY p.ecosystem, p.name, m.display_path;
```
## Working with JSON Data
Since some fields store JSON data, you can use SQLite's JSON functions to query nested data:
### Extract specific fields from package details
```sql
SELECT
name,
version,
json_extract(package_details, '$.commit') as commit_hash,
json_extract(package_details, '$.ecosystem') as detail_ecosystem
FROM report_packages
WHERE package_details IS NOT NULL;
```
### Query code analysis data
```sql
SELECT
name,
version,
json_extract(code_analysis, '$.usage_evidences') as usage_evidences
FROM report_packages
WHERE code_analysis IS NOT NULL;
```
## Tips for Querying
1. **Use EXPLAIN QUERY PLAN** to optimize slow queries
2. **Create indexes** on frequently queried columns for better performance
3. **Use JSON functions** to extract data from JSON fields
4. **Join tables** to get comprehensive views across related data
5. **Use aggregate functions** to generate summary statistics
## Dependency Graph Analysis Queries
The SQLite3 reporter includes a comprehensive dependency graph stored in the `report_dependency_graphs` table, enabling advanced dependency analysis including path tracing, dependent discovery, and graph traversal queries.
### Dependency Graph Schema
The dependency graph is stored in the `report_dependency_graphs` table with the following structure:
```sql
CREATE TABLE report_dependency_graphs (
id INTEGER PRIMARY KEY AUTOINCREMENT,
-- Source package (dependent)
from_package_id TEXT NOT NULL,
from_package_name TEXT NOT NULL,
from_package_version TEXT NOT NULL,
from_package_ecosystem TEXT NOT NULL,
-- Target package (dependency)
to_package_id TEXT NOT NULL,
to_package_name TEXT NOT NULL,
to_package_version TEXT NOT NULL,
to_package_ecosystem TEXT NOT NULL,
-- Edge metadata
dependency_type TEXT,
version_constraint TEXT,
depth INTEGER DEFAULT 0,
is_direct BOOLEAN DEFAULT FALSE,
is_root_edge BOOLEAN DEFAULT FALSE,
manifest_id TEXT NOT NULL,
created_at DATETIME,
updated_at DATETIME
);
```
### Find All Dependencies of a Package
```sql
SELECT
to_package_name,
to_package_version,
to_package_ecosystem,
depth,
is_direct,
dependency_type
FROM report_dependency_graphs
WHERE from_package_name = 'express'
AND from_package_ecosystem = 'npm'
ORDER BY depth, to_package_name;
```
### Find All Dependents of a Package
```sql
SELECT
from_package_name,
from_package_version,
from_package_ecosystem,
depth,
is_direct
FROM report_dependency_graphs
WHERE to_package_name = 'lodash'
AND to_package_ecosystem = 'npm'
ORDER BY depth, from_package_name;
```
### Find Direct Dependencies Only
```sql
SELECT
to_package_name,
to_package_version,
to_package_ecosystem,
dependency_type
FROM report_dependency_graphs
WHERE from_package_name = 'my-app'
AND is_direct = 1
ORDER BY to_package_name;
```
### Find Root Packages (Top-Level Dependencies)
```sql
SELECT DISTINCT
from_package_name,
from_package_version,
from_package_ecosystem,
manifest_id
FROM report_dependency_graphs
WHERE is_root_edge = 1
ORDER BY from_package_name;
```
### Trace Dependency Path (Simple Path from A to B)
```sql
WITH RECURSIVE dependency_path AS (
-- Base case: find direct dependencies
SELECT
from_package_id,
from_package_name,
to_package_id,
to_package_name,
depth,
from_package_name || ' -> ' || to_package_name AS path,
1 AS level
FROM report_dependency_graphs
WHERE from_package_name = 'my-app'
UNION ALL
-- Recursive case: follow the dependency chain
SELECT
rdg.from_package_id,
rdg.from_package_name,
rdg.to_package_id,
rdg.to_package_name,
rdg.depth,
dp.path || ' -> ' || rdg.to_package_name,
dp.level + 1
FROM report_dependency_graphs rdg
JOIN dependency_path dp ON rdg.from_package_id = dp.to_package_id
WHERE dp.level < 10 -- Prevent infinite loops
)
SELECT path, level
FROM dependency_path
WHERE to_package_name = 'target-package'
ORDER BY level;
```
### Find Packages with Vulnerabilities in Dependency Chain
```sql
SELECT DISTINCT
dg.from_package_name,
dg.from_package_version,
dg.to_package_name AS vulnerable_dep,
dg.to_package_version AS vulnerable_version,
rv.vulnerability_id,
rv.severity,
dg.depth
FROM report_dependency_graphs dg
JOIN report_packages rp ON dg.to_package_id = rp.package_id
JOIN report_vulnerabilities rv ON rp.id = rv.report_package_vulnerabilities
WHERE dg.from_package_name = 'my-app'
AND rv.severity IN ('CRITICAL', 'HIGH')
ORDER BY dg.depth, rv.severity;
```
### Find Dependency Depth Distribution
```sql
SELECT
depth,
COUNT(*) as edge_count,
COUNT(DISTINCT to_package_name) as unique_packages
FROM report_dependency_graphs
GROUP BY depth
ORDER BY depth;
```
### Find Packages with Most Dependencies
```sql
SELECT
from_package_name,
from_package_version,
from_package_ecosystem,
COUNT(*) as dependency_count
FROM report_dependency_graphs
GROUP BY from_package_id, from_package_name, from_package_version, from_package_ecosystem
ORDER BY dependency_count DESC
LIMIT 10;
```
### Find Packages with Most Dependents
```sql
SELECT
to_package_name,
to_package_version,
to_package_ecosystem,
COUNT(*) as dependent_count
FROM report_dependency_graphs
GROUP BY to_package_id, to_package_name, to_package_version, to_package_ecosystem
ORDER BY dependent_count DESC
LIMIT 10;
```
### Find Circular Dependencies
```sql
WITH RECURSIVE circular_deps AS (
SELECT
from_package_id,
to_package_id,
from_package_name,
to_package_name,
from_package_name || ' -> ' || to_package_name AS path,
1 AS level
FROM report_dependency_graphs
UNION ALL
SELECT
rdg.from_package_id,
rdg.to_package_id,
rdg.from_package_name,
rdg.to_package_name,
cd.path || ' -> ' || rdg.to_package_name,
cd.level + 1
FROM report_dependency_graphs rdg
JOIN circular_deps cd ON rdg.from_package_id = cd.to_package_id
WHERE cd.level < 50
AND rdg.to_package_id NOT IN (
SELECT from_package_id
FROM circular_deps c2
WHERE c2.path = cd.path
)
)
SELECT path
FROM circular_deps
WHERE to_package_id = from_package_id
ORDER BY level;
```
### Complex Vulnerability Impact Analysis
```sql
-- Find all packages that depend on packages with high severity vulnerabilities
SELECT DISTINCT
root_pkg.from_package_name as root_package,
vuln_pkg.to_package_name as vulnerable_package,
rv.vulnerability_id,
rv.severity,
MIN(dg.depth) as min_depth
FROM report_dependency_graphs root_pkg
JOIN report_dependency_graphs dg ON root_pkg.from_package_id = dg.from_package_id
JOIN report_packages rp ON dg.to_package_id = rp.package_id
JOIN report_vulnerabilities rv ON rp.id = rv.report_package_vulnerabilities
JOIN report_dependency_graphs vuln_pkg ON dg.to_package_id = vuln_pkg.to_package_id
WHERE root_pkg.is_root_edge = 1
AND rv.severity IN ('CRITICAL', 'HIGH')
GROUP BY root_pkg.from_package_name, vuln_pkg.to_package_name, rv.vulnerability_id
ORDER BY min_depth, rv.severity;
```
### Find Transitive Dependency Chains
```sql
SELECT
dg1.from_package_name as root_pkg,
dg1.to_package_name as direct_dep,
dg2.to_package_name as transitive_dep,
dg1.depth + dg2.depth as total_depth
FROM report_dependency_graphs dg1
JOIN report_dependency_graphs dg2 ON dg1.to_package_id = dg2.from_package_id
WHERE dg1.is_root_edge = 1 AND dg2.depth > 0
ORDER BY total_depth
LIMIT 10;
```
### Vulnerability Impact Summary
```sql
SELECT
rp.name as vulnerable_package,
rv.vulnerability_id,
rv.severity,
COUNT(dg.from_package_id) as packages_affected
FROM report_packages rp
JOIN report_vulnerabilities rv ON rp.id = rv.report_package_vulnerabilities
LEFT JOIN report_dependency_graphs dg ON rp.package_id = dg.to_package_id
GROUP BY rp.package_id, rv.vulnerability_id
ORDER BY rv.severity, packages_affected DESC;
```
### Performance Optimization
The dependency graph queries are optimized with the following indexes:
- `from_package_id` - for finding dependencies
- `to_package_id` - for finding dependents
- `manifest_id` - for manifest-specific queries
- `is_direct` - for direct dependency queries
- `is_root_edge` - for root package queries
- `depth` - for depth-based queries
For large dependency graphs, consider:
- Using LIMIT clauses for exploratory queries
- Adding WHERE clauses to filter by ecosystem or manifest
- Using the depth field to limit traversal depth
- Creating additional indexes for frequently queried patterns
## OpenSSF Scorecard Database Schema
The OpenSSF scorecard data is stored in separate, normalized entities for optimal querying:
### ReportProjects Table
```sql
CREATE TABLE report_projects (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT NOT NULL,
url TEXT,
description TEXT,
stars INTEGER,
forks INTEGER,
created_at DATETIME,
updated_at DATETIME,
report_package_projects INTEGER REFERENCES report_packages(id)
);
```
### ReportScorecards Table
```sql
CREATE TABLE report_scorecards (
id INTEGER PRIMARY KEY AUTOINCREMENT,
score REAL NOT NULL,
scorecard_version TEXT NOT NULL,
repo_name TEXT NOT NULL,
repo_commit TEXT NOT NULL,
date TEXT,
created_at DATETIME,
updated_at DATETIME,
report_project_scorecard INTEGER REFERENCES report_projects(id)
);
```
### ReportScorecardChecks Table
```sql
CREATE TABLE report_scorecard_checks (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT NOT NULL,
score REAL NOT NULL,
reason TEXT,
created_at DATETIME,
updated_at DATETIME,
report_scorecard_checks INTEGER REFERENCES report_scorecards(id)
);
```
### ReportSlsaProvenances Table
```sql
CREATE TABLE report_slsa_provenances (
id INTEGER PRIMARY KEY AUTOINCREMENT,
source_repository TEXT NOT NULL,
commit_sha TEXT NOT NULL,
url TEXT NOT NULL,
verified BOOLEAN NOT NULL DEFAULT FALSE,
created_at DATETIME,
updated_at DATETIME,
report_package_slsa_provenances INTEGER REFERENCES report_packages(id)
);
```
This normalized schema enables:
- Direct queries on scorecard scores without JSON parsing
- Individual analysis of each scorecard check
- Efficient joins across packages → projects → scorecards → checks
- **SLSA provenance tracking** for supply chain security analysis
- **Verification status monitoring** to identify unverified provenance
- **Source repository analysis** to understand package origins
- Proper indexing on scorecard and provenance fields for performance
## Creating Views for Common Queries
You can create views to simplify frequently used queries:
```sql
-- Create a view for security overview
CREATE VIEW security_overview AS
SELECT
p.name,
p.version,
p.ecosystem,
p.is_direct,
p.is_malware,
p.is_suspicious,
m.display_path,
m.ecosystem as manifest_ecosystem
FROM report_packages p
JOIN report_package_manifest_packages mp ON p.id = mp.report_package_id
JOIN report_package_manifests m ON mp.report_package_manifest_id = m.id;
-- Create a view for vulnerability impact analysis
CREATE VIEW vulnerability_impact AS
SELECT
rp.name as vulnerable_package,
rp.version as vulnerable_version,
rv.vulnerability_id,
rv.severity,
dg.from_package_name as affected_package,
dg.depth as dependency_depth,
dg.is_direct,
dg.is_root_edge
FROM report_packages rp
JOIN report_vulnerabilities rv ON rp.id = rv.report_package_vulnerabilities
LEFT JOIN report_dependency_graphs dg ON rp.package_id = dg.to_package_id;
-- Create a view for scorecard analysis
CREATE VIEW scorecard_analysis AS
SELECT
p.name as package_name,
p.version as package_version,
p.ecosystem,
proj.name as project_name,
proj.url as project_url,
proj.stars,
proj.forks,
s.score as scorecard_score,
s.scorecard_version,
s.repo_name,
s.date as scorecard_date
FROM report_packages p
JOIN report_projects proj ON p.id = proj.report_package_projects
LEFT JOIN report_scorecards s ON proj.id = s.report_project_scorecard;
-- Create a view for security posture overview
CREATE VIEW security_posture AS
SELECT
p.name,
p.version,
p.ecosystem,
p.is_malware,
p.is_suspicious,
COUNT(DISTINCT v.id) as vulnerability_count,
SUM(CASE WHEN v.severity = 'CRITICAL' THEN 1 ELSE 0 END) as critical_vulns,
SUM(CASE WHEN v.severity = 'HIGH' THEN 1 ELSE 0 END) as high_vulns,
s.score as scorecard_score,
proj.stars
FROM report_packages p
LEFT JOIN report_vulnerabilities v ON p.id = v.report_package_vulnerabilities
LEFT JOIN report_projects proj ON p.id = proj.report_package_projects
LEFT JOIN report_scorecards s ON proj.id = s.report_project_scorecard
GROUP BY p.id, proj.id, s.id;
-- Create a view for SLSA provenance analysis
CREATE VIEW slsa_provenance_analysis AS
SELECT
p.name as package_name,
p.version as package_version,
p.ecosystem,
p.is_direct,
slsa.source_repository,
slsa.commit_sha,
slsa.url as provenance_url,
slsa.verified,
CASE
WHEN slsa.id IS NULL THEN 'Missing'
WHEN slsa.verified = true THEN 'Verified'
ELSE 'Unverified'
END as provenance_status
FROM report_packages p
LEFT JOIN report_slsa_provenances slsa ON p.id = slsa.report_package_slsa_provenances;
-- Use the views
SELECT * FROM security_overview WHERE is_malware = 1;
SELECT * FROM vulnerability_impact WHERE severity IN ('CRITICAL', 'HIGH');
SELECT * FROM scorecard_analysis WHERE scorecard_score < 5.0;
SELECT * FROM security_posture WHERE scorecard_score < 6 AND vulnerability_count > 0;
SELECT * FROM slsa_provenance_analysis WHERE provenance_status = 'Missing';
SELECT * FROM slsa_provenance_analysis WHERE is_direct = 1 AND provenance_status != 'Verified';
```