competition_metrics.py•12.8 kB
"""Competition Index calculations based on NIH/NSF methodologies."""
import logging
import math
from typing import Dict, List, Optional, Tuple
import numpy as np
from mcp_server.models.grants_schemas import OpportunityV1
from mcp_server.models.analytics_schemas import ScoreBreakdown, IndustryConstants
logger = logging.getLogger(__name__)
class CompetitionIndexCalculator:
"""
Calculate Competition Index (CI) using NIH/NSF methodologies.
The Competition Index measures how competitive a grant opportunity is
based on historical data, funding amounts, and application patterns.
"""
def __init__(self):
self.constants = IndustryConstants()
def calculate_basic_competition_index(
self,
estimated_applications: int,
number_of_awards: int
) -> float:
"""
Calculate basic Competition Index.
Formula: CI = (Total_Applications / Number_of_Awards) * 100
Args:
estimated_applications: Estimated number of applications
number_of_awards: Number of awards to be made
Returns:
Competition Index (higher = more competitive)
"""
if number_of_awards <= 0:
return 100.0 # Maximum competition if no awards
return (estimated_applications / number_of_awards) * 100
def estimate_applications_from_funding(
self,
award_ceiling: Optional[float],
award_floor: Optional[float],
agency_code: str,
funding_category: Optional[str]
) -> int:
"""
Estimate number of applications based on funding amounts and agency patterns.
Uses empirical data from NIH/NSF to estimate application volumes.
Args:
award_ceiling: Maximum award amount
award_floor: Minimum award amount
agency_code: Agency code (NIH, NSF, etc.)
funding_category: Category of funding
Returns:
Estimated number of applications
"""
# Default estimates based on funding tiers
if not award_ceiling:
award_ceiling = award_floor or 100000
# Base estimate on award size (empirical data from NIH/NSF)
if award_ceiling < 50000:
base_applications = 20
elif award_ceiling < 100000:
base_applications = 35
elif award_ceiling < 500000:
base_applications = 60
elif award_ceiling < 1000000:
base_applications = 100
else:
base_applications = 150
# Agency-specific multipliers (based on historical data)
agency_multipliers = {
'NIH': 1.2, # NIH grants tend to be more competitive
'NSF': 1.0, # Baseline
'DOE': 0.8, # Slightly less competitive
'DOD': 0.7, # More specialized, fewer applicants
'NASA': 0.9,
'EPA': 0.8,
'USDA': 0.7,
}
# Extract main agency code (first part)
main_agency = agency_code.split('-')[0] if agency_code else 'OTHER'
multiplier = agency_multipliers.get(main_agency, 1.0)
# Category-specific adjustments
category_multipliers = {
'Health': 1.3, # Very competitive
'Science/Technology': 1.2,
'Education': 1.1,
'Environment': 1.0,
'Agriculture': 0.8,
'Transportation': 0.7,
}
if funding_category:
for cat, mult in category_multipliers.items():
if cat.lower() in funding_category.lower():
multiplier *= mult
break
estimated_apps = int(base_applications * multiplier)
logger.debug(f"Estimated applications: {estimated_apps} for {agency_code} ${award_ceiling}")
return max(5, estimated_apps) # Minimum 5 applications
def calculate_weighted_competition_index(
self,
basic_ci: float,
award_ceiling: Optional[float],
agency_code: str,
deadline_days: Optional[int] = None
) -> float:
"""
Calculate Weighted Competition Index with additional factors.
Formula: WCI = CI * (1 / sqrt(Award_Ceiling)) * Agency_Weight_Factor * Deadline_Factor
Args:
basic_ci: Basic competition index
award_ceiling: Maximum award amount
agency_code: Agency code
deadline_days: Days until deadline
Returns:
Weighted Competition Index
"""
wci = basic_ci
# Award amount factor (higher awards = slightly less competition due to barriers)
if award_ceiling and award_ceiling > 0:
amount_factor = 1 / math.sqrt(award_ceiling / 100000) # Normalize to $100K
amount_factor = max(0.5, min(2.0, amount_factor)) # Clamp between 0.5-2.0
wci *= amount_factor
# Agency prestige factor
agency_factors = {
'NIH': 1.2, # High prestige = more competition
'NSF': 1.1,
'DOE': 0.9,
'DOD': 0.8, # More specialized
'NASA': 1.0,
'EPA': 0.9,
'USDA': 0.8,
}
main_agency = agency_code.split('-')[0] if agency_code else 'OTHER'
agency_factor = agency_factors.get(main_agency, 1.0)
wci *= agency_factor
# Deadline proximity factor (closer deadline = less competition)
if deadline_days is not None:
if deadline_days < 30:
deadline_factor = 0.8 # Less competition for short deadlines
elif deadline_days > 180:
deadline_factor = 1.1 # More time = more competition
else:
deadline_factor = 1.0
wci *= deadline_factor
return wci
def get_competition_interpretation(self, ci: float, agency_code: str) -> str:
"""
Interpret the Competition Index value.
Args:
ci: Competition Index value
agency_code: Agency code for context
Returns:
Human-readable interpretation
"""
main_agency = agency_code.split('-')[0] if agency_code else 'OTHER'
# Agency-specific benchmarks
if main_agency == 'NIH':
if ci < 20:
return "Low competition for NIH (excellent odds)"
elif ci < 30:
return "Moderate competition for NIH (good odds)"
elif ci < 50:
return "High competition for NIH (challenging)"
else:
return "Very high competition for NIH (very challenging)"
elif main_agency == 'NSF':
if ci < 25:
return "Low competition for NSF (excellent odds)"
elif ci < 40:
return "Moderate competition for NSF (good odds)"
elif ci < 60:
return "High competition for NSF (challenging)"
else:
return "Very high competition for NSF (very challenging)"
else:
# General interpretation
if ci < self.constants.LOW_COMPETITION_THRESHOLD:
return "Low competition (excellent opportunity)"
elif ci < self.constants.NIH_AVERAGE_CI:
return "Below average competition (good opportunity)"
elif ci < self.constants.NSF_AVERAGE_CI:
return "Average competition (moderate opportunity)"
elif ci < self.constants.HIGH_COMPETITION_THRESHOLD:
return "Above average competition (challenging)"
else:
return "High competition (very challenging)"
def calculate_percentile_ranking(
self,
ci: float,
reference_values: Optional[List[float]] = None
) -> float:
"""
Calculate percentile ranking for Competition Index.
Args:
ci: Competition Index value
reference_values: Optional list of reference CI values for comparison
Returns:
Percentile ranking (0-100)
"""
if reference_values and len(reference_values) > 0:
# Use provided reference values
return float(np.percentile(reference_values + [ci], 100 * (len(reference_values) / (len(reference_values) + 1))))
# Use industry standard distribution
# Assume log-normal distribution of CI values
industry_mean = self.constants.NIH_AVERAGE_CI
industry_std = 15.0 # Empirical standard deviation
# Convert to percentile using normal approximation
z_score = (ci - industry_mean) / industry_std
# Higher CI = higher percentile (more competitive)
# Use approximation instead of scipy to avoid dependency
# Normal CDF approximation using error function
percentile = 50 * (1 + math.erf(z_score / math.sqrt(2)))
return max(0, min(100, percentile))
def calculate_competition_score(
self,
opportunity: OpportunityV1,
reference_opportunities: Optional[List[OpportunityV1]] = None
) -> ScoreBreakdown:
"""
Calculate comprehensive Competition Index score.
Args:
opportunity: Grant opportunity to score
reference_opportunities: Optional list for percentile calculation
Returns:
ScoreBreakdown with transparent calculation
"""
try:
# Extract key information
awards = opportunity.summary.expected_number_of_awards or 1
ceiling = opportunity.summary.award_ceiling
floor = opportunity.summary.award_floor
agency = opportunity.agency_code
category = opportunity.summary.funding_category
# Estimate applications
estimated_apps = self.estimate_applications_from_funding(
ceiling, floor, agency, category
)
# Calculate basic CI
basic_ci = self.calculate_basic_competition_index(estimated_apps, awards)
# Calculate weighted CI
weighted_ci = self.calculate_weighted_competition_index(
basic_ci, ceiling, agency
)
# Calculate percentile ranking
percentile = self.calculate_percentile_ranking(weighted_ci)
# Get interpretation
interpretation = self.get_competition_interpretation(weighted_ci, agency)
# Competition Index score is inverse (lower CI = higher score)
# Convert CI to 0-100 scale where 100 = best (least competitive)
max_ci = 100.0 # Theoretical maximum
score = max(0, (max_ci - weighted_ci) / max_ci * 100)
# Industry benchmark
main_agency = agency.split('-')[0] if agency else 'OTHER'
if main_agency == 'NIH':
benchmark = f"NIH average: {self.constants.NIH_AVERAGE_CI}"
elif main_agency == 'NSF':
benchmark = f"NSF average: {self.constants.NSF_AVERAGE_CI}"
else:
benchmark = f"Industry range: {self.constants.LOW_COMPETITION_THRESHOLD}-{self.constants.HIGH_COMPETITION_THRESHOLD}"
return ScoreBreakdown(
value=score,
calculation=f"CI = ({estimated_apps} apps / {awards} awards) = {weighted_ci:.1f}",
components={
"estimated_applications": estimated_apps,
"number_of_awards": awards,
"basic_ci": basic_ci,
"weighted_ci": weighted_ci,
"award_ceiling": ceiling,
"agency_code": agency,
"formula": "Weighted CI with agency and amount factors"
},
interpretation=interpretation,
percentile=percentile,
industry_benchmark=benchmark
)
except Exception as e:
logger.error(f"Error calculating competition score: {e}")
return ScoreBreakdown(
value=50.0,
calculation="Error in calculation",
components={"error": str(e)},
interpretation="Unable to calculate competition index",
percentile=None,
industry_benchmark=None
)