"""
Keyword analysis module for App Store Optimization.
Analyzes keyword search volume, competition, and relevance for app discovery.
"""
from typing import Dict, List, Any, Optional, Tuple
import re
from collections import Counter
class KeywordAnalyzer:
"""Analyzes keywords for ASO effectiveness."""
# Competition level thresholds (based on number of competing apps)
COMPETITION_THRESHOLDS = {
'low': 1000,
'medium': 5000,
'high': 10000
}
# Search volume categories (monthly searches estimate)
VOLUME_CATEGORIES = {
'very_low': 1000,
'low': 5000,
'medium': 20000,
'high': 100000,
'very_high': 500000
}
def __init__(self):
"""Initialize keyword analyzer."""
self.analyzed_keywords = {}
def analyze_keyword(
self,
keyword: str,
search_volume: int = 0,
competing_apps: int = 0,
relevance_score: float = 0.0
) -> Dict[str, Any]:
"""
Analyze a single keyword for ASO potential.
Args:
keyword: The keyword to analyze
search_volume: Estimated monthly search volume
competing_apps: Number of apps competing for this keyword
relevance_score: Relevance to your app (0.0-1.0)
Returns:
Dictionary with keyword analysis
"""
competition_level = self._calculate_competition_level(competing_apps)
volume_category = self._categorize_search_volume(search_volume)
difficulty_score = self._calculate_keyword_difficulty(
search_volume,
competing_apps
)
# Calculate potential score (0-100)
potential_score = self._calculate_potential_score(
search_volume,
competing_apps,
relevance_score
)
analysis = {
'keyword': keyword,
'search_volume': search_volume,
'volume_category': volume_category,
'competing_apps': competing_apps,
'competition_level': competition_level,
'relevance_score': relevance_score,
'difficulty_score': difficulty_score,
'potential_score': potential_score,
'recommendation': self._generate_recommendation(
potential_score,
difficulty_score,
relevance_score
),
'keyword_length': len(keyword.split()),
'is_long_tail': len(keyword.split()) >= 3
}
self.analyzed_keywords[keyword] = analysis
return analysis
def compare_keywords(self, keywords_data: List[Dict[str, Any]]) -> Dict[str, Any]:
"""
Compare multiple keywords and rank by potential.
Args:
keywords_data: List of dicts with keyword, search_volume, competing_apps, relevance_score
Returns:
Comparison report with ranked keywords
"""
analyses = []
for kw_data in keywords_data:
analysis = self.analyze_keyword(
keyword=kw_data['keyword'],
search_volume=kw_data.get('search_volume', 0),
competing_apps=kw_data.get('competing_apps', 0),
relevance_score=kw_data.get('relevance_score', 0.0)
)
analyses.append(analysis)
# Sort by potential score (descending)
ranked_keywords = sorted(
analyses,
key=lambda x: x['potential_score'],
reverse=True
)
# Categorize keywords
primary_keywords = [
kw for kw in ranked_keywords
if kw['potential_score'] >= 70 and kw['relevance_score'] >= 0.8
]
secondary_keywords = [
kw for kw in ranked_keywords
if 50 <= kw['potential_score'] < 70 and kw['relevance_score'] >= 0.6
]
long_tail_keywords = [
kw for kw in ranked_keywords
if kw['is_long_tail'] and kw['relevance_score'] >= 0.7
]
return {
'total_keywords_analyzed': len(analyses),
'ranked_keywords': ranked_keywords,
'primary_keywords': primary_keywords[:5], # Top 5
'secondary_keywords': secondary_keywords[:10], # Top 10
'long_tail_keywords': long_tail_keywords[:10], # Top 10
'summary': self._generate_comparison_summary(
primary_keywords,
secondary_keywords,
long_tail_keywords
)
}
def find_long_tail_opportunities(
self,
base_keyword: str,
modifiers: List[str]
) -> List[Dict[str, Any]]:
"""
Generate long-tail keyword variations.
Args:
base_keyword: Core keyword (e.g., "task manager")
modifiers: List of modifiers (e.g., ["free", "simple", "team"])
Returns:
List of long-tail keyword suggestions
"""
long_tail_keywords = []
# Generate combinations
for modifier in modifiers:
# Modifier + base
variation1 = f"{modifier} {base_keyword}"
long_tail_keywords.append({
'keyword': variation1,
'pattern': 'modifier_base',
'estimated_competition': 'low',
'rationale': f"Less competitive variation of '{base_keyword}'"
})
# Base + modifier
variation2 = f"{base_keyword} {modifier}"
long_tail_keywords.append({
'keyword': variation2,
'pattern': 'base_modifier',
'estimated_competition': 'low',
'rationale': f"Specific use-case variation of '{base_keyword}'"
})
# Add question-based long-tail
question_words = ['how', 'what', 'best', 'top']
for q_word in question_words:
question_keyword = f"{q_word} {base_keyword}"
long_tail_keywords.append({
'keyword': question_keyword,
'pattern': 'question_based',
'estimated_competition': 'very_low',
'rationale': f"Informational search query"
})
return long_tail_keywords
def extract_keywords_from_text(
self,
text: str,
min_word_length: int = 3
) -> List[Tuple[str, int]]:
"""
Extract potential keywords from text (descriptions, reviews).
Args:
text: Text to analyze
min_word_length: Minimum word length to consider
Returns:
List of (keyword, frequency) tuples
"""
# Clean and normalize text
text = text.lower()
text = re.sub(r'[^\w\s]', ' ', text)
# Extract words
words = text.split()
# Filter by length
words = [w for w in words if len(w) >= min_word_length]
# Remove common stop words
stop_words = {
'the', 'and', 'for', 'with', 'this', 'that', 'from', 'have',
'but', 'not', 'you', 'all', 'can', 'are', 'was', 'were', 'been'
}
words = [w for w in words if w not in stop_words]
# Count frequency
word_counts = Counter(words)
# Extract 2-word phrases
phrases = []
for i in range(len(words) - 1):
phrase = f"{words[i]} {words[i+1]}"
phrases.append(phrase)
phrase_counts = Counter(phrases)
# Combine and sort
all_keywords = list(word_counts.items()) + list(phrase_counts.items())
all_keywords.sort(key=lambda x: x[1], reverse=True)
return all_keywords[:50] # Top 50
def calculate_keyword_density(
self,
text: str,
target_keywords: List[str]
) -> Dict[str, float]:
"""
Calculate keyword density in text.
Args:
text: Text to analyze (title, description)
target_keywords: Keywords to check density for
Returns:
Dictionary of keyword: density (percentage)
"""
text_lower = text.lower()
total_words = len(text_lower.split())
densities = {}
for keyword in target_keywords:
keyword_lower = keyword.lower()
occurrences = text_lower.count(keyword_lower)
density = (occurrences / total_words) * 100 if total_words > 0 else 0
densities[keyword] = round(density, 2)
return densities
def _calculate_competition_level(self, competing_apps: int) -> str:
"""Determine competition level based on number of competing apps."""
if competing_apps < self.COMPETITION_THRESHOLDS['low']:
return 'low'
elif competing_apps < self.COMPETITION_THRESHOLDS['medium']:
return 'medium'
elif competing_apps < self.COMPETITION_THRESHOLDS['high']:
return 'high'
else:
return 'very_high'
def _categorize_search_volume(self, search_volume: int) -> str:
"""Categorize search volume."""
if search_volume < self.VOLUME_CATEGORIES['very_low']:
return 'very_low'
elif search_volume < self.VOLUME_CATEGORIES['low']:
return 'low'
elif search_volume < self.VOLUME_CATEGORIES['medium']:
return 'medium'
elif search_volume < self.VOLUME_CATEGORIES['high']:
return 'high'
else:
return 'very_high'
def _calculate_keyword_difficulty(
self,
search_volume: int,
competing_apps: int
) -> float:
"""
Calculate keyword difficulty score (0-100).
Higher score = harder to rank.
"""
if competing_apps == 0:
return 0.0
# Competition factor (0-1)
competition_factor = min(competing_apps / 50000, 1.0)
# Volume factor (0-1) - higher volume = more difficulty
volume_factor = min(search_volume / 1000000, 1.0)
# Difficulty score (weighted average)
difficulty = (competition_factor * 0.7 + volume_factor * 0.3) * 100
return round(difficulty, 1)
def _calculate_potential_score(
self,
search_volume: int,
competing_apps: int,
relevance_score: float
) -> float:
"""
Calculate overall keyword potential (0-100).
Higher score = better opportunity.
"""
# Volume score (0-40 points)
volume_score = min((search_volume / 100000) * 40, 40)
# Competition score (0-30 points) - inverse relationship
if competing_apps > 0:
competition_score = max(30 - (competing_apps / 500), 0)
else:
competition_score = 30
# Relevance score (0-30 points)
relevance_points = relevance_score * 30
total_score = volume_score + competition_score + relevance_points
return round(min(total_score, 100), 1)
def _generate_recommendation(
self,
potential_score: float,
difficulty_score: float,
relevance_score: float
) -> str:
"""Generate actionable recommendation for keyword."""
if relevance_score < 0.5:
return "Low relevance - avoid targeting"
if potential_score >= 70:
return "High priority - target immediately"
elif potential_score >= 50:
if difficulty_score < 50:
return "Good opportunity - include in metadata"
else:
return "Competitive - use in description, not title"
elif potential_score >= 30:
return "Secondary keyword - use for long-tail variations"
else:
return "Low potential - deprioritize"
def _generate_comparison_summary(
self,
primary_keywords: List[Dict[str, Any]],
secondary_keywords: List[Dict[str, Any]],
long_tail_keywords: List[Dict[str, Any]]
) -> str:
"""Generate summary of keyword comparison."""
summary_parts = []
summary_parts.append(
f"Identified {len(primary_keywords)} high-priority primary keywords."
)
if primary_keywords:
top_keyword = primary_keywords[0]['keyword']
summary_parts.append(
f"Top recommendation: '{top_keyword}' (potential score: {primary_keywords[0]['potential_score']})."
)
summary_parts.append(
f"Found {len(secondary_keywords)} secondary keywords for description and metadata."
)
summary_parts.append(
f"Discovered {len(long_tail_keywords)} long-tail opportunities with lower competition."
)
return " ".join(summary_parts)
def analyze_keyword_set(keywords_data: List[Dict[str, Any]]) -> Dict[str, Any]:
"""
Convenience function to analyze a set of keywords.
Args:
keywords_data: List of keyword data dictionaries
Returns:
Complete analysis report
"""
analyzer = KeywordAnalyzer()
return analyzer.compare_keywords(keywords_data)