import type { UnifiedPost, Platform, TrendItem } from '../types.js';
import { analyzeSentiment } from './sentiment.js';
type TimeWindow = 'hourly' | 'daily' | 'weekly';
interface TimeBucket {
key: string;
posts: UnifiedPost[];
}
function getTimeBucketKey(date: Date, window: TimeWindow): string {
const y = date.getFullYear();
const m = String(date.getMonth() + 1).padStart(2, '0');
const d = String(date.getDate()).padStart(2, '0');
switch (window) {
case 'hourly':
return `${y}-${m}-${d}T${String(date.getHours()).padStart(2, '0')}`;
case 'daily':
return `${y}-${m}-${d}`;
case 'weekly': {
const weekStart = new Date(date);
weekStart.setDate(weekStart.getDate() - weekStart.getDay());
const wy = weekStart.getFullYear();
const wm = String(weekStart.getMonth() + 1).padStart(2, '0');
const wd = String(weekStart.getDate()).padStart(2, '0');
return `${wy}-${wm}-${wd}`;
}
}
}
function bucketize(posts: UnifiedPost[], window: TimeWindow): TimeBucket[] {
const bucketMap = new Map<string, UnifiedPost[]>();
for (const post of posts) {
const key = getTimeBucketKey(post.timestamp, window);
const bucket = bucketMap.get(key) || [];
bucket.push(post);
bucketMap.set(key, bucket);
}
return Array.from(bucketMap.entries())
.map(([key, posts]) => ({ key, posts }))
.sort((a, b) => a.key.localeCompare(b.key));
}
function extractTopics(posts: UnifiedPost[]): Map<string, number> {
const topicCounts = new Map<string, number>();
for (const post of posts) {
// Count hashtags
for (const tag of post.hashtags) {
topicCounts.set(tag, (topicCounts.get(tag) || 0) + 1);
}
// Extract significant words (simple keyword extraction)
const words = post.content.toLowerCase()
.split(/\s+/)
.filter(w => w.length > 4 && !STOP_WORDS.has(w))
.map(w => w.replace(/[^a-z0-9]/g, ''))
.filter(Boolean);
for (const word of words) {
topicCounts.set(word, (topicCounts.get(word) || 0) + 1);
}
}
return topicCounts;
}
export function detectTrends(posts: UnifiedPost[], window: TimeWindow = 'daily', topN: number = 10): TrendItem[] {
if (posts.length === 0) return [];
const buckets = bucketize(posts, window);
if (buckets.length < 2) {
// Not enough data for trend detection, just return top topics
const topics = extractTopics(posts);
return Array.from(topics.entries())
.sort((a, b) => b[1] - a[1])
.slice(0, topN)
.map(([topic, count]) => ({
topic,
count,
growth: 0,
sentiment: analyzeSentiment(posts.filter(p =>
p.hashtags.includes(topic) || p.content.toLowerCase().includes(topic)
).map(p => p.content).join(' ')).score,
platforms: [...new Set(posts.filter(p =>
p.hashtags.includes(topic) || p.content.toLowerCase().includes(topic)
).map(p => p.platform))],
}));
}
// Compare latest bucket to previous buckets
const latest = buckets[buckets.length - 1];
const previous = buckets.slice(0, -1);
const latestTopics = extractTopics(latest.posts);
const previousPosts = previous.flatMap(b => b.posts);
const previousTopics = extractTopics(previousPosts);
// Calculate growth for each topic
const trends: TrendItem[] = [];
for (const [topic, count] of latestTopics) {
const prevCount = previousTopics.get(topic) || 0;
const avgPrev = prevCount / Math.max(previous.length, 1);
const growth = avgPrev > 0 ? ((count - avgPrev) / avgPrev) * 100 : count > 1 ? 100 : 0;
const relevantPosts = posts.filter(p =>
p.hashtags.includes(topic) || p.content.toLowerCase().includes(topic)
);
trends.push({
topic,
count,
growth: Math.round(growth * 10) / 10,
sentiment: analyzeSentiment(relevantPosts.map(p => p.content).join(' ')).score,
platforms: [...new Set(relevantPosts.map(p => p.platform))],
});
}
// Sort by a composite score of count and growth
return trends
.sort((a, b) => {
const scoreA = a.count * (1 + Math.max(a.growth, 0) / 100);
const scoreB = b.count * (1 + Math.max(b.growth, 0) / 100);
return scoreB - scoreA;
})
.slice(0, topN);
}
export function getPostingFrequency(posts: UnifiedPost[]): string {
if (posts.length < 2) return 'insufficient data';
const sorted = [...posts].sort((a, b) => a.timestamp.getTime() - b.timestamp.getTime());
const firstDate = sorted[0].timestamp.getTime();
const lastDate = sorted[sorted.length - 1].timestamp.getTime();
const daySpan = (lastDate - firstDate) / (1000 * 60 * 60 * 24);
if (daySpan === 0) return `${posts.length} posts in one day`;
const postsPerDay = posts.length / daySpan;
if (postsPerDay >= 5) return `${Math.round(postsPerDay)} posts/day (very active)`;
if (postsPerDay >= 1) return `${Math.round(postsPerDay * 10) / 10} posts/day`;
if (postsPerDay >= 1/7) return `${Math.round(postsPerDay * 7 * 10) / 10} posts/week`;
return `${Math.round(postsPerDay * 30 * 10) / 10} posts/month`;
}
export function getPeakPostingHours(posts: UnifiedPost[]): number[] {
const hourCounts = new Array(24).fill(0);
for (const post of posts) {
hourCounts[post.timestamp.getHours()]++;
}
const avg = posts.length / 24;
return hourCounts
.map((count, hour) => ({ hour, count }))
.filter(h => h.count > avg)
.sort((a, b) => b.count - a.count)
.map(h => h.hour);
}
// Common English stop words
const STOP_WORDS = new Set([
'the', 'be', 'to', 'of', 'and', 'a', 'in', 'that', 'have', 'i', 'it', 'for',
'not', 'on', 'with', 'he', 'as', 'you', 'do', 'at', 'this', 'but', 'his',
'by', 'from', 'they', 'we', 'say', 'her', 'she', 'or', 'an', 'will', 'my',
'one', 'all', 'would', 'there', 'their', 'what', 'so', 'up', 'out', 'if',
'about', 'who', 'get', 'which', 'go', 'me', 'when', 'make', 'can', 'like',
'time', 'no', 'just', 'him', 'know', 'take', 'people', 'into', 'year',
'your', 'good', 'some', 'could', 'them', 'see', 'other', 'than', 'then',
'now', 'look', 'only', 'come', 'its', 'over', 'think', 'also', 'back',
'after', 'use', 'two', 'how', 'our', 'work', 'first', 'well', 'way',
'even', 'new', 'want', 'because', 'any', 'these', 'give', 'day', 'most',
'https', 'http', 'www', 'com', 'just', 'really', 'very', 'been', 'more',
'much', 'still', 'should', 'here', 'being', 'does', 'doing', 'done',
]);