We provide all the information about MCP servers via our MCP API.
curl -X GET 'https://glama.ai/api/mcp/v1/servers/Cicatriiz/pophive-mcp-server'
If you have feedback or need assistance with the MCP directory API, please join our Discord server
import axios from 'axios';
import * as cheerio from 'cheerio';
import { saveData } from '../utils/file-saver.js';
export class RespiratoryScraper {
constructor(dataDir) {
this.dataDir = dataDir;
this.baseUrl = 'https://www.pophive.org';
this.dashboardUrl = 'https://www.pophive.org/respiratory-diseases';
}
async scrapeAll() {
try {
console.error('Scraping respiratory diseases data from PopHIVE...');
// Scrape the main dashboard page
const response = await axios.get(this.dashboardUrl, {
headers: {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
},
timeout: 30000
});
const $ = cheerio.load(response.data);
// Try to find data download links or embedded data
const downloadLinks = this.findDownloadLinks($);
let edData = [];
let labData = [];
let wastewaterData = [];
let trendsData = [];
if (downloadLinks.length > 0) {
// Try to download data from found links
for (const link of downloadLinks) {
try {
const data = await this.downloadDataFromLink(link);
if (link.includes('ed') || link.includes('emergency')) {
edData = data;
} else if (link.includes('lab') || link.includes('nrevss')) {
labData = data;
} else if (link.includes('wastewater') || link.includes('nwws')) {
wastewaterData = data;
} else if (link.includes('trends') || link.includes('google')) {
trendsData = data;
}
} catch (error) {
console.error(`Error downloading from ${link}:`, error.message);
}
}
}
// If no download links found, parse embedded data
if (edData.length === 0) {
edData = this.parseEDData($);
}
if (labData.length === 0) {
labData = this.parseLabData($);
}
if (wastewaterData.length === 0) {
wastewaterData = this.parseWastewaterData($);
}
if (trendsData.length === 0) {
trendsData = this.parseTrendsData($);
}
// If still no data, use sample data
if (edData.length === 0) {
edData = this.generateSampleEDData();
}
if (labData.length === 0) {
labData = this.generateSampleLabData();
}
if (wastewaterData.length === 0) {
wastewaterData = this.generateSampleWastewaterData();
}
if (trendsData.length === 0) {
trendsData = this.generateSampleTrendsData();
}
// Save the scraped data
await saveData(this.dataDir, 'respiratory_ed.json', edData);
await saveData(this.dataDir, 'respiratory_lab.json', labData);
await saveData(this.dataDir, 'respiratory_wastewater.json', wastewaterData);
await saveData(this.dataDir, 'respiratory_trends.json', trendsData);
console.error(`Respiratory data scraped: ${edData.length} ED records, ${labData.length} lab records, ${wastewaterData.length} wastewater records, ${trendsData.length} trends records`);
return {
ed: edData,
lab: labData,
wastewater: wastewaterData,
trends: trendsData
};
} catch (error) {
console.error('Error scraping respiratory data:', error.message);
// Return sample data if scraping fails
const edData = this.generateSampleEDData();
const labData = this.generateSampleLabData();
const wastewaterData = this.generateSampleWastewaterData();
const trendsData = this.generateSampleTrendsData();
await saveData(this.dataDir, 'respiratory_ed.json', edData);
await saveData(this.dataDir, 'respiratory_lab.json', labData);
await saveData(this.dataDir, 'respiratory_wastewater.json', wastewaterData);
await saveData(this.dataDir, 'respiratory_trends.json', trendsData);
return {
ed: edData,
lab: labData,
wastewater: wastewaterData,
trends: trendsData
};
}
}
findDownloadLinks($) {
const links = [];
// Look for download buttons, CSV links, or data export options
$('a[href*="download"], a[href*=".csv"], a[href*=".json"], button[data-download]').each((i, el) => {
const href = $(el).attr('href') || $(el).attr('data-download');
if (href) {
const fullUrl = href.startsWith('http') ? href : `${this.baseUrl}${href}`;
links.push(fullUrl);
}
});
// Look for API endpoints in script tags
$('script').each((i, el) => {
const scriptContent = $(el).html();
if (scriptContent) {
const apiMatches = scriptContent.match(/["']([^"']*api[^"']*\.(?:csv|json))['"]/gi);
if (apiMatches) {
apiMatches.forEach(match => {
const url = match.replace(/['"]/g, '');
const fullUrl = url.startsWith('http') ? url : `${this.baseUrl}${url}`;
links.push(fullUrl);
});
}
}
});
return [...new Set(links)]; // Remove duplicates
}
async downloadDataFromLink(url) {
try {
const response = await axios.get(url, {
headers: {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
},
timeout: 30000
});
if (url.endsWith('.csv')) {
return this.parseCSVData(response.data);
} else if (url.endsWith('.json')) {
return JSON.parse(response.data);
} else {
// Try to parse as JSON first, then CSV
try {
return JSON.parse(response.data);
} catch {
return this.parseCSVData(response.data);
}
}
} catch (error) {
console.error(`Error downloading data from ${url}:`, error.message);
return [];
}
}
parseCSVData(csvText) {
const lines = csvText.split('\n').filter(line => line.trim());
if (lines.length < 2) return [];
const headers = lines[0].split(',').map(h => h.trim().replace(/"/g, ''));
const data = [];
for (let i = 1; i < lines.length; i++) {
const values = lines[i].split(',').map(v => v.trim().replace(/"/g, ''));
if (values.length === headers.length) {
const row = {};
headers.forEach((header, index) => {
row[header] = values[index];
});
data.push(row);
}
}
return data;
}
parseEDData($) {
const data = [];
// Look for tables containing ED visit data
$('table').each((i, table) => {
const $table = $(table);
const tableText = $table.text().toLowerCase();
if (tableText.includes('emergency') || tableText.includes('ed') || tableText.includes('visits') || tableText.includes('epic')) {
const rows = $table.find('tr');
const headers = [];
// Extract headers
rows.first().find('th, td').each((j, cell) => {
headers.push($(cell).text().trim());
});
// Extract data rows
rows.slice(1).each((j, row) => {
const cells = $(row).find('td');
if (cells.length > 0) {
const rowData = {};
cells.each((k, cell) => {
const header = headers[k] || `column_${k}`;
rowData[header] = $(cell).text().trim();
});
if (Object.keys(rowData).length > 0) {
data.push(this.normalizeEDRecord(rowData));
}
}
});
}
});
return data.length > 0 ? data : [];
}
parseLabData($) {
const data = [];
// Look for tables containing lab data
$('table').each((i, table) => {
const $table = $(table);
const tableText = $table.text().toLowerCase();
if (tableText.includes('lab') || tableText.includes('nrevss') || tableText.includes('positivity') || tableText.includes('test')) {
const rows = $table.find('tr');
const headers = [];
// Extract headers
rows.first().find('th, td').each((j, cell) => {
headers.push($(cell).text().trim());
});
// Extract data rows
rows.slice(1).each((j, row) => {
const cells = $(row).find('td');
if (cells.length > 0) {
const rowData = {};
cells.each((k, cell) => {
const header = headers[k] || `column_${k}`;
rowData[header] = $(cell).text().trim();
});
if (Object.keys(rowData).length > 0) {
data.push(this.normalizeLabRecord(rowData));
}
}
});
}
});
return data.length > 0 ? data : [];
}
parseWastewaterData($) {
const data = [];
// Look for tables containing wastewater data
$('table').each((i, table) => {
const $table = $(table);
const tableText = $table.text().toLowerCase();
if (tableText.includes('wastewater') || tableText.includes('nwws') || tableText.includes('viral') || tableText.includes('sewage')) {
const rows = $table.find('tr');
const headers = [];
// Extract headers
rows.first().find('th, td').each((j, cell) => {
headers.push($(cell).text().trim());
});
// Extract data rows
rows.slice(1).each((j, row) => {
const cells = $(row).find('td');
if (cells.length > 0) {
const rowData = {};
cells.each((k, cell) => {
const header = headers[k] || `column_${k}`;
rowData[header] = $(cell).text().trim();
});
if (Object.keys(rowData).length > 0) {
data.push(this.normalizeWastewaterRecord(rowData));
}
}
});
}
});
return data.length > 0 ? data : [];
}
parseTrendsData($) {
const data = [];
// Look for tables containing Google Trends data
$('table').each((i, table) => {
const $table = $(table);
const tableText = $table.text().toLowerCase();
if (tableText.includes('trends') || tableText.includes('google') || tableText.includes('search') || tableText.includes('symptoms')) {
const rows = $table.find('tr');
const headers = [];
// Extract headers
rows.first().find('th, td').each((j, cell) => {
headers.push($(cell).text().trim());
});
// Extract data rows
rows.slice(1).each((j, row) => {
const cells = $(row).find('td');
if (cells.length > 0) {
const rowData = {};
cells.each((k, cell) => {
const header = headers[k] || `column_${k}`;
rowData[header] = $(cell).text().trim();
});
if (Object.keys(rowData).length > 0) {
data.push(this.normalizeTrendsRecord(rowData));
}
}
});
}
});
return data.length > 0 ? data : [];
}
normalizeEDRecord(rawData) {
return {
geography: rawData.State || rawData.Geography || rawData.state || 'US',
date: rawData.Date || rawData.date || new Date().toISOString().split('T')[0],
week: rawData.Week || rawData.week || this.getWeekString(new Date()),
virus: rawData.Virus || rawData.virus || rawData.Disease || 'Unknown',
ed_visits: parseInt(rawData['ED Visits'] || rawData.ed_visits || rawData.Visits || '0'),
ed_visits_per_100k: parseFloat(rawData['ED Visits per 100k'] || rawData.ed_visits_per_100k || rawData.Rate || '0'),
percent_change: parseFloat(rawData['Percent Change'] || rawData.percent_change || rawData.Change || '0'),
source: rawData.Source || 'Epic Cosmos',
last_updated: new Date().toISOString().split('T')[0]
};
}
normalizeLabRecord(rawData) {
return {
geography: rawData.Geography || rawData.geography || rawData.Region || 'US',
date: rawData.Date || rawData.date || new Date().toISOString().split('T')[0],
week: rawData.Week || rawData.week || this.getWeekString(new Date()),
virus: rawData.Virus || rawData.virus || rawData.Pathogen || 'Unknown',
tests_positive: parseInt(rawData['Tests Positive'] || rawData.tests_positive || rawData.Positive || '0'),
total_tests: parseInt(rawData['Total Tests'] || rawData.total_tests || rawData.Total || '0'),
positivity_rate: parseFloat(rawData['Positivity Rate'] || rawData.positivity_rate || rawData.Rate || '0'),
source: 'CDC NREVSS',
last_updated: new Date().toISOString().split('T')[0]
};
}
normalizeWastewaterRecord(rawData) {
return {
geography: rawData.Geography || rawData.geography || rawData.Region || 'Region 1',
date: rawData.Date || rawData.date || new Date().toISOString().split('T')[0],
week: rawData.Week || rawData.week || this.getWeekString(new Date()),
virus: rawData.Virus || rawData.virus || rawData.Pathogen || 'SARS-CoV-2',
viral_level: rawData['Viral Level'] || rawData.viral_level || rawData.Level || 'Moderate',
copies_per_ml: parseFloat(rawData['Copies per mL'] || rawData.copies_per_ml || rawData.Concentration || '0'),
percent_change: parseFloat(rawData['Percent Change'] || rawData.percent_change || rawData.Change || '0'),
source: 'CDC NWWS',
last_updated: new Date().toISOString().split('T')[0]
};
}
normalizeTrendsRecord(rawData) {
return {
geography: rawData.Geography || rawData.geography || rawData.State || 'US',
date: rawData.Date || rawData.date || new Date().toISOString().split('T')[0],
week: rawData.Week || rawData.week || this.getWeekString(new Date()),
search_term: rawData['Search Term'] || rawData.search_term || rawData.Term || 'flu symptoms',
relative_search_volume: parseFloat(rawData['Search Volume'] || rawData.relative_search_volume || rawData.Volume || '0'),
percent_change: parseFloat(rawData['Percent Change'] || rawData.percent_change || rawData.Change || '0'),
source: 'Google Trends',
last_updated: new Date().toISOString().split('T')[0]
};
}
getWeekString(date) {
const year = date.getFullYear();
const start = new Date(year, 0, 1);
const days = Math.floor((date - start) / (24 * 60 * 60 * 1000));
const week = Math.ceil((days + start.getDay() + 1) / 7);
return `${year}-${week.toString().padStart(2, '0')}`;
}
generateSampleEDData() {
const states = ['US', 'CA', 'TX', 'FL', 'NY', 'PA', 'IL', 'OH', 'GA', 'NC'];
const viruses = ['RSV', 'COVID-19', 'Influenza', 'Rhinovirus'];
const data = [];
const today = new Date();
// Generate data for the last 12 weeks
for (let weekOffset = 0; weekOffset < 12; weekOffset++) {
const weekDate = new Date(today.getTime() - (weekOffset * 7 * 24 * 60 * 60 * 1000));
const weekString = this.getWeekString(weekDate);
states.forEach(state => {
viruses.forEach(virus => {
data.push({
geography: state,
date: weekDate.toISOString().split('T')[0],
week: weekString,
virus: virus,
ed_visits: Math.floor(Math.random() * 20000) + 1000,
ed_visits_per_100k: Math.random() * 50 + 5,
percent_change: (Math.random() - 0.5) * 40, // -20% to +20%
source: 'Epic Cosmos',
last_updated: new Date().toISOString().split('T')[0]
});
});
});
}
return data;
}
generateSampleLabData() {
const viruses = ['RSV', 'Influenza A', 'Influenza B', 'SARS-CoV-2', 'Rhinovirus'];
const data = [];
const today = new Date();
// Generate data for the last 12 weeks
for (let weekOffset = 0; weekOffset < 12; weekOffset++) {
const weekDate = new Date(today.getTime() - (weekOffset * 7 * 24 * 60 * 60 * 1000));
const weekString = this.getWeekString(weekDate);
viruses.forEach(virus => {
const totalTests = Math.floor(Math.random() * 10000) + 1000;
const positiveTests = Math.floor(totalTests * (Math.random() * 0.3)); // 0-30% positivity
data.push({
geography: 'US',
date: weekDate.toISOString().split('T')[0],
week: weekString,
virus: virus,
tests_positive: positiveTests,
total_tests: totalTests,
positivity_rate: (positiveTests / totalTests) * 100,
source: 'CDC NREVSS',
last_updated: new Date().toISOString().split('T')[0]
});
});
}
return data;
}
generateSampleWastewaterData() {
const regions = ['Region 1', 'Region 2', 'Region 3', 'Region 4', 'Region 5', 'Region 6', 'Region 7', 'Region 8', 'Region 9', 'Region 10'];
const viruses = ['SARS-CoV-2', 'RSV', 'Influenza'];
const levels = ['Low', 'Moderate', 'High', 'Very High'];
const data = [];
const today = new Date();
// Generate data for the last 12 weeks
for (let weekOffset = 0; weekOffset < 12; weekOffset++) {
const weekDate = new Date(today.getTime() - (weekOffset * 7 * 24 * 60 * 60 * 1000));
const weekString = this.getWeekString(weekDate);
regions.forEach(region => {
viruses.forEach(virus => {
data.push({
geography: region,
date: weekDate.toISOString().split('T')[0],
week: weekString,
virus: virus,
viral_level: levels[Math.floor(Math.random() * levels.length)],
copies_per_ml: Math.floor(Math.random() * 200000) + 10000,
percent_change: (Math.random() - 0.5) * 60, // -30% to +30%
source: 'CDC NWWS',
last_updated: new Date().toISOString().split('T')[0]
});
});
});
}
return data;
}
generateSampleTrendsData() {
const states = ['US', 'CA', 'TX', 'FL', 'NY', 'PA', 'IL', 'OH', 'GA', 'NC'];
const searchTerms = ['flu symptoms', 'RSV symptoms', 'COVID symptoms', 'cough', 'fever', 'sore throat'];
const data = [];
const today = new Date();
// Generate data for the last 12 weeks
for (let weekOffset = 0; weekOffset < 12; weekOffset++) {
const weekDate = new Date(today.getTime() - (weekOffset * 7 * 24 * 60 * 60 * 1000));
const weekString = this.getWeekString(weekDate);
states.forEach(state => {
searchTerms.forEach(term => {
data.push({
geography: state,
date: weekDate.toISOString().split('T')[0],
week: weekString,
search_term: term,
relative_search_volume: Math.floor(Math.random() * 100),
percent_change: (Math.random() - 0.5) * 50, // -25% to +25%
source: 'Google Trends',
last_updated: new Date().toISOString().split('T')[0]
});
});
});
}
return data;
}
}