Skip to main content
Glama

Amazon Order History CSV Download MCP

by marcusquinn
transactions-page.ts15.7 kB
/** * Transactions page extraction for Amazon. * Extracts from: https://www.amazon.{domain}/cpe/yourpayments/transactions * * This page contains ALL payment transactions across all orders, * making it much faster than extracting from individual order pages. * * Based on AZAD's transaction.ts, transaction0.ts, and transaction1.ts patterns. */ import { Page } from "playwright"; import { Transaction } from "../../core/types/transaction"; import { parseMoney } from "../../core/types/money"; import { parseDate } from "../../core/utils/date"; import { getRegionByCode } from "../regions"; /** * Get transactions page URL for a region. */ export function getTransactionsPageUrl(region: string): string { const regionConfig = getRegionByCode(region); const domain = regionConfig?.domain || "amazon.com"; return `https://www.${domain}/cpe/yourpayments/transactions`; } /** * Extract all transactions from the transactions page. * Uses scrolling to load all transactions (Amazon lazy-loads them). */ export async function extractTransactionsFromPage( page: Page, region: string, options?: { startDate?: Date; endDate?: Date; maxScrolls?: number; onProgress?: (message: string, count: number) => void; }, ): Promise<Transaction[]> { const regionConfig = getRegionByCode(region); const currency = regionConfig?.currency || "USD"; const url = getTransactionsPageUrl(region); const { startDate, endDate, maxScrolls = 50, onProgress } = options || {}; // Navigate to transactions page onProgress?.("Loading transactions page...", 0); await page.goto(url, { waitUntil: "domcontentloaded", timeout: 30000 }); // Wait for transactions to load await page .waitForSelector( '[data-testid="transaction-link"], .transaction-date-container, .transactions-line-item', { timeout: 10000 }, ) .catch(() => {}); // Give initial content time to render await page.waitForTimeout(1000); let allTransactions: Transaction[] = []; let previousCount = 0; let scrollCount = 0; let stableCount = 0; // Scroll to load all transactions while (scrollCount < maxScrolls) { // Extract current transactions const pageTransactions = await extractVisibleTransactions(page, currency); // Merge with existing (deduplicate) allTransactions = mergeTransactions(allTransactions, pageTransactions); onProgress?.( `Found ${allTransactions.length} transactions...`, allTransactions.length, ); // Check if we've stopped finding new transactions if (allTransactions.length === previousCount) { stableCount++; if (stableCount >= 3) { // No new transactions after 3 scroll attempts break; } } else { stableCount = 0; } previousCount = allTransactions.length; // Check date range - stop if we've gone past the start date if (startDate && allTransactions.length > 0) { const oldestDate = Math.min( ...allTransactions.map((t) => t.date.getTime()), ); if (oldestDate < startDate.getTime()) { break; } } // Scroll down to load more await scrollToLoadMore(page); scrollCount++; // Wait for new content await page.waitForTimeout(500); } // Filter by date range if specified let filteredTransactions = allTransactions; if (startDate || endDate) { filteredTransactions = allTransactions.filter((t) => { if (startDate && t.date < startDate) return false; if (endDate && t.date > endDate) return false; return true; }); } // Sort by date descending (newest first) filteredTransactions.sort((a, b) => b.date.getTime() - a.date.getTime()); onProgress?.( `Complete: ${filteredTransactions.length} transactions`, filteredTransactions.length, ); return filteredTransactions; } /** * Extract visible transactions from current page state. * Implements multiple strategies based on AZAD's approach. */ async function extractVisibleTransactions( page: Page, currency: string, ): Promise<Transaction[]> { // Try Strategy 0 (transaction-date-container based) let transactions = await extractStrategy0(page, currency); if (transactions.length > 0) { return transactions; } // Try Strategy 1 (component-based parsing) transactions = await extractStrategy1(page, currency); if (transactions.length > 0) { return transactions; } // Fallback: generic extraction return extractGeneric(page, currency); } /** * Strategy 0: Extract from transaction-date-container structure. * Based on AZAD's transaction0.ts */ async function extractStrategy0( page: Page, currency: string, ): Promise<Transaction[]> { const transactions: Transaction[] = []; // Find all date containers const dateContainers = await page .locator(".transaction-date-container") .all(); for (const dateContainer of dateContainers) { try { const dateText = await dateContainer .textContent({ timeout: 300 }) .catch(() => ""); if (!dateText) continue; const date = parseDate(dateText.trim()); if (!date) continue; // Get the sibling container with transaction items const siblingContainer = dateContainer.locator( "xpath=following-sibling::*[1]", ); const transactionItems = await siblingContainer .locator(".transactions-line-item") .all(); for (const item of transactionItems) { const transaction = await extractTransactionItem(item, date, currency); if (transaction) { transactions.push(transaction); } } } catch { continue; } } return transactions; } /** * Strategy 1: Extract using data-testid attributes and modern selectors. * Updated for 2024+ Amazon transactions page structure. */ async function extractStrategy1( page: Page, currency: string, ): Promise<Transaction[]> { const transactions: Transaction[] = []; // Find transaction links (modern Amazon UI with data-testid) const transactionLinks = await page .locator('[data-testid="transaction-link"]') .all(); console.error( `[transactions-page] Strategy 1: Found ${transactionLinks.length} transaction links`, ); for (const link of transactionLinks) { try { // Extract date - first span[data-testid="text"] in the transaction // Date format: "28 Nov 2025" const dateSpans = await link.locator('span[data-testid="text"]').all(); let dateText = ""; if (dateSpans.length > 0) { dateText = (await dateSpans[0].textContent({ timeout: 300 }).catch(() => "")) || ""; } const date = parseDate(dateText.trim()); if (!date) { console.error( `[transactions-page] Could not parse date: "${dateText}"`, ); continue; } // Extract vendor - usually the third span[data-testid="text"] (after date and dot separator) let vendor = ""; if (dateSpans.length >= 3) { vendor = (await dateSpans[2].textContent({ timeout: 300 }).catch(() => "")) || ""; } // Extract card info from method-details fields const cardName = (await link .locator('[data-testid="method-details-name"]') .textContent({ timeout: 300 }) .catch(() => "")) || ""; const cardNumber = (await link .locator('[data-testid="method-details-number"]') .textContent({ timeout: 300 }) .catch(() => "")) || ""; const cardInfo = cardName && cardNumber ? `${cardName} ••••${cardNumber}` : cardName || ""; // Extract order IDs - look for text containing "Order #" const orderTexts = await link .locator('div[data-testid="text"]') .allTextContents(); const orderIds: string[] = []; for (const text of orderTexts) { // Match both standard format (203-1234567-1234567) and digital (D01-1234567-1234567) const matches = text.match(/[D]?\d{3}-\d{7}-\d{7}/g); if (matches) { orderIds.push(...matches); } } if (orderIds.length === 0) { // Try getting order ID from the full link text const fullText = (await link.textContent({ timeout: 300 }).catch(() => "")) || ""; const fallbackMatches = fullText.match(/[D]?\d{3}-\d{7}-\d{7}/g); if (fallbackMatches) { orderIds.push(...fallbackMatches); } } // Extract amount - look for text with currency symbol (bold text) const fullText = (await link.textContent({ timeout: 300 }).catch(() => "")) || ""; // Match amounts like "-£47.64", "+£93.59", "£1,399.13" const amountMatch = fullText.match(/([+-])?\s*([£$€])\s*([\d,]+\.?\d*)/); let amount = parseMoney("0", currency); if (amountMatch) { const sign = amountMatch[1] === "+" ? "" : "-"; const amountStr = `${sign}${amountMatch[2]}${amountMatch[3]}`; amount = parseMoney(amountStr, currency); } // Determine if refund based on amount sign or status text const isRefund = fullText.includes("Refunded") || (amountMatch && amountMatch[1] === "+"); // Extract status const statusMatch = fullText.match( /\b(Pending|Charged|Refunded|Completed)\b/i, ); const status = statusMatch ? statusMatch[1] : undefined; const transaction: Transaction = { date, orderIds: [...new Set(orderIds)], // Deduplicate amount, cardInfo, vendor: vendor.trim(), platformData: { source: "transactions-page", status, isRefund, }, }; transactions.push(transaction); } catch (e) { console.error(`[transactions-page] Error extracting transaction: ${e}`); continue; } } console.error( `[transactions-page] Strategy 1: Extracted ${transactions.length} transactions`, ); return transactions; } /** * Generic extraction fallback. */ async function extractGeneric( page: Page, currency: string, ): Promise<Transaction[]> { const transactions: Transaction[] = []; // Get all text content and try to parse transactions const rows = await page.locator('.a-row, [class*="transaction"]').all(); for (const row of rows) { try { const text = await row.textContent({ timeout: 200 }).catch(() => ""); if (!text) continue; // Must have an order ID pattern to be a transaction if (!text.match(/\d{3}-\d{7}-\d{7}/)) continue; const transaction = parseTransactionText(text, currency); if (transaction) { transactions.push(transaction); } } catch { continue; } } return transactions; } /** * Extract a single transaction item. */ async function extractTransactionItem( item: ReturnType<Page["locator"]>, date: Date, currency: string, ): Promise<Transaction | null> { try { const itemText = await item.textContent({ timeout: 300 }).catch(() => ""); if (!itemText) return null; // Extract order IDs const orderIdMatches = itemText.match(/\d{3}-\d{7}-\d{7}/g) || []; const orderIds = [...new Set(orderIdMatches)]; if (orderIds.length === 0) return null; // Extract amount const amountMatch = itemText.match(/([$£€¥]|USD|GBP|EUR|CAD|AUD)\s*([\d,]+\.?\d*)/i) || itemText.match(/([\d,]+\.?\d*)\s*([$£€¥]|USD|GBP|EUR|CAD|AUD)/i); const amount = amountMatch ? parseMoney(amountMatch[0], currency) : parseMoney("0", currency); // Extract card info (e.g., "Visa ****1234" or "ending in 1234") const cardMatch = itemText.match( /(Visa|Mastercard|Amex|American Express|Discover|Debit)[^*]*(\*{3,4}\d{4}|\d{4})/i, ) || itemText.match(/ending\s*in\s*(\d{4})/i) || itemText.match(/(\*{3,4}\d{4})/); const cardInfo = cardMatch ? cardMatch[0].trim() : ""; // Extract vendor (text that's not order ID, amount, or card) let vendor = itemText .replace(/\d{3}-\d{7}-\d{7}/g, "") .replace(/([$£€¥]|USD|GBP|EUR|CAD|AUD)\s*[\d,]+\.?\d*/gi, "") .replace( /(Visa|Mastercard|Amex|American Express|Discover|Debit)[^*]*\*{3,4}\d{4}/gi, "", ) .replace(/ending\s*in\s*\d{4}/gi, "") .replace(/\*{3,4}\d{4}/g, "") .replace(/Pending|Charged|Completed|Refunded/gi, "") .trim(); // Clean up vendor - take first meaningful segment vendor = vendor.split(/\s{2,}|\n/)[0]?.trim() || ""; return { date, orderIds, amount, cardInfo, vendor, platformData: { source: "transactions-page" }, }; } catch { return null; } } /** * Parse transaction from text content. */ function parseTransactionText( text: string, currency: string, ): Transaction | null { // Extract date const datePatterns = [ /(\w+\s+\d{1,2},?\s+\d{4})/, // "January 15, 2024" /(\d{1,2}\s+\w+\s+\d{4})/, // "15 January 2024" /(\d{1,2}[./-]\d{1,2}[./-]\d{2,4})/, // "01/15/2024" ]; let date: Date | null = null; for (const pattern of datePatterns) { const match = text.match(pattern); if (match) { date = parseDate(match[1]); if (date) break; } } if (!date) return null; // Extract order IDs const orderIdMatches = text.match(/\d{3}-\d{7}-\d{7}/g) || []; const orderIds = [...new Set(orderIdMatches)]; if (orderIds.length === 0) return null; // Extract amount const amountMatch = text.match(/([$£€¥])\s*([\d,]+\.?\d*)/) || text.match(/([\d,]+\.?\d*)\s*([$£€¥])/); const amount = amountMatch ? parseMoney(amountMatch[0], currency) : parseMoney("0", currency); // Extract card info const cardMatch = text.match(/(Visa|Mastercard|Amex|Discover)[^*]*(\*{3,4}\d{4})/i) || text.match(/(\*{3,4}\d{4})/); const cardInfo = cardMatch ? cardMatch[0].trim() : ""; // Extract vendor let vendor = text .replace(/\d{3}-\d{7}-\d{7}/g, "") .replace(/([$£€¥])\s*[\d,]+\.?\d*/g, "") .replace(/([\d,]+\.?\d*)\s*([$£€¥])/g, "") .replace(/(Visa|Mastercard|Amex|Discover)[^*]*\*{3,4}\d{4}/gi, "") .replace(/\*{3,4}\d{4}/g, "") .replace(/\w+\s+\d{1,2},?\s+\d{4}/g, "") .replace(/\d{1,2}\s+\w+\s+\d{4}/g, "") .replace(/Pending|Charged|Completed|Refunded/gi, "") .trim(); vendor = vendor.split(/\s{2,}|\n/)[0]?.trim() || ""; return { date, orderIds, amount, cardInfo, vendor, platformData: { source: "transactions-page" }, }; } /** * Scroll to load more transactions. */ async function scrollToLoadMore(page: Page): Promise<void> { // Find the last transaction element and scroll it into view const lastTransaction = page .locator('[data-testid="transaction-link"], .transactions-line-item') .last(); const exists = await lastTransaction.count().catch(() => 0); if (exists > 0) { await lastTransaction.scrollIntoViewIfNeeded().catch(() => {}); } else { // Fallback: use keyboard to scroll down await page.keyboard.press("End"); } } /** * Merge transactions, removing duplicates. */ function mergeTransactions( existing: Transaction[], newTransactions: Transaction[], ): Transaction[] { const seen = new Set<string>(); const merged: Transaction[] = []; for (const t of [...existing, ...newTransactions]) { // Create unique key from date + orderIds + amount const key = `${t.date.toISOString()}-${t.orderIds.sort().join(",")}-${t.amount.amount}`; if (!seen.has(key)) { seen.add(key); merged.push(t); } } return merged; }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/marcusquinn/amazon-order-history-csv-download-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server