Skip to main content
Glama

Amazon Order History CSV Download MCP

by marcusquinn
shipments.ts24.7 kB
/** * Shipment extraction from Amazon order pages. * Extracts tracking information, delivery status, and shipment items. */ import { Page } from "playwright"; import { Shipment, DeliveryStatus, ShipmentTransaction, } from "../../core/types/shipment"; import { Item } from "../../core/types/item"; import { OrderHeader } from "../../core/types/order"; import { parseMoney } from "../../core/types/money"; import { getTextByXPath, getAttributeByXPath, firstMatchingStrategy, } from "../../core/utils/extraction"; import { getRegionByCode } from "../regions"; import { extractItems } from "./items"; /** * Parse delivery status from text. */ function parseDeliveryStatus(text: string): DeliveryStatus { const lowerText = text.toLowerCase(); if ( lowerText.includes("delivered") || lowerText.includes("entregado") || lowerText.includes("livré") ) { return DeliveryStatus.YES; } if ( lowerText.includes("shipping") || lowerText.includes("in transit") || lowerText.includes("out for delivery") || lowerText.includes("arriving") || lowerText.includes("expected") || lowerText.includes("on the way") ) { return DeliveryStatus.NO; } return DeliveryStatus.UNKNOWN; } /** * Extract tracking link from shipment element using XPath. * AZAD patterns for tracking URLs: * - /progress-tracker/ * - /ship-track * - trackingId parameter */ export async function extractTrackingFromElement( page: Page, shipmentElement: string, ): Promise<{ trackingLink: string; trackingId: string }> { // Try multiple selectors for tracking link const trackingSelectors = [ `${shipmentElement}//a[contains(@href, "track")]`, `${shipmentElement}//a[contains(text(), "Track")]`, `${shipmentElement}//a[contains(@href, "ship-track")]`, `${shipmentElement}//a[contains(@class, "track")]`, ]; for (const selector of trackingSelectors) { const href = await getAttributeByXPath(page, selector, "href", ""); if (href) { // Extract tracking ID from URL or text const trackingMatch = href.match(/trackingId=([^&]+)/i) || href.match(/tracking[_-]?id=([^&]+)/i); const trackingId = trackingMatch ? trackingMatch[1] : ""; return { trackingLink: href, trackingId }; } } return { trackingLink: "", trackingId: "" }; } /** * Strategy A (AZAD): Traditional shipment boxes with class "shipment" * XPath: //div[contains(@class, "a-box shipment")] * Then filter to elements where class list includes "shipment" exactly */ async function extractShipmentsStrategyA( page: Page, header: OrderHeader, items: Item[], ): Promise<Shipment[] | null> { // AZAD's exact selector - a-box with shipment class const candidates = await page .locator( 'xpath=//div[contains(@class, "a-box") and contains(@class, "shipment")]', ) .all(); // Filter to only those with "shipment" as an exact class (not just containing "shipment" in another class name) const shipmentBoxes: typeof candidates = []; for (const candidate of candidates) { const classAttr = await candidate.getAttribute("class").catch(() => ""); if (classAttr) { const classes = classAttr.split(" "); if (classes.includes("shipment")) { shipmentBoxes.push(candidate); } } } if (shipmentBoxes.length === 0) return null; const regionConfig = getRegionByCode(header.region); const currency = regionConfig?.currency || "USD"; const shipments: Shipment[] = []; for (let i = 0; i < shipmentBoxes.length; i++) { const box = shipmentBoxes[i]; try { // Get delivery status - check class for shipment-is-delivered or text content const classAttr = await box.getAttribute("class").catch(() => ""); let delivered = DeliveryStatus.UNKNOWN; if (classAttr?.includes("shipment-is-delivered")) { delivered = DeliveryStatus.YES; } else { // Check text content for delivery indicators const text = await box.textContent().catch(() => ""); delivered = parseDeliveryStatus(text || ""); } // Get status text from AZAD XPath const statusText = (await box .locator( 'xpath=.//div[contains(@class, "shipment-info-container")]//div[@class="a-row"]/span', ) .first() .textContent() .catch(() => "")) || (await box .locator('[data-component="shipmentStatus"]') .first() .textContent() .catch(() => "")); // Get tracking link using AZAD's XPaths let trackingLink = (await box .locator('a[href*="/progress-tracker/"]') .first() .getAttribute("href") .catch(() => "")) || (await box .locator('a[href*="/ship-track"]') .first() .getAttribute("href") .catch(() => "")); // Normalize tracking link if (trackingLink && !trackingLink.startsWith("http")) { trackingLink = `https://www.${getRegionByCode(header.region)?.domain}${trackingLink}`; } // Extract shipment ID from tracking link let shipmentId = `${header.orderId}-shipment-${i + 1}`; if (trackingLink) { const shipmentMatch = trackingLink.match(/shipmentId=([^&]+)/); if (shipmentMatch) shipmentId = shipmentMatch[1]; } // Get tracking ID (would need to fetch tracking page for full ID) const trackingIdMatch = trackingLink?.match(/trackingId=([^&]+)/); const trackingId = trackingIdMatch ? trackingIdMatch[1] : ""; // Get items in this shipment (for now, associate all items with first shipment) const shipmentItems = i === 0 ? items : []; // Try to extract transaction info using AZAD's pattern let transaction: ShipmentTransaction | undefined; const transactionText = await box .locator( 'xpath=.//span[normalize-space(text())="Transactions"]/../../div[contains(@class, "expander")]//div[contains(@class, "a-row")]', ) .first() .textContent() .catch(() => ""); if (transactionText) { // Parse "December 17, 2023 - Visa ending in 8489: $41.49" format const amountMatch = transactionText.match(/[$£€]\s*[\d,]+\.?\d*/); if (amountMatch) { transaction = { paymentAmount: parseMoney(amountMatch[0], currency), infoString: transactionText .replace(/[$£€]\s*[\d,]+\.?\d*/, "") .trim(), }; } } // Check for refund using AZAD's XPath let refund; const refundText = await box .locator( 'xpath=.//span[contains(text(), "Refund for this return")]/../../../../..//span', ) .first() .textContent() .catch(() => ""); if (refundText) { refund = parseMoney(refundText, currency); if (refund.amount === 0) refund = undefined; } shipments.push({ shipmentId, orderHeader: header, items: shipmentItems, delivered, status: statusText?.trim() || "Unknown", trackingLink: trackingLink || "", trackingId, transaction, refund, platformData: {}, }); } catch { continue; } } return shipments.length > 0 ? shipments : null; } /** * Result from ship-track page extraction. */ export interface ShipTrackPageData { trackingId: string; carrier: string; } /** * Extract tracking info from a ship-track page. * Gets both tracking number and carrier name. * * Tracking number patterns: * - Amazon Logistics: AZ + 9 digits + 2-letter suffix (e.g., AZ218181365JE) * - Amazon Logistics: TBA followed by digits (e.g., TBA123456789) * - Royal Mail: 2-letter prefix + 9 digits + 2-letter suffix (e.g., AA123456789GB) * - Hermes/Evri: 16-digit number * - DPD: Various alphanumeric formats * * Carrier patterns (UK examples): * - "Delivery By JERSEY_POST" * - "Delivery By Whistl Group" * - "Shipped with Royal Mail" */ export async function extractTrackingInfoFromPage( page: Page, ): Promise<ShipTrackPageData> { const result: ShipTrackPageData = { trackingId: "", carrier: "" }; try { // Extract tracking number const trackingSelectors = [ // Progress tracker page selectors ".pt-delivery-card-trackingId", '[data-test-id="tracking-number"]', ".carrierRelatedInfo-trackingId-text", // Ship-track page selectors ".a-row.pt-carrier-tracking-id", ".ship-track-grid-content .a-text-bold", // Generic patterns '[class*="tracking"] .a-text-bold', '[class*="tracking-id"]', '[class*="trackingId"]', ]; for (const selector of trackingSelectors) { const el = page.locator(selector).first(); const count = await el.count().catch(() => 0); if (count > 0) { const text = await el.textContent({ timeout: 500 }).catch(() => ""); if (text) { const cleaned = text.trim(); const trackingNumber = validateTrackingNumber(cleaned); if (trackingNumber) { result.trackingId = trackingNumber; break; } } } } // Fallback: search page text for carrier patterns const pageText = await page.textContent("body").catch(() => ""); if (pageText) { // Look for tracking ID if not found yet if (!result.trackingId) { const trackingMatch = pageText.match( /Tracking\s*ID:?\s*([A-Z0-9]{10,20})/i, ); if (trackingMatch) { const trackingNumber = validateTrackingNumber(trackingMatch[1]); if (trackingNumber) result.trackingId = trackingNumber; } } // Look for carrier patterns - be very specific to avoid false matches // UK patterns: "Delivery By JERSEY_POST", "Delivery By Whistl Group", "Delivery By Royal Mail" // The carrier name should be relatively short (not a product name) if (!result.carrier) { // Pattern 1: "Delivery By CARRIER_NAME" (uppercase with underscores) const deliveryByMatch = pageText.match( /Delivery\s+By\s+([A-Z][A-Z0-9_]{2,30})\b/, ); if (deliveryByMatch) { result.carrier = deliveryByMatch[1].trim(); } } if (!result.carrier) { // Pattern 2: "Delivery By Carrier Name" (title case, max 3 words) const deliveryByTitleMatch = pageText.match( /Delivery\s+By\s+([A-Z][a-z]+(?:\s+[A-Z][a-z]+){0,2})\b/, ); if (deliveryByTitleMatch) { const carrier = deliveryByTitleMatch[1].trim(); // Validate it's not a product name (too long or contains certain words) if ( carrier.length <= 30 && !carrier.includes("Amazon") && !carrier.includes("Fujitsu") ) { result.carrier = carrier; } } } if (!result.carrier) { // Pattern 3: "Shipped with CARRIER" or "Carrier: CARRIER" const shippedMatch = pageText.match( /(?:Shipped\s+with|Carrier:?)\s+([A-Z][A-Za-z0-9_\s]{2,25}?)(?:\.|,|\n|$)/i, ); if (shippedMatch) { const carrier = shippedMatch[1].trim(); if (carrier.length <= 25) { result.carrier = carrier; } } } } return result; } catch { return result; } } /** * Extract carrier tracking number from a ship-track page (legacy function). * @deprecated Use extractTrackingInfoFromPage instead */ export async function extractTrackingNumberFromPage( page: Page, ): Promise<string> { const info = await extractTrackingInfoFromPage(page); return info.trackingId; } /** * Validate and return a tracking number if it matches known patterns. */ function validateTrackingNumber(text: string): string | null { const cleaned = text.trim().toUpperCase(); // Amazon Logistics: AZ + 9 digits + 2 letters (e.g., AZ218181365JE) if (/^AZ\d{9}[A-Z]{2}$/.test(cleaned)) return cleaned; // Amazon Logistics: TBA + digits (e.g., TBA123456789) if (/^TBA\d+$/.test(cleaned)) return cleaned; // Royal Mail: 2 letters + 9 digits + 2 letters (e.g., AA123456789GB) if (/^[A-Z]{2}\d{9}[A-Z]{2}$/.test(cleaned)) return cleaned; // Hermes/Evri: 16 digits if (/^\d{16}$/.test(cleaned)) return cleaned; // DPD UK: 14 digits or alphanumeric if (/^\d{14}$/.test(cleaned)) return cleaned; // Generic alphanumeric 10-20 chars (covers DPD, UPS, FedEx, etc.) if (/^[A-Z0-9]{10,20}$/.test(cleaned)) return cleaned; // Generic long number (10+ digits) if (/^\d{10,}$/.test(cleaned)) return cleaned; return null; } /** * Strategy B (AZAD): 2024+ layout with data-component attributes * This is the primary strategy for modern Amazon pages. * * Structure from HTML: * - [data-component="shipments"] - main shipments container * - [data-component="shipmentStatus"] - delivery status ("Arriving Tuesday", "Delivered", etc.) * - [data-component="shipmentConnections"] - tracking buttons * - a[href*="ship-track"] - "Track package" button with tracking URL * - [data-component="purchasedItems"] - items in this shipment */ async function extractShipmentsStrategyB( page: Page, header: OrderHeader, items: Item[], ): Promise<Shipment[] | null> { // First try the shipments container directly const shipmentsContainer = await page .locator('[data-component="shipments"]') .all(); if (shipmentsContainer.length === 0) { // Fallback: try the XPath approach const shipmentBoxes = await page .locator( 'xpath=//div[div[@data-component="shipmentsLeftGrid"]/div[div[@data-component="shipmentStatus"]]]', ) .all(); if (shipmentBoxes.length === 0) return null; } const shipments: Shipment[] = []; // Each shipments container represents one shipment const containers = shipmentsContainer.length > 0 ? shipmentsContainer : await page .locator( 'xpath=//div[div[@data-component="shipmentsLeftGrid"]/div[div[@data-component="shipmentStatus"]]]', ) .all(); for (let i = 0; i < containers.length; i++) { const box = containers[i]; try { // Get status from data-component="shipmentStatus" const statusEl = box.locator('[data-component="shipmentStatus"]').first(); let statusText = ""; const statusCount = await statusEl.count().catch(() => 0); if (statusCount > 0) { // Get the status message text (e.g., "Arriving Tuesday") const statusMsgEl = statusEl .locator(".od-status-message, h4, .a-text-bold") .first(); statusText = (await statusMsgEl.textContent().catch(() => "")) || ""; if (!statusText) { statusText = (await statusEl.textContent().catch(() => "")) || ""; } } const delivered = parseDeliveryStatus(statusText || ""); // Get tracking link from shipmentConnections or any tracking link let trackingLink = ""; // Primary: look in shipmentConnections for "Track package" button const trackBtn = box .locator( '[data-component="shipmentConnections"] a[href*="ship-track"], a[href*="ship-track"]', ) .first(); const trackBtnCount = await trackBtn.count().catch(() => 0); if (trackBtnCount > 0) { trackingLink = (await trackBtn.getAttribute("href").catch(() => "")) || ""; } // Fallback: look for progress-tracker links if (!trackingLink) { trackingLink = (await box .locator('a[href*="/progress-tracker/"]') .first() .getAttribute("href") .catch(() => "")) || ""; } // Normalize tracking link if (trackingLink && !trackingLink.startsWith("http")) { trackingLink = `https://www.${getRegionByCode(header.region)?.domain}${trackingLink}`; } // Extract shipment ID from tracking link let shipmentId = `${header.orderId}-shipment-${i + 1}`; if (trackingLink) { const shipmentMatch = trackingLink.match(/shipmentId=([^&]+)/); if (shipmentMatch) shipmentId = shipmentMatch[1]; } // Extract tracking ID const trackingIdMatch = trackingLink?.match(/trackingId=([^&]+)/); const trackingId = trackingIdMatch ? trackingIdMatch[1] : ""; // Count items in this shipment from purchasedItems containers const shipmentPurchasedItems = await box .locator('[data-component="purchasedItems"]') .all(); const itemCount = shipmentPurchasedItems.length; // Associate items with shipments (first shipment gets all items if only one shipment) const shipmentItems = containers.length === 1 ? items : i === 0 ? items.slice(0, itemCount || items.length) : []; shipments.push({ shipmentId, orderHeader: header, items: shipmentItems, delivered, status: statusText?.trim() || "Unknown", trackingLink: trackingLink || "", trackingId, platformData: { itemCount }, }); } catch { continue; } } return shipments.length > 0 ? shipments : null; } /** * Strategy 1: Tracking package sections */ async function extractShipmentsStrategy1( page: Page, header: OrderHeader, items: Item[], ): Promise<Shipment[] | null> { const trackingSections = await page .locator('[id*="tracking"], [class*="tracking-package"]') .all(); if (trackingSections.length === 0) return null; const shipments: Shipment[] = []; for (let i = 0; i < trackingSections.length; i++) { const section = trackingSections[i]; try { const statusText = await section.textContent().catch(() => ""); const delivered = parseDeliveryStatus(statusText || ""); // Look for tracking link const trackingLink = (await section .locator('a[href*="track"]') .first() .getAttribute("href") .catch(() => "")) || ""; const trackingIdMatch = trackingLink.match(/trackingId=([^&]+)/i); const trackingId = trackingIdMatch ? trackingIdMatch[1] : ""; shipments.push({ shipmentId: `${header.orderId}-tracking-${i + 1}`, orderHeader: header, items: i === 0 ? items : [], delivered, status: statusText?.slice(0, 100).trim() || "Unknown", trackingLink, trackingId, platformData: {}, }); } catch { continue; } } return shipments.length > 0 ? shipments : null; } /** * Strategy 2: Delivery status sections (2024+ layout) */ async function extractShipmentsStrategy2( page: Page, header: OrderHeader, items: Item[], ): Promise<Shipment[] | null> { const deliverySections = await page .locator('[data-component="deliveryStatus"], .delivery-box') .all(); if (deliverySections.length === 0) return null; const shipments: Shipment[] = []; for (let i = 0; i < deliverySections.length; i++) { const section = deliverySections[i]; try { // Get primary status message const statusText = await section .locator('.a-color-success, .a-color-state, [class*="status"]') .first() .textContent() .catch(() => ""); const delivered = parseDeliveryStatus(statusText || ""); // Get tracking info const trackingLink = (await section .locator('a[href*="track"]') .first() .getAttribute("href") .catch(() => "")) || ""; let trackingId = ""; // Try to get tracking number from visible text const trackingNumText = await section .locator(':text-matches("\\\\d{10,}")') .first() .textContent() .catch(() => ""); if (trackingNumText) { const match = trackingNumText.match(/\d{10,}/); if (match) trackingId = match[0]; } shipments.push({ shipmentId: `${header.orderId}-delivery-${i + 1}`, orderHeader: header, items: i === 0 ? items : [], delivered, status: statusText?.trim() || "Unknown", trackingLink, trackingId, platformData: {}, }); } catch { continue; } } return shipments.length > 0 ? shipments : null; } /** * Strategy 3: Single shipment fallback (create from order details) */ async function extractShipmentsStrategy3( page: Page, header: OrderHeader, items: Item[], ): Promise<Shipment[] | null> { // Look for any delivery message on the page const deliveryText = await getTextByXPath( page, '//*[contains(text(), "Delivered") or contains(text(), "Arriving") or contains(text(), "Shipped")]', "", ); if (!deliveryText && items.length === 0) return null; const delivered = parseDeliveryStatus(deliveryText); // Try to find any tracking link on the page const trackingLink = await getAttributeByXPath( page, '//a[contains(@href, "track") or contains(text(), "Track")]', "href", "", ); return [ { shipmentId: `${header.orderId}-shipment-1`, orderHeader: header, items, delivered, status: deliveryText.slice(0, 100).trim() || "Unknown", trackingLink, trackingId: "", platformData: {}, }, ]; } /** * Options for shipment extraction. */ export interface ExtractShipmentsOptions { /** Visit ship-track pages to get actual carrier tracking numbers (slower but complete) */ fetchTrackingNumbers?: boolean; } /** * Extract shipments from an order detail page. */ export async function extractShipments( page: Page, header: OrderHeader, options: ExtractShipmentsOptions = {}, ): Promise<Shipment[]> { const { fetchTrackingNumbers = false } = options; // First, extract items for this order const items = await extractItems(page, header); // Try each extraction strategy (AZAD order: A, B, then fallbacks) const shipments = await firstMatchingStrategy<Shipment[]>( [ () => extractShipmentsStrategyA(page, header, items), // AZAD's a-box.shipment () => extractShipmentsStrategyB(page, header, items), // AZAD's data-component 2024+ () => extractShipmentsStrategy2(page, header, items), // Delivery status sections () => extractShipmentsStrategy1(page, header, items), // Tracking sections () => extractShipmentsStrategy3(page, header, items), // Fallback single shipment ], [], ); // If requested, visit each ship-track page to get tracking numbers and carrier info if (fetchTrackingNumbers) { const detailUrl = page.url(); // Save current URL to return to for (const shipment of shipments) { // Skip if no tracking link, but still visit if we need carrier info if (!shipment.trackingLink) continue; // Skip if we already have both tracking ID and carrier if (shipment.trackingId && shipment.carrier) continue; try { // Navigate to ship-track page await page.goto(shipment.trackingLink, { waitUntil: "domcontentloaded", timeout: 10000, }); // Wait for tracking info to load await page .waitForSelector( '.carrierRelatedInfo, [class*="tracking"], .pt-delivery-card', { timeout: 3000, }, ) .catch(() => {}); // Extract tracking number and carrier from page const trackingInfo = await extractTrackingInfoFromPage(page); if (trackingInfo.trackingId && !shipment.trackingId) { shipment.trackingId = trackingInfo.trackingId; } if (trackingInfo.carrier && !shipment.carrier) { shipment.carrier = trackingInfo.carrier; } } catch { // Continue if tracking page fails } } // Return to detail page if (detailUrl && !page.url().includes("order-details")) { await page .goto(detailUrl, { waitUntil: "domcontentloaded", timeout: 10000 }) .catch(() => {}); } } return shipments; } /** * Check if an order has been fully delivered. */ export function isFullyDelivered(shipments: Shipment[]): boolean { if (shipments.length === 0) return false; return shipments.every((s) => s.delivered === DeliveryStatus.YES); } /** * Get combined tracking IDs for an order. */ export function getTrackingIds(shipments: Shipment[]): string[] { return shipments.map((s) => s.trackingId).filter((id) => id.length > 0); }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/marcusquinn/amazon-order-history-csv-download-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server