analyze_csv
Analyze review CSV or Excel files from any source with automatic column matching. Works for eBay, AliExpress, Shopify exports and more.
Instructions
Analyze any review CSV / Excel — not just Amazon.
Drag in a Helium 10 export, an eBay / AliExpress scrape, or your own
Shopify export. The loader fuzzy-matches column names (内容 / 评价 /
body / review / content all detected automatically) so you don't
have to reformat the file.
Use this when:
The product is NOT on Amazon (eBay / AliExpress / D2C)
You already have a reviews file from another source
You want to bypass the Shulex VOC API entirely
Args: csv_path: Local path or HTTP(S) URL to a .csv / .xls / .xlsx file. product_name: Optional friendly name for the report header. market: Optional marketplace tag (US / GB / OTHER, etc.).
Returns: Same shape as analyze_reviews, with meta.columns_detected
showing which columns the loader matched.
Input Schema
| Name | Required | Description | Default |
|---|---|---|---|
| csv_path | Yes | ||
| product_name | No | ||
| market | No | OTHER |
Implementation Reference
- mcp_server/tools.py:320-356 (handler)The core implementation of the analyze_csv tool. Loads reviews from a CSV/Excel file using csv_loader.load_reviews(), then delegates to analyze_reviews() with a synthetic ASIN placeholder. Returns the analysis result with the product name and column detection metadata.
def analyze_csv( csv_path: str, *, product_name: str | None = None, market: str = "OTHER", ) -> dict[str, Any]: """Analyze a user-provided CSV / Excel of reviews. No Amazon scrape required. Drag in any CSV (Helium 10 export, eBay / Shopify export, custom scrape). The loader fuzzy-matches column names — `内容` / `评价` / `body` / `review` are all detected automatically. Use this when: - The product is NOT on Amazon (eBay / AliExpress / D2C) - You already scraped reviews and just want the VOC report - You want to bypass the Shulex API entirely """ from . import csv_loader loaded = csv_loader.load_reviews(csv_path) reviews = loaded["reviews"] if not reviews: raise ValueError( "No reviews extracted from the file. Check the column names — " f"detected: {loaded['meta']['columns_detected']}" ) # Reuse analyze_reviews with a synthetic ASIN placeholder. fake_asin = "B0CSV00000" # not used as a real ASIN, just for the renderer result = analyze_reviews( reviews_json={"reviews": reviews, "meta": {"asin": fake_asin, "market": market}}, asin=fake_asin, ) result["asin"] = product_name or "CSV input" result["market"] = market result["meta"] = loaded["meta"] return result - mcp_server/schemas.py:38-54 (schema)The AnalyzeReport Pydantic model defines the return shape that analyze_csv produces (same as analyze_reviews). Includes asin, market, report_markdown, sentiment, pain_points, selling_points, tips, summary_zh, summary_en.
class AnalyzeReport(BaseModel): """Report returned by `analyze_reviews` / `voc_full`. Always includes the raw markdown (`report_markdown`). When the report can be parsed cleanly, structured fields are populated; on parse failure the structured fields are None and the markdown is still returned verbatim. """ asin: str market: str = "US" report_markdown: str sentiment: Optional[dict] = None pain_points: list[dict] = Field(default_factory=list) selling_points: list[dict] = Field(default_factory=list) tips: list[dict] = Field(default_factory=list) summary_zh: str = "" summary_en: str = "" - mcp_server/server.py:105-131 (registration)The MCP tool registration for analyze_csv using the @mcp.tool() decorator. Defines the schema (csv_path, product_name, market) and delegates to tools.analyze_csv().
@mcp.tool() def analyze_csv( csv_path: str, product_name: str | None = None, market: str = "OTHER", ) -> dict: """Analyze any review CSV / Excel — not just Amazon. Drag in a Helium 10 export, an eBay / AliExpress scrape, or your own Shopify export. The loader fuzzy-matches column names (`内容` / `评价` / `body` / `review` / `content` all detected automatically) so you don't have to reformat the file. Use this when: - The product is NOT on Amazon (eBay / AliExpress / D2C) - You already have a reviews file from another source - You want to bypass the Shulex VOC API entirely Args: csv_path: Local path or HTTP(S) URL to a .csv / .xls / .xlsx file. product_name: Optional friendly name for the report header. market: Optional marketplace tag (US / GB / OTHER, etc.). Returns: Same shape as `analyze_reviews`, with `meta.columns_detected` showing which columns the loader matched. """ return tools.analyze_csv(csv_path=csv_path, product_name=product_name, market=market) - mcp_server/csv_loader.py:77-126 (helper)The csv_loader.load_reviews() helper function that loads and normalizes CSV/Excel files. Handles URL downloads, fuzzy column detection for body/rating/date columns, and returns reviews in the same format expected by analyze_reviews.
def load_reviews(src: str) -> dict: """Load a CSV/Excel (local path or URL) and return a normalized review array. Returns the same `{reviews, meta}` envelope that fetch_reviews uses, so the downstream analyze_reviews tool can consume either source uniformly. """ path = download_if_url(src) if not os.path.exists(path): raise FileNotFoundError(path) df = _read(path) body_col = _find(df, BODY_KEYS) if not body_col: raise ValueError( f"no review-body column found. Expected one of: {list(BODY_KEYS)}. " f"Got columns: {list(df.columns)}" ) rating_col = _find(df, RATING_KEYS) date_col = _find(df, DATE_KEYS) reviews = [] dropped = 0 for _, row in df.iterrows(): import pandas as pd body = str(row[body_col]).strip() if pd.isna(row[body_col]) or body.lower() == "nan" or len(body) < 3: dropped += 1 continue try: rating = float(row[rating_col]) if rating_col and pd.notna(row[rating_col]) else 0.0 except (TypeError, ValueError): rating = 0.0 date = str(row[date_col]) if date_col and pd.notna(row[date_col]) else "" reviews.append({ "review_id": str(uuid.uuid4())[:12], "body": body, "rating": rating, "date": date, }) return { "reviews": reviews, "meta": { "source": src, "columns_detected": {"body": body_col, "rating": rating_col, "date": date_col}, "rows_in_file": int(len(df)), "rows_used": len(reviews), "rows_dropped": dropped, }, } - mcp_server/csv_loader.py:27-51 (helper)The download_if_url() helper that supports both local file paths and HTTP(S) URLs, handling temp downloads with cleanup.
def download_if_url(src: str, *, max_mb: int = 100) -> str: """Local path → local path. URL → tmp download → local path.""" if not (src.startswith("http://") or src.startswith("https://")): return src ext = Path(urllib.parse.urlparse(src).path).suffix.lower() or ".csv" if ext not in {".csv", ".xls", ".xlsx"}: ext = ".csv" tmp_fd, tmp_path = tempfile.mkstemp(suffix=ext, prefix="review_") os.close(tmp_fd) atexit.register(lambda p=tmp_path: os.path.exists(p) and os.unlink(p)) req = urllib.request.Request(src, headers={ "User-Agent": "review-analyzer/1.0 (+https://github.com/mguozhen/voc-amazon-reviews)" }) try: with urllib.request.urlopen(req, timeout=60) as resp: cl = resp.headers.get("Content-Length") if cl and int(cl) > max_mb * 1024 * 1024: raise ValueError(f"file >{max_mb}MB, refusing: {int(cl)/1024/1024:.1f}MB") with open(tmp_path, "wb") as f: shutil.copyfileobj(resp, f) except urllib.error.URLError as e: raise ValueError(f"download failed: {e}") from e return tmp_path