gp_reviews

Retrieve Google Play app reviews with pagination to analyze user feedback and ratings for any Android application.

Instructions

[Google Play] Get app reviews with pagination

Input Schema

TableJSON Schema

Name	Required	Description	Default
`appId`	Yes	Google Play app ID
`country`	No	Two-letter country code (default: us)	us
`lang`	No	Language code (default: en)	en
`page`	No	Page number (default: 0)
`sort`	No	Sort order: 0 = most recent, 2 = most helpful (default: 0)

Implementation Reference

src/server.js:656-696 (handler)

Main handler function for gp_reviews tool. Fetches reviews HTML using buildGPReviewsUrl and parses with parseGPReviews, returning formatted JSON response.

async function handleGPReviews(args) {
  try {
    const {
      appId,
      country = 'us',
      lang = 'en',
      page = 0,
      sort = 0,
    } = args;

    if (!appId) {
      throw new Error('appId is required');
    }

    const url = buildGPReviewsUrl({ appId, country, lang, page, sort });
    const html = await fetchText(url);
    const result = parseGPReviews(html);

    return {
      content: [
        {
          type: 'text',
          text: JSON.stringify({
            page,
            ...result,
          }, null, 2),
        },
      ],
    };
  } catch (error) {
    return {
      content: [
        {
          type: 'text',
          text: JSON.stringify({ error: error.message }, null, 2),
        },
      ],
      isError: true,
    };
  }
}

src/server.js:1270-1302 (schema)

Tool schema definition including name, description, and input schema for gp_reviews in ListToolsRequestSchema handler.

  name: 'gp_reviews',
  description: '[Google Play] Get app reviews with pagination',
  inputSchema: {
    type: 'object',
    properties: {
      appId: {
        type: 'string',
        description: 'Google Play app ID',
      },
      country: {
        type: 'string',
        description: 'Two-letter country code (default: us)',
        default: 'us',
      },
      lang: {
        type: 'string',
        description: 'Language code (default: en)',
        default: 'en',
      },
      page: {
        type: 'number',
        description: 'Page number (default: 0)',
        default: 0,
      },
      sort: {
        type: 'number',
        description: 'Sort order: 0 = most recent, 2 = most helpful (default: 0)',
        default: 0,
      },
    },
    required: ['appId'],
  },
},

src/server.js:1472-1473 (registration)
Dispatch/registration of gp_reviews handler in the CallToolRequestSchema switch statement.
```
case 'gp_reviews':
  return await handleGPReviews(args);
```

src/parsers/googlePlay/reviews.js:11-224 (helper)

Core parsing helper function that extracts reviews from Google Play HTML using multiple strategies (JSON-LD, embedded JSON, HTML DOM patterns). Imported as parseGPReviews.

export function parseReviews(html) {
  if (!html || typeof html !== 'string') {
    return {
      data: [],
      nextPaginationToken: null,
    };
  }

  const reviews = [];
  const seenReviewIds = new Set();

  try {
    // Strategy 1: Look for JSON-LD structured data
    const jsonLdMatches = html.matchAll(/<script[^>]*type=["']application\/ld\+json["'][^>]*>([\s\S]*?)<\/script>/gi);
    for (const match of jsonLdMatches) {
      try {
        const jsonLd = JSON.parse(match[1]);
        if (jsonLd['@type'] === 'Review' || (jsonLd['@type'] === 'ItemList' && jsonLd.itemListElement)) {
          const reviewList = jsonLd['@type'] === 'Review' ? [jsonLd] : jsonLd.itemListElement;
          reviewList.forEach(item => {
            if (item['@type'] === 'Review' || item.reviewBody) {
              const review = extractFromJsonLd(item);
              if (review && !seenReviewIds.has(review.reviewId || review.text)) {
                seenReviewIds.add(review.reviewId || review.text);
                reviews.push(review);
              }
            }
          });
        }
      } catch (e) {
        // Not valid JSON-LD, continue
      }
    }

    // Strategy 2: Look for embedded JSON data in script tags (Google Play uses _df_ prefix)
    const scriptMatches = html.matchAll(/<script[^>]*>([\s\S]*?)<\/script>/gi);
    
    for (const match of scriptMatches) {
      const scriptContent = match[1];
      
      // Look for review data in various JSON formats
      if (scriptContent.includes('review') || scriptContent.includes('rating') || scriptContent.includes('_df_')) {
        // Try to find JSON objects/arrays
        const jsonPatterns = [
          /\[([\s\S]{100,}?)\]/g,  // Array of reviews
          /\{[\s\S]*?"reviews"[\s\S]*?:[\s\S]*?\[([\s\S]*?)\][\s\S]*?\}/g,  // Object with reviews array
          /\{[\s\S]*?"data"[\s\S]*?:[\s\S]*?\[([\s\S]*?)\][\s\S]*?\}/g,  // Object with data array
        ];

        for (const pattern of jsonPatterns) {
          const matches = scriptContent.matchAll(pattern);
          for (const jsonMatch of matches) {
            try {
              const jsonStr = '[' + jsonMatch[1] + ']';
              const jsonData = JSON.parse(jsonStr);
              if (Array.isArray(jsonData)) {
                jsonData.forEach(item => {
                  if (item && (item.reviewId || item.text || item.rating || item.comment)) {
                    const review = normalizeReview(item);
                    if (review && !seenReviewIds.has(review.reviewId || review.text)) {
                      seenReviewIds.add(review.reviewId || review.text);
                      reviews.push(review);
                    }
                  }
                });
              }
            } catch (e) {
              // Not valid JSON, try next pattern
            }
          }
        }

        // Try direct JSON parsing if content looks like JSON
        if (scriptContent.trim().startsWith('{') || scriptContent.trim().startsWith('[')) {
          try {
            const jsonData = JSON.parse(scriptContent);
            if (Array.isArray(jsonData)) {
              jsonData.forEach(item => {
                if (item && (item.reviewId || item.text || item.rating)) {
                  const review = normalizeReview(item);
                  if (review && !seenReviewIds.has(review.reviewId || review.text)) {
                    seenReviewIds.add(review.reviewId || review.text);
                    reviews.push(review);
                  }
                }
              });
            } else if (jsonData.reviews || jsonData.data || jsonData[0]) {
              const reviewList = jsonData.reviews || jsonData.data || (Array.isArray(jsonData) ? jsonData : []);
              reviewList.forEach(item => {
                const review = normalizeReview(item);
                if (review && !seenReviewIds.has(review.reviewId || review.text)) {
                  seenReviewIds.add(review.reviewId || review.text);
                  reviews.push(review);
                }
              });
            }
          } catch (e) {
            // Not JSON, continue
          }
        }
      }
    }

    // Strategy 3: Extract from visible HTML structure with improved patterns
    const reviewPatterns = [
      /<div[^>]*class=["'][^"']*review["'][^>]*>([\s\S]*?)<\/div>/gi,
      /<div[^>]*itemprop=["']review["'][^>]*>([\s\S]*?)<\/div>/gi,
      /<article[^>]*class=["'][^"']*review["'][^>]*>([\s\S]*?)<\/article>/gi,
      /<div[^>]*data-review-id=["'][^"']*["'][^>]*>([\s\S]*?)<\/div>/gi,
    ];

    for (const pattern of reviewPatterns) {
      const reviewBlockMatches = html.matchAll(pattern);
      
      for (const blockMatch of reviewBlockMatches) {
        const reviewHtml = blockMatch[1];
        
        // Extract rating with multiple patterns
        const ratingMatch = reviewHtml.match(/aria-label=["'](\d+)\s*stars?["']/i) ||
                           reviewHtml.match(/<div[^>]*class=["'][^"']*rating["'][^>]*>(\d+)[^<]*<\/div>/i) ||
                           reviewHtml.match(/ratingValue["']:\s*["']?(\d+)/i) ||
                           reviewHtml.match(/<meta[^>]*itemprop=["']ratingValue["'][^>]*content=["'](\d+)["']/i) ||
                           reviewHtml.match(/<span[^>]*class=["'][^"']*star-rating["'][^>]*>(\d+)/i);
        const rating = ratingMatch ? parseInt(ratingMatch[1], 10) : null;

        // Extract text with multiple patterns
        const textMatch = reviewHtml.match(/<span[^>]*class=["'][^"']*review-body["'][^>]*>([\s\S]*?)<\/span>/i) ||
                         reviewHtml.match(/<div[^>]*class=["'][^"']*review-text["'][^>]*>([\s\S]*?)<\/div>/i) ||
                         reviewHtml.match(/<p[^>]*class=["'][^"']*review-text["'][^>]*>([\s\S]*?)<\/p>/i) ||
                         reviewHtml.match(/reviewBody["']:\s*["']([^"']+)["']/i) ||
                         reviewHtml.match(/<span[^>]*itemprop=["']reviewBody["'][^>]*>([\s\S]*?)<\/span>/i);
        let text = textMatch ? textMatch[1].replace(/<[^>]+>/g, '').trim() : null;
        if (text) {
          text = text.replace(/"/g, '"').replace(/&/g, '&').replace(/</g, '<').replace(/>/g, '>');
        }

        // Extract author with multiple patterns
        const authorMatch = reviewHtml.match(/<span[^>]*class=["'][^"']*author-name["'][^>]*>([^<]+)<\/span>/i) ||
                           reviewHtml.match(/<a[^>]*class=["'][^"']*author["'][^>]*>([^<]+)<\/a>/i) ||
                           reviewHtml.match(/<span[^>]*itemprop=["']author["'][^>]*>([^<]+)<\/span>/i) ||
                           reviewHtml.match(/author["']:\s*["']([^"']+)["']/i);
        const author = authorMatch ? authorMatch[1].trim() : null;

        // Extract date with multiple patterns
        const dateMatch = reviewHtml.match(/<span[^>]*class=["'][^"']*review-date["'][^>]*>([^<]+)<\/span>/i) ||
                         reviewHtml.match(/<time[^>]*datetime=["']([^"']+)["']/i) ||
                         reviewHtml.match(/<span[^>]*itemprop=["']datePublished["'][^>]*>([^<]+)<\/span>/i) ||
                         reviewHtml.match(/datePublished["']:\s*["']([^"']+)["']/i);
        const date = dateMatch ? dateMatch[1].trim() : null;

        // Extract review ID
        const reviewIdMatch = reviewHtml.match(/data-review-id=["']([^"']+)["']/i) ||
                             reviewHtml.match(/reviewId["']:\s*["']([^"']+)["']/i);
        const reviewId = reviewIdMatch ? reviewIdMatch[1] : null;

        // Extract thumbs up
        const thumbsUpMatch = reviewHtml.match(/(\d+)\s*(?:thumbs?|helpful|útil)/i) ||
                             reviewHtml.match(/thumbsUp["']:\s*["']?(\d+)/i);
        const thumbsUp = thumbsUpMatch ? parseInt(thumbsUpMatch[1], 10) : 0;

        // Only add if we have meaningful data
        if (rating || text || author) {
          const reviewKey = reviewId || text || `${author}-${date}`;
          if (!seenReviewIds.has(reviewKey)) {
            seenReviewIds.add(reviewKey);
            reviews.push({
              reviewId: reviewId,
              userName: author || 'Anonymous',
              userImage: null,
              date: date,
              dateText: date,
              score: rating || 0,
              scoreText: rating ? rating.toString() : '0',
              title: null,
              text: text,
              replyDate: null,
              replyText: null,
              version: null,
              thumbsUp: thumbsUp,
              criterias: [],
            });
          }
        }
      }
    }

    // Strategy 4: Look for pagination token
    const paginationPatterns = [
      /nextPaginationToken["']:\s*["']([^"']+)["']/i,
      /"paginationToken"["']:\s*["']([^"']+)["']/i,
      /data-pagination-token=["']([^"']+)["']/i,
    ];
    
    let nextToken = null;
    for (const pattern of paginationPatterns) {
      const match = html.match(pattern);
      if (match) {
        nextToken = match[1];
        break;
      }
    }

    return {
      data: reviews,
      nextPaginationToken: nextToken,
    };
  } catch (error) {
    console.error('Error parsing Google Play reviews:', error);
    return {
      data: [],
      nextPaginationToken: null,
    };
  }
}

src/endpoints/googlePlay.js:80-104 (helper)

URL builder helper for Google Play reviews endpoint. Imported as buildGPReviewsUrl.

export function buildReviewsUrl(params) {
  const {
    appId,
    lang = 'en',
    country = 'us',
    page = 0,
    sort = 0, // 0 = most recent, 2 = most helpful
  } = params;
  
  if (!appId) {
    throw new Error('appId is required');
  }

  // Google Play uses pagination tokens, but we can use page numbers as approximation
  const queryParams = new URLSearchParams({
    id: appId,
    gl: country,
    hl: lang,
    reviewSortOrder: sort.toString(),
    reviewType: '0', // All reviews
    pageNum: page.toString(),
  });

  return `${GOOGLE_PLAY_BASE}/store/apps/details?id=${appId}&gl=${country}&hl=${lang}#Reviews`;
}

Store Scraper MCP

gp_reviews

Instructions

Input Schema

Implementation Reference

Other Tools

Latest Blog Posts

MCP directory API