Skip to main content
Glama
MiguelAlvRed

Store Scraper MCP

by MiguelAlvRed

gp_datasafety

Retrieve Google Play app data safety details including permissions and security information to assess privacy and compliance requirements.

Instructions

[Google Play] Get app data safety information

Input Schema

TableJSON Schema
NameRequiredDescriptionDefault
appIdYesGoogle Play app ID
langNoLanguage code (default: en)en

Implementation Reference

  • Main handler function for gp_datasafety tool: fetches app data safety HTML and parses it into structured JSON.
    async function handleGPDataSafety(args) {
      try {
        const { appId, lang = 'en' } = args;
    
        if (!appId) {
          throw new Error('appId is required');
        }
    
        const url = buildDataSafetyUrl({ appId, lang });
        const html = await fetchText(url);
        const dataSafety = parseDataSafety(html);
    
        if (!dataSafety) {
          return {
            content: [
              {
                type: 'text',
                text: JSON.stringify({ error: 'Data safety information not available' }, null, 2),
              },
            ],
          };
        }
    
        return {
          content: [
            {
              type: 'text',
              text: JSON.stringify(dataSafety, null, 2),
            },
          ],
        };
      } catch (error) {
        return {
          content: [
            {
              type: 'text',
              text: JSON.stringify({ error: error.message }, null, 2),
            },
          ],
          isError: true,
        };
      }
    }
  • Tool schema definition including input parameters (appId required, lang optional) for gp_datasafety.
    {
      name: 'gp_datasafety',
      description: '[Google Play] Get app data safety information',
      inputSchema: {
        type: 'object',
        properties: {
          appId: {
            type: 'string',
            description: 'Google Play app ID',
          },
          lang: {
            type: 'string',
            description: 'Language code (default: en)',
            default: 'en',
          },
        },
        required: ['appId'],
      },
    },
  • Registration in the tool dispatcher switch statement that routes calls to the handler.
    case 'gp_datasafety':
      return await handleGPDataSafety(args);
  • Core parsing logic that extracts data safety details (shared/collected data, security practices, privacy policy) from HTML using multiple strategies.
    export function parseDataSafety(html) {
      if (!html || typeof html !== 'string') {
        return null;
      }
    
      try {
        const result = {
          dataShared: [],
          dataCollected: [],
          securityPractices: [],
          privacyPolicyUrl: null,
        };
    
        // Strategy 1: Extract privacy policy URL with multiple patterns
        const privacyPolicyPatterns = [
          /<a[^>]*href=["']([^"']*privacy[^"']*)["'][^>]*>/i,
          /privacy[^"']*policy["'][^>]*href=["']([^"']+)["']/i,
          /<link[^>]*rel=["']privacy-policy["'][^>]*href=["']([^"']+)["']/i,
          /privacyPolicyUrl["']:\s*["']([^"']+)["']/i,
          /privacy.*?policy.*?url["']:\s*["']([^"']+)["']/i,
        ];
    
        for (const pattern of privacyPolicyPatterns) {
          const match = html.match(pattern);
          if (match) {
            result.privacyPolicyUrl = match[1];
            break;
          }
        }
    
        // Strategy 2: Look for JSON-LD structured data
        const jsonLdMatches = html.matchAll(/<script[^>]*type=["']application\/ld\+json["'][^>]*>([\s\S]*?)<\/script>/gi);
        
        for (const match of jsonLdMatches) {
          try {
            const jsonLd = JSON.parse(match[1]);
            
            // Look for data safety information in structured data
            if (jsonLd.dataSafety || jsonLd['data-safety'] || jsonLd.data_safety) {
              const ds = jsonLd.dataSafety || jsonLd['data-safety'] || jsonLd.data_safety;
              
              if (ds.dataShared) {
                result.dataShared = Array.isArray(ds.dataShared) ? ds.dataShared : [];
              }
              if (ds.dataCollected) {
                result.dataCollected = Array.isArray(ds.dataCollected) ? ds.dataCollected : [];
              }
              if (ds.securityPractices) {
                result.securityPractices = Array.isArray(ds.securityPractices) ? ds.securityPractices : [];
              }
            }
          } catch (e) {
            // Not valid JSON, continue
          }
        }
    
        // Strategy 3: Extract from script tags with embedded JSON
        const scriptMatches = html.matchAll(/<script[^>]*>([\s\S]*?)<\/script>/gi);
        
        for (const match of scriptMatches) {
          const scriptContent = match[1];
          
          if (scriptContent.includes('dataSafety') || scriptContent.includes('data-safety') || scriptContent.includes('data_safety')) {
            // Try to find data safety JSON structures
            const jsonPatterns = [
              /dataSafety["']?\s*:\s*\{([\s\S]*?)\}/i,
              /"data-safety"["']?\s*:\s*\{([\s\S]*?)\}/i,
              /data_safety["']?\s*:\s*\{([\s\S]*?)\}/i,
            ];
    
            for (const pattern of jsonPatterns) {
              const jsonMatch = scriptContent.match(pattern);
              if (jsonMatch) {
                try {
                  const jsonStr = '{' + jsonMatch[1] + '}';
                  const jsonData = JSON.parse(jsonStr);
                  
                  if (jsonData.dataShared) {
                    result.dataShared = Array.isArray(jsonData.dataShared) ? jsonData.dataShared : [];
                  }
                  if (jsonData.dataCollected) {
                    result.dataCollected = Array.isArray(jsonData.dataCollected) ? jsonData.dataCollected : [];
                  }
                  if (jsonData.securityPractices) {
                    result.securityPractices = Array.isArray(jsonData.securityPractices) ? jsonData.securityPractices : [];
                  }
                } catch (e) {
                  // Not valid JSON, continue
                }
              }
            }
          }
        }
    
        // Strategy 4: Extract from HTML sections with multiple patterns
        const dataSafetySectionPatterns = [
          /<div[^>]*class=["'][^"']*data-safety["'][^>]*>([\s\S]*?)<\/div>/i,
          /<section[^>]*id=["']data-safety["'][^>]*>([\s\S]*?)<\/section>/i,
          /<div[^>]*id=["']data-safety["'][^>]*>([\s\S]*?)<\/div>/i,
          /<section[^>]*class=["'][^"']*data-safety["'][^>]*>([\s\S]*?)<\/section>/i,
        ];
    
        let sectionHtml = null;
        for (const pattern of dataSafetySectionPatterns) {
          const match = html.match(pattern);
          if (match) {
            sectionHtml = match[1];
            break;
          }
        }
    
        if (sectionHtml) {
          // Extract "Data shared" section
          const dataSharedPatterns = [
            /data.*?shared["']?\s*:\s*\[([\s\S]*?)\]/i,
            /<div[^>]*class=["'][^"']*data-shared["'][^>]*>([\s\S]*?)<\/div>/gi,
            /<section[^>]*class=["'][^"']*data-shared["'][^>]*>([\s\S]*?)<\/section>/gi,
          ];
    
          for (const pattern of dataSharedPatterns) {
            const matches = sectionHtml.matchAll(pattern);
            for (const match of matches) {
              const itemHtml = pattern.source.includes('div') || pattern.source.includes('section') ? match[1] : match[0];
              
              // Extract data items
              const dataItemPatterns = [
                /<span[^>]*>([^<]+)<\/span>/gi,
                /<div[^>]*class=["'][^"']*item["'][^>]*>([^<]+)<\/div>/gi,
                /"([^"]+)"/g,
              ];
    
              for (const itemPattern of dataItemPatterns) {
                const itemMatches = itemHtml.matchAll(itemPattern);
                for (const itemMatch of itemMatches) {
                  const dataText = itemMatch[1].trim();
                  if (dataText && dataText.length > 2 && dataText.length < 100) {
                    // Check if it's a valid data type (not HTML tags or common words)
                    if (!dataText.startsWith('<') && !['and', 'or', 'the', 'a', 'an'].includes(dataText.toLowerCase())) {
                      const existing = result.dataShared.find(d => d.data === dataText);
                      if (!existing) {
                        result.dataShared.push({
                          data: dataText,
                          optional: itemHtml.includes('optional') || false,
                          purpose: extractPurpose(itemHtml),
                          type: extractDataType(dataText),
                        });
                      }
                    }
                  }
                }
              }
            }
          }
    
          // Extract "Data collected" section
          const dataCollectedPatterns = [
            /data.*?collected["']?\s*:\s*\[([\s\S]*?)\]/i,
            /<div[^>]*class=["'][^"']*data-collected["'][^>]*>([\s\S]*?)<\/div>/gi,
            /<section[^>]*class=["'][^"']*data-collected["'][^>]*>([\s\S]*?)<\/section>/gi,
          ];
    
          for (const pattern of dataCollectedPatterns) {
            const matches = sectionHtml.matchAll(pattern);
            for (const match of matches) {
              const itemHtml = pattern.source.includes('div') || pattern.source.includes('section') ? match[1] : match[0];
              
              // Extract data items
              const dataItemPatterns = [
                /<span[^>]*>([^<]+)<\/span>/gi,
                /<div[^>]*class=["'][^"']*item["'][^>]*>([^<]+)<\/div>/gi,
                /"([^"]+)"/g,
              ];
    
              for (const itemPattern of dataItemPatterns) {
                const itemMatches = itemHtml.matchAll(itemPattern);
                for (const itemMatch of itemMatches) {
                  const dataText = itemMatch[1].trim();
                  if (dataText && dataText.length > 2 && dataText.length < 100) {
                    if (!dataText.startsWith('<') && !['and', 'or', 'the', 'a', 'an'].includes(dataText.toLowerCase())) {
                      const existing = result.dataCollected.find(d => d.data === dataText);
                      if (!existing) {
                        result.dataCollected.push({
                          data: dataText,
                          optional: itemHtml.includes('optional') || false,
                          purpose: extractPurpose(itemHtml),
                          type: extractDataType(dataText),
                        });
                      }
                    }
                  }
                }
              }
            }
          }
    
          // Extract security practices
          const securityPatterns = [
            /security.*?practices["']?\s*:\s*\[([\s\S]*?)\]/i,
            /<div[^>]*class=["'][^"']*security-practices["'][^>]*>([\s\S]*?)<\/div>/gi,
          ];
    
          for (const pattern of securityPatterns) {
            const matches = sectionHtml.matchAll(pattern);
            for (const match of matches) {
              const practiceHtml = match[1];
              const practiceMatch = practiceHtml.match(/([^<]+)/);
              if (practiceMatch) {
                const practice = practiceMatch[1].trim();
                if (practice && practice.length > 5) {
                  result.securityPractices.push({
                    practice: practice,
                    description: extractDescription(practiceHtml),
                  });
                }
              }
            }
          }
        }
    
        // Return result only if we found at least some data
        if (result.dataShared.length > 0 || result.dataCollected.length > 0 || result.securityPractices.length > 0 || result.privacyPolicyUrl) {
          return result;
        }
    
        return null;
      } catch (error) {
        console.error('Error parsing Google Play data safety:', error);
        return null;
      }
    }
  • URL builder function that constructs the Google Play data safety endpoint.
    export function buildDataSafetyUrl(params) {
      const { appId, lang = 'en' } = params;
      
      if (!appId) {
        throw new Error('appId is required');
      }
    
      return `${GOOGLE_PLAY_BASE}/store/apps/details?id=${appId}&hl=${lang}`;
    }

Latest Blog Posts

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/MiguelAlvRed/mobile-store-scraper-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server