gp_datasafety

Retrieve Google Play app data safety details including permissions and security information to assess privacy and compliance requirements.

Instructions

[Google Play] Get app data safety information

Input Schema

TableJSON Schema

Name	Required	Description	Default
`appId`	Yes	Google Play app ID
`lang`	No	Language code (default: en)	en

Implementation Reference

src/server.js:820-862 (handler)

Main handler function for gp_datasafety tool: fetches app data safety HTML and parses it into structured JSON.

async function handleGPDataSafety(args) {
  try {
    const { appId, lang = 'en' } = args;

    if (!appId) {
      throw new Error('appId is required');
    }

    const url = buildDataSafetyUrl({ appId, lang });
    const html = await fetchText(url);
    const dataSafety = parseDataSafety(html);

    if (!dataSafety) {
      return {
        content: [
          {
            type: 'text',
            text: JSON.stringify({ error: 'Data safety information not available' }, null, 2),
          },
        ],
      };
    }

    return {
      content: [
        {
          type: 'text',
          text: JSON.stringify(dataSafety, null, 2),
        },
      ],
    };
  } catch (error) {
    return {
      content: [
        {
          type: 'text',
          text: JSON.stringify({ error: error.message }, null, 2),
        },
      ],
      isError: true,
    };
  }
}

src/server.js:1385-1403 (schema)

Tool schema definition including input parameters (appId required, lang optional) for gp_datasafety.

{
  name: 'gp_datasafety',
  description: '[Google Play] Get app data safety information',
  inputSchema: {
    type: 'object',
    properties: {
      appId: {
        type: 'string',
        description: 'Google Play app ID',
      },
      lang: {
        type: 'string',
        description: 'Language code (default: en)',
        default: 'en',
      },
    },
    required: ['appId'],
  },
},

src/server.js:1480-1481 (registration)
Registration in the tool dispatcher switch statement that routes calls to the handler.
```
case 'gp_datasafety':
  return await handleGPDataSafety(args);
```

src/parsers/googlePlay/datasafety.js:11-240 (helper)

Core parsing logic that extracts data safety details (shared/collected data, security practices, privacy policy) from HTML using multiple strategies.

export function parseDataSafety(html) {
  if (!html || typeof html !== 'string') {
    return null;
  }

  try {
    const result = {
      dataShared: [],
      dataCollected: [],
      securityPractices: [],
      privacyPolicyUrl: null,
    };

    // Strategy 1: Extract privacy policy URL with multiple patterns
    const privacyPolicyPatterns = [
      /<a[^>]*href=["']([^"']*privacy[^"']*)["'][^>]*>/i,
      /privacy[^"']*policy["'][^>]*href=["']([^"']+)["']/i,
      /<link[^>]*rel=["']privacy-policy["'][^>]*href=["']([^"']+)["']/i,
      /privacyPolicyUrl["']:\s*["']([^"']+)["']/i,
      /privacy.*?policy.*?url["']:\s*["']([^"']+)["']/i,
    ];

    for (const pattern of privacyPolicyPatterns) {
      const match = html.match(pattern);
      if (match) {
        result.privacyPolicyUrl = match[1];
        break;
      }
    }

    // Strategy 2: Look for JSON-LD structured data
    const jsonLdMatches = html.matchAll(/<script[^>]*type=["']application\/ld\+json["'][^>]*>([\s\S]*?)<\/script>/gi);
    
    for (const match of jsonLdMatches) {
      try {
        const jsonLd = JSON.parse(match[1]);
        
        // Look for data safety information in structured data
        if (jsonLd.dataSafety || jsonLd['data-safety'] || jsonLd.data_safety) {
          const ds = jsonLd.dataSafety || jsonLd['data-safety'] || jsonLd.data_safety;
          
          if (ds.dataShared) {
            result.dataShared = Array.isArray(ds.dataShared) ? ds.dataShared : [];
          }
          if (ds.dataCollected) {
            result.dataCollected = Array.isArray(ds.dataCollected) ? ds.dataCollected : [];
          }
          if (ds.securityPractices) {
            result.securityPractices = Array.isArray(ds.securityPractices) ? ds.securityPractices : [];
          }
        }
      } catch (e) {
        // Not valid JSON, continue
      }
    }

    // Strategy 3: Extract from script tags with embedded JSON
    const scriptMatches = html.matchAll(/<script[^>]*>([\s\S]*?)<\/script>/gi);
    
    for (const match of scriptMatches) {
      const scriptContent = match[1];
      
      if (scriptContent.includes('dataSafety') || scriptContent.includes('data-safety') || scriptContent.includes('data_safety')) {
        // Try to find data safety JSON structures
        const jsonPatterns = [
          /dataSafety["']?\s*:\s*\{([\s\S]*?)\}/i,
          /"data-safety"["']?\s*:\s*\{([\s\S]*?)\}/i,
          /data_safety["']?\s*:\s*\{([\s\S]*?)\}/i,
        ];

        for (const pattern of jsonPatterns) {
          const jsonMatch = scriptContent.match(pattern);
          if (jsonMatch) {
            try {
              const jsonStr = '{' + jsonMatch[1] + '}';
              const jsonData = JSON.parse(jsonStr);
              
              if (jsonData.dataShared) {
                result.dataShared = Array.isArray(jsonData.dataShared) ? jsonData.dataShared : [];
              }
              if (jsonData.dataCollected) {
                result.dataCollected = Array.isArray(jsonData.dataCollected) ? jsonData.dataCollected : [];
              }
              if (jsonData.securityPractices) {
                result.securityPractices = Array.isArray(jsonData.securityPractices) ? jsonData.securityPractices : [];
              }
            } catch (e) {
              // Not valid JSON, continue
            }
          }
        }
      }
    }

    // Strategy 4: Extract from HTML sections with multiple patterns
    const dataSafetySectionPatterns = [
      /<div[^>]*class=["'][^"']*data-safety["'][^>]*>([\s\S]*?)<\/div>/i,
      /<section[^>]*id=["']data-safety["'][^>]*>([\s\S]*?)<\/section>/i,
      /<div[^>]*id=["']data-safety["'][^>]*>([\s\S]*?)<\/div>/i,
      /<section[^>]*class=["'][^"']*data-safety["'][^>]*>([\s\S]*?)<\/section>/i,
    ];

    let sectionHtml = null;
    for (const pattern of dataSafetySectionPatterns) {
      const match = html.match(pattern);
      if (match) {
        sectionHtml = match[1];
        break;
      }
    }

    if (sectionHtml) {
      // Extract "Data shared" section
      const dataSharedPatterns = [
        /data.*?shared["']?\s*:\s*\[([\s\S]*?)\]/i,
        /<div[^>]*class=["'][^"']*data-shared["'][^>]*>([\s\S]*?)<\/div>/gi,
        /<section[^>]*class=["'][^"']*data-shared["'][^>]*>([\s\S]*?)<\/section>/gi,
      ];

      for (const pattern of dataSharedPatterns) {
        const matches = sectionHtml.matchAll(pattern);
        for (const match of matches) {
          const itemHtml = pattern.source.includes('div') || pattern.source.includes('section') ? match[1] : match[0];
          
          // Extract data items
          const dataItemPatterns = [
            /<span[^>]*>([^<]+)<\/span>/gi,
            /<div[^>]*class=["'][^"']*item["'][^>]*>([^<]+)<\/div>/gi,
            /"([^"]+)"/g,
          ];

          for (const itemPattern of dataItemPatterns) {
            const itemMatches = itemHtml.matchAll(itemPattern);
            for (const itemMatch of itemMatches) {
              const dataText = itemMatch[1].trim();
              if (dataText && dataText.length > 2 && dataText.length < 100) {
                // Check if it's a valid data type (not HTML tags or common words)
                if (!dataText.startsWith('<') && !['and', 'or', 'the', 'a', 'an'].includes(dataText.toLowerCase())) {
                  const existing = result.dataShared.find(d => d.data === dataText);
                  if (!existing) {
                    result.dataShared.push({
                      data: dataText,
                      optional: itemHtml.includes('optional') || false,
                      purpose: extractPurpose(itemHtml),
                      type: extractDataType(dataText),
                    });
                  }
                }
              }
            }
          }
        }
      }

      // Extract "Data collected" section
      const dataCollectedPatterns = [
        /data.*?collected["']?\s*:\s*\[([\s\S]*?)\]/i,
        /<div[^>]*class=["'][^"']*data-collected["'][^>]*>([\s\S]*?)<\/div>/gi,
        /<section[^>]*class=["'][^"']*data-collected["'][^>]*>([\s\S]*?)<\/section>/gi,
      ];

      for (const pattern of dataCollectedPatterns) {
        const matches = sectionHtml.matchAll(pattern);
        for (const match of matches) {
          const itemHtml = pattern.source.includes('div') || pattern.source.includes('section') ? match[1] : match[0];
          
          // Extract data items
          const dataItemPatterns = [
            /<span[^>]*>([^<]+)<\/span>/gi,
            /<div[^>]*class=["'][^"']*item["'][^>]*>([^<]+)<\/div>/gi,
            /"([^"]+)"/g,
          ];

          for (const itemPattern of dataItemPatterns) {
            const itemMatches = itemHtml.matchAll(itemPattern);
            for (const itemMatch of itemMatches) {
              const dataText = itemMatch[1].trim();
              if (dataText && dataText.length > 2 && dataText.length < 100) {
                if (!dataText.startsWith('<') && !['and', 'or', 'the', 'a', 'an'].includes(dataText.toLowerCase())) {
                  const existing = result.dataCollected.find(d => d.data === dataText);
                  if (!existing) {
                    result.dataCollected.push({
                      data: dataText,
                      optional: itemHtml.includes('optional') || false,
                      purpose: extractPurpose(itemHtml),
                      type: extractDataType(dataText),
                    });
                  }
                }
              }
            }
          }
        }
      }

      // Extract security practices
      const securityPatterns = [
        /security.*?practices["']?\s*:\s*\[([\s\S]*?)\]/i,
        /<div[^>]*class=["'][^"']*security-practices["'][^>]*>([\s\S]*?)<\/div>/gi,
      ];

      for (const pattern of securityPatterns) {
        const matches = sectionHtml.matchAll(pattern);
        for (const match of matches) {
          const practiceHtml = match[1];
          const practiceMatch = practiceHtml.match(/([^<]+)/);
          if (practiceMatch) {
            const practice = practiceMatch[1].trim();
            if (practice && practice.length > 5) {
              result.securityPractices.push({
                practice: practice,
                description: extractDescription(practiceHtml),
              });
            }
          }
        }
      }
    }

    // Return result only if we found at least some data
    if (result.dataShared.length > 0 || result.dataCollected.length > 0 || result.securityPractices.length > 0 || result.privacyPolicyUrl) {
      return result;
    }

    return null;
  } catch (error) {
    console.error('Error parsing Google Play data safety:', error);
    return null;
  }
}

src/endpoints/googlePlay.js:166-174 (helper)

URL builder function that constructs the Google Play data safety endpoint.

export function buildDataSafetyUrl(params) {
  const { appId, lang = 'en' } = params;
  
  if (!appId) {
    throw new Error('appId is required');
  }

  return `${GOOGLE_PLAY_BASE}/store/apps/details?id=${appId}&hl=${lang}`;
}

Store Scraper MCP

gp_datasafety

Instructions

Input Schema

Implementation Reference

Other Tools

Latest Blog Posts

MCP directory API