Skip to main content
Glama

Rishi's Interactive Resume MCP Server

by RishiA
test_pii_redaction.py9.83 kB
#!/usr/bin/env python3 """ Automated PII Detection Tests Ensures no phone numbers or sensitive information in resume data """ import re import json import sys from pathlib import Path from typing import List, Tuple, Dict class PIIDetector: """Detect PII in resume files""" # Phone number patterns PHONE_PATTERNS = [ # US phone formats r'(?<!\d)(?:\+?1[\s.-]?)?(?:\(\d{3}\)|\d{3})[\s.-]?\d{3}[\s.-]?\d{4}(?!\d)', # Spaced format r'\d\s\d\s\d\s\d\s\d\s\d\s\d\s\d\s\d\s\d', # International formats r'\+\d{1,3}[\s.-]?\d{2,4}[\s.-]?\d{3,4}[\s.-]?\d{4}', # Generic 10-digit r'(?<!\d)\d{10}(?!\d)', # With extensions r'(?<!\d)(?:\+?1[\s.-]?)?(?:\(\d{3}\)|\d{3})[\s.-]?\d{3}[\s.-]?\d{4}(?:\s*(?:ext|x|extension)[\s.:]*\d{1,5})?(?!\d)' ] # SSN patterns (should never be in resume) SSN_PATTERNS = [ r'(?<!\d)\d{3}[-\s]?\d{2}[-\s]?\d{4}(?!\d)', r'(?<!\d)\d{9}(?!\d)' ] # Credit card patterns (should never be in resume) CC_PATTERNS = [ r'(?<!\d)\d{4}[\s-]?\d{4}[\s-]?\d{4}[\s-]?\d{4}(?!\d)', r'(?<!\d)\d{16}(?!\d)' ] def __init__(self): self.issues_found = [] def check_text(self, text: str, filename: str) -> List[Dict]: """Check text for PII patterns""" issues = [] # Check for phone numbers for pattern in self.PHONE_PATTERNS: matches = re.findall(pattern, text) if matches: # Filter out false positives (like years or percentages) real_phones = [m for m in matches if not self._is_false_positive(m)] if real_phones: issues.append({ "type": "phone_number", "file": filename, "pattern": pattern, "matches": real_phones, "severity": "HIGH" }) # Check for SSN for pattern in self.SSN_PATTERNS: matches = re.findall(pattern, text) if matches: # Filter out false positives real_ssns = [m for m in matches if self._looks_like_ssn(m)] if real_ssns: issues.append({ "type": "ssn", "file": filename, "pattern": pattern, "matches": real_ssns, "severity": "CRITICAL" }) # Check for credit cards for pattern in self.CC_PATTERNS: matches = re.findall(pattern, text) if matches: if any(self._luhn_check(m.replace(" ", "").replace("-", "")) for m in matches): issues.append({ "type": "credit_card", "file": filename, "pattern": pattern, "matches": matches, "severity": "CRITICAL" }) return issues def _is_false_positive(self, match: str) -> bool: """Check if a phone pattern match is actually something else""" # Remove non-digits digits_only = re.sub(r'\D', '', match) # Years (1900-2099) if 1900 <= int(digits_only[:4]) <= 2099 and len(digits_only) == 4: return True # Percentages or metrics (like "92% accuracy") if len(digits_only) <= 3: return True # Version numbers if '.' in match and len(digits_only) <= 6: return True return False def _looks_like_ssn(self, match: str) -> bool: """Check if pattern really looks like SSN""" digits_only = re.sub(r'\D', '', match) if len(digits_only) != 9: return False # SSN rules: first 3 digits not 000, 666, or 900-999 first_three = int(digits_only[:3]) if first_three == 0 or first_three == 666 or first_three >= 900: return False return True def _luhn_check(self, number: str) -> bool: """Luhn algorithm to validate credit card numbers""" try: digits = [int(d) for d in number] checksum = 0 for i, digit in enumerate(reversed(digits[:-1])): if i % 2 == 0: digit *= 2 if digit > 9: digit -= 9 checksum += digit return (checksum + digits[-1]) % 10 == 0 except: return False def check_file(self, filepath: Path) -> List[Dict]: """Check a file for PII""" try: if filepath.suffix == '.json': with open(filepath, 'r') as f: # Check both raw JSON and string values content = f.read() issues = self.check_text(content, str(filepath)) # Also check parsed JSON values f.seek(0) data = json.load(f) json_text = json.dumps(data, indent=2) issues.extend(self.check_text(json_text, f"{filepath} (parsed)")) else: with open(filepath, 'r') as f: content = f.read() issues = self.check_text(content, str(filepath)) return issues except Exception as e: print(f"Error checking {filepath}: {e}") return [] def check_directory(self, directory: Path) -> Tuple[bool, List[Dict]]: """Check all relevant files in directory""" all_issues = [] # Files to check patterns = ['*.json', '*.md', '*.py', '*.txt'] for pattern in patterns: for filepath in directory.rglob(pattern): # Skip test files and node_modules if 'node_modules' in str(filepath) or '__pycache__' in str(filepath): continue issues = self.check_file(filepath) all_issues.extend(issues) return len(all_issues) == 0, all_issues def test_no_phone_numbers(): """Test that no phone numbers exist in resume files""" detector = PIIDetector() project_root = Path(__file__).parent.parent # Check specific resume files resume_files = [ project_root / "resume.md", project_root / "resume_data.json", project_root / "README.md" ] all_issues = [] for filepath in resume_files: if filepath.exists(): issues = detector.check_file(filepath) all_issues.extend(issues) # Filter to just phone numbers phone_issues = [i for i in all_issues if i['type'] == 'phone_number'] if phone_issues: print("❌ PHONE NUMBERS DETECTED:") for issue in phone_issues: print(f" File: {issue['file']}") print(f" Matches: {issue['matches']}") assert False, f"Found {len(phone_issues)} phone number(s) in resume files" print("✅ No phone numbers detected") def test_no_ssn(): """Test that no SSNs exist in any files""" detector = PIIDetector() project_root = Path(__file__).parent.parent passed, all_issues = detector.check_directory(project_root) ssn_issues = [i for i in all_issues if i['type'] == 'ssn'] if ssn_issues: print("❌ SSN PATTERN DETECTED:") for issue in ssn_issues: print(f" File: {issue['file']}") print(f" Matches: {issue['matches']}") assert False, f"Found {len(ssn_issues)} SSN pattern(s)" print("✅ No SSN patterns detected") def test_no_credit_cards(): """Test that no credit card numbers exist""" detector = PIIDetector() project_root = Path(__file__).parent.parent passed, all_issues = detector.check_directory(project_root) cc_issues = [i for i in all_issues if i['type'] == 'credit_card'] if cc_issues: print("❌ CREDIT CARD PATTERN DETECTED:") for issue in cc_issues: print(f" File: {issue['file']}") assert False, f"Found {len(cc_issues)} credit card pattern(s)" print("✅ No credit card patterns detected") def test_email_allowed(): """Test that email addresses ARE allowed (not PII for professional use)""" project_root = Path(__file__).parent.parent readme = project_root / "README.md" if readme.exists(): with open(readme, 'r') as f: content = f.read() # Email pattern email_pattern = r'[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}' matches = re.findall(email_pattern, content) # Note: We removed email from README, but this test shows emails are OK if present print(f"ℹ️ Emails are allowed. Found {len(matches)} email(s)") def run_all_tests(): """Run all PII detection tests""" print("🔍 Running PII Detection Tests") print("=" * 50) tests = [ ("Phone Numbers", test_no_phone_numbers), ("SSN Patterns", test_no_ssn), ("Credit Cards", test_no_credit_cards), ("Email Policy", test_email_allowed) ] failed = [] for test_name, test_func in tests: print(f"\nTesting: {test_name}") try: test_func() except AssertionError as e: failed.append((test_name, str(e))) print(f" ❌ FAILED: {e}") print("\n" + "=" * 50) if failed: print(f"❌ {len(failed)} test(s) failed:") for name, error in failed: print(f" - {name}: {error}") return False else: print("✅ All PII detection tests passed!") return True if __name__ == "__main__": success = run_all_tests() sys.exit(0 if success else 1)

MCP directory API

We provide all the information about MCP servers via our MCP API.

curl -X GET 'https://glama.ai/api/mcp/v1/servers/RishiA/rishi-resume-mcp'

If you have feedback or need assistance with the MCP directory API, please join our Discord server