parameter-extraction.tsā¢22.8 kB
import { exec } from 'child_process';
import { promisify } from 'util';
import axios from 'axios';
import * as cheerio from 'cheerio';
import { ScanResult } from './recon.js';
const execAsync = promisify(exec);
export interface ExtractedParameter {
name: string;
type: 'GET' | 'POST' | 'COOKIE' | 'HEADER' | 'JSON' | 'XML' | 'MULTIPART';
source: string;
url: string;
method: string;
example_value?: string;
context: string;
potential_vuln_types: string[];
}
export interface ParameterExtractionResult {
total_parameters: number;
unique_parameters: number;
parameters_by_type: Record<string, number>;
parameters_by_vuln_type: Record<string, number>;
high_interest_params: ExtractedParameter[];
all_parameters: ExtractedParameter[];
}
export class ParameterExtractionEngine {
async extractParameters(target: string, depth: number = 2): Promise<ScanResult> {
try {
console.error(`š Extracting parameters from ${target} (depth: ${depth})`);
const allParameters: ExtractedParameter[] = [];
// Method 1: Katana crawling and parameter extraction
const katanaParams = await this.runKatanaExtraction(target, depth);
allParameters.push(...katanaParams);
// Method 2: Manual crawling and form analysis
const manualParams = await this.manualParameterExtraction(target, depth);
allParameters.push(...manualParams);
// Method 3: JavaScript analysis
const jsParams = await this.extractFromJavaScript(target);
allParameters.push(...jsParams);
// Method 4: API endpoint discovery
const apiParams = await this.discoverAPIParameters(target);
allParameters.push(...apiParams);
// Deduplicate and analyze
const uniqueParams = this.deduplicateParameters(allParameters);
const analyzedParams = this.analyzeParameters(uniqueParams);
const highInterestParams = this.identifyHighInterestParameters(uniqueParams);
const result: ParameterExtractionResult = {
total_parameters: allParameters.length,
unique_parameters: uniqueParams.length,
parameters_by_type: this.categorizeByType(uniqueParams),
parameters_by_vuln_type: this.categorizeByVulnType(uniqueParams),
high_interest_params: highInterestParams,
all_parameters: uniqueParams
};
return {
target,
timestamp: new Date().toISOString(),
tool: 'parameter_extraction',
results: result,
status: 'success'
};
} catch (error) {
return {
target,
timestamp: new Date().toISOString(),
tool: 'parameter_extraction',
results: {},
status: 'error',
error: error instanceof Error ? error.message : String(error)
};
}
}
private async runKatanaExtraction(target: string, depth: number): Promise<ExtractedParameter[]> {
try {
// Check if Katana is installed
try {
await execAsync('katana -version', { timeout: 5000 });
} catch {
console.warn('Katana not found, skipping Katana extraction');
return [];
}
const command = `katana -u ${target} -d ${depth} -ps -pss waybackarchive,commoncrawl,alienvault -f qurl -o -`;
console.error(`Executing: ${command}`);
const { stdout, stderr } = await execAsync(command, {
timeout: 120000, // 2 minutes
maxBuffer: 1024 * 1024 * 5 // 5MB buffer
});
const parameters: ExtractedParameter[] = [];
const lines = stdout.split('\n').filter(line => line.trim());
for (const line of lines) {
try {
const url = new URL(line.trim());
// Extract GET parameters
for (const [name, value] of url.searchParams.entries()) {
parameters.push({
name,
type: 'GET',
source: 'katana',
url: url.toString(),
method: 'GET',
example_value: value,
context: `Query parameter in ${url.pathname}`,
potential_vuln_types: this.identifyVulnTypes(name, value)
});
}
} catch (urlError) {
// Skip invalid URLs
}
}
console.error(`Katana extracted ${parameters.length} parameters`);
return parameters;
} catch (error) {
console.error('Katana extraction error:', error);
return [];
}
}
private async manualParameterExtraction(target: string, depth: number): Promise<ExtractedParameter[]> {
try {
const parameters: ExtractedParameter[] = [];
const visitedUrls = new Set<string>();
const urlsToVisit = [target];
let currentDepth = 0;
while (urlsToVisit.length > 0 && currentDepth < depth) {
const currentUrl = urlsToVisit.shift();
if (!currentUrl || visitedUrls.has(currentUrl)) continue;
visitedUrls.add(currentUrl);
try {
const response = await axios.get(currentUrl, {
timeout: 10000,
headers: {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
}
});
const $ = cheerio.load(response.data);
// Extract form parameters
$('form').each((_, form) => {
const $form = $(form);
const method = $form.attr('method')?.toUpperCase() || 'GET';
const action = $form.attr('action') || currentUrl;
$form.find('input, select, textarea').each((_, input) => {
const $input = $(input);
const name = $input.attr('name');
const type = $input.attr('type') || 'text';
const value = $input.attr('value') || $input.text();
if (name) {
parameters.push({
name,
type: method === 'GET' ? 'GET' : 'POST',
source: 'manual_crawl',
url: action,
method,
example_value: value,
context: `Form input (${type}) in ${currentUrl}`,
potential_vuln_types: this.identifyVulnTypes(name, value, type)
});
}
});
});
// Extract links for deeper crawling
if (currentDepth < depth - 1) {
$('a[href]').each((_, link) => {
const href = $(link).attr('href');
if (href) {
try {
const absoluteUrl = new URL(href, currentUrl).toString();
if (absoluteUrl.startsWith(target) && !visitedUrls.has(absoluteUrl)) {
urlsToVisit.push(absoluteUrl);
}
} catch {
// Skip invalid URLs
}
}
});
}
// Extract AJAX endpoints from script tags
$('script').each((_, script) => {
const scriptContent = $(script).html();
if (scriptContent) {
const ajaxParams = this.extractAjaxParameters(scriptContent, currentUrl);
parameters.push(...ajaxParams);
}
});
} catch (error) {
console.error(`Error crawling ${currentUrl}:`, error);
}
currentDepth++;
}
console.error(`Manual extraction found ${parameters.length} parameters`);
return parameters;
} catch (error) {
console.error('Manual extraction error:', error);
return [];
}
}
private async extractFromJavaScript(target: string): Promise<ExtractedParameter[]> {
try {
const parameters: ExtractedParameter[] = [];
const response = await axios.get(target, {
timeout: 10000,
headers: {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
}
});
const $ = cheerio.load(response.data);
// Analyze JavaScript files
const jsUrls: string[] = [];
$('script[src]').each((_, script) => {
const src = $(script).attr('src');
if (src) {
try {
const absoluteUrl = new URL(src, target).toString();
jsUrls.push(absoluteUrl);
} catch {
// Skip invalid URLs
}
}
});
// Analyze inline JavaScript
$('script:not([src])').each((_, script) => {
const content = $(script).html();
if (content) {
const inlineParams = this.extractParametersFromJS(content, target);
parameters.push(...inlineParams);
}
});
// Analyze external JavaScript files
for (const jsUrl of jsUrls.slice(0, 10)) { // Limit to 10 files
try {
const jsResponse = await axios.get(jsUrl, { timeout: 10000 });
const jsParams = this.extractParametersFromJS(jsResponse.data, jsUrl);
parameters.push(...jsParams);
} catch {
// Skip failed JS files
}
}
console.error(`JavaScript analysis found ${parameters.length} parameters`);
return parameters;
} catch (error) {
console.error('JavaScript extraction error:', error);
return [];
}
}
private extractParametersFromJS(jsContent: string, sourceUrl: string): ExtractedParameter[] {
const parameters: ExtractedParameter[] = [];
// Common patterns for parameter extraction from JavaScript
const patterns = [
// AJAX requests
/\$\.ajax\s*\(\s*\{[^}]*url\s*:\s*['"]([^'"]+)['"][^}]*data\s*:\s*\{([^}]+)\}/gi,
/\$\.post\s*\(\s*['"]([^'"]+)['"],\s*\{([^}]+)\}/gi,
/\$\.get\s*\(\s*['"]([^'"]+)['"],\s*\{([^}]+)\}/gi,
// Fetch API
/fetch\s*\(\s*['"]([^'"]+)['"][^}]*body\s*:\s*JSON\.stringify\s*\(\s*\{([^}]+)\}/gi,
// XMLHttpRequest
/XMLHttpRequest[^;]*send\s*\(\s*['"]([^'"]+)['"]\s*\)/gi,
// URL construction
/['"]([^'"]*\?[^'"]*)['"]/gi,
// Form data
/FormData[^;]*append\s*\(\s*['"]([^'"]+)['"],\s*([^)]+)\)/gi
];
for (const pattern of patterns) {
let match;
while ((match = pattern.exec(jsContent)) !== null) {
if (match[1]) { // URL found
try {
const url = new URL(match[1], sourceUrl);
// Extract query parameters
for (const [name, value] of url.searchParams.entries()) {
parameters.push({
name,
type: 'GET',
source: 'javascript',
url: url.toString(),
method: 'GET',
example_value: value,
context: `JavaScript query parameter in ${sourceUrl}`,
potential_vuln_types: this.identifyVulnTypes(name, value)
});
}
// Extract POST data parameters
if (match[2]) {
const postParams = this.parseJSObjectString(match[2]);
for (const [name, value] of Object.entries(postParams)) {
parameters.push({
name,
type: 'POST',
source: 'javascript',
url: url.toString(),
method: 'POST',
example_value: String(value),
context: `JavaScript POST data in ${sourceUrl}`,
potential_vuln_types: this.identifyVulnTypes(name, String(value))
});
}
}
} catch {
// Skip invalid URLs
}
}
}
}
return parameters;
}
private async discoverAPIParameters(target: string): Promise<ExtractedParameter[]> {
try {
const parameters: ExtractedParameter[] = [];
// Common API endpoints to check
const apiEndpoints = [
'/api',
'/api/v1',
'/api/v2',
'/rest',
'/graphql',
'/swagger',
'/openapi.json',
'/api-docs',
'/docs'
];
const baseUrl = new URL(target).origin;
for (const endpoint of apiEndpoints) {
try {
const apiUrl = `${baseUrl}${endpoint}`;
const response = await axios.get(apiUrl, {
timeout: 5000,
headers: {
'Accept': 'application/json',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
}
});
// Try to parse as JSON
if (response.headers['content-type']?.includes('application/json')) {
const apiParams = this.extractParametersFromAPI(response.data, apiUrl);
parameters.push(...apiParams);
}
// Check for Swagger/OpenAPI documentation
if (endpoint.includes('swagger') || endpoint.includes('openapi')) {
const swaggerParams = this.extractParametersFromSwagger(response.data, apiUrl);
parameters.push(...swaggerParams);
}
} catch {
// API endpoint not found or accessible
}
}
console.error(`API discovery found ${parameters.length} parameters`);
return parameters;
} catch (error) {
console.error('API parameter discovery error:', error);
return [];
}
}
private extractAjaxParameters(scriptContent: string, sourceUrl: string): ExtractedParameter[] {
const parameters: ExtractedParameter[] = [];
// Extract AJAX call parameters
const ajaxRegex = /\$\.ajax\s*\(\s*\{[^}]*\}/gi;
const postRegex = /\$\.post\s*\([^)]*\)/gi;
const getRegex = /\$\.get\s*\([^)]*\)/gi;
const patterns = [ajaxRegex, postRegex, getRegex];
for (const pattern of patterns) {
let match;
while ((match = pattern.exec(scriptContent)) !== null) {
const ajaxCall = match[0];
// Extract URL
const urlMatch = ajaxCall.match(/url\s*:\s*['"]([^'"]+)['"]/);
if (urlMatch) {
const url = urlMatch[1];
// Extract data parameters
const dataMatch = ajaxCall.match(/data\s*:\s*\{([^}]+)\}/);
if (dataMatch) {
const dataString = dataMatch[1];
const params = this.parseJSObjectString(dataString);
for (const [name, value] of Object.entries(params)) {
parameters.push({
name,
type: 'POST',
source: 'ajax',
url: url,
method: 'POST',
example_value: String(value),
context: `AJAX request in ${sourceUrl}`,
potential_vuln_types: this.identifyVulnTypes(name, String(value))
});
}
}
}
}
}
return parameters;
}
private extractParametersFromAPI(apiData: any, apiUrl: string): ExtractedParameter[] {
const parameters: ExtractedParameter[] = [];
try {
// Recursively extract parameter names from JSON structure
const extractFromObject = (obj: any, path: string = '') => {
if (typeof obj === 'object' && obj !== null) {
for (const [key, value] of Object.entries(obj)) {
const currentPath = path ? `${path}.${key}` : key;
parameters.push({
name: key,
type: 'JSON',
source: 'api',
url: apiUrl,
method: 'POST',
example_value: typeof value === 'object' ? JSON.stringify(value) : String(value),
context: `JSON API parameter at ${currentPath}`,
potential_vuln_types: this.identifyVulnTypes(key, String(value))
});
if (typeof value === 'object' && value !== null) {
extractFromObject(value, currentPath);
}
}
}
};
if (typeof apiData === 'string') {
apiData = JSON.parse(apiData);
}
extractFromObject(apiData);
} catch {
// Not valid JSON or parsing error
}
return parameters;
}
private extractParametersFromSwagger(swaggerData: any, apiUrl: string): ExtractedParameter[] {
const parameters: ExtractedParameter[] = [];
try {
if (typeof swaggerData === 'string') {
swaggerData = JSON.parse(swaggerData);
}
// Extract parameters from Swagger/OpenAPI spec
if (swaggerData.paths) {
for (const [path, pathObj] of Object.entries(swaggerData.paths as any)) {
for (const [method, methodObj] of Object.entries(pathObj as any)) {
if (typeof methodObj === 'object' && methodObj !== null) {
const params = (methodObj as any).parameters || [];
for (const param of params) {
parameters.push({
name: param.name,
type: param.in?.toUpperCase() || 'UNKNOWN',
source: 'swagger',
url: `${apiUrl}${path}`,
method: method.toUpperCase(),
example_value: param.example || param.default || '',
context: `Swagger API parameter (${param.in}) in ${path}`,
potential_vuln_types: this.identifyVulnTypes(param.name, param.example || '')
});
}
}
}
}
}
} catch {
// Not valid Swagger/OpenAPI or parsing error
}
return parameters;
}
private parseJSObjectString(objString: string): Record<string, any> {
const params: Record<string, any> = {};
try {
// Simple regex-based parsing for basic object notation
const paramRegex = /['"]?(\w+)['"]?\s*:\s*(['"]?)([^,}]+)\2/gi;
let match;
while ((match = paramRegex.exec(objString)) !== null) {
const key = match[1];
const value = match[3].trim();
params[key] = value;
}
} catch {
// Parsing failed
}
return params;
}
private identifyVulnTypes(paramName: string, paramValue: string, inputType?: string): string[] {
const vulnTypes: string[] = [];
const nameLower = paramName.toLowerCase();
const valueLower = paramValue.toLowerCase();
// SQL Injection indicators
if (nameLower.includes('id') || nameLower.includes('user') || nameLower.includes('search') ||
nameLower.includes('query') || nameLower.includes('filter')) {
vulnTypes.push('SQL Injection');
}
// XSS indicators
if (nameLower.includes('comment') || nameLower.includes('message') || nameLower.includes('content') ||
nameLower.includes('text') || nameLower.includes('search') || inputType === 'text' || inputType === 'textarea') {
vulnTypes.push('XSS');
}
// Command Injection indicators
if (nameLower.includes('cmd') || nameLower.includes('command') || nameLower.includes('exec') ||
nameLower.includes('system') || nameLower.includes('shell')) {
vulnTypes.push('Command Injection');
}
// File Upload indicators
if (nameLower.includes('file') || nameLower.includes('upload') || nameLower.includes('attach') ||
inputType === 'file') {
vulnTypes.push('File Upload');
}
// LDAP Injection indicators
if (nameLower.includes('ldap') || nameLower.includes('dn') || nameLower.includes('cn')) {
vulnTypes.push('LDAP Injection');
}
// Path Traversal indicators
if (nameLower.includes('file') || nameLower.includes('path') || nameLower.includes('dir') ||
nameLower.includes('folder') || valueLower.includes('../') || valueLower.includes('..\\')) {
vulnTypes.push('Path Traversal');
}
// SSRF indicators
if (nameLower.includes('url') || nameLower.includes('uri') || nameLower.includes('link') ||
nameLower.includes('callback') || nameLower.includes('redirect')) {
vulnTypes.push('SSRF');
}
// Authentication/Authorization indicators
if (nameLower.includes('token') || nameLower.includes('session') || nameLower.includes('auth') ||
nameLower.includes('key') || nameLower.includes('password')) {
vulnTypes.push('Authentication Bypass');
}
return vulnTypes;
}
private deduplicateParameters(parameters: ExtractedParameter[]): ExtractedParameter[] {
const seen = new Map<string, ExtractedParameter>();
for (const param of parameters) {
const key = `${param.name}_${param.type}_${param.url}`;
if (!seen.has(key) || seen.get(key)!.source === 'manual_crawl') {
seen.set(key, param);
}
}
return Array.from(seen.values());
}
private analyzeParameters(parameters: ExtractedParameter[]): ExtractedParameter[] {
// Additional analysis and enrichment
return parameters.map(param => ({
...param,
potential_vuln_types: [...new Set(param.potential_vuln_types)] // Remove duplicates
}));
}
private identifyHighInterestParameters(parameters: ExtractedParameter[]): ExtractedParameter[] {
return parameters.filter(param => {
const nameLower = param.name.toLowerCase();
// High interest parameter names
const highInterestNames = [
'id', 'userid', 'user_id', 'uid', 'admin', 'role', 'auth', 'token',
'password', 'passwd', 'pwd', 'pass', 'key', 'secret', 'api_key',
'file', 'filename', 'path', 'dir', 'directory', 'folder',
'cmd', 'command', 'exec', 'system', 'shell', 'eval',
'search', 'query', 'sql', 'select', 'where', 'order',
'url', 'uri', 'link', 'redirect', 'callback', 'referer',
'upload', 'attach', 'document', 'image', 'pdf'
];
return highInterestNames.some(name => nameLower.includes(name)) ||
param.potential_vuln_types.length > 0;
});
}
private categorizeByType(parameters: ExtractedParameter[]): Record<string, number> {
const categories: Record<string, number> = {};
for (const param of parameters) {
categories[param.type] = (categories[param.type] || 0) + 1;
}
return categories;
}
private categorizeByVulnType(parameters: ExtractedParameter[]): Record<string, number> {
const categories: Record<string, number> = {};
for (const param of parameters) {
for (const vulnType of param.potential_vuln_types) {
categories[vulnType] = (categories[vulnType] || 0) + 1;
}
}
return categories;
}
}