output-parser.ts•11.8 kB
/**
 * @fileoverview Git CLI output parsing utilities
 * @module services/git/providers/cli/utils/output-parser
 */
import type { GitBranchInfo, GitStatusResult } from '../../../types.js';
/** Delimiter for fields within a single record in porcelain formats. */
export const GIT_FIELD_DELIMITER = '\x1F';
/** Delimiter for records in porcelain formats. */
export const GIT_RECORD_DELIMITER = '\x1E';
/**
 * Parse git porcelain output into structured data.
 *
 * @param output - Raw git output
 * @param delimiter - Field delimiter (default: GIT_FIELD_DELIMITER)
 * @param recordDelimiter - Record delimiter (default: GIT_RECORD_DELIMITER)
 * @returns Array of parsed records
 */
export function parsePorcelainOutput(
  output: string,
  delimiter = GIT_FIELD_DELIMITER,
  recordDelimiter = GIT_RECORD_DELIMITER,
): string[][] {
  const records: string[][] = [];
  const lines = output.split(recordDelimiter).filter((r) => r.trim());
  for (const line of lines) {
    const fields = line.trim().split(delimiter);
    if (fields.length > 0) {
      records.push(fields);
    }
  }
  return records;
}
/**
 * Parse git status porcelain v2 output.
 *
 * @param output - Git status --porcelain=v2 -b output
 * @returns Parsed status information
 */
export function parseGitStatus(output: string): GitStatusResult {
  const lines = output.split('\n').filter(Boolean);
  const result: GitStatusResult = {
    currentBranch: null,
    stagedChanges: {},
    unstagedChanges: {},
    untrackedFiles: [],
    conflictedFiles: [],
    isClean: true,
  };
  for (const line of lines) {
    if (line.startsWith('#')) {
      // Header line, e.g., '# branch.head main'
      const parts = line.split(' ');
      if (parts[1] === 'branch.head' && parts[2]) {
        result.currentBranch = parts[2] === '(detached)' ? null : parts[2];
      }
      continue;
    }
    result.isClean = false;
    const parts = line.split(' ');
    const statusType = parts[0];
    if (statusType === '1') {
      // Normal entry: 1 <XY> <sub> <mH> <mI> <mW> <hH> <hI> <path>
      const xy = parts[1];
      const path = parts.slice(8).join(' ').trim();
      const stagedStatus = xy?.[0];
      const unstagedStatus = xy?.[1];
      if (stagedStatus && stagedStatus !== '.') {
        if (!result.stagedChanges) result.stagedChanges = {};
        if (stagedStatus === 'A')
          (result.stagedChanges.added ??= []).push(path);
        if (stagedStatus === 'M')
          (result.stagedChanges.modified ??= []).push(path);
        if (stagedStatus === 'D')
          (result.stagedChanges.deleted ??= []).push(path);
        if (stagedStatus === 'R')
          (result.stagedChanges.renamed ??= []).push(path);
        if (stagedStatus === 'C')
          (result.stagedChanges.copied ??= []).push(path);
      }
      if (unstagedStatus && unstagedStatus !== '.') {
        if (!result.unstagedChanges) result.unstagedChanges = {};
        if (unstagedStatus === 'M')
          (result.unstagedChanges.modified ??= []).push(path);
        if (unstagedStatus === 'D')
          (result.unstagedChanges.deleted ??= []).push(path);
      }
    } else if (statusType === '2') {
      // Renamed or copied entry
      const pathInfo = line.substring(line.indexOf('\t') + 1);
      const [newPath, oldPath] = pathInfo.split('\t');
      if (!result.stagedChanges.renamed) result.stagedChanges.renamed = [];
      result.stagedChanges.renamed.push(`${oldPath} -> ${newPath}`);
    } else if (statusType === 'u') {
      // Unmerged (conflicted)
      const path = parts.slice(8).join(' ').trim();
      result.conflictedFiles.push(path);
    } else if (statusType === '?') {
      // Untracked
      const path = line.substring(2);
      result.untrackedFiles.push(path);
    }
  }
  // Final check for cleanliness
  if (result.isClean && lines.length > 0) {
    // isClean might be false due to headers, but if no files are listed, it's clean
    const hasChanges =
      Object.values(result.stagedChanges).some(
        (arr) => arr && arr.length > 0,
      ) ||
      Object.values(result.unstagedChanges).some(
        (arr) => arr && arr.length > 0,
      ) ||
      result.untrackedFiles.length > 0 ||
      result.conflictedFiles.length > 0;
    result.isClean = !hasChanges;
  }
  return result;
}
/**
 * Parse git log output.
 *
 * @param output - Git log output with custom format
 * @returns Array of commit data
 */
export function parseGitLog(output: string): Array<Record<string, string>> {
  const commits: Array<Record<string, string>> = [];
  const records = output.split(GIT_RECORD_DELIMITER).filter((r) => r.trim());
  for (const record of records) {
    const fields = record.trim().split(GIT_FIELD_DELIMITER);
    // Default format: %H%x1F%an%x1F%ae%x1F%ad%x1F%s%x1E
    // (hash, author name, author email, date, subject)
    if (fields.length >= 5) {
      commits.push({
        hash: fields[0] || '',
        authorName: fields[1] || '',
        authorEmail: fields[2] || '',
        date: fields[3] || '',
        subject: fields[4] || '',
      });
    }
  }
  return commits;
}
/**
 * Parse git diff --stat output.
 *
 * @param output - Git diff --stat output
 * @returns Parsed diff statistics
 */
export function parseGitDiffStat(output: string): {
  files: Array<{ path: string; additions: number; deletions: number }>;
  totalAdditions: number;
  totalDeletions: number;
} {
  const lines = output.split('\n');
  const files: Array<{ path: string; additions: number; deletions: number }> =
    [];
  let totalAdditions = 0;
  let totalDeletions = 0;
  for (const line of lines) {
    // Match pattern: " path/to/file | 10 +++++-----"
    const match = line.match(/^\s*(.+?)\s*\|\s*(\d+)\s*([\+\-]*)/);
    if (match) {
      const path = match[1]!.trim();
      const symbols = match[3] || '';
      const additions = (symbols.match(/\+/g) || []).length;
      const deletions = (symbols.match(/-/g) || []).length;
      files.push({ path, additions, deletions });
      totalAdditions += additions;
      totalDeletions += deletions;
    }
    // Match summary line: " 3 files changed, 25 insertions(+), 10 deletions(-)"
    const summaryMatch = line.match(
      /(\d+) insertion[s]?\(\+\).*?(\d+) deletion[s]?\(-\)/,
    );
    if (summaryMatch) {
      totalAdditions = parseInt(summaryMatch[1]!, 10);
      totalDeletions = parseInt(summaryMatch[2]!, 10);
    }
  }
  return { files, totalAdditions, totalDeletions };
}
/**
 * Parse git branch output.
 *
 * @param output - Git branch output
 * @returns Array of branch information
 */
export function parseGitBranch(output: string): Array<{
  name: string;
  current: boolean;
  upstream?: string;
}> {
  const branches: Array<{
    name: string;
    current: boolean;
    upstream?: string;
  }> = [];
  const lines = output.split('\n').filter((l) => l.trim());
  for (const line of lines) {
    const current = line.startsWith('*');
    const branchLine = line.substring(current ? 2 : 2).trim();
    // Parse upstream tracking info if present
    const upstreamMatch = branchLine.match(/^(.+?)\s+->\s+(.+)$/);
    if (upstreamMatch) {
      branches.push({
        name: upstreamMatch[1]!.trim(),
        current,
        upstream: upstreamMatch[2]!.trim(),
      });
    } else {
      branches.push({
        name: branchLine,
        current,
      });
    }
  }
  return branches;
}
/**
 * Parse git remote output.
 *
 * @param output - Git remote -v output
 * @returns Array of remote information
 */
export function parseGitRemote(output: string): Array<{
  name: string;
  url: string;
  type: 'fetch' | 'push';
}> {
  const remotes: Array<{ name: string; url: string; type: 'fetch' | 'push' }> =
    [];
  const lines = output.split('\n').filter((l) => l.trim());
  for (const line of lines) {
    // Format: "origin  https://github.com/user/repo.git (fetch)"
    const match = line.match(/^(\S+)\s+(\S+)\s+\((\w+)\)/);
    if (match) {
      remotes.push({
        name: match[1]!,
        url: match[2]!,
        type: match[3] as 'fetch' | 'push',
      });
    }
  }
  return remotes;
}
/**
 * Parse git tag output.
 *
 * @param output - Git tag -l output
 * @returns Array of tag names
 */
export function parseGitTag(output: string): string[] {
  return output.split('\n').filter((t) => t.trim());
}
/**
 * Parse the structured output of `git for-each-ref` to get detailed branch info.
 *
 * This parser is more robust than parsing `git branch -v` because it uses
 * machine-readable output format that won't break with git version changes.
 *
 * Expected format from git for-each-ref with custom delimiter:
 * refname<delim>objectname<delim>upstream:short<delim>upstream:track<delim>HEAD
 *
 * @param output - The raw stdout from the git for-each-ref command
 * @returns An array of branch information objects
 *
 * @example
 * ```typescript
 * // Command: git for-each-ref --format='%(refname)\x1F%(objectname)...' refs/heads
 * const branches = parseBranchRef(output);
 * console.log(branches[0].name); // 'main'
 * console.log(branches[0].ahead); // 2
 * ```
 */
export function parseBranchRef(output: string): GitBranchInfo[] {
  const branches: GitBranchInfo[] = [];
  const lines = output.split('\n').filter((line) => line.trim());
  for (const line of lines) {
    const [refname, commitHash, upstream, trackInfo, head] =
      line.split(GIT_FIELD_DELIMITER);
    if (!refname) continue;
    // refname is like 'refs/heads/main' or 'refs/remotes/origin/main'
    const isRemote = refname.startsWith('refs/remotes/');
    const name = refname.replace(
      isRemote ? 'refs/remotes/' : 'refs/heads/',
      '',
    );
    // Parse tracking info like "ahead 2, behind 1" or "ahead 2" or "behind 1"
    let ahead = 0;
    let behind = 0;
    if (trackInfo) {
      const aheadMatch = trackInfo.match(/ahead (\d+)/);
      const behindMatch = trackInfo.match(/behind (\d+)/);
      ahead = aheadMatch ? parseInt(aheadMatch[1]!, 10) : 0;
      behind = behindMatch ? parseInt(behindMatch[1]!, 10) : 0;
    }
    const branchInfo: GitBranchInfo = {
      name,
      commitHash: commitHash || '',
      current: head === '*',
      ahead,
      behind,
    };
    // Only add upstream if it exists
    if (upstream) {
      branchInfo.upstream = upstream;
    }
    branches.push(branchInfo);
  }
  return branches;
}
/**
 * Parse the output of `git branch -v --no-abbrev`.
 *
 * @deprecated Use parseBranchRef() instead for more robust parsing
 * @param output - The raw stdout from the git command.
 * @returns An array of branch information objects.
 */
export function parseGitBranchList(output: string): GitBranchInfo[] {
  return output
    .split('\n')
    .filter((line) => line.trim())
    .map((line) => {
      const current = line.startsWith('*');
      const trimmed = line.substring(2).trim(); // Skip '* ' or '  '
      const parts = trimmed.split(/\s+/);
      const name = parts[0] || '';
      const commitHash = parts[1] || '';
      const trackingMatch = trimmed.match(/\[(.+?)\]/);
      const upstream = trackingMatch?.[1];
      const branch: GitBranchInfo = {
        name,
        current,
        commitHash,
      };
      if (upstream) {
        // Example upstream: 'origin/main: ahead 1'
        const [remoteBranch, ...status] = upstream.split(':');
        if (remoteBranch) {
          branch.upstream = remoteBranch;
        }
        if (status.length > 0) {
          const statusText = status.join(':');
          const aheadMatch = statusText.match(/ahead (\d+)/);
          const behindMatch = statusText.match(/behind (\d+)/);
          branch.ahead = aheadMatch?.[1] ? parseInt(aheadMatch[1], 10) : 0;
          branch.behind = behindMatch?.[1] ? parseInt(behindMatch[1], 10) : 0;
        }
      }
      return branch;
    });
}