format-detector.ts•3.8 kB
/**
* Format detection utilities for content processing
*/
export interface FormatInfo {
mimeType: string;
extension: string;
category: 'document' | 'text' | 'pdf' | 'image' | 'unknown';
supported: boolean;
processor?: string;
}
export class FormatDetector {
private static readonly SUPPORTED_FORMATS = new Map<string, FormatInfo>([
// Google Workspace formats
['application/vnd.google-apps.document', {
mimeType: 'application/vnd.google-apps.document',
extension: '.gdoc',
category: 'document',
supported: true,
processor: 'google-docs'
}],
['application/vnd.google-apps.spreadsheet', {
mimeType: 'application/vnd.google-apps.spreadsheet',
extension: '.gsheet',
category: 'document',
supported: false // Not implemented yet
}],
['application/vnd.google-apps.presentation', {
mimeType: 'application/vnd.google-apps.presentation',
extension: '.gslides',
category: 'document',
supported: false // Not implemented yet
}],
// Microsoft Office formats
['application/vnd.openxmlformats-officedocument.wordprocessingml.document', {
mimeType: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
extension: '.docx',
category: 'document',
supported: true,
processor: 'docx'
}],
['application/msword', {
mimeType: 'application/msword',
extension: '.doc',
category: 'document',
supported: false // Legacy format not supported
}],
// PDF
['application/pdf', {
mimeType: 'application/pdf',
extension: '.pdf',
category: 'pdf',
supported: true,
processor: 'pdf'
}],
// Text formats
['text/plain', {
mimeType: 'text/plain',
extension: '.txt',
category: 'text',
supported: true,
processor: 'text'
}],
['text/markdown', {
mimeType: 'text/markdown',
extension: '.md',
category: 'text',
supported: true,
processor: 'text'
}],
['text/html', {
mimeType: 'text/html',
extension: '.html',
category: 'text',
supported: true,
processor: 'text'
}],
// Other common formats
['application/rtf', {
mimeType: 'application/rtf',
extension: '.rtf',
category: 'document',
supported: false // Not implemented yet
}]
]);
/**
* Detect format information from MIME type
*/
static detectFormat(mimeType: string): FormatInfo {
const format = this.SUPPORTED_FORMATS.get(mimeType);
if (format) {
return format;
}
// Try to infer from MIME type patterns
if (mimeType.startsWith('text/')) {
return {
mimeType,
extension: '.txt',
category: 'text',
supported: true,
processor: 'text'
};
}
// Unknown format
return {
mimeType,
extension: '',
category: 'unknown',
supported: false
};
}
/**
* Check if a MIME type is supported for processing
*/
static isSupported(mimeType: string): boolean {
const format = this.detectFormat(mimeType);
return format.supported;
}
/**
* Get the processor type for a MIME type
*/
static getProcessor(mimeType: string): string | null {
const format = this.detectFormat(mimeType);
return format.processor || null;
}
/**
* Get all supported MIME types
*/
static getSupportedMimeTypes(): string[] {
return Array.from(this.SUPPORTED_FORMATS.entries())
.filter(([, format]) => format.supported)
.map(([mimeType]) => mimeType);
}
/**
* Get format category
*/
static getCategory(mimeType: string): string {
const format = this.detectFormat(mimeType);
return format.category;
}
}