search-engine.ts•13.8 kB
import { google, drive_v3 } from 'googleapis';
import { AuthService } from '../auth/auth-service.js';
import {
SearchEngine,
SearchQuery,
SearchResult,
ContentMatch,
SearchFilters,
SearchOptions
} from '../types/search.js';
import { DriveFile } from '../types/drive.js';
import { ResilientExecutor } from '../utils/retry-handler.js';
import { ErrorHandler } from '../utils/error-handler.js';
import { ValidationError, InvalidFileIdError } from '../types/errors.js';
/**
* Google Drive Search Engine
* Provides advanced search capabilities for Google Drive files and content
*/
export class GoogleDriveSearchEngine implements SearchEngine {
private driveApi: drive_v3.Drive;
private authService: AuthService;
private resilientExecutor: ResilientExecutor;
constructor(
authService: AuthService,
resilientExecutor?: ResilientExecutor,
_errorHandler?: ErrorHandler
) {
this.authService = authService;
this.driveApi = google.drive({
version: 'v3',
auth: authService.getOAuthManager().getOAuth2Client()
});
this.resilientExecutor = resilientExecutor || new ResilientExecutor();
}
/**
* Search files on Google Drive with advanced filtering
*/
async searchFiles(query: SearchQuery): Promise<SearchResult[]> {
// Validate query
if (!query || (typeof query !== 'object')) {
throw new ValidationError('Search query must be a valid object');
}
return this.resilientExecutor.execute(async () => {
return this.authService.executeWithAuth(async () => {
const filters: SearchFilters = {};
if (query.text) {
filters.name = query.text;
filters.fullText = query.text;
}
if (query.fileType) {
filters.mimeType = query.fileType;
}
if (query.folderId) {
filters.parents = [query.folderId];
}
if (query.modifiedAfter || query.modifiedBefore) {
filters.modifiedTime = {};
if (query.modifiedAfter) {
filters.modifiedTime.after = query.modifiedAfter;
}
if (query.modifiedBefore) {
filters.modifiedTime.before = query.modifiedBefore;
}
}
if (query.owner) {
filters.owners = [query.owner];
}
const searchQuery = this.buildSearchQuery(filters);
const options: SearchOptions = {
includeItemsFromAllDrives: true,
supportsAllDrives: true,
corpora: 'allDrives',
pageSize: query.limit || 100
};
const listParams: any = {
q: searchQuery,
pageSize: options.pageSize,
fields: 'nextPageToken, files(id, name, mimeType, size, modifiedTime, createdTime, owners, permissions, webViewLink, webContentLink, parents, description)',
orderBy: query.orderBy ? `${query.orderBy} ${query.orderDirection || 'desc'}` : 'relevance desc',
supportsAllDrives: options.supportsAllDrives,
includeItemsFromAllDrives: options.includeItemsFromAllDrives
};
if (options.corpora) {
listParams.corpora = options.corpora;
}
const response = await this.driveApi.files.list(listParams);
if (!response.data.files) {
return [];
}
return response.data.files.map((file: any) => {
const snippet = this.generateSnippet(file, query.text || '');
return {
file: this.mapGoogleFileToDriveFile(file),
relevanceScore: this.calculateRelevanceScore(file, query.text || ''),
matchedFields: this.getMatchedFields(file, query.text || ''),
...(snippet && { snippet })
};
});
}, 'searchFiles');
}, { operation: 'searchFiles', query });
}
/**
* Search within file content (for supported file types)
*/
async searchInContent(fileId: string, query: string): Promise<ContentMatch[]> {
// Validate inputs
if (!fileId || typeof fileId !== 'string') {
throw new ValidationError('File ID must be a non-empty string');
}
if (!query || typeof query !== 'string') {
throw new ValidationError('Search query must be a non-empty string');
}
return this.resilientExecutor.execute(async () => {
return this.authService.executeWithAuth(async () => {
try {
// Get file metadata first
const fileResponse = await this.driveApi.files.get({
fileId,
fields: 'mimeType, name',
supportsAllDrives: true
});
const mimeType = fileResponse.data.mimeType;
// For Google Docs, we can use the search API
if (mimeType === 'application/vnd.google-apps.document') {
return this.searchInGoogleDoc(fileId, query);
}
// For other file types, we would need to download and process the content
// This is a placeholder - actual implementation would require content processing
return [];
} catch (error: any) {
if (error.status === 404) {
throw new InvalidFileIdError(`File not found: ${fileId}`);
}
throw error;
}
}, 'searchInContent');
}, { operation: 'searchInContent', fileId, query });
}
/**
* Build Google Drive search query string from SearchFilters
*/
buildSearchQuery(filters: SearchFilters): string {
const conditions: string[] = [];
// Name search
if (filters.name) {
conditions.push(`name contains '${this.escapeQueryString(filters.name)}'`);
}
// Full text search
if (filters.fullText) {
conditions.push(`fullText contains '${this.escapeQueryString(filters.fullText)}'`);
}
// MIME type filters
if (filters.mimeType && filters.mimeType.length > 0) {
const mimeTypeConditions = filters.mimeType.map(type => {
// Handle common file type shortcuts
const mimeTypeMap: { [key: string]: string } = {
'pdf': 'application/pdf',
'doc': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
'docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
'gdoc': 'application/vnd.google-apps.document',
'gsheet': 'application/vnd.google-apps.spreadsheet',
'gslide': 'application/vnd.google-apps.presentation',
'folder': 'application/vnd.google-apps.folder',
'image': 'image/',
'video': 'video/',
'audio': 'audio/'
};
const mimeType = mimeTypeMap[type.toLowerCase()] || type;
// For partial mime types (like 'image/'), use contains
if (mimeType.endsWith('/')) {
return `mimeType contains '${mimeType}'`;
}
return `mimeType = '${mimeType}'`;
});
conditions.push(`(${mimeTypeConditions.join(' or ')})`);
}
// Parent folder filters
if (filters.parents && filters.parents.length > 0) {
const parentConditions = filters.parents.map(parentId =>
`'${parentId}' in parents`
);
conditions.push(`(${parentConditions.join(' or ')})`);
}
// Owner filters
if (filters.owners && filters.owners.length > 0) {
const ownerConditions = filters.owners.map(owner =>
`'${this.escapeQueryString(owner)}' in owners`
);
conditions.push(`(${ownerConditions.join(' or ')})`);
}
// Writer filters
if (filters.writers && filters.writers.length > 0) {
const writerConditions = filters.writers.map(writer =>
`'${this.escapeQueryString(writer)}' in writers`
);
conditions.push(`(${writerConditions.join(' or ')})`);
}
// Reader filters
if (filters.readers && filters.readers.length > 0) {
const readerConditions = filters.readers.map(reader =>
`'${this.escapeQueryString(reader)}' in readers`
);
conditions.push(`(${readerConditions.join(' or ')})`);
}
// Shared with me filter
if (filters.sharedWithMe !== undefined) {
conditions.push(`sharedWithMe = ${filters.sharedWithMe}`);
}
// Starred filter
if (filters.starred !== undefined) {
conditions.push(`starred = ${filters.starred}`);
}
// Trashed filter
if (filters.trashed !== undefined) {
conditions.push(`trashed = ${filters.trashed}`);
} else {
// Default to excluding trashed files
conditions.push('trashed = false');
}
// Modified time filters
if (filters.modifiedTime) {
if (filters.modifiedTime.after) {
conditions.push(`modifiedTime > '${filters.modifiedTime.after.toISOString()}'`);
}
if (filters.modifiedTime.before) {
conditions.push(`modifiedTime < '${filters.modifiedTime.before.toISOString()}'`);
}
}
// Created time filters
if (filters.createdTime) {
if (filters.createdTime.after) {
conditions.push(`createdTime > '${filters.createdTime.after.toISOString()}'`);
}
if (filters.createdTime.before) {
conditions.push(`createdTime < '${filters.createdTime.before.toISOString()}'`);
}
}
// Custom properties
if (filters.properties) {
Object.entries(filters.properties).forEach(([key, value]) => {
conditions.push(`properties has { key='${this.escapeQueryString(key)}' and value='${this.escapeQueryString(value)}' }`);
});
}
return conditions.join(' and ') || 'trashed = false';
}
/**
* Search within Google Docs content
*/
private async searchInGoogleDoc(_fileId: string, _query: string): Promise<ContentMatch[]> {
// This would require Google Docs API integration
// For now, return empty array as placeholder
return [];
}
/**
* Calculate relevance score for search results
*/
private calculateRelevanceScore(file: drive_v3.Schema$File, query: string): number {
if (!query) return 1.0;
let score = 0;
const queryLower = query.toLowerCase();
const fileName = (file.name || '').toLowerCase();
const description = (file.description || '').toLowerCase();
// Exact name match gets highest score
if (fileName === queryLower) {
score += 1.0;
} else if (fileName.includes(queryLower)) {
// Partial name match
score += 0.8;
}
// Description match
if (description.includes(queryLower)) {
score += 0.3;
}
// Boost score for recently modified files
const modifiedTime = new Date(file.modifiedTime || 0);
const daysSinceModified = (Date.now() - modifiedTime.getTime()) / (1000 * 60 * 60 * 24);
if (daysSinceModified < 7) {
score += 0.2;
} else if (daysSinceModified < 30) {
score += 0.1;
}
// Boost score for certain file types
const mimeType = file.mimeType || '';
if (mimeType.includes('document') || mimeType.includes('pdf')) {
score += 0.1;
}
return Math.min(score, 1.0);
}
/**
* Get fields that matched the search query
*/
private getMatchedFields(file: drive_v3.Schema$File, query: string): string[] {
if (!query) return [];
const matchedFields: string[] = [];
const queryLower = query.toLowerCase();
if ((file.name || '').toLowerCase().includes(queryLower)) {
matchedFields.push('name');
}
if ((file.description || '').toLowerCase().includes(queryLower)) {
matchedFields.push('description');
}
return matchedFields;
}
/**
* Generate search result snippet
*/
private generateSnippet(file: drive_v3.Schema$File, query: string): string | undefined {
if (!query) return undefined;
const queryLower = query.toLowerCase();
const description = file.description || '';
if (description.toLowerCase().includes(queryLower)) {
// Find the position of the query in the description
const index = description.toLowerCase().indexOf(queryLower);
const start = Math.max(0, index - 50);
const end = Math.min(description.length, index + query.length + 50);
let snippet = description.substring(start, end);
if (start > 0) snippet = '...' + snippet;
if (end < description.length) snippet = snippet + '...';
return snippet;
}
return undefined;
}
/**
* Map Google Drive API file object to our DriveFile interface
*/
private mapGoogleFileToDriveFile(file: drive_v3.Schema$File): DriveFile {
return {
id: file.id!,
name: file.name || 'Untitled',
mimeType: file.mimeType || 'application/octet-stream',
size: parseInt(file.size || '0'),
modifiedTime: new Date(file.modifiedTime || Date.now()),
createdTime: new Date(file.createdTime || Date.now()),
owners: (file.owners || []).map(owner => ({
displayName: owner.displayName || 'Unknown User',
emailAddress: owner.emailAddress || '',
...(owner.photoLink && { photoLink: owner.photoLink }),
me: owner.me || false
})),
permissions: (file.permissions || []).map(permission => ({
id: permission.id!,
type: (permission.type as any) || 'user',
role: (permission.role as any) || 'reader',
...(permission.emailAddress && { emailAddress: permission.emailAddress }),
...(permission.domain && { domain: permission.domain }),
...(permission.displayName && { displayName: permission.displayName })
})),
webViewLink: file.webViewLink || '',
...(file.webContentLink && { webContentLink: file.webContentLink }),
parents: file.parents || []
};
}
/**
* Escape special characters in search query strings
*/
private escapeQueryString(query: string): string {
// Escape single quotes and backslashes for Google Drive search
return query.replace(/\\/g, '\\\\').replace(/'/g, "\\'");
}
}