/**
* AWS S3 / S3-Compatible Provider
*
* This provider implements the Provider interface for AWS S3 and S3-compatible services.
* Can be used for: AWS S3, MinIO, Backblaze B2, DigitalOcean Spaces, etc.
*/
import {
S3Client,
ListBucketsCommand,
ListObjectsV2Command,
GetObjectCommand,
HeadObjectCommand,
HeadBucketCommand,
} from "@aws-sdk/client-s3";
import { Provider, ProviderRegistry } from "../interface.js";
import type { SourceConfig, ProviderType } from "../../types/config.js";
import type {
BucketInfo,
ObjectInfo,
ObjectContent,
ObjectMetadata,
ListObjectsResult,
SearchFilter,
SearchResult,
} from "../../types/storage.js";
import type { ListObjectsOptions, GetObjectOptions } from "../interface.js";
import * as mime from "mime-types";
/**
* AWS S3 / S3-Compatible Provider Implementation
*/
class S3Provider implements Provider {
readonly id: ProviderType = "s3";
readonly name = "AWS S3 / S3-Compatible";
private client: S3Client | null = null;
private sourceId: string = "";
private config: SourceConfig | null = null;
getId(): string {
return this.sourceId;
}
clone(): Provider {
return new S3Provider();
}
async connect(config: SourceConfig): Promise<void> {
this.config = config;
this.sourceId = config.id;
// Initialize S3 client
this.client = new S3Client({
endpoint: config.endpoint,
region: config.region || "us-east-1",
credentials: {
accessKeyId: config.access_key,
secretAccessKey: config.secret_key,
sessionToken: config.security_token,
},
forcePathStyle: config.path_style ?? true, // Default to path style for compatibility
requestHandler: {
connectionTimeout: config.connection_timeout ? config.connection_timeout * 1000 : 60000,
socketTimeout: config.connection_timeout ? config.connection_timeout * 1000 : 60000,
} as any,
});
// Test connection by listing buckets
try {
await this.client.send(new ListBucketsCommand({}));
} catch (error) {
this.client = null;
throw new Error(`Failed to connect to S3: ${(error as Error).message}`);
}
}
async disconnect(): Promise<void> {
if (this.client) {
this.client.destroy();
this.client = null;
}
}
private ensureClient(): S3Client {
if (!this.client) {
throw new Error("S3 client not connected. Call connect() first.");
}
return this.client;
}
async listBuckets(): Promise<BucketInfo[]> {
const client = this.ensureClient();
const result = await client.send(new ListBucketsCommand({}));
return (result.Buckets || []).map((bucket) => ({
name: bucket.Name || "",
creationDate: bucket.CreationDate?.toISOString(),
}));
}
async listObjects(bucket: string, options?: ListObjectsOptions): Promise<ListObjectsResult> {
const client = this.ensureClient();
const command = new ListObjectsV2Command({
Bucket: bucket,
MaxKeys: options?.maxKeys || 1000,
Prefix: options?.prefix,
Delimiter: options?.delimiter,
ContinuationToken: options?.continuationToken,
});
const result = await client.send(command);
const objects: ObjectInfo[] = (result.Contents || []).map((obj) => ({
key: obj.Key || "",
size: obj.Size || 0,
lastModified: obj.LastModified?.toISOString(),
etag: obj.ETag?.replace(/"/g, ""),
storageClass: obj.StorageClass,
owner: obj.Owner
? {
id: obj.Owner.ID,
displayName: obj.Owner.DisplayName,
}
: undefined,
}));
const commonPrefixes = (result.CommonPrefixes || [])
.map((p) => p.Prefix)
.filter((p): p is string => !!p);
return {
objects,
prefix: result.Prefix,
delimiter: result.Delimiter,
isTruncated: result.IsTruncated || false,
nextContinuationToken: result.NextContinuationToken,
commonPrefixes: commonPrefixes.length > 0 ? commonPrefixes : undefined,
keyCount: result.KeyCount || objects.length,
maxKeys: result.MaxKeys || 1000,
};
}
async getObject(bucket: string, key: string, options?: GetObjectOptions): Promise<ObjectContent> {
const client = this.ensureClient();
const command = new GetObjectCommand({
Bucket: bucket,
Key: key,
Range:
options?.rangeStart !== undefined || options?.rangeEnd !== undefined
? `bytes=${options.rangeStart ?? 0}-${options.rangeEnd ?? ""}`
: undefined,
});
const result = await client.send(command);
const contentType = result.ContentType || mime.lookup(key) || "application/octet-stream";
const contentLength = result.ContentLength || 0;
// Read body
const bodyStream = result.Body;
let bodyBuffer: Buffer | undefined;
if (bodyStream) {
const chunks: Uint8Array[] = [];
// @ts-ignore - Body is a Readable stream in Node.js
for await (const chunk of bodyStream) {
chunks.push(chunk);
}
bodyBuffer = Buffer.concat(chunks);
}
// Determine if content is text-based
const isText = this.isTextContent(contentType);
let content: string | undefined;
let contentBase64: string | undefined;
let truncated = false;
if (bodyBuffer) {
const maxSize = options?.maxSize || 10 * 1024 * 1024; // Default 10MB
if (bodyBuffer.length > maxSize) {
truncated = true;
const truncatedBuffer = bodyBuffer.subarray(0, maxSize);
if (isText) {
content = truncatedBuffer.toString("utf-8");
} else {
contentBase64 = truncatedBuffer.toString("base64");
}
} else {
if (isText) {
content = bodyBuffer.toString("utf-8");
} else {
contentBase64 = bodyBuffer.toString("base64");
}
}
}
return {
key,
content,
contentBase64,
contentType,
contentLength,
truncated,
lastModified: result.LastModified?.toISOString(),
etag: result.ETag?.replace(/"/g, ""),
metadata: result.Metadata,
};
}
async getObjectMetadata(bucket: string, key: string): Promise<ObjectMetadata> {
const client = this.ensureClient();
const command = new HeadObjectCommand({
Bucket: bucket,
Key: key,
});
const result = await client.send(command);
return {
key,
contentType: result.ContentType || "application/octet-stream",
contentLength: result.ContentLength || 0,
lastModified: result.LastModified?.toISOString(),
etag: result.ETag?.replace(/"/g, ""),
storageClass: result.StorageClass,
cacheControl: result.CacheControl,
contentDisposition: result.ContentDisposition,
contentEncoding: result.ContentEncoding,
contentLanguage: result.ContentLanguage,
expires: result.Expires?.toISOString(),
metadata: result.Metadata,
};
}
async bucketExists(bucket: string): Promise<boolean> {
const client = this.ensureClient();
try {
await client.send(new HeadBucketCommand({ Bucket: bucket }));
return true;
} catch {
return false;
}
}
async objectExists(bucket: string, key: string): Promise<boolean> {
const client = this.ensureClient();
try {
await client.send(new HeadObjectCommand({ Bucket: bucket, Key: key }));
return true;
} catch {
return false;
}
}
async searchObjects(bucket: string, filter: SearchFilter): Promise<SearchResult> {
const client = this.ensureClient();
const matchingObjects: ObjectInfo[] = [];
let continuationToken: string | undefined;
let hasMore = false;
const maxResults = filter.maxResults || 100;
// List all objects and filter
do {
const command = new ListObjectsV2Command({
Bucket: bucket,
MaxKeys: 1000,
Prefix: filter.prefix,
ContinuationToken: continuationToken,
});
const result = await client.send(command);
const objects = result.Contents || [];
for (const obj of objects) {
if (matchingObjects.length >= maxResults) {
hasMore = true;
break;
}
const objInfo: ObjectInfo = {
key: obj.Key || "",
size: obj.Size || 0,
lastModified: obj.LastModified?.toISOString(),
etag: obj.ETag?.replace(/"/g, ""),
storageClass: obj.StorageClass,
};
if (this.matchesFilter(objInfo, filter)) {
matchingObjects.push(objInfo);
}
}
if (!result.IsTruncated || matchingObjects.length >= maxResults) {
break;
}
continuationToken = result.NextContinuationToken;
} while (continuationToken);
return {
objects: matchingObjects,
totalCount: matchingObjects.length,
hasMore,
filter,
};
}
getSampleEndpoint(): string {
return "https://s3.amazonaws.com";
}
isValidEndpoint(endpoint: string): boolean {
try {
new URL(endpoint);
return true;
} catch {
return false;
}
}
/**
* Check if content type is text-based
*/
private isTextContent(contentType: string): boolean {
const textTypes = [
"text/",
"application/json",
"application/xml",
"application/javascript",
"application/typescript",
"application/x-yaml",
"application/yaml",
"application/toml",
"application/x-sh",
"application/x-python",
];
return textTypes.some((t) => contentType.startsWith(t) || contentType.includes(t));
}
/**
* Check if an object matches the search filter
*/
private matchesFilter(obj: ObjectInfo, filter: SearchFilter): boolean {
// Extension filter
if (filter.extensions && filter.extensions.length > 0) {
const ext = "." + obj.key.split(".").pop()?.toLowerCase();
if (!filter.extensions.some((e) => e.toLowerCase() === ext)) {
return false;
}
}
// Suffix filter
if (filter.suffix && !obj.key.endsWith(filter.suffix)) {
return false;
}
// Size filters
if (filter.minSize !== undefined && obj.size < filter.minSize) {
return false;
}
if (filter.maxSize !== undefined && obj.size > filter.maxSize) {
return false;
}
// Date filters
if (filter.modifiedAfter && obj.lastModified) {
if (new Date(obj.lastModified) < new Date(filter.modifiedAfter)) {
return false;
}
}
if (filter.modifiedBefore && obj.lastModified) {
if (new Date(obj.lastModified) > new Date(filter.modifiedBefore)) {
return false;
}
}
// Pattern filter (simple glob matching)
if (filter.pattern) {
const regex = this.globToRegex(filter.pattern);
if (!regex.test(obj.key)) {
return false;
}
}
return true;
}
/**
* Convert glob pattern to regex
*/
private globToRegex(glob: string): RegExp {
const escaped = glob
.replace(/[.+^${}()|[\]\\]/g, "\\$&")
.replace(/\*/g, ".*")
.replace(/\?/g, ".");
return new RegExp(`^${escaped}$`, "i");
}
}
// Register the provider
const s3Provider = new S3Provider();
ProviderRegistry.register(s3Provider);
export { S3Provider };