files.py•30.4 kB
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
from __future__ import annotations
import time
import typing_extensions
from typing import Mapping, cast
from typing_extensions import Literal
import httpx
from .. import _legacy_response
from ..types import FilePurpose, file_list_params, file_create_params
from .._types import Body, Omit, Query, Headers, NotGiven, FileTypes, omit, not_given
from .._utils import extract_files, maybe_transform, deepcopy_minimal, async_maybe_transform
from .._compat import cached_property
from .._resource import SyncAPIResource, AsyncAPIResource
from .._response import (
StreamedBinaryAPIResponse,
AsyncStreamedBinaryAPIResponse,
to_streamed_response_wrapper,
async_to_streamed_response_wrapper,
to_custom_streamed_response_wrapper,
async_to_custom_streamed_response_wrapper,
)
from ..pagination import SyncCursorPage, AsyncCursorPage
from .._base_client import AsyncPaginator, make_request_options
from ..types.file_object import FileObject
from ..types.file_deleted import FileDeleted
from ..types.file_purpose import FilePurpose
__all__ = ["Files", "AsyncFiles"]
class Files(SyncAPIResource):
@cached_property
def with_raw_response(self) -> FilesWithRawResponse:
"""
This property can be used as a prefix for any HTTP method call to return
the raw response object instead of the parsed content.
For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
"""
return FilesWithRawResponse(self)
@cached_property
def with_streaming_response(self) -> FilesWithStreamingResponse:
"""
An alternative to `.with_raw_response` that doesn't eagerly read the response body.
For more information, see https://www.github.com/openai/openai-python#with_streaming_response
"""
return FilesWithStreamingResponse(self)
def create(
self,
*,
file: FileTypes,
purpose: FilePurpose,
expires_after: file_create_params.ExpiresAfter | Omit = omit,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
extra_query: Query | None = None,
extra_body: Body | None = None,
timeout: float | httpx.Timeout | None | NotGiven = not_given,
) -> FileObject:
"""Upload a file that can be used across various endpoints.
Individual files can be
up to 512 MB, and the size of all files uploaded by one organization can be up
to 1 TB.
- The Assistants API supports files up to 2 million tokens and of specific file
types. See the
[Assistants Tools guide](https://platform.openai.com/docs/assistants/tools)
for details.
- The Fine-tuning API only supports `.jsonl` files. The input also has certain
required formats for fine-tuning
[chat](https://platform.openai.com/docs/api-reference/fine-tuning/chat-input)
or
[completions](https://platform.openai.com/docs/api-reference/fine-tuning/completions-input)
models.
- The Batch API only supports `.jsonl` files up to 200 MB in size. The input
also has a specific required
[format](https://platform.openai.com/docs/api-reference/batch/request-input).
Please [contact us](https://help.openai.com/) if you need to increase these
storage limits.
Args:
file: The File object (not file name) to be uploaded.
purpose: The intended purpose of the uploaded file. One of: - `assistants`: Used in the
Assistants API - `batch`: Used in the Batch API - `fine-tune`: Used for
fine-tuning - `vision`: Images used for vision fine-tuning - `user_data`:
Flexible file type for any purpose - `evals`: Used for eval data sets
expires_after: The expiration policy for a file. By default, files with `purpose=batch` expire
after 30 days and all other files are persisted until they are manually deleted.
extra_headers: Send extra headers
extra_query: Add additional query parameters to the request
extra_body: Add additional JSON properties to the request
timeout: Override the client-level default timeout for this request, in seconds
"""
body = deepcopy_minimal(
{
"file": file,
"purpose": purpose,
"expires_after": expires_after,
}
)
files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
# It should be noted that the actual Content-Type header that will be
# sent to the server will contain a `boundary` parameter, e.g.
# multipart/form-data; boundary=---abc--
extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
return self._post(
"/files",
body=maybe_transform(body, file_create_params.FileCreateParams),
files=files,
options=make_request_options(
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
),
cast_to=FileObject,
)
def retrieve(
self,
file_id: str,
*,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
extra_query: Query | None = None,
extra_body: Body | None = None,
timeout: float | httpx.Timeout | None | NotGiven = not_given,
) -> FileObject:
"""
Returns information about a specific file.
Args:
extra_headers: Send extra headers
extra_query: Add additional query parameters to the request
extra_body: Add additional JSON properties to the request
timeout: Override the client-level default timeout for this request, in seconds
"""
if not file_id:
raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
return self._get(
f"/files/{file_id}",
options=make_request_options(
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
),
cast_to=FileObject,
)
def list(
self,
*,
after: str | Omit = omit,
limit: int | Omit = omit,
order: Literal["asc", "desc"] | Omit = omit,
purpose: str | Omit = omit,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
extra_query: Query | None = None,
extra_body: Body | None = None,
timeout: float | httpx.Timeout | None | NotGiven = not_given,
) -> SyncCursorPage[FileObject]:
"""Returns a list of files.
Args:
after: A cursor for use in pagination.
`after` is an object ID that defines your place
in the list. For instance, if you make a list request and receive 100 objects,
ending with obj_foo, your subsequent call can include after=obj_foo in order to
fetch the next page of the list.
limit: A limit on the number of objects to be returned. Limit can range between 1 and
10,000, and the default is 10,000.
order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
order and `desc` for descending order.
purpose: Only return files with the given purpose.
extra_headers: Send extra headers
extra_query: Add additional query parameters to the request
extra_body: Add additional JSON properties to the request
timeout: Override the client-level default timeout for this request, in seconds
"""
return self._get_api_list(
"/files",
page=SyncCursorPage[FileObject],
options=make_request_options(
extra_headers=extra_headers,
extra_query=extra_query,
extra_body=extra_body,
timeout=timeout,
query=maybe_transform(
{
"after": after,
"limit": limit,
"order": order,
"purpose": purpose,
},
file_list_params.FileListParams,
),
),
model=FileObject,
)
def delete(
self,
file_id: str,
*,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
extra_query: Query | None = None,
extra_body: Body | None = None,
timeout: float | httpx.Timeout | None | NotGiven = not_given,
) -> FileDeleted:
"""
Delete a file and remove it from all vector stores.
Args:
extra_headers: Send extra headers
extra_query: Add additional query parameters to the request
extra_body: Add additional JSON properties to the request
timeout: Override the client-level default timeout for this request, in seconds
"""
if not file_id:
raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
return self._delete(
f"/files/{file_id}",
options=make_request_options(
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
),
cast_to=FileDeleted,
)
def content(
self,
file_id: str,
*,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
extra_query: Query | None = None,
extra_body: Body | None = None,
timeout: float | httpx.Timeout | None | NotGiven = not_given,
) -> _legacy_response.HttpxBinaryResponseContent:
"""
Returns the contents of the specified file.
Args:
extra_headers: Send extra headers
extra_query: Add additional query parameters to the request
extra_body: Add additional JSON properties to the request
timeout: Override the client-level default timeout for this request, in seconds
"""
if not file_id:
raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
extra_headers = {"Accept": "application/binary", **(extra_headers or {})}
return self._get(
f"/files/{file_id}/content",
options=make_request_options(
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
),
cast_to=_legacy_response.HttpxBinaryResponseContent,
)
@typing_extensions.deprecated("The `.content()` method should be used instead")
def retrieve_content(
self,
file_id: str,
*,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
extra_query: Query | None = None,
extra_body: Body | None = None,
timeout: float | httpx.Timeout | None | NotGiven = not_given,
) -> str:
"""
Returns the contents of the specified file.
Args:
extra_headers: Send extra headers
extra_query: Add additional query parameters to the request
extra_body: Add additional JSON properties to the request
timeout: Override the client-level default timeout for this request, in seconds
"""
if not file_id:
raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
return self._get(
f"/files/{file_id}/content",
options=make_request_options(
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
),
cast_to=str,
)
def wait_for_processing(
self,
id: str,
*,
poll_interval: float = 5.0,
max_wait_seconds: float = 30 * 60,
) -> FileObject:
"""Waits for the given file to be processed, default timeout is 30 mins."""
TERMINAL_STATES = {"processed", "error", "deleted"}
start = time.time()
file = self.retrieve(id)
while file.status not in TERMINAL_STATES:
self._sleep(poll_interval)
file = self.retrieve(id)
if time.time() - start > max_wait_seconds:
raise RuntimeError(
f"Giving up on waiting for file {id} to finish processing after {max_wait_seconds} seconds."
)
return file
class AsyncFiles(AsyncAPIResource):
@cached_property
def with_raw_response(self) -> AsyncFilesWithRawResponse:
"""
This property can be used as a prefix for any HTTP method call to return
the raw response object instead of the parsed content.
For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
"""
return AsyncFilesWithRawResponse(self)
@cached_property
def with_streaming_response(self) -> AsyncFilesWithStreamingResponse:
"""
An alternative to `.with_raw_response` that doesn't eagerly read the response body.
For more information, see https://www.github.com/openai/openai-python#with_streaming_response
"""
return AsyncFilesWithStreamingResponse(self)
async def create(
self,
*,
file: FileTypes,
purpose: FilePurpose,
expires_after: file_create_params.ExpiresAfter | Omit = omit,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
extra_query: Query | None = None,
extra_body: Body | None = None,
timeout: float | httpx.Timeout | None | NotGiven = not_given,
) -> FileObject:
"""Upload a file that can be used across various endpoints.
Individual files can be
up to 512 MB, and the size of all files uploaded by one organization can be up
to 1 TB.
- The Assistants API supports files up to 2 million tokens and of specific file
types. See the
[Assistants Tools guide](https://platform.openai.com/docs/assistants/tools)
for details.
- The Fine-tuning API only supports `.jsonl` files. The input also has certain
required formats for fine-tuning
[chat](https://platform.openai.com/docs/api-reference/fine-tuning/chat-input)
or
[completions](https://platform.openai.com/docs/api-reference/fine-tuning/completions-input)
models.
- The Batch API only supports `.jsonl` files up to 200 MB in size. The input
also has a specific required
[format](https://platform.openai.com/docs/api-reference/batch/request-input).
Please [contact us](https://help.openai.com/) if you need to increase these
storage limits.
Args:
file: The File object (not file name) to be uploaded.
purpose: The intended purpose of the uploaded file. One of: - `assistants`: Used in the
Assistants API - `batch`: Used in the Batch API - `fine-tune`: Used for
fine-tuning - `vision`: Images used for vision fine-tuning - `user_data`:
Flexible file type for any purpose - `evals`: Used for eval data sets
expires_after: The expiration policy for a file. By default, files with `purpose=batch` expire
after 30 days and all other files are persisted until they are manually deleted.
extra_headers: Send extra headers
extra_query: Add additional query parameters to the request
extra_body: Add additional JSON properties to the request
timeout: Override the client-level default timeout for this request, in seconds
"""
body = deepcopy_minimal(
{
"file": file,
"purpose": purpose,
"expires_after": expires_after,
}
)
files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
# It should be noted that the actual Content-Type header that will be
# sent to the server will contain a `boundary` parameter, e.g.
# multipart/form-data; boundary=---abc--
extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
return await self._post(
"/files",
body=await async_maybe_transform(body, file_create_params.FileCreateParams),
files=files,
options=make_request_options(
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
),
cast_to=FileObject,
)
async def retrieve(
self,
file_id: str,
*,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
extra_query: Query | None = None,
extra_body: Body | None = None,
timeout: float | httpx.Timeout | None | NotGiven = not_given,
) -> FileObject:
"""
Returns information about a specific file.
Args:
extra_headers: Send extra headers
extra_query: Add additional query parameters to the request
extra_body: Add additional JSON properties to the request
timeout: Override the client-level default timeout for this request, in seconds
"""
if not file_id:
raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
return await self._get(
f"/files/{file_id}",
options=make_request_options(
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
),
cast_to=FileObject,
)
def list(
self,
*,
after: str | Omit = omit,
limit: int | Omit = omit,
order: Literal["asc", "desc"] | Omit = omit,
purpose: str | Omit = omit,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
extra_query: Query | None = None,
extra_body: Body | None = None,
timeout: float | httpx.Timeout | None | NotGiven = not_given,
) -> AsyncPaginator[FileObject, AsyncCursorPage[FileObject]]:
"""Returns a list of files.
Args:
after: A cursor for use in pagination.
`after` is an object ID that defines your place
in the list. For instance, if you make a list request and receive 100 objects,
ending with obj_foo, your subsequent call can include after=obj_foo in order to
fetch the next page of the list.
limit: A limit on the number of objects to be returned. Limit can range between 1 and
10,000, and the default is 10,000.
order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
order and `desc` for descending order.
purpose: Only return files with the given purpose.
extra_headers: Send extra headers
extra_query: Add additional query parameters to the request
extra_body: Add additional JSON properties to the request
timeout: Override the client-level default timeout for this request, in seconds
"""
return self._get_api_list(
"/files",
page=AsyncCursorPage[FileObject],
options=make_request_options(
extra_headers=extra_headers,
extra_query=extra_query,
extra_body=extra_body,
timeout=timeout,
query=maybe_transform(
{
"after": after,
"limit": limit,
"order": order,
"purpose": purpose,
},
file_list_params.FileListParams,
),
),
model=FileObject,
)
async def delete(
self,
file_id: str,
*,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
extra_query: Query | None = None,
extra_body: Body | None = None,
timeout: float | httpx.Timeout | None | NotGiven = not_given,
) -> FileDeleted:
"""
Delete a file and remove it from all vector stores.
Args:
extra_headers: Send extra headers
extra_query: Add additional query parameters to the request
extra_body: Add additional JSON properties to the request
timeout: Override the client-level default timeout for this request, in seconds
"""
if not file_id:
raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
return await self._delete(
f"/files/{file_id}",
options=make_request_options(
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
),
cast_to=FileDeleted,
)
async def content(
self,
file_id: str,
*,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
extra_query: Query | None = None,
extra_body: Body | None = None,
timeout: float | httpx.Timeout | None | NotGiven = not_given,
) -> _legacy_response.HttpxBinaryResponseContent:
"""
Returns the contents of the specified file.
Args:
extra_headers: Send extra headers
extra_query: Add additional query parameters to the request
extra_body: Add additional JSON properties to the request
timeout: Override the client-level default timeout for this request, in seconds
"""
if not file_id:
raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
extra_headers = {"Accept": "application/binary", **(extra_headers or {})}
return await self._get(
f"/files/{file_id}/content",
options=make_request_options(
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
),
cast_to=_legacy_response.HttpxBinaryResponseContent,
)
@typing_extensions.deprecated("The `.content()` method should be used instead")
async def retrieve_content(
self,
file_id: str,
*,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
extra_query: Query | None = None,
extra_body: Body | None = None,
timeout: float | httpx.Timeout | None | NotGiven = not_given,
) -> str:
"""
Returns the contents of the specified file.
Args:
extra_headers: Send extra headers
extra_query: Add additional query parameters to the request
extra_body: Add additional JSON properties to the request
timeout: Override the client-level default timeout for this request, in seconds
"""
if not file_id:
raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
return await self._get(
f"/files/{file_id}/content",
options=make_request_options(
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
),
cast_to=str,
)
async def wait_for_processing(
self,
id: str,
*,
poll_interval: float = 5.0,
max_wait_seconds: float = 30 * 60,
) -> FileObject:
"""Waits for the given file to be processed, default timeout is 30 mins."""
TERMINAL_STATES = {"processed", "error", "deleted"}
start = time.time()
file = await self.retrieve(id)
while file.status not in TERMINAL_STATES:
await self._sleep(poll_interval)
file = await self.retrieve(id)
if time.time() - start > max_wait_seconds:
raise RuntimeError(
f"Giving up on waiting for file {id} to finish processing after {max_wait_seconds} seconds."
)
return file
class FilesWithRawResponse:
def __init__(self, files: Files) -> None:
self._files = files
self.create = _legacy_response.to_raw_response_wrapper(
files.create,
)
self.retrieve = _legacy_response.to_raw_response_wrapper(
files.retrieve,
)
self.list = _legacy_response.to_raw_response_wrapper(
files.list,
)
self.delete = _legacy_response.to_raw_response_wrapper(
files.delete,
)
self.content = _legacy_response.to_raw_response_wrapper(
files.content,
)
self.retrieve_content = ( # pyright: ignore[reportDeprecated]
_legacy_response.to_raw_response_wrapper(
files.retrieve_content, # pyright: ignore[reportDeprecated],
)
)
class AsyncFilesWithRawResponse:
def __init__(self, files: AsyncFiles) -> None:
self._files = files
self.create = _legacy_response.async_to_raw_response_wrapper(
files.create,
)
self.retrieve = _legacy_response.async_to_raw_response_wrapper(
files.retrieve,
)
self.list = _legacy_response.async_to_raw_response_wrapper(
files.list,
)
self.delete = _legacy_response.async_to_raw_response_wrapper(
files.delete,
)
self.content = _legacy_response.async_to_raw_response_wrapper(
files.content,
)
self.retrieve_content = ( # pyright: ignore[reportDeprecated]
_legacy_response.async_to_raw_response_wrapper(
files.retrieve_content, # pyright: ignore[reportDeprecated],
)
)
class FilesWithStreamingResponse:
def __init__(self, files: Files) -> None:
self._files = files
self.create = to_streamed_response_wrapper(
files.create,
)
self.retrieve = to_streamed_response_wrapper(
files.retrieve,
)
self.list = to_streamed_response_wrapper(
files.list,
)
self.delete = to_streamed_response_wrapper(
files.delete,
)
self.content = to_custom_streamed_response_wrapper(
files.content,
StreamedBinaryAPIResponse,
)
self.retrieve_content = ( # pyright: ignore[reportDeprecated]
to_streamed_response_wrapper(
files.retrieve_content, # pyright: ignore[reportDeprecated],
)
)
class AsyncFilesWithStreamingResponse:
def __init__(self, files: AsyncFiles) -> None:
self._files = files
self.create = async_to_streamed_response_wrapper(
files.create,
)
self.retrieve = async_to_streamed_response_wrapper(
files.retrieve,
)
self.list = async_to_streamed_response_wrapper(
files.list,
)
self.delete = async_to_streamed_response_wrapper(
files.delete,
)
self.content = async_to_custom_streamed_response_wrapper(
files.content,
AsyncStreamedBinaryAPIResponse,
)
self.retrieve_content = ( # pyright: ignore[reportDeprecated]
async_to_streamed_response_wrapper(
files.retrieve_content, # pyright: ignore[reportDeprecated],
)
)