cognee_multimedia_demo.ipynb•28.7 kB
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Cognee GraphRAG with Multimedia files"
]
},
{
"cell_type": "markdown",
"metadata": {
"vscode": {
"languageId": "plaintext"
}
},
"source": [
"## Load Data\n",
"\n",
"We will use a few sample multimedia files which we have on GitHub for easy access."
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"ExecuteTime": {
"end_time": "2025-06-30T11:54:44.613431Z",
"start_time": "2025-06-30T11:54:44.606687Z"
}
},
"outputs": [],
"source": [
"import os\n",
"import pathlib\n",
"\n",
"# cognee knowledge graph will be created based on the text\n",
"# and description of these files\n",
"mp3_file_path = os.path.join(\n",
" os.path.abspath(\"\"),\n",
" \"../\",\n",
" \"examples/data/multimedia/text_to_speech.mp3\",\n",
")\n",
"png_file_path = os.path.join(\n",
" os.path.abspath(\"\"),\n",
" \"../\",\n",
" \"examples/data/multimedia/example.png\",\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Set environment variables"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"ExecuteTime": {
"end_time": "2025-06-30T11:54:46.739157Z",
"start_time": "2025-06-30T11:54:46.734808Z"
}
},
"outputs": [],
"source": [
"import os\n",
"\n",
"if \"LLM_API_KEY\" not in os.environ:\n",
" os.environ[\"LLM_API_KEY\"] = \"\"\n",
"\n",
"# \"neo4j\" or \"networkx\"\n",
"os.environ[\"GRAPH_DATABASE_PROVIDER\"] = \"kuzu\"\n",
"# Not needed if using networkx\n",
"# os.environ[\"GRAPH_DATABASE_URL\"]=\"\"\n",
"# os.environ[\"GRAPH_DATABASE_USERNAME\"]=\"\"\n",
"# os.environ[\"GRAPH_DATABASE_PASSWORD\"]=\"\"\n",
"\n",
"# \"pgvector\", \"qdrant\", \"weaviate\" or \"lancedb\"\n",
"os.environ[\"VECTOR_DB_PROVIDER\"] = \"lancedb\"\n",
"# Not needed if using \"lancedb\" or \"pgvector\"\n",
"# os.environ[\"VECTOR_DB_URL\"]=\"\"\n",
"# os.environ[\"VECTOR_DB_KEY\"]=\"\"\n",
"\n",
"# Relational Database provider \"sqlite\" or \"postgres\"\n",
"os.environ[\"DB_PROVIDER\"] = \"sqlite\"\n",
"\n",
"# Database name\n",
"os.environ[\"DB_NAME\"] = \"cognee_db\"\n",
"\n",
"# Postgres specific parameters (Only if Postgres or PGVector is used)\n",
"# os.environ[\"DB_HOST\"]=\"127.0.0.1\"\n",
"# os.environ[\"DB_PORT\"]=\"5432\"\n",
"# os.environ[\"DB_USERNAME\"]=\"cognee\"\n",
"# os.environ[\"DB_PASSWORD\"]=\"cognee\""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Run Cognee with multimedia files"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"\u001b[2m2025-08-27T14:33:41.256195\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mDeleted old log file: /Users/daulet/Desktop/dev/cognee-claude/logs/2025-08-27_14-00-27.log\u001b[0m [\u001b[0m\u001b[1m\u001b[34mcognee.shared.logging_utils\u001b[0m]\u001b[0m\n",
"/Users/daulet/Desktop/dev/cognee-claude/.venv/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
" from .autonotebook import tqdm as notebook_tqdm\n",
"\n",
"\u001b[2m2025-08-27T14:33:42.133224\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mLogging initialized \u001b[0m [\u001b[0m\u001b[1m\u001b[34mcognee.shared.logging_utils\u001b[0m]\u001b[0m \u001b[36mcognee_version\u001b[0m=\u001b[35m0.2.4-local\u001b[0m \u001b[36mdatabase_path\u001b[0m=\u001b[35m/Users/daulet/Desktop/dev/cognee-claude/cognee/.cognee_system/databases\u001b[0m \u001b[36mgraph_database_name\u001b[0m=\u001b[35m\u001b[0m \u001b[36mos_info\u001b[0m=\u001b[35m'Darwin 24.5.0 (Darwin Kernel Version 24.5.0: Tue Apr 22 19:54:43 PDT 2025; root:xnu-11417.121.6~2/RELEASE_ARM64_T8132)'\u001b[0m \u001b[36mpython_version\u001b[0m=\u001b[35m3.12.7\u001b[0m \u001b[36mrelational_config\u001b[0m=\u001b[35mcognee_db\u001b[0m \u001b[36mstructlog_version\u001b[0m=\u001b[35m25.4.0\u001b[0m \u001b[36mvector_config\u001b[0m=\u001b[35mlancedb\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:33:42.133667\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mDatabase storage: /Users/daulet/Desktop/dev/cognee-claude/cognee/.cognee_system/databases\u001b[0m [\u001b[0m\u001b[1m\u001b[34mcognee.shared.logging_utils\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:33:43.785214\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mDeleted Kuzu database files at /Users/daulet/Desktop/dev/cognee-claude/cognee/.cognee_system/databases/cognee_graph_kuzu\u001b[0m [\u001b[0m\u001b[1m\u001b[34mcognee.shared.logging_utils\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:33:44.215920\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mDatabase deleted successfully.\u001b[0m [\u001b[0m\u001b[1m\u001b[34mcognee.shared.logging_utils\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[1mLangfuse client is disabled since no public_key was provided as a parameter or environment variable 'LANGFUSE_PUBLIC_KEY'. See our docs: https://langfuse.com/docs/sdk/python/low-level-sdk#initialize-client\u001b[0m\n",
"\u001b[92m15:33:44 - LiteLLM:INFO\u001b[0m: utils.py:3341 - \n",
"LiteLLM completion() model= gpt-5-mini; provider = openai\n",
"\n",
"\u001b[1m\n",
"LiteLLM completion() model= gpt-5-mini; provider = openai\u001b[0m\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"User 37ea34fa-cae7-4bea-8cb3-1ba234688771 has registered.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"\u001b[1mEmbeddingRateLimiter initialized: enabled=False, requests_limit=60, interval_seconds=60\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:33:50.440270\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mPipeline run started: `017311b3-90e5-53ce-9974-00c4d9551248`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_with_telemetry()\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:33:50.690756\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `resolve_data_directories`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:33:50.996600\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `ingest_data`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:33:51.287352\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mRegistered loader: pypdf_loader\u001b[0m [\u001b[0m\u001b[1m\u001b[34mcognee.infrastructure.loaders.LoaderEngine\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:33:51.287759\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mRegistered loader: text_loader\u001b[0m [\u001b[0m\u001b[1m\u001b[34mcognee.infrastructure.loaders.LoaderEngine\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:33:51.288078\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mRegistered loader: image_loader\u001b[0m [\u001b[0m\u001b[1m\u001b[34mcognee.infrastructure.loaders.LoaderEngine\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:33:51.288341\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mRegistered loader: audio_loader\u001b[0m [\u001b[0m\u001b[1m\u001b[34mcognee.infrastructure.loaders.LoaderEngine\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:33:51.288576\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mRegistered loader: unstructured_loader\u001b[0m [\u001b[0m\u001b[1m\u001b[34mcognee.infrastructure.loaders.LoaderEngine\u001b[0m]\u001b[0m\n",
"\u001b[92m15:33:52 - LiteLLM:INFO\u001b[0m: utils.py:1274 - Wrapper: Completed Call, calling success_handler\n",
"\n",
"\u001b[1mWrapper: Completed Call, calling success_handler\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:33:52.455447\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `ingest_data`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:33:52.599686\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `resolve_data_directories`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:33:52.806593\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mPipeline run completed: `017311b3-90e5-53ce-9974-00c4d9551248`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_with_telemetry()\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:33:53.075106\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mPipeline run started: `017311b3-90e5-53ce-9974-00c4d9551248`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_with_telemetry()\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:33:53.209912\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `resolve_data_directories`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:33:53.355890\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `ingest_data`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
"\u001b[92m15:33:53 - LiteLLM:INFO\u001b[0m: utils.py:3341 - \n",
"LiteLLM completion() model= gpt-5-mini; provider = openai\n",
"\n",
"\u001b[1m\n",
"LiteLLM completion() model= gpt-5-mini; provider = openai\u001b[0m\n",
"\u001b[92m15:33:57 - LiteLLM:INFO\u001b[0m: utils.py:1274 - Wrapper: Completed Call, calling success_handler\n",
"\n",
"\u001b[1mWrapper: Completed Call, calling success_handler\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:33:57.407561\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `ingest_data`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:33:57.560808\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `resolve_data_directories`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:33:57.713507\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mPipeline run completed: `017311b3-90e5-53ce-9974-00c4d9551248`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_with_telemetry()\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:33:57.897060\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mOntology file 'None' not found. No owl ontology will be attached to the graph.\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:33:57.938027\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mPipeline run started: `9bd0d908-8e9e-5780-b4c2-09fc8d471f1b`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_with_telemetry()\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:33:58.093101\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `classify_documents`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:33:58.255165\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `check_permissions_on_dataset`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:33:58.428623\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mAsync Generator task started: `extract_chunks_from_documents`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:33:58.588682\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `extract_graph_from_data`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
"\u001b[92m15:33:58 - LiteLLM:INFO\u001b[0m: utils.py:3341 - \n",
"LiteLLM completion() model= gpt-5-mini; provider = openai\n",
"\n",
"\u001b[1m\n",
"LiteLLM completion() model= gpt-5-mini; provider = openai\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:34:19.706892\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'person' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:34:19.707703\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'programmer' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:34:19.708083\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'object' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:34:19.708440\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'light bulb' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:34:19.708802\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'concept' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:34:19.709129\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'hardware problem' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:34:19.709475\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'how many programmers does it take to change a light bulb? none, thats a hardware problem.' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:34:24.553989\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `summarize_text`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
"\u001b[92m15:34:24 - LiteLLM:INFO\u001b[0m: utils.py:3341 - \n",
"LiteLLM completion() model= gpt-5-mini; provider = openai\n",
"\n",
"\u001b[1m\n",
"LiteLLM completion() model= gpt-5-mini; provider = openai\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:34:32.883579\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `add_data_points`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:34:35.680233\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `add_data_points`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:34:35.825933\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `summarize_text`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:34:35.975352\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `extract_graph_from_data`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:34:36.126720\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mAsync Generator task completed: `extract_chunks_from_documents`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:34:36.275404\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `check_permissions_on_dataset`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:34:36.424984\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `classify_documents`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:34:36.576258\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mPipeline run completed: `9bd0d908-8e9e-5780-b4c2-09fc8d471f1b`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_with_telemetry()\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:34:36.754472\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mPipeline run started: `9bd0d908-8e9e-5780-b4c2-09fc8d471f1b`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_with_telemetry()\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:34:36.912219\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `classify_documents`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:34:37.053036\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `check_permissions_on_dataset`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:34:37.220157\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mAsync Generator task started: `extract_chunks_from_documents`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:34:37.388094\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `extract_graph_from_data`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
"\u001b[92m15:34:37 - LiteLLM:INFO\u001b[0m: utils.py:3341 - \n",
"LiteLLM completion() model= gpt-5-mini; provider = openai\n",
"\n",
"\u001b[1m\n",
"LiteLLM completion() model= gpt-5-mini; provider = openai\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:35:00.010321\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'person' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:35:00.012394\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'programmers' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:35:00.012794\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'object' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:35:00.013111\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'light bulb' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:35:00.013378\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'concept' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:35:00.013598\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'hardware problem' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:35:00.013914\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'joke' in category 'classes'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:35:00.014215\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mNo close match found for 'programmer joke' in category 'individuals'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mOntologyAdapter\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:35:02.040520\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `summarize_text`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
"\u001b[92m15:35:02 - LiteLLM:INFO\u001b[0m: utils.py:3341 - \n",
"LiteLLM completion() model= gpt-5-mini; provider = openai\n",
"\n",
"\u001b[1m\n",
"LiteLLM completion() model= gpt-5-mini; provider = openai\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:35:11.589828\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task started: `add_data_points`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:35:14.446614\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `add_data_points`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:35:14.622281\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `summarize_text`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:35:14.820192\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `extract_graph_from_data`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:35:15.004173\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mAsync Generator task completed: `extract_chunks_from_documents`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:35:15.518803\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `check_permissions_on_dataset`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:35:15.756519\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mCoroutine task completed: `classify_documents`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_base\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:35:15.978364\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mPipeline run completed: `9bd0d908-8e9e-5780-b4c2-09fc8d471f1b`\u001b[0m [\u001b[0m\u001b[1m\u001b[34mrun_tasks_with_telemetry()\u001b[0m]\u001b[0m\n"
]
},
{
"data": {
"text/plain": [
"{UUID('a08926db-6319-5cd9-adc9-2cf9dfbc75e0'): PipelineRunCompleted(status='PipelineRunCompleted', pipeline_run_id=UUID('69b78d3d-4d27-5d9f-918f-57e77b3cb10a'), dataset_id=UUID('a08926db-6319-5cd9-adc9-2cf9dfbc75e0'), dataset_name='main_dataset', payload=None, data_ingestion_info=[{'run_info': PipelineRunCompleted(status='PipelineRunCompleted', pipeline_run_id=UUID('69b78d3d-4d27-5d9f-918f-57e77b3cb10a'), dataset_id=UUID('a08926db-6319-5cd9-adc9-2cf9dfbc75e0'), dataset_name='main_dataset', payload=None, data_ingestion_info=None), 'data_id': UUID('17b5c469-a8ce-5347-bea5-ab3dba767d13')}, {'run_info': PipelineRunCompleted(status='PipelineRunCompleted', pipeline_run_id=UUID('69b78d3d-4d27-5d9f-918f-57e77b3cb10a'), dataset_id=UUID('a08926db-6319-5cd9-adc9-2cf9dfbc75e0'), dataset_name='main_dataset', payload=None, data_ingestion_info=None), 'data_id': UUID('5b1e3c7e-d837-5704-a3b3-53abdda3a84f')}])}"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import cognee\n",
"\n",
"# Create a clean slate for cognee -- reset data and system state\n",
"await cognee.prune.prune_data()\n",
"await cognee.prune.prune_system(metadata=True)\n",
"\n",
"# Add multimedia files and make them available for cognify\n",
"await cognee.add([mp3_file_path, png_file_path])\n",
"\n",
"# Create knowledge graph with cognee\n",
"await cognee.cognify()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Query Cognee for summaries related to multimedia files"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"ExecuteTime": {
"end_time": "2025-06-30T11:44:56.372628Z",
"start_time": "2025-06-30T11:44:55.978258Z"
}
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"\u001b[2m2025-08-27T14:36:09.273837\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mStarting completion generation for query: 'What is in the multimedia files?'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mSummariesRetriever\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:36:09.275355\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mStarting summary retrieval for query: 'What is in the multimedia files?'\u001b[0m [\u001b[0m\u001b[1m\u001b[34mSummariesRetriever\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:36:09.691101\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mFound 2 summaries from vector search\u001b[0m [\u001b[0m\u001b[1m\u001b[34mSummariesRetriever\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:36:09.691827\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mReturning 2 summary payloads \u001b[0m [\u001b[0m\u001b[1m\u001b[34mSummariesRetriever\u001b[0m]\u001b[0m\n",
"\n",
"\u001b[2m2025-08-27T14:36:09.692207\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mReturning context with 2 item(s)\u001b[0m [\u001b[0m\u001b[1m\u001b[34mSummariesRetriever\u001b[0m]\u001b[0m\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'id': 'facab42e-12fc-557e-aaf4-09c02ae1cd4f', 'created_at': 1756305273061, 'updated_at': 1756305273061, 'ontology_valid': False, 'version': 1, 'topological_rank': 0, 'type': 'IndexSchema', 'text': \"Programmers won't change a light bulb — it's considered a hardware issue.\"}\n",
"{'id': '958f2bc9-060b-5500-b14a-19b300cc99aa', 'created_at': 1756305311791, 'updated_at': 1756305311791, 'ontology_valid': False, 'version': 1, 'topological_rank': 0, 'type': 'IndexSchema', 'text': 'One-line programmer joke: changing a light bulb is labeled a hardware issue.'}\n"
]
}
],
"source": [
"from cognee.api.v1.search import SearchType\n",
"\n",
"# Query cognee for summaries of the data in the multimedia files\n",
"search_results = await cognee.search(\n",
" query_type=SearchType.SUMMARIES,\n",
" query_text=\"What is in the multimedia files?\",\n",
")\n",
"\n",
"# Display search results\n",
"for result_text in search_results:\n",
" print(result_text)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"ename": "",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[1;31mThe Kernel crashed while executing code in the current cell or a previous cell. \n",
"\u001b[1;31mPlease review the code in the cell(s) to identify a possible cause of the failure. \n",
"\u001b[1;31mClick <a href='https://aka.ms/vscodeJupyterKernelCrash'>here</a> for more info. \n",
"\u001b[1;31mView Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details."
]
}
],
"source": [
"# Only exit in interactive mode, not during GitHub Actions\n",
"import os\n",
"\n",
"# Skip exit if we're running in GitHub Actions\n",
"if not os.environ.get('GITHUB_ACTIONS'):\n",
" print(\"Exiting kernel to clean up resources...\")\n",
" os._exit(0)\n",
"else:\n",
" print(\"Skipping kernel exit - running in GitHub Actions\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.7"
}
},
"nbformat": 4,
"nbformat_minor": 2
}