benchmark_results.json•4.41 kB
{
"model": "gpt-4.1-2025-04-14",
"timestamp": "2025-07-01T09:41:10-07:00",
"tasks": [
{
"task": "Write JavaScript code to publish a message to a PubNub channel named 'chat-room' with the message 'Hello World'",
"with_context": {
"score": 97,
"response_time": 8.012726000,
"tokens_used": 6973,
"context_length": 30360
},
"without_context": {
"score": 72,
"response_time": 5.579726000,
"tokens_used": 471
}
}
,
{
"task": "Create Python code that subscribes to multiple PubNub channels and handles incoming messages with error handling",
"with_context": {
"score": 97,
"response_time": 20.352000000,
"tokens_used": 8534,
"context_length": 35522
},
"without_context": {
"score": 72,
"response_time": 10.411436000,
"tokens_used": 529
}
}
,
{
"task": "Write a Node.js function that implements presence detection on a PubNub channel and logs when users join/leave",
"with_context": {
"score": 97,
"response_time": 7.927724000,
"tokens_used": 4976,
"context_length": 20687
},
"without_context": {
"score": 78,
"response_time": 13.039602000,
"tokens_used": 496
}
}
,
{
"task": "Create JavaScript code that uses PubNub's message persistence feature to retrieve the last 10 messages from a channel",
"with_context": {
"score": 97,
"response_time": 18.645841000,
"tokens_used": 5022,
"context_length": 20342
},
"without_context": {
"score": 68,
"response_time": 8.419415000,
"tokens_used": 669
}
}
,
{
"task": "Write Python code that implements PubNub access control to grant read/write permissions to specific users",
"with_context": {
"score": 97,
"response_time": 13.044834000,
"tokens_used": 7191,
"context_length": 28877
},
"without_context": {
"score": 72,
"response_time": 8.554783000,
"tokens_used": 717
}
}
,
{
"task": "Create a JavaScript function that uses PubNub Functions to filter messages based on content before delivery",
"with_context": {
"score": 97,
"response_time": 14.675023000,
"tokens_used": 11335,
"context_length": 48016
},
"without_context": {
"score": 68,
"response_time": 8.612057000,
"tokens_used": 563
}
}
,
{
"task": "Write code that implements PubNub's file sharing feature to upload and share a file with other users",
"with_context": {
"score": 97,
"response_time": 18.520890000,
"tokens_used": 4650,
"context_length": 17744
},
"without_context": {
"score": 72,
"response_time": 14.312381000,
"tokens_used": 743
}
}
,
{
"task": "Create a real-time location tracking system using PubNub that updates user positions on a map",
"with_context": {
"score": 94,
"response_time": 11.827520000,
"tokens_used": 5463,
"context_length": 20687
},
"without_context": {
"score": 68,
"response_time": 28.107076000,
"tokens_used": 1043
}
}
,
{
"task": "Create a PubNub-powered web-based social mapping app with OpenStreetMap, user markers with image uploads via PubNub Files API, user data stored in PubNub AppContext, real-time synchronization, and a global chat window",
"with_context": {
"score": 95,
"response_time": 12.948944000,
"tokens_used": 31915,
"context_length": 140284
},
"without_context": {
"score": 68,
"response_time": 14.106186000,
"tokens_used": 1072
}
}
],
"summary": {
"total_tasks": 9,
"averages": {
"with_context": {
"score": 96.44,
"response_time": 13.995,
"tokens_used": 9562.11
},
"without_context": {
"score": 70.88,
"response_time": 12.349,
"tokens_used": 700.33
}
},
"improvements": {
"score_improvement_percent": 36.00,
"time_difference_percent": 13.00,
"token_usage_difference_percent": 1265.00
}
}
}