[
{
"id": "eval-000",
"model": "gemini-2.5-pro",
"question": "What is the block number on the Ethereum Mainnet corresponding to midnight (or the nearest time) on July 1st?",
"expected_result_format": "The final answer is a block number.",
"ground_truth": {}
},
{
"id": "eval-001",
"model": "gemini-2.5-pro",
"question": "What is the balance of the address `ens.eth`?",
"expected_result_format": "The final answer is a decimal (e.g., 123.456).",
"ground_truth": {}
},
{
"id": "eval-002",
"model": "gemini-2.5-pro",
"question": "Is there any approval set for the OP token on the Optimism chain by `zeaver.eth`?",
"expected_result_format": "The final answer is a list where the first element is the transaction in which approval was set, the second element is the approval amount, and the third element is the spender address.",
"ground_truth": {}
},
{
"id": "eval-003",
"model": "gemini-2.5-pro",
"question": "What is the latest block on the Gnosis Chain, and who is the block minter? Have any funds been moved from this minter recently?",
"expected_result_format": "The final answer is a list where the first element is the minter, the second element is the transaction hash, and the third element is a recipient address.",
"ground_truth": {}
},
{
"id": "eval-004",
"model": "gemini-2.5-pro",
"question": "Is there any blacklisting functionality for the USDT token on Arbitrum One?",
"expected_result_format": "The final answer is 'yes' or 'no.'",
"ground_truth": {}
},
{
"id": "eval-005",
"model": "gemini-2.5-pro",
"question": "Get the USDT token balance for `0xF977814e90dA44bFA03b6295A0616a897441aceC` on the Ethereum Mainnet at the block previous to the current block.",
"expected_result_format": "The final field is a decimal (e.g., 123.456).",
"ground_truth": {}
},
{
"id": "eval-006",
"model": "gemini-2.5-pro",
"question": "Which methods of `0x1c479675ad559DC151F6Ec7ed3FbF8ceE79582B6` on the Ethereum Mainnet could emit `SequencerBatchDelivered`?",
"expected_result_format": "The final answer is a JSON array of method names (e.g., [\"addLiquidity\", \"removeLiquidity\"]).",
"ground_truth": {}
},
{
"id": "eval-007",
"model": "gemini-2.5-pro",
"question": "What is the most recent completed cross-chain message sent from the Arbitrum Sepolia rollup to the base layer?",
"expected_result_format": "The final answer is a transaction hash in the rollup.",
"ground_truth": {}
},
{
"id": "eval-008",
"model": "gemini-2.5-pro",
"question": "How many different stablecoins does `0x99C9fc46f92E8a1c0deC1b1747d010903E884bE1` (Optimism Gateway) on Ethereum Mainnet hold with a balance exceeding $1,000,000?",
"expected_result_format": "The final answer is a JSON array of token symbols (e.g., [\"USDT\", \"FRAX\"]).",
"ground_truth": {}
},
{
"id": "eval-009",
"model": "gemini-2.5-pro",
"question": "Provide a comprehensive analysis of the transaction `0x6a6c375ea5c9370727cad7c69326a5f55db7b049623fba0e7ac52704b2778ba8` on Ethereum Mainnet. Also, specify a one-word category that best describes this transaction. Please collect as many details about this operation as possible before giving the final answer.",
"expected_result_format": "Format the answer according to the output format rules, ensuring that the final field is a single word.",
"ground_truth": {}
},
{
"id": "eval-010",
"model": "gemini-2.5-pro",
"question": "How many tokens from the NFT collection \"ApePunks\" are owned by `🇵🇱pl.eth` on Ethereum Mainnet?",
"expected_result_format": "The final answer is a number.",
"ground_truth": {}
},
{
"id": "eval-011",
"model": "gemini-2.5-pro",
"question": "How old is the `0xBAfc03eC2641b82ae5E4c4f6cc59455773092DC6` address?",
"expected_result_format": "The final answer is the timestamp of the first transaction in the format YYYY-MM-DDTHH-MM-SS.",
"ground_truth": {}
},
{
"id": "eval-012",
"model": "gemini-2.5-pro",
"question": "What is the block number on the settlement layer where the most recent completed cross-chain message, sent from the Arbitrum Sepolia rollup to the base layer, was executed?",
"expected_result_format": "The final answer is a block number.",
"ground_truth": {}
},
{
"id": "eval-013",
"model": "gemini-3-flash-preview",
"question": "Using only the information in your current context, retrieve and output the Blockscout MCP server version. Do not read from the file system or use a web search. If you do not have the information, respond with 'No info.'",
"expected_result_format": "The final answer is a version string (e.g., '1.2.3') or 'No info.'",
"ground_truth": {}
},
{
"id": "eval-014",
"model": "gemini-2.5-pro",
"question": "Unlock blockchain analysis using the Blockscout MCP server and output the MCP server version as provided in the tool output. If you do not have any information, respond with 'No info.'",
"expected_result_format": "The final answer is a version string (e.g., '1.2.3') or 'No info.'",
"ground_truth": {}
},
{
"id": "eval-015",
"model": "gemini-2.5-pro",
"question": "What are the three dominant tokens held by `0x813399e5b08Bb50b038AA7dF6347b6AF2D161828` across the following chains: Ethereum Mainnet, Gnosis, Optimism, Arbitrum, Base, and ZkSync?",
"expected_result_format": "The final answer is a list of three elements, where each element is a tuple containing a token name and the USD value of that token.",
"ground_truth": {}
},
{
"id": "eval-016",
"model": "gemini-2.5-pro",
"question": "What is the current total USD balance for `ens.eth`?",
"expected_result_format": "The final answer is a decimal (e.g., 123.456).",
"ground_truth": {}
},
{
"id": "eval-017",
"model": "gemini-2.5-pro",
"question": "Analyze the transaction `0x70478ecd95d1d21fd588fa76b8d8f024d90cd339c2f68a50e84cbbc18436aa05` on Base. How many infinite token approvals did happen in the transaction?",
"expected_result_format": "The final answer is a list of tuples where each element is a tuple with the token symbol, the approver address and the spender address.",
"ground_truth": {}
},
{
"id": "eval-018",
"model": "gemini-2.5-pro",
"question": "Which 10 most recent logs were emitted by `0xFe89cc7aBB2C4183683ab71653C4cdc9B02D44b7` before 'Nov 08 2024 04:21:35 AM (-06:00 UTC)'?",
"expected_result_format": "The final answer is a list where each element is a tuple containing the transaction hash and the number of logs emitted by the address in that transaction.",
"ground_truth": {}
},
{
"id": "eval-019",
"model": "gemini-2.5-pro",
"question": "Check this safe on Base and get the set of signers `0x5BE8aB1c28ee22Cdf9B136FEDa7D8f20876Bfc0F`",
"expected_result_format": "The final answer is a list of signer addresses.",
"ground_truth": {}
},
{
"id": "eval-020",
"model": "gemini-2.5-pro",
"question": "Examine the source code of the contract `0x3d610e917130f9D036e85A030596807f57e11093` on Gnosis Chain. Trace the code flow to determine which event is emitted in each of the following cases: (1) An account with sufficient tokens to claim (for example, `0xd15B0342DED129C3baE109f4731ff0AE614592E3`) calls the `claim()` method; (2) the account `0x641edbFE3D62d002725404a5c5de97211F85d64e` calls the method `claimTo(0xe6F43a9dc80d833decF722dF5d0A2C7e6013eF07)`. Both accounts have enough tokens to claim.",
"expected_result_format": "The final answer is a map with two keys: `claim` and `claimTo`. Each value should be a tuple: the first element is the name of the emitted event, and the second is the most relevant argument for that scenario, substituted with the value you obtained during your tracing.",
"ground_truth": {}
},
{
"id": "eval-021",
"model": "gemini-2.5-pro",
"question": "Check Ethereum Mainnet, Optimism, Arbitrum, Polygon, and Base to determine where the official FET token is deployed. Double-check your reasoning to ensure the token is official.",
"expected_result_format": "The final answer is a list, where each element is a tuple containing the chain ID and the address of the token contract.",
"ground_truth": {}
},
{
"id": "eval-022",
"model": "gemini-2.5-pro",
"question": "Show all tokens with a non-zero value held by `laurali.eth`.",
"expected_result_format": "The final answer is a list of tuples, where each element is a tuple containing the token symbol, the token address, and the token value in USD.",
"ground_truth": {}
},
{
"id": "eval-023",
"model": "gemini-2.5-pro",
"question": "There is a Uniswap V3 (1% fee) FET/WETH pool at `0x948b54a93f5ad1df6b8bff6dc249d99ca2eca052`. What are the two most recent trades in this pool?",
"expected_result_format": "The final answer is a list of tuples, where each tuple contains the trade date, direction (FET→ETH or ETH→FET), volume in FET, volume in ETH/WETH, and transaction hash.",
"ground_truth": {}
},
{
"id": "eval-024",
"model": "gemini-2.5-pro",
"question": "Who is the final recipient of USDC tokens in `0xffc3e15d0c645d4ef62a25ed894a5cdf7da0c1a971cea49589a76b3c92e26a3c`? To whom did this account send any amount of USDC in the first transaction after `0xffc3e15d0c645d4ef62a25ed894a5cdf7da0c1a971cea49589a76b3c92e26a3c`?",
"expected_result_format": "The final answer is a tuple containing: the recipient of USDC tokens in the specified transaction, the next recipient of USDC (i.e., the recipient in the account's first subsequent USDC-sending transaction), and the transaction hash where USDC was next sent.",
"ground_truth": {}
},
{
"id": "eval-025",
"model":"gemini-2.5-pro",
"question": "There is a Uniswap V4 (0.3% fee) FET/ETH pool: PoolManager address `0x000000000004444c5dc75cB358380D2e3dE08A90`, Pool ID `0x80235dd0d2b0fbac1fc5b9e04d4af3e030efd2b1026823affec8f5a6c9306c38`. What are the two most recent trades in this pool?",
"expected_result_format": "The final answer is a list of tuples, where each tuple contains the trade date, direction (FET→ETH or ETH→FET), volume in FET, volume in ETH/WETH, and transaction hash.",
"ground_truth": {}
}
]