raw_benchmark_results.json•562 kB
{
"metadata": {
"top_models": [
"meta-llama/Meta-Llama-3.1-70B-Instruct",
"meta-llama/Meta-Llama-3.1-8B-Instruct",
"Qwen/Qwen2.5-72B-Instruct",
"mistralai/Mixtral-8x22B-Instruct-v0.1",
"mistralai/Mistral-7B-Instruct-v0.3"
],
"total_questions": 500,
"fetched_at": "1760849328.0299566"
},
"questions": {
"mmlu_pro_1017": {
"question_id": "mmlu_pro_1017",
"source_benchmark": "MMLU_Pro",
"domain": "law",
"question_text": "A highway patrol officer stopped a driver for speeding and found that her license was suspended. He arrested her for driving while under suspension and secured her in the rear seat of the squad car. He then searched the car where he found several bags of cocaine inside a coat that was setting on the rear seat; he arrested her for possession and possession with intent to deliver cocaine. Prior to trial, the driver moved for suppression of the cocaine. Is it likely that the trial court will grant the suppression motion?",
"correct_answer": "A",
"choices": [
"Yes, it must be suppressed because this was an unreasonable, warrantless search.",
"No, the motion will be denied because the initial traffic stop was legal and everything that flowed therefrom was a reasonable step to search for crime.",
"No, the motion will be denied because the search was permissible as incident to a proper arrest.",
"Yes, the motion will be granted because a search of a vehicle requires a search warrant unless there are exceptional exigent circumstances.",
"Yes, the motion will be granted because the officer did not have probable cause to search the car.",
"No, the motion will be denied because the officer had the right to search the car for his own safety.",
"No, the motion will be denied because the officer had probable cause to search the car due to the driver's suspended license.",
"Yes, the motion will be granted because the officer did not see the cocaine in plain view before searching the car.",
"Yes, the motion will be granted because the officer did not have the driver's consent to search the car."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_840": {
"question_id": "mmlu_pro_840",
"source_benchmark": "MMLU_Pro",
"domain": "law",
"question_text": "A shop owner domiciled in State A sued a distributor in a federal district court in State A for breach of a contract. The shop owner sought $100,000 in damages for allegedly defective goods that the distributor had provided under the contract. The distributor is incorporated in State B, with its principal place of business in State C. The distributor brought in as a third-party defendant the wholesaler that had provided the goods to the distributor, alleging that the wholesaler had a duty to indemnify the distributor for any damages recovered by the shop owner. The wholesaler is incorporated in State B, with its principal place of business in State A. The wholesaler has asserted a $60,000 counterclaim against the distributor for payment for the goods at issue, and the distributor has moved to dismiss the counterclaim for lack of subject-matter jurisdiction. Should the motion to dismiss be granted?",
"correct_answer": "I",
"choices": [
"No, because the distributor and the wholesaler conduct business in different states.",
"Yes, because there is no diversity of citizenship between the distributor and the wholesaler.",
"Yes, because the wholesaler's counterclaim is not directly related to the distributor's claim.",
"No, because the wholesaler's and the distributor's principal places of business are diverse.",
"No, because the distributor has invoked the jurisdiction of the court.",
"Yes, because the amount in controversy does not meet the jurisdictional limit.",
"Yes, because the wholesaler's counterclaim is not a compulsory counterclaim.",
"No, because the wholesaler and distributor are incorporated in the same state.",
"No, because there is supplemental jurisdiction over the wholesaler's counterclaim.",
"Yes, because there is no diversity of citizenship between the shop owner and the wholesaler."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_1352": {
"question_id": "mmlu_pro_1352",
"source_benchmark": "MMLU_Pro",
"domain": "law",
"question_text": "A farmer conveyed 100 acres of his farm to a landscaper. The deed contained the following covenants: (1) seisin, (2) right to convey, and (3) against encumbrances. Subsequently, the landscaper conveyed the property to a buyer by warranty deed. However, the buyer is later evicted by a woman because of paramount title. The buyer now brings suit against the farmer for breach of covenants in the deed. Judgment should be for",
"correct_answer": "H",
"choices": [
"the buyer, but only for the covenants of seisinand right to convey.",
"the buyer, because the covenants of seisin and right to convey were breached by the farmer.",
"the farmer, because the buyer should have conducted a thorough title search before purchasing the property.",
"the buyer, because the covenant against encumbrances was breached by the farmer.",
"the farmer, because the buyer did not directly purchase the land from him.",
"the farmer, because no privity of estate existsbetween the buyer and the farmer.",
"the farmer, because the buyer was evicted due to a problem not related to the covenants in the deed.",
"the farmer, because the covenants are personalin nature and do not run with the land.",
"the buyer, because the warranty deed guarantees clear title."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_1290": {
"question_id": "mmlu_pro_1290",
"source_benchmark": "MMLU_Pro",
"domain": "law",
"question_text": "\u2018Law consists of set of principles or body of rules recognized and applied by the State in the administration of justice\u2019. The statement is made by :",
"correct_answer": "G",
"choices": [
"Blackstone",
"Bentham",
"Gray",
"Austin",
"Kelsen",
"Dworkin",
"Salmond",
"H.L.A. Hart",
"Roscoe Pound",
"Holland"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_1246": {
"question_id": "mmlu_pro_1246",
"source_benchmark": "MMLU_Pro",
"domain": "law",
"question_text": "A debtor owed a creditor $12,000 under a promissory note. Under the applicable statute of limitations, a suit to collect on the promissory note had to have been filed by September 30 of last year. On June 1 of this year, the creditor received a letter from the debtor stating, \"I shall pay you $5,000 on July 1 in full satisfaction of what I owe you. \" However, the debtor failed to make the payment on July 1. If, on August 1, the creditor brings suit against the debtor and the debtor asserts the statute of limitations as an affirmative defense and refuses to pay the creditor anything, which of the following accurately states the creditor's legal rights against the debtor?",
"correct_answer": "H",
"choices": [
"On July 1, not June 1, the creditor will be entitled to a judgment against the debtor for$12,000.",
"On June 1, the creditor became entitled to a judgment against the debtor for $12,000.",
"The creditor is entitled to a judgment against the debtor for $5,000, regardless of the date.",
"On August 1, the creditor became entitled to a judgment against the debtor for $12,000.",
"The creditor is entitled to a judgment against the debtor for $7,000, the remaining amount after the promised $5,000 payment.",
"On July 1, the creditor became entitled to a judgment against the debtor for the full $12,000, due to the debtor's failure to pay.",
"The creditor is not entitled to anything, due to the statute of limitations expiring.",
"On July 1, not June 1, the creditor became entitled to a judgment against the debtor for $5,000 only.",
"The creditor is not entitled to anything, on either June 1 or on July 1."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_1074": {
"question_id": "mmlu_pro_1074",
"source_benchmark": "MMLU_Pro",
"domain": "law",
"question_text": "Proposed legislation was offered to a state legislature that would reorganize the state police. The bill created a great deal of controversy, both in and outside the state government. Several leaders of the minority party in the legislature decided to oppose the legislation. One member of the minority party disagreed with his party's opposition to the bill and publicly announced his support for the legislation. The minority party leaders called a caucus to discuss and determine their legislative strategy for floor debate on the bill. When the disagreeing member appeared at the door of the caucus room, he was denied admission because of his anti-party stance. He was also informed that he would be removed from all of his committee assignments. During the caucus, the party members discussed other means of disciplining the member for his party insubordination. It was suggested that they issue a press release in which the party would publicly castigate him for his actions. The leader of the party said that \"the member is a cutthroat politician who is only looking out for where his next buck will come from. \"Which of the following constitutional provisions would give the ousted member his best grounds for challenging his exclusion from the party caucus?",
"correct_answer": "C",
"choices": [
"The speech and debate clause.",
"The establishment clause of the First Amendment.",
"The due process clause of the Fourteenth Amendment.",
"The right to petition as guaranteed by the First Amendment.",
"The right to a jury trial as guaranteed by the Sixth Amendment.",
"The right of assembly as guaranteed by the First Amendment.",
"The equal protection clause of the Fourteenth Amendment.",
"The protection from ex post facto laws.",
"The cruel and unusual punishment clause of the Eighth Amendment.",
"The privileges and immunities clause of the Fourteenth Amendment."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_998": {
"question_id": "mmlu_pro_998",
"source_benchmark": "MMLU_Pro",
"domain": "law",
"question_text": "A defendant was suspected of having burglarized his neighbor's apartment. The neighbor reported that his apartment had been ransacked and several items of clothing had been stolen. During the course of their investigation, two police detectives went to the defendant's place of work to interview him. After being advised of his Miranda rights, the defendant requested permission to call his attorney. Although his attorney was unavailable, the attorney's receptionist admonished him not to say anything. The defendant told the detectives he would have nothing further to say unless his attorney was present. The detectives then asked him if he would accompany them to the police station to answer some questions about an unrelated robbery. The defendant agreed. As they were driving to the police station, the defendant was asked where he had purchased the boots that he was wearing. He refused to answer. When they arrived at the police station, the detectives requested that he sign a waiver of his right to counsel. The defendant replied that he would not sign anything in the absence of his attorney. He was then placed in an interrogation room. Shortly thereafter, the defendant was told he could leave if he would be willing to say where he had purchased his boots. The defendant admitted that he had bought the boots and some shirts from a friend. From this information, the detectives obtained a search warrant and went to the defendant's home where they found the stolen clothing. The defendant is charged with burglarizing the apartment. At trial, the court should",
"correct_answer": "B",
"choices": [
"admit the confession because it was voluntary, and the clothing because it was obtained pursuant to a valid search warrant.",
"suppress the confession because it was obtained in violation of his Fifth Amendment right to counsel, and the clothing because the search warrant was secured as a result of the confession.",
"suppress both the confession and the clothing because the defendant's right to counsel was violated.",
"admit the confession because the defendant voluntarily accompanied the detectives, but suppress the clothing because the search warrant was based on the confession.",
"suppress the confession because the defendant failed to sign the waiver, but admit the clothing because it was obtained pursuant to a valid search warrant.",
"admit the confession because the defendant was not under arrest when he made it, and the clothing because it was obtained pursuant to a valid search warrant.",
"admit the confession because the question about the boots was about an unrelated robbery, but suppress the clothing because the search warrant was based on the confession.",
"suppress the confession because the defendant's attorney was not present, and the clothing because it was obtained pursuant to a valid search warrant.",
"suppress the confession because the detectives continued questioning after the defendant invoked his right to counsel, but admit the clothing because it was obtained pursuant to a valid search warrant.",
"suppress the confession because it was obtained in violation of his Sixth Amendment right to counsel, but admit the clothing because it was obtained pursuant to a valid search warrant."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_967": {
"question_id": "mmlu_pro_967",
"source_benchmark": "MMLU_Pro",
"domain": "law",
"question_text": "An avowed corrimunist was elected vice president of a union. A senator, in his investigation of communist infiltration of national labor unions, found a provision in a statute passed by Congress, which makes it a crime for a member of the communist party to act as an official of a labor union. After a subsequent legislative hearing, the communist is dismissed from his position by the union. Which of the following most accurately summarizes the applicable rule of constitutional law regarding the aforementioned provision of the statute?",
"correct_answer": "C",
"choices": [
"The statutory provision is a violation of the person's Eighth Amendment right against cruel and unusual punishment.",
"The statutory provision is a violation of the person's Sixth Amendment right to a fair trial.",
"The statutory provision in the act is a form of legislative punishment violative of the Constitution as a bill of attainder.",
"Making it a crime for a Communist to hold a union office is a suspect classification, which violates the equal protection clause.",
"The statutory prohibition is a reasonable method of discrimination since the benefit to the public outweighs the injury or restrictions that would be inflicted upon the person.",
"The statute is valid as it is a reasonable restriction on the First Amendment rights of an individual in the interest of national security.",
"The statutory provision is unconstitutional as it violates the person's right to due process under the Fifth Amendment.",
"The statutory provision is a form of unconstitutional prior restraint on a person's First Amendment right of free association.",
"The statutory prohibition is a valid exercise of Congress' power to regulate interstate commerce.",
"The statute is unconstitutional because it infringes upon the individual's right to political affiliation, protected under the First Amendment."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_1653": {
"question_id": "mmlu_pro_1653",
"source_benchmark": "MMLU_Pro",
"domain": "law",
"question_text": "A ceramics studio contracted with an artist to produce cups and saucers designed by the artist. The artist was an established designer of collectible ceramic dinnerware, and the studio did production work for many artists who created ceramic dinnerware. The price and quantity term of the contract read: \"2,000 sets of the cups and saucers at $5 each, payable on delivery.\" The contract contained a merger clause. The studio produced the cups and saucers and delivered them along with a bill for $20,000 (4,000 pieces at $5 per piece). The artist refused to pay more than $10,000 (2,000 sets at $5 per set). At the trial of the studio's action against the artist for breach of contract, the studio introduced evidence of an established practice in the studio industry to price cup-and-saucer sets on a per-piece, not a per-set, basis. Is the studio's evidence admissible?",
"correct_answer": "D",
"choices": [
"No, because such evidence would vary an unambiguous term of the contract.",
"No, because the evidence would contradict the explicit terms of the contract.",
"Yes, because evidence of trade usage is always admissible.",
"Yes, because the usage of trade is offered to give meaning to the contract.",
"No, because the contract contained a merger clause which makes extrinsic evidence inadmissible.",
"Yes, because the studio's interpretation of the contract aligns with the industry practice.",
"Yes, because the court should consider industry norms in interpreting the contract.",
"No, because the agreement was completely integrated.",
"No, because the artist was unaware of the industry practice.",
"Yes, because the evidence of established industry practice may explain the ambiguity in the contract."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_854": {
"question_id": "mmlu_pro_854",
"source_benchmark": "MMLU_Pro",
"domain": "law",
"question_text": "A 15-year-old boy was killed during a gang fight. Two days after his funeral, the boy's mother saw a television program about gang violence and was shocked to see video of herself weeping over the boy's body. The video had been shot by the television reporting team while the boy's body was still lying on a public street. The mother suffered severe emotional distress as a result of seeing the video. If the mother sues the television station for invasion of her privacy and that of her son, will the mother be likely to prevail?",
"correct_answer": "B",
"choices": [
"No, because the television station has the right to broadcast any footage shot in public.",
"No, because the street was open to the public and the subject was newsworthy.",
"No, because the broadcast of the video falls under freedom of the press.",
"Yes, because the video footage was used without any prior notice to the mother.",
"No, because a person has no right to privacy after his or her death.",
"Yes, because the television station capitalized on the mother's grief for their program.",
"Yes, because the video footage intruded on the mother's private grief.",
"Yes, because the mother did not give permission to have the video used in the program.",
"Yes, because the mother suffered severe emotional distress as a result of viewing the video."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_850": {
"question_id": "mmlu_pro_850",
"source_benchmark": "MMLU_Pro",
"domain": "law",
"question_text": "On February 15, a company that manufactures metal sidings for home exteriors received the following order from a builder: \"Please ship 300 sheets of 1/4-inch refabricated aluminum siding. Delivery by April 1. \"On March 8, the company shipped 300 sheets of 1/2-inch refabricated aluminum siding, which were received by the builder on March 10. The following day, the builder sent the following fax to the company: \"Be advised that your shipment is rejected. Order stipulated 1/4-inch sheets. \" This fax was received by the company, but the builder did not ship the nonconforming aluminum sheets back to the company. Did the builder properly reject the shipment delivered on March 10?",
"correct_answer": "E",
"choices": [
"Yes, because the company did not notify the builder that the 1/2-inch sheets were for accommodation only.",
"No, because the company could accept the builder's offer by prompt shipment of either conforming or nonconforming goods.",
"No, because the builder accepted the goods by not returning them immediately.",
"Yes, because the company failed to deliver by the stipulated April 1 deadline.",
"Yes, because the aluminum sheets were nonconforming goods.",
"No, because the builder waived its right to reject the nonconforming goods by not returning them promptly to the company.",
"No, because the company has the right to ship nonconforming goods as long as they meet the overall order requirements.",
"No, because the builder did not specify the reason for rejection in the fax."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_980": {
"question_id": "mmlu_pro_980",
"source_benchmark": "MMLU_Pro",
"domain": "law",
"question_text": "Owner has a property in fee simple absolute. He executes a deed to Friend with the words \"To Friend for life and then to the heirs of Owner.\" Under common law principles the conveyance to the heirs is not effective. Thus, the deed would result in a life estate to Friend with a reversion back to Owner. That result is due to what common law doctrine?",
"correct_answer": "G",
"choices": [
"The doctrine of eminent domain",
"The rule of survivorship",
"The rule of destructibility of contingent remainders",
"The rule in Shelly's case",
"The doctrine of escheat",
"The doctrine of concurrent ownership",
"The doctrine of worthier title",
"The rule of lapse",
"The rule against perpetuities",
"The doctrine of adverse possession"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_1236": {
"question_id": "mmlu_pro_1236",
"source_benchmark": "MMLU_Pro",
"domain": "law",
"question_text": "A homeowner wished to have his house painted. He contacted a number of house painters in his area and asked them to submit bids to do the work. The homeowner received 10 bids. The first nine offered to paint the house for amounts ranging from $10,000 to $12,500. The 10th bid was sent by telegram and was supposed to be for $10,000. However, the telegraph company made a mistake and transmitted the bid as $1,000. The homeowner immediately accepted the 1 0th bid, but the 1 0 painter refused to perform. The I 0th painter's best defense in an action for breach of contract by the homeowner would be",
"correct_answer": "A",
"choices": [
"that the homeowner should have been aware of the mistaken transmission, because of the disparity between its bid and the others.",
"that the telegraph company should be liable as an independent contractor.",
"that the homeowner deliberately exploited the telegraph company's mistake.",
"that the telegraph company's mistake nullifies the contractual agreement.",
"that the homeowner's immediate acceptance of the bid was unreasonable.",
"that the discrepancy between the bid and the actual cost of painting is too large.",
"that the mistake made the contract unconscionable.",
"that the homeowner was under an affirmative duty to investigate all submitted bids.",
"that the bid was not legally binding as it was sent via telegram.",
"that the homeowner failed to provide accurate specifications for the job."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_1265": {
"question_id": "mmlu_pro_1265",
"source_benchmark": "MMLU_Pro",
"domain": "law",
"question_text": "Late one night, an accountant walked into a bar and ordered a whiskey sour. The bartender served the drink, even though the accountant looked and acted as though he was already very intoxicated. The accountant soon had consumed five more cocktails, which the bartender served, despite the accountant's obviously and unmistakably drunken condition. After finishing his sixth drink in the bar, the accountant said good night to the bartender, staggered out of the bar, got into his car, and drove away. After weaving back and forth across the road for several blocks, the accountant crashed his car into a pedestrian who was walking on the sidewalk next to the road. The pedestrian suffered serious injuries to his back and legs. The bartender's act of serving drinks to the accountant would most likely be viewed as the",
"correct_answer": "E",
"choices": [
"nominal cause of the pedestrian's injuries.",
"intervening cause of the pedestrian's injuries.",
"contributing cause of the pedestrian's injuries.",
"remote cause of the pedestrian's injuries.",
"proximate cause of the pedestrian's injuries.",
"superseding cause of the pedestrian's injuries.",
"incidental cause of the pedestrian's injuries.",
"secondary cause of the pedestrian's injuries.",
"underlying cause of the pedestrian's injuries.",
"direct cause of the pedestrian's injuries."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_1823": {
"question_id": "mmlu_pro_1823",
"source_benchmark": "MMLU_Pro",
"domain": "law",
"question_text": "Defendant is on trial. Which of the following, if occurs, would NOT cause a due process violation in the trial?",
"correct_answer": "I",
"choices": [
"The defendant is not provided with a competent interpreter when needed.",
"The defendant is not given the opportunity to testify on their own behalf.",
"Jurors are exposed to influence favorable to the prosecution.",
"The trial is held in a location that is prejudiced against the defendant.",
"The defendant is compelled to stand trial or appear at penalty phase proceedings visibly shackled (unless there are security concerns).",
"The defendant is denied the opportunity to present evidence.",
"The defendant's past convictions are disclosed to the jury before they reach a verdict.",
"The defendant is not allowed to cross-examine witnesses.",
"The defendant is compelled to stand trial in street clothing.",
"The trial is conducted in a way that jurors would not be able to give evidence reasonable consideration."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_843": {
"question_id": "mmlu_pro_843",
"source_benchmark": "MMLU_Pro",
"domain": "law",
"question_text": "Which of the following powers standing alone cannot support federal law?",
"correct_answer": "H",
"choices": [
"defense power",
"treaty power",
"legislative power",
"executive power",
"judicial power",
"commerce power",
"spending power",
"necessary and proper power",
"taxing power",
"police power"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_1196": {
"question_id": "mmlu_pro_1196",
"source_benchmark": "MMLU_Pro",
"domain": "law",
"question_text": "The Traditional Religion Advocacy Party (TRAP) has successfully lobbied the State of Texarkana for a statute named \"Equal Rights for All Religions.\" The law requires broadcasters that air segments discussing Christianity to allow equal time for segments on Judaism, Islam, Hinduism, and Buddhism. The American Atheism Association (AAA) has put together a broadcast segment about atheism, rejecting all religions, but four of the five Texarkana stations have declined to air the segment because they believe it would violate the statute. AAA has filed suit against the state, challenging the constitutionality of the statute. What is the strongest argument AAA can make in support of striking down the Equal Rights for All Religions statute?",
"correct_answer": "D",
"choices": [
"The statute discriminates against atheism, therefore violating the Equal Protection Clause.",
"The fact that the statute involves only broadcast media and not print media is an Equal Protection violation.",
"The statute is unconstitutional because it interferes with the AAA's right to reject all religions.",
"The statute is not narrowly tailored to promote a compelling governmental interest.",
"The First Amendment prohibits government interference with the free exercise of religion.",
"The statute violates the Due Process Clause by restricting AAA's right to express their beliefs."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_1648": {
"question_id": "mmlu_pro_1648",
"source_benchmark": "MMLU_Pro",
"domain": "law",
"question_text": "A man was arrested and convicted of publishing paid newspaper advertisements giving information on how to get an abortion in another state and how to find a referral service. A criminal statute made it a crime to disseminate such information. Will the courts uphold the conviction?",
"correct_answer": "A",
"choices": [
"No, the statute violates the First Amendment right to publish protected commercial speech.",
"Yes, the right to an abortion does not include the right to publicize where to get one.",
"Yes, the statute properly restricts speech that promotes unlawful activity.",
"No, the statute violates the Fourth Amendment protection against unreasonable searches and seizures.",
"No, the statute violates the Fifth Amendment right to due process.",
"No, the statute violates the sixth amendment right to privacy.",
"No, the statute violates the Eighth Amendment prohibition of cruel and unusual punishment.",
"Yes, the right to an abortion does not include the right to advertise one.",
"Yes, the statute does not violate any constitutional rights.",
"Yes, there is no right to publish or disseminate speech that is commercial in nature."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_1240": {
"question_id": "mmlu_pro_1240",
"source_benchmark": "MMLU_Pro",
"domain": "law",
"question_text": "One evening, a defendant was at a party and offered to sell an ounce of marijuana to a partygoer. The partygoer agreed to purchase the marijuana and gave the defendant $200. In return, the defendant handed the partygoer a bag containing what appeared to be marijuana. At the time of the transaction, the defendant knew that the bag did not contain marijuana but, instead, was oregano. The defendant is guilty for which, if any, of the following crimes?",
"correct_answer": "B",
"choices": [
"Solicitation, attempted sale of narcotics, and false pretenses.",
"False pretenses.",
"Solicitation and false pretenses.",
"Attempted sale of narcotics and false pretenses.",
"Attempted sale of narcotics.",
"Attempted fraud.",
"No crimes, as no actual narcotics were involved.",
"Theft by deception.",
"Solicitation and attempted sale of narcotics.",
"Solicitation, theft by deception, and false pretenses."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_1708": {
"question_id": "mmlu_pro_1708",
"source_benchmark": "MMLU_Pro",
"domain": "law",
"question_text": "A person owned property next to a highway. After raking leaves on his property into a large pile, the landowner loaded the leaves into several large metal barrels so that he could burn the leaves. Before starting the fire, the landowner telephoned the local weather bureau to determine which direction the wind would be blowing. Because a highway was located on the southern edge of his property, he was aware that it would be unsafe to burn the leaves if the wind was blowing in that direction. The weather bureau, however, indicated that the wind would be gusting in a northerly direction. Shortly after the landowner set the leaves on fire, the wind current unexpectedly shifted and started gusting in a southerly direction. As a consequence, the smoke and ashes blew over the highway, resulting in poor visibility. Moments later, a motorist was driving his automobile on the highway in a westerly direction. The posted speed limit was 45 m. p. h. , although the driver was traveling about 55 m. p. h. The driver could not see the highway clearly and crashed his vehicle into a median strip. If the driver asserts a claim against the landowner, the most likely result is",
"correct_answer": "J",
"choices": [
"the driver will prevail, if he can prove that the landowner's actions directly resulted in the accident.",
"the driver will prevail, because the landowner should have anticipated the wind change.",
"the landowner will prevail, if the driver cannot prove that the smoke caused the accident.",
"the driver will prevail, if the smoke from the burning leaves prevented him from clearly seeing the roadway.",
"the driver will prevail, because the landowner will be strictly liable for causing the accident.",
"the landowner will prevail, because burning leaves on private property is not against the law.",
"the landowner will prevail, because he called the weather bureau before starting the fire.",
"the driver will prevail, because the landowner should have placed warnings on the highway.",
"the landowner will prevail, because the driver was driving in excess of the speed limit.",
"the landowner will prevail, if his decision to burn the leaves was reasonable under the circumstances."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_1358": {
"question_id": "mmlu_pro_1358",
"source_benchmark": "MMLU_Pro",
"domain": "law",
"question_text": "A construction company was doing repairs and replacing portions of a sidewalk and railing next to a lake. The construction crew started tearing out the old sidewalk and railing, but stopped work when it started to get dark. The construction crew left without putting up a warning sign or barrier around the work area. A few hours later, a jogger came along the sidewalk. Not realizing the construction work was in progress there, the jogger stumbled and fell at the spot where the construction crew had torn up the sidewalk and railing. The jogger fell into the lake. As the jogger was attempting to stay afloat, he began screaming, \"Help! Help! I can't swim. I'm drowning. \" His screams attracted the attention of a person who was passing on his bicycle. The cyclist immediately hurried to assist the jogger. As the cyclist was leaning over the edge of the lake, trying to help the jogger get out of the water, he lost his balance and fell into the lake. Both the jogger and cyclist suffered serious bodily injuries before they were pulled out of the water by police. In a negligence action by the cyclist to recover for his personal injuries, the construction company will most probably",
"correct_answer": "F",
"choices": [
"not be held liable, because the construction company could not foresee that anyone would be hurt while trying to rescue someone from the lake.",
"not be held liable, because the cyclist assumed the risk by leaning over the edge of the lake.",
"not be held liable, because it was the jogger's responsibility to avoid the construction area.",
"be held liable, because they did not properly secure the worksite after finishing for the day.",
"be held liable, because the construction company would be strictly liable to anyone injured by the failure to put adequate warnings or barriers around the site of the sidewalk repairs.",
"be held liable, because the cyclist's attempt to rescue the jogger was foreseeable.",
"not be held liable, because the jogger should have been aware of the construction work.",
"be held liable, because they failed to provide adequate lighting for the construction site.",
"not be held liable, because the cyclist voluntarily put himself in danger."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_802": {
"question_id": "mmlu_pro_802",
"source_benchmark": "MMLU_Pro",
"domain": "law",
"question_text": "A high school junior was charged by the school administration with violating certain sections of the disciplinary code, specifically, he was charged with being disrespectful to a teacher by using profanity and with using abusive language to a fellow student. The principal, sent the student's parents a letter notifying them of the three-day suspension for the above-mentioned charges. The suspension was to take effect on February 1. The principal also included a copy of the disciplinary code in the letter. On January 19, the student and his mother met with the principal in his office to discuss the matter, and the student admitted that he used abusive language to a fellow student. On January 22, the student's parents received a letter informing them that his teacher had upheld the school administration's decision to suspend their son. They were then notified of a hearing on the recommended suspension to be held at the school. The parents did not attend this hearing, but were advised that the school board upheld the suspension, effective February 1. Which of the following most accurately summarizes the applicable rule of constitutional law with respect to the student's suspension?",
"correct_answer": "A",
"choices": [
"The student's suspension did not constitute a denial of due process.",
"The student's suspension was a violation of his Fifth Amendment rights to due process.",
"The school did not have the right to suspend the student without the presence of his parents at the hearing.",
"The student's suspension was a violation of the Eighth Amendment's prohibition of cruel and unusual punishment.",
"The student's suspension violated his right to a fair trial.",
"The disciplinary code violated the student's Fourth Amendment rights against unreasonable searches and seizures.",
"The disciplinary code violated the student's right to a compulsory school education.",
"The school administration's decision was protected under the Tenth Amendment's guarantee of states' rights.",
"The student's suspension deprived him of liberty and property without due process, as guaranteed by the Fourteenth Amendment.",
"The student's conduct was protected under the First Amendment's guarantee of freedom of speech."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_1115": {
"question_id": "mmlu_pro_1115",
"source_benchmark": "MMLU_Pro",
"domain": "law",
"question_text": "A man informed the police that he wanted to confess to a murder. After they read him his constitutional rights, he insisted that he was acting voluntarily. He then took them to the scene of the murder where they found the victim's body. By the next day, the man claimed that \"voices\" had ordered him to confess and he didn't act voluntarily. A physician verified that the man suffered from hallucinations that impaired his volitional abilities, preventing a voluntary confession. Will the court suppress the statements?",
"correct_answer": "B",
"choices": [
"Yes, because the man was suffering from hallucinations at the time of his confession.",
"No, there was no police misconduct or overreaching, and as a result the confession is voluntary.",
"No, a confession is admissible when accompanied by actions by the defendant that prove the truth of the statements.",
"No, because the man insisted initially that he was acting voluntarily.",
"Yes, because the man later retracted his confession.",
"Yes, the police overreached by not having him mentally evaluated before questioning him.",
"Yes, he was insane and could not have given a voluntary confession.",
"No, because the man led the police to the victim's body which supports his confession.",
"No, because the police read the man his constitutional rights before he confessed."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_1654": {
"question_id": "mmlu_pro_1654",
"source_benchmark": "MMLU_Pro",
"domain": "law",
"question_text": "A brick mason was hired by a builder under a written one-year contract, at an annual salary of $45,000, with employment to begin on March 1. Because the builder was unable to secure enough building contracts to keep all its employees busy during the season beginning March 1, it notified the brick mason on February 15 that it could not afford to employ him as a mason. At the same time, however, the builder offered to employ the mason, for the same contract period, as a night guard at an annual salary of $25,000. The mason declined the offer and remained unemployed during the year. No offer and remained unemployed during the year. No employment for brick masons was available in the community during the year, but the mason could have obtained other employment as a day laborer that would have paid up to $25,000 a year. At the end of the year, in an action against the builder for breach of contract, how much, if anything, is the mason entitled to recover?",
"correct_answer": "B",
"choices": [
"$20,000 (the difference between the mason's contract price and the salary offered for the night guard position).",
"$45,000 (the contract price).",
"Nothing, because the mason turned down a reasonable alternative employment offer.",
"Nothing, because the mason did not mitigate his damages.",
"$20,000 (the $45,000 contract price less the $25,000 the mason could have earned in other employment).",
"$45,000 plus damages for emotional distress.",
"Nothing, because the builder did not act in bad faith when it discharged the mason.",
"$25,000 (the amount he could have earned as a day laborer).",
"$45,000 minus any unemployment benefits the mason received during the year.",
"$25,000 (the amount he could have earned as a night guard)."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_1485": {
"question_id": "mmlu_pro_1485",
"source_benchmark": "MMLU_Pro",
"domain": "law",
"question_text": "Which one of the following statements is false?",
"correct_answer": "J",
"choices": [
"Protocol 14 established the Human Rights Council",
"Protocol 14 mandated a two judge formation for hearing admissibility cases",
"Protocol 14 reduced the jurisdiction of the European Court of Human Rights",
"Protocol 14 shortened the judicial term of office for the European Court of Human Rights",
"Protocol 14 introduced a requirement for unanimous decision-making in admissibility cases",
"Protocol 14 added a new criterion for admissibility",
"Protocol 14 eliminated the right to individual petition",
"Protocol 14 changed the committee that hears admissibility cases from a three judge to a one judge formation",
"Protocol 14 extended the judicial term of office for the European Court of Human Rights",
"Protocol 14 abolished the Commission of Human Rights"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_1107": {
"question_id": "mmlu_pro_1107",
"source_benchmark": "MMLU_Pro",
"domain": "law",
"question_text": "A company contracted with a builder to construct a new corporate headquarters for a fixed price of $100 million. At the time of the contract, structural steel was widely available and was included in the contract as a $6 million item. Before work began on the project, tornado damage shut down the production facility of the biggest structural steel supplier in the country, and the price of structural steel increased by 20% as a result. The builder informed the company of the steel price increase, and the parties then orally agreed to increase the project price to $101 million. The builder proceeded with construction and delivered the project on time. The company paid the builder $100 million but refused to pay the additional $1 million. If the builder sues the company for $1 million, is the builder likely to prevail?",
"correct_answer": "E",
"choices": [
"No, because the price change of structural steel was not significant enough to warrant a contract modification.",
"No, because there was no consideration for the modification of the contract.",
"No, because the modification was never reduced to a writing signed by the party to be charged.",
"No, because the company did not sign a new contract reflecting the price change.",
"Yes, because the modification was fair and equitable in view of the unanticipated increase in the price of structural steel.",
"Yes, because the builder completed the project on time despite the unforeseen circumstances.",
"Yes, because the builder had no control over the price increase of structural steel.",
"Yes, because the company\u00d5s promise was supported by consideration.",
"Yes, because the company was aware of the price increase and verbally agreed to it.",
"No, because the builder should have anticipated potential price fluctuations in materials."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_1229": {
"question_id": "mmlu_pro_1229",
"source_benchmark": "MMLU_Pro",
"domain": "law",
"question_text": "A rancher, being owner in fee simpleconveyed the property by warranty deed to a woman. The woman gave her niece a mortgage on the ranch to secure a loan from the niece to the woman in the amount of $500,000. The mortgage was recorded immediately. Two years later, the woman conveyed the ranch to a farmer by quitclaim deed. The woman then defaulted on the mortgage, and the niece brought an in personam action against the farmer to recover the amount of the mortgage due. Assume that the woman's quitclaim deed to the farmer made no reference to the mortgage. The woman then defaulted on the mortgage, and the niece brought an in personam action against the farmer to recover the amount of the mortgage due. The mortgagee will probably",
"correct_answer": "H",
"choices": [
"succeed, because the mortgage was recorded immediately.",
"not succeed, because the quitclaim deed did not reference the mortgage.",
"succeed, because an implied delegation of duties resulted from the woman's conveyance to the farmer.",
"succeed, because the quitclaim deed does not absolve the farmer of the mortgage debt.",
"not succeed, unless the farmer had constructive notice of the existence of the mortgage.",
"succeed, because the niece was a third-party beneficiary under the conveyance between the woman and the farmer.",
"not succeed, because the woman was still legally responsible for the mortgage debt.",
"not succeed, because the farmer did not promise to pay the mortgage debt.",
"succeed, because the farmer implicitly accepted responsibility for the mortgage by accepting the quitclaim deed."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_1478": {
"question_id": "mmlu_pro_1478",
"source_benchmark": "MMLU_Pro",
"domain": "law",
"question_text": "In 1993, a rancher had good record title to a 20-acre orange grove in fee simple absolute. In 1994, the rancher delivered to his son, for a sum of $1,000, a deed signed by the rancher, naming the son and his heirs as grantee, and appearing valid on its face. The son neglected to record the deed. In 1998, a farmer, aware of the existence of the rancher-to-son deed, sought out the rancher and asked to buy for $10,000 a deed to the orange grove from the rancher to the fanner and his heirs. The rancher executed such a deed, and the fanner promptly recorded it. The farmer's intent was to acquire color of title and obtain ownership of the orange grove by adverse possession. In 1998, the farmer constructed a fence around the orange grove. In 1999, the son presented his deed of the orange grove to a retiree, and for $15,000, paid by the retiree, signed and delivered a deed of the orange grove in favor of the retiree and his heirs. After receiving the deed, the retiree made no effort to search the title, to examine the property, or to record the deed. In 2003, a buyer paid the fanner $20,000, and the farmer delivered to the buyer a deed of the orange grove in favor of the buyer and his heirs. The buyer had examined the property, had searched the title, and had no knowledge of the farmer's awareness of the prior rancher-to-son instrument. Although the buyer did not reside on the property, he regularly visited the orange grove twice a week. The buyer recorded his deed. In 2007 for $25,000 paid by an orange grower, the retiree signed and delivered adeed of the orange grove naming the orange grower and his heirs as grantees. Before the grower had paid the retiree and taken his deed, the grower visited the orange grove and observed the fence. However, the buyer was not present when the grower visited the property and nothing suggested who \u0080\u0094if anyone \u0080\u0094was using it. In any case, the grower did not attempt to search the title before making his purchase. This jurisdiction uses Grantor \u0080\u0094Grantee Indices and has no Tract Index. In 2008, what is the present state of title to the orange grove if the jurisdiction's recording act provides: \"Every conveyance of real property shall be invalid as against any person, except the grantor, his heirs, and assigns, having actual notice thereof, unless it is recorded as provided by statute\"?",
"correct_answer": "A",
"choices": [
"In a notice jurisdiction, the grower, as a subsequent bonafide purchaser, is only chargeable with notice of what appears in his chain of title and, therefore, would acquire record title to the orange grove.",
"In a race \u0080\u0094notice jurisdiction, the farmer would acquire equitable title to the orange grove, because he erected the fence and failed to inform the buyer of the prior rancher-to-son instrument.",
"In a race-notice jurisdiction, the farmer, having recorded his deed first, would acquire record title to the orange grove.",
"In a race-notice jurisdiction, the grower, being the last to record, would acquire record title to the orange grove.",
"In a notice jurisdiction, the retiree would acquire record title to the orange grove even though he failed to record his deed.",
"In a race-notice jurisdiction, the son would acquire record title to the orange grove, because he is the original grantee.",
"In a race jurisdiction, the buyer, having recorded his deed before the grower, would acquire record title to the orange grove.",
"In a notice jurisdiction, the retiree, despite not recording his deed, would acquire equitable title to the orange grove due to his payment and receipt of the deed.",
"In a notice jurisdiction, the rancher retains record title to the orange grove, as he never properly conveyed his title."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_978": {
"question_id": "mmlu_pro_978",
"source_benchmark": "MMLU_Pro",
"domain": "law",
"question_text": "A patent holder brought a patent infringement action in federal court against a licensee of the patent. The patent holder believed that a jury would be more sympathetic to his claims than a judge, and asked his lawyer to obtain a jury trial. What should the lawyer do to secure the patent holder's right to a jury trial?",
"correct_answer": "I",
"choices": [
"File and serve a jury trial demand within 60 days after the close of the pleadings.",
"File and serve a jury trial demand at the close of discovery.",
"Submit a jury trial demand by email to the judge's office.",
"File and serve a jury trial demand within 30 days after the close of the pleadings.",
"File and serve a jury trial demand at the initial court hearing.",
"File and serve a complaint that excludes a jury trial demand.",
"Make a verbal request for a jury trial during the first court hearing.",
"Make a jury trial demand at the initial pretrial conference.",
"File and serve a complaint that includes a jury trial demand.",
"File and serve a jury trial demand within 14 days after the complaint was served."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_1567": {
"question_id": "mmlu_pro_1567",
"source_benchmark": "MMLU_Pro",
"domain": "law",
"question_text": "Who said that \u201cJurisprudence is the eye of law\u201d ",
"correct_answer": "G",
"choices": [
"Savigny",
"Kelsen",
"Austin",
"Hart",
"Bentham",
"Dworkin",
"Laski",
"Pound",
"Blackstone",
"Maine"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_987": {
"question_id": "mmlu_pro_987",
"source_benchmark": "MMLU_Pro",
"domain": "law",
"question_text": "An investor sued a corporation for stock fraud. In presenting his case-in-chief, the investor sought to introduce an issue of a newspaper to show the corporation's stock price on that given day. Upon objection by the corporation's attorney, this evidence should be",
"correct_answer": "J",
"choices": [
"admitted, because it is relevant to the case.",
"admitted, under the business records exception.",
"excluded, because it is considered hearsay.",
"excluded, because the newspaper is not a credible source.",
"admitted, under the past recollection recorded exception.",
"admitted, under the public records exception.",
"excluded, because it violates the original document rule.",
"excluded, because the newspaper copy does not fit within any established exception to the hearsay rule.",
"excluded, because it violates the best evidence rule.",
"admitted, under the market reports exception."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_1524": {
"question_id": "mmlu_pro_1524",
"source_benchmark": "MMLU_Pro",
"domain": "law",
"question_text": "A buyer, located on the west coast contacted a seller, located on the east coast, about purchasing flanges that the buyer used in their manufacturing process. The parties entered into a contract whereby the seller agreed to deliver to the buyer 1,000 flanges for $10,000. The contract stipulated that it was FOB at the seller's place of business. The seller then delivered the flanges to a delivery company, which was to transport them to the buyer. While en route, the vehicle that was transporting the flanges was involved in an accident and the flanges were destroyed. When the buyer received notice of the accident, the buyer immediately contacted the seller and demanded that it ship replacement flanges. The seller refused to do so. In an action by the buyer against the seller for breach of contract, the buyer will",
"correct_answer": "H",
"choices": [
"succeed, because the accident happened before the goods reached the buyer.",
"succeed, because the risk of loss was on the seller.",
"not succeed, because the accident was an unforeseen event.",
"not succeed, because of frustration of purpose.",
"succeed, because the carrier was the seller's agent.",
"succeed, because the seller has a responsibility to ensure safe delivery of goods.",
"succeed, because the seller should have insured the goods.",
"not succeed, because the risk of loss was on the buyer."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_1493": {
"question_id": "mmlu_pro_1493",
"source_benchmark": "MMLU_Pro",
"domain": "law",
"question_text": "Bob Wilson borrowed $20,000 from Ted Lamar to open a hardware store. Ted's only interest in the business was the repayment of his 5-year unsecured loan. Bob was so grateful for the loan that he named his business \"Wilson and Lamar Hardware\" and purchased signs and advertising displaying this name. He also listed Bob Wilson and Ted Lamar as \"partners\" on his stationery. When Ted found out, he was flattered to the point that he voluntarily reduced Bob's interest rate from 9 percent to 8 percent per annum. A few weeks later, Pete Smith, who had assumed that both Wilson and Lamar were operating the hardware store and was not familiar with the true situation, sold goods to Wilson and Lamar Hardware. Pete Smith has been unable to collect for the goods and he seeks your advice. Your advice to Pete is",
"correct_answer": "C",
"choices": [
"The Wilson and Lamar Hardware business entity is solely liable.",
"Neither Bob Wilson nor Ted Lamar is liable.",
"Bob Wilson and Ted Lamar are liable jointly.",
"Bob Wilson is liable up to the amount of the initial loan, and Ted Lamar is liable for the remainder.",
"Bob Wilson is liable for the entire amount and Ted Lamar is liable only to the extent the debt cannot be collected from Bob Wilson.",
"Ted Lamar is solely liable.",
"Only the de jure partnership arising from the relationship between Wilson and Lamar is liable.",
"Both Bob Wilson and Ted Lamar are independently liable.",
"only the de facto partnership arising from the relationship between Wilson and Lamar is liable.",
"only Bob Wilson is liable."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_1330": {
"question_id": "mmlu_pro_1330",
"source_benchmark": "MMLU_Pro",
"domain": "law",
"question_text": "A college student initiated a criminal case against his former roommate, claiming the former roommate snuck in and removed his state-of-the-art home theater equipment from his house. The college student took the complaint papers that were approved by the prosecution and filed them with a magistrate court. An ongoing police investigation later determined, however, that the college student was a vengeful person not averse to creating stories, and that he and the former roommate had been arguing over several related issues. They also determined that he never had any quality sound or video equipment. The police dropped the prosecution, and the criminal case was dismissed at the preliminary hearing. When the former roommate filed a civil case against the college student alleging the tort of malicious prosecution, will evidence of the student's reputation for dishonesty be admissible and why?",
"correct_answer": "F",
"choices": [
"Yes, because the student's reputation for dishonesty directly relates to the tort of malicious prosecution.",
"No, because reputation evidence is considered too prejudicial to be admissible.",
"Yes, because dishonesty at any time in the past is always relevant to a malicious prosecution complaint.",
"Yes, because the student's dishonesty directly affects the credibility of his initial criminal complaint.",
"Yes, because reputation evidence, just like most character evidence, is virtually always admissible.",
"Yes, because even though character evidence is too collateral to be admitted for circumstantial evidence, it is admissible if it is directly at issue in the case.",
"No, because character evidence is only admissible if it is directly related to the act in question.",
"No, because the past reputation of a person is not considered as evidence in court.",
"Yes, because while most character evidence is not admissible, some is, and reputation evidence is always admissible.",
"No, because character evidence is never admissible in any case."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_877": {
"question_id": "mmlu_pro_877",
"source_benchmark": "MMLU_Pro",
"domain": "law",
"question_text": "A plaintiff sued the insurer of her home after the insurer denied coverage for water damage to the home allegedly caused by a frozen plastic pipe that burst. At trial, the insurer called as an expert witness an engineer, who testified that the pipe had burst because of age rather than freezing. On cross-examination, the engineer admitted that five years earlier, he had been convicted of tax fraud, even though he had asserted that it was his accountant's error. In response, the insurer calls a witness, who is well acquainted with the engineer and his reputation, to testify that (1) in the witness's opinion, the engineer is a truthful person, and (2) the engineer's neighbors all describe him as a truthful person. How much, if any, of the witness's testimony is admissible?",
"correct_answer": "E",
"choices": [
"None of the testimony is admissible, because it is collateral, having no bearing on the engineer's qualifications as an expert.",
"Only the portion concerning the engineer's reputation is admissible, because where both opinion and reputation evidence are available, only the latter is admissible under a rule of preference.",
"Both portions of the testimony are admissible, but only as they relate to the engineer's truthfulness, not his expertise.",
"None of the testimony is admissible, because character evidence is not relevant to the engineer's expertise.",
"All of the testimony is admissible to support the engineer's credibility.",
"Only the portion concerning the engineer's reputation is admissible, as it is directly relevant to his character for truthfulness.",
"Only the portion concerning the witness's opinion of the engineer's character is admissible, as it directly relates to his credibility.",
"Only the portion concerning the witness's opinion of the engineer's character, because the witness's reporting of the neighbors' comments is hearsay.",
"None of the testimony is admissible, because the witness is biased due to their acquaintance with the engineer.",
"All of the testimony is admissible, but the judge must instruct the jury to consider the engineer's past conviction."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_2544": {
"question_id": "mmlu_pro_2544",
"source_benchmark": "MMLU_Pro",
"domain": "psychology",
"question_text": "There are several methods for estimating the test reliability, like the INTERNAL CONSISTENCY RELIABILITY for example. From your knowledge about this topic, find the correct definition of it.",
"correct_answer": "B",
"choices": [
"correlating the test results with the age of the test takers",
"correlations among individual test items",
"administering the test to two different groups and then correlating scores",
"correlating the test results with the test takers' socio-economic status",
"correlating the test results with another test",
"they are not used for an unstable trait",
"administering the test in multiple languages and then correlating scores",
"administer 2 equivalent forms of a test to same people and correlate",
"measuring the correlation between test results and the time it takes to complete the test",
"administer test twice to same people and then correlate scores"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_2004": {
"question_id": "mmlu_pro_2004",
"source_benchmark": "MMLU_Pro",
"domain": "psychology",
"question_text": "Which of the following illustrates why most people can detect the difference between Coke and Pepsi most of the time?",
"correct_answer": "J",
"choices": [
"Depth perception",
"Sensory adaptation",
"Subliminal perception",
"Perceptual constancy",
"Gestalt principles",
"Absolute threshold",
"Signal detection theory",
"Divided attention",
"Selective attention",
"Difference threshold"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_1915": {
"question_id": "mmlu_pro_1915",
"source_benchmark": "MMLU_Pro",
"domain": "psychology",
"question_text": "Ethnic identity refers to",
"correct_answer": "C",
"choices": [
"a rejection of Caucasian social, cultural, and institutional standards",
"the rejection of all cultural traditions and practices",
"the extent to which an individual endorses and practices the ethnocultural traditions of a given group",
"cultural practices and beliefs char are non-Western in orientation and perspective",
"the adherence to only Western cultural traditions and practices",
"the extent to which an individual rejects their own ethnocultural traditions and practices",
"the adoption of multiple ethnic identities and practices",
"the extent to which an individual practices the traditions of the majority culture",
"the collective cultural practices, traditions, and orientations of ethnic minority communities",
"the adoption of the cultural practices of the dominant group"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_2649": {
"question_id": "mmlu_pro_2649",
"source_benchmark": "MMLU_Pro",
"domain": "psychology",
"question_text": "A group's performance on a(n) __________ task is limited by the performance of the least skilled or knowledgeable member of the group.",
"correct_answer": "B",
"choices": [
"substitutable",
"conjunctive",
"additive",
"divisible",
"disjunctive",
"complementary",
"sequential",
"cooperative",
"synchronized",
"compensatory"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_2171": {
"question_id": "mmlu_pro_2171",
"source_benchmark": "MMLU_Pro",
"domain": "psychology",
"question_text": "Which of the following is most likely a violation of ethical guidelines?",
"correct_answer": "E",
"choices": [
"a psychotherapist accepting gifts from a client as a form of payment for their services",
"a brochure containing client testimonials mailed to potential attendees at a conference for adult children of alcoholics",
"a therapist offering group therapy sessions at a discounted rate for college students during midterms",
"a display ad for psychotherapy services in the yellow pages of the local telephone directory",
"uninvited in-person solicitations for therapy of hurricane victims that destroyed their homes",
"an informational flyer about mental health services distributed at a community health fair",
"a counselor offering free stress management workshops at a local library",
"an advertisement offering a free initial therapy session that is published in the campus newspaper just prior to final exams",
"a psychiatrist advertising their services on their own private practice website"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_2140": {
"question_id": "mmlu_pro_2140",
"source_benchmark": "MMLU_Pro",
"domain": "psychology",
"question_text": "Describe scheme, assimilation and accommodation as defined by Jean Piaget.",
"correct_answer": "H",
"choices": [
"Schemes are learned responses to new stimuli. Assimilation is ignoring new environment. Accommodation is early infant behaviors.",
"Schemes are complex behaviors learned in adulthood. Assimilation is learning completely new responses to stimuli. Accommodation is ignoring new stimuli.",
"Schemes are innate reflexes that cannot be changed. Assimilation is adapting new stimuli to fit into existing schemes. Accommodation is the rejection of new stimuli that don't fit into existing schemes.",
"Schemes are mental representations of the world. Assimilation is the process of integrating new information into existing schemes without changing the schemes. Accommodation is the adjustment of existing schemes to incorporate new information.",
"Schemes are the only form of infant learning. Assimilation is the process of ignoring differences between stimuli. Accommodation is the process of retaining old behaviors in the face of new challenges.",
"Schemes are unconscious behaviors. Assimilation is changing previous behaviors to fit new environment. Accommodation is learning previously known methods.",
"Schemes are fixed psychological structures that determine behavior. Assimilation is the modification of the environment to fit pre-existing schemes. Accommodation is the alteration of schemes to adhere to societal norms.",
"Schemes are early behaviors such as sucking, looking, crying. Assimilation is when an individual uses previously learned responses to new stimuli. Accommodation is when an individual must learn new methods to cope with a new environment.",
"Schemes are the final stage of cognitive development. Assimilation is the refusal to alter cognitive structures in response to new information. Accommodation is the persistence of using known behaviors despite their ineffectiveness.",
"Schemes are temporary cognitive structures. Assimilation is the blending of new experiences with old memories. Accommodation is the discarding of obsolete schemes in favor of new ones."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_2118": {
"question_id": "mmlu_pro_2118",
"source_benchmark": "MMLU_Pro",
"domain": "psychology",
"question_text": "For most children, stranger anxiety begins at about _____ months of age.",
"correct_answer": "C",
"choices": [
"4 to 6",
"11 to 13",
"8 to 10",
"2 to 3",
"3 to 5",
"15 to 18",
"1 to 2",
"12 to 14",
"7 to 9",
"6 to 7"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_2032": {
"question_id": "mmlu_pro_2032",
"source_benchmark": "MMLU_Pro",
"domain": "psychology",
"question_text": "The concept of bureaucracy has come to have very negative connotationstoday. Discuss those factors you believe are responsiblefor this negative attitude.",
"correct_answer": "C",
"choices": [
"The bureaucratic approach is considered negative due to its overemphasis on innovation and risk-taking, leading to instability.",
"The bureaucratic approach is outdated and inefficient.",
"Weber's bureaucratic approach, with its lack of regard for the individual, assumption that humans are predictably alike, confusing rules and regulations, 'red tape', and clearly defined job descriptions detracting from cooperation and problem solving.",
"The perception of bureaucracy is poor because it prioritizes speed and efficiency over thoroughness and accuracy.",
"Negative attitudes towards bureaucracy arise from its encouragement of personal connections and favoritism over merit-based advancement.",
"Bureaucracies often receive criticism for promoting equality and fairness, which is seen as a hindrance to competitive advantage.",
"Bureaucracies are viewed negatively because they are too decentralized, allowing for a lack of accountability and coordination.",
"The bureaucratic approach is too flexible and lacks clear rules and procedures.",
"Bureaucracies are often disliked for their flexibility in adapting to new situations, which leads to inconsistency and confusion.",
"Weber's bureaucratic approach encourages individuality and creativity."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_2644": {
"question_id": "mmlu_pro_2644",
"source_benchmark": "MMLU_Pro",
"domain": "psychology",
"question_text": "As used in personnel selection, \u201cadverse impact\u201d means that",
"correct_answer": "E",
"choices": [
"the total selection process results in equal results for all groups covered by law",
"a group covered by law is hired at a rate more than 80% of that of the group with the best selection",
"adverse conditions exist for selection of groups covered bylaw",
"a group not covered by law is hired at a higher rate than that of the group with the best selection",
"a group covered by law is hired at a rate less than 80% of that of the group with the best selection",
"a group not covered by law is hired at a rate less than 80% of that of the group with the best selection",
"the total selection process results in unequal results for one or more groups covered by law",
"selection interviews are \u201cadverse\u201d for groups covered by law and result in differential selection rates",
"adverse conditions exist for selection of groups not covered by law",
"selection interviews are \u201cfavorable\u201d for groups covered by law and result in differential selection rates"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_1994": {
"question_id": "mmlu_pro_1994",
"source_benchmark": "MMLU_Pro",
"domain": "psychology",
"question_text": "The highest levels of learning and performance are usually associated with:",
"correct_answer": "F",
"choices": [
"constant changes in levels of arousal.",
"low levels of arousal.",
"arousal levels that are consistently low.",
"high levels of arousal.",
"complete absence of arousal.",
"moderate levels of arousal.",
"extreme levels of arousal.",
"variability in levels of arousal.",
"gradually increasing levels of arousal.",
"arousal levels that are consistently high."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_2582": {
"question_id": "mmlu_pro_2582",
"source_benchmark": "MMLU_Pro",
"domain": "psychology",
"question_text": "If you believe another psychologist has committed an ethical violation, you should first:",
"correct_answer": "G",
"choices": [
"Report it to his or her state professional standards board",
"Ignore it as it's none of your business",
"Report it to the APA Ethics Committee",
"Write an anonymous letter to the psychologist's employer",
"Wait to see whether he or she commits a second violation",
"Confront the psychologist in a public forum",
"Discuss your concerns with him or her",
"Report it to the police",
"Report it to the media",
"Discuss it with your colleagues"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_2648": {
"question_id": "mmlu_pro_2648",
"source_benchmark": "MMLU_Pro",
"domain": "psychology",
"question_text": "For practitioners of humanistic psychotherapy, psychopathology is the result of:",
"correct_answer": "A",
"choices": [
"blocked potential.",
"severe trauma.",
"genetic predisposition.",
"cognitive distortions.",
"lack of emotional intelligence.",
"unresolved conflicts.",
"chemical imbalances in the brain.",
"suppressed memories.",
"improper socialization."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_2448": {
"question_id": "mmlu_pro_2448",
"source_benchmark": "MMLU_Pro",
"domain": "psychology",
"question_text": "Nine rats run through a maze. The time each rat took to traversethe maze is recorded and these times are listed below. 1 min., 2.5 min., 3 min., 1.5 min., 2 min., 1.25 min., 1 min., .9 min., 30 min. Which of the three measures of central tendency would be themost appropriate in this case?",
"correct_answer": "D",
"choices": [
"range",
"trimmed mean",
"harmonic mean",
"median or mode",
"interquartile range",
"standard deviation",
"geometric mean",
"mean",
"weighted mean",
"midrange"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_1979": {
"question_id": "mmlu_pro_1979",
"source_benchmark": "MMLU_Pro",
"domain": "psychology",
"question_text": "You receive an e-mail from Dr. Brenda Browne, a licensed psychologist, who is currently seeing one of your former clients for a substance abuse disorder. Dr. Browne asks you to forward the client\u2019s file to her and states that the client has signed a release authorizing you to do so. You should:",
"correct_answer": "J",
"choices": [
"Refuse to forward the file as it violates patient privacy laws.",
"wait until you receive a request from the client before taking any action.",
"Directly send the original file to the psychologist without any prior discussion with the client.",
"forward a photocopy of the file to the psychologist as requested.",
"Delete all the records related to the client's substance abuse disorder before sending the file.",
"Send only the parts of the file that were created during your treatment of the client.",
"forward a summary of the file to the psychologist that includes only information related to the client\u2019s current condition.",
"Ask Dr. Browne for a copy of the signed release before sending the file.",
"Forward the entire file digitally to the psychologist as requested.",
"contact the client to discuss the release of information."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_2494": {
"question_id": "mmlu_pro_2494",
"source_benchmark": "MMLU_Pro",
"domain": "psychology",
"question_text": "Discuss the advantages and disadvantages of doing field researchand laboratory research with human subjects.",
"correct_answer": "F",
"choices": [
"Laboratory research results are less valid than field research because they are always artificial.",
"Laboratory research is always preferable over field research",
"Field research is always better than laboratory research",
"Field research is less ethical than laboratory research because it involves real-world scenarios.",
"Field research yields quicker results than laboratory research.",
"Field research focuses on real people in real-world situations offering more impact but less control and laboratory research provides controlled situations with less impact but more control. There is a basic tradeoff between impact and control in these two methods.",
"Field research provides more control than laboratory research",
"Laboratory research can be conducted in any environment, while field research is limited to specific settings.",
"Field research typically requires more resources than laboratory research.",
"Laboratory research allows for studying larger groups than field research does."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_2322": {
"question_id": "mmlu_pro_2322",
"source_benchmark": "MMLU_Pro",
"domain": "psychology",
"question_text": "Describe Spitz's (1945) study of sensory deprivation in a socialsetting. What do his findingssuggest ?",
"correct_answer": "I",
"choices": [
"Social deprivation has no significant impact on emotional growth",
"Sensory and social deprivation enhance development",
"Sensory deprivation can improve problem-solving abilities",
"Spitz's study suggests that social deprivation can lead to accelerated cognitive development",
"Sensory and social deprivation only affect physical growth",
"Sensory and social deprivation only affect language development",
"Sensory deprivation is beneficial for short periods",
"Sensory deprivation has no effect on development",
"Sensory and social deprivation can adversely affect normal development",
"Sensory and social deprivation lead to improved social skills later in life"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_1922": {
"question_id": "mmlu_pro_1922",
"source_benchmark": "MMLU_Pro",
"domain": "psychology",
"question_text": "Antonia has a cat. The first time she sees a rabbit, she calls it a cat. Her mistake is due to the process of",
"correct_answer": "I",
"choices": [
"association.",
"generalization.",
"recognition.",
"classification.",
"interpretation.",
"discrimination.",
"accommodation.",
"confirmation.",
"assimilation.",
"differentiation."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_1920": {
"question_id": "mmlu_pro_1920",
"source_benchmark": "MMLU_Pro",
"domain": "psychology",
"question_text": "Learned helplessness is an example of the power of",
"correct_answer": "G",
"choices": [
"cognitive dissonance",
"positive reinforcement",
"classical conditioning",
"intrinsic motivation",
"observational learning",
"modeling",
"expectation",
"operant conditioning",
"negative reinforcement",
"social learning"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_1985": {
"question_id": "mmlu_pro_1985",
"source_benchmark": "MMLU_Pro",
"domain": "psychology",
"question_text": "According to Bronfenbrenner\u2019s (1979) ecological model, the ______ consists of interactions between elements of the microsystem (e.g., between the family and the school).",
"correct_answer": "G",
"choices": [
"nanosystem",
"ecosystem",
"psychosystem",
"biosystem",
"endosystem",
"chronosystem",
"mesosystem",
"macrosystem",
"exosystem",
"sociosystem"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_2113": {
"question_id": "mmlu_pro_2113",
"source_benchmark": "MMLU_Pro",
"domain": "psychology",
"question_text": "Operant extinction would NOT be the treatment-of-choice when:",
"correct_answer": "E",
"choices": [
"the behavior has been reinforced on a continuous schedule.",
"the behavior is not causing significant distress or dysfunction.",
"the behavior is a part of the individual's cultural or religious practices.",
"an alternative behavior cannot be identified.",
"a temporary increase in the behavior cannot be tolerated.",
"the baseline levels of the behavior cannot be established."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_2128": {
"question_id": "mmlu_pro_2128",
"source_benchmark": "MMLU_Pro",
"domain": "psychology",
"question_text": "Which best represents the path of an impulse over a reflex arc?",
"correct_answer": "J",
"choices": [
"interneuron, afferent neuron, efferent neuron, receptor, effector",
"effector, sensory neuron, afferent neuron, interneuron, receptor",
"sensory neuron, interneuron, afferent neuron, efferent neuron, effector",
"sensory neuron, afferent neuron, interneuron, efferent neuron, receptor",
"receptor, efferent neuron, interneuron, afferent neuron, affector",
"efferent neuron, interneuron, afferent neuron, receptor, effector",
"receptor, afferent neuron, efferent neuron, interneuron, effector",
"receptor, efferent neuron, afferent neuron, interneuron, effector",
"afferent neuron, receptor, efferent neuron, interneuron, effector",
"receptor, afferent neuron, interneuron, efferent neuron, effector"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_2407": {
"question_id": "mmlu_pro_2407",
"source_benchmark": "MMLU_Pro",
"domain": "psychology",
"question_text": "Which of the following values is least characteristic of Asians and Asian Americans?",
"correct_answer": "G",
"choices": [
"fatalism",
"shame",
"pessimism",
"individualism",
"materialism",
"competitiveness",
"egalitarianism",
"extraversion",
"hedonism"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_2506": {
"question_id": "mmlu_pro_2506",
"source_benchmark": "MMLU_Pro",
"domain": "psychology",
"question_text": "Discuss the function of a theory of personality.",
"correct_answer": "J",
"choices": [
"Theories of personality are purely speculative and do not have any empirical backing.",
"Theories of personality are temporary trends that change with each new generation.",
"Theories of personality are unchangeable and always accurate.",
"Theories of personality are irrelevant for understanding human behavior.",
"Theories of personality are synonymous with psychological disorders.",
"Theories of personality are concrete facts about human behavior.",
"Theories of personality are fixed sets of traits that apply universally to all individuals.",
"Theories of personality dictate an individual's destiny and cannot be altered.",
"Theories of personality are solely based on biological factors and disregard environmental influences.",
"Theories of personality are useful, though expendable guidelines to research and thinking. They help us make sense of human conduct, discover uniformities of character among individuals, and devise general principles to explain particular motives."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_1917": {
"question_id": "mmlu_pro_1917",
"source_benchmark": "MMLU_Pro",
"domain": "psychology",
"question_text": "Which classical conditioning term best describes the following scenario: Later in his classical conditioning experiments, Ivan Pavlov's dogs began to salivate whenever they heard any sound similar to a bell, such as a doorbell or someone accidentally clinking a water glass.",
"correct_answer": "B",
"choices": [
"negative reinforcement",
"generalization",
"acquisition",
"discrimination",
"counter conditioning",
"trace conditioning",
"spontaneous recovery",
"stimulus control",
"extinction",
"operant conditioning"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_2464": {
"question_id": "mmlu_pro_2464",
"source_benchmark": "MMLU_Pro",
"domain": "psychology",
"question_text": "The primary goal of a newly-developed community-based mental health program is to help people recently released from a psychiatric hospital adjust to life in the community. This is an example of:",
"correct_answer": "F",
"choices": [
"early intervention.",
"reactive intervention.",
"quaternary prevention.",
"emergency intervention.",
"crisis intervention.",
"tertiary prevention.",
"primary prevention.",
"preventive care."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_2093": {
"question_id": "mmlu_pro_2093",
"source_benchmark": "MMLU_Pro",
"domain": "psychology",
"question_text": "In a family where all male members have been doctors for generations, Peter decides to become an artist. Although his parentsclaim to love him they are very upset over his choice. How would Rogers explain this attitude shownby Peter's parents?",
"correct_answer": "B",
"choices": [
"Peter's parents wanted him to continue the family legacy.",
"Peter's parents' attitude can be explained by the threat to their self-structure, which contains the notion that to be of value, a male family member must be a doctor. This idea is incongruent with their love for their son, causing distress.",
"Peter's parents believe that being a doctor is the only respectable profession in society.",
"Peter's parents are afraid that Peter's choice indicates a rejection of family values.",
"Peter's parents think Peter is not talented enough to be a successful artist.",
"Peter's parents are worried about the social stigma associated with not following family traditions.",
"Peter's parents are concerned about the financial stability of being an artist.",
"Peter's parents are upset because they don't value the arts.",
"Peter's parents are upset because they fear he will not be able to contribute to the family's reputation.",
"Peter's parents assume that Peter's decision is a phase and he will eventually become a doctor."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_2623": {
"question_id": "mmlu_pro_2623",
"source_benchmark": "MMLU_Pro",
"domain": "psychology",
"question_text": "What are the four basic factors in learning?",
"correct_answer": "B",
"choices": [
"perception, insight, retention, recall",
"arousal, motivation, reinforcement, and association",
"analysis, synthesis, evaluation, creation",
"encoding, storage, retrieval, adaptation",
"observation, imitation, practice, feedback",
"cognition, emotion, behavior, environment",
"comprehension, experimentation, reflection, adaptation",
"attention, emotion, motivation, repetition",
"exposure, response, consolidation, transformation",
"memory, attention, understanding, application"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_2555": {
"question_id": "mmlu_pro_2555",
"source_benchmark": "MMLU_Pro",
"domain": "psychology",
"question_text": "Some contemporary intelligence researchers like Howard Gardner and Robert Sternberg complain that schools focus too much on",
"correct_answer": "C",
"choices": [
"nonessential subjects like art and music.",
"fostering collaborative learning environments.",
"traditional subjects and methods.",
"promoting physical education and sports.",
"cultivating social intelligence.",
"the development of emotional intelligence.",
"individual learning styles and preferences.",
"encouraging creative thought.",
"the use of technology in education.",
"the environmental factors that influence the expression of intelligence."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_2608": {
"question_id": "mmlu_pro_2608",
"source_benchmark": "MMLU_Pro",
"domain": "psychology",
"question_text": "Although a man watched in horror as his wife and children were killed by a speeding truck as they crossed the street, he has no memory of the event and gets upset when people tell him he must remember. The man is most likely suffering from",
"correct_answer": "D",
"choices": [
"major depressive disorder",
"antisocial personality disorder",
"bipolar disorder",
"dissociative amnesia",
"conversion disorder",
"obsessive-compulsive disorder",
"schizophrenia",
"generalized anxiety disorder",
"post-traumatic stress disorder",
"panic disorder"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_2319": {
"question_id": "mmlu_pro_2319",
"source_benchmark": "MMLU_Pro",
"domain": "psychology",
"question_text": "A couple has six children whose ages are 6, 8, 10, 12, 14, and16. Find the variance in ages.",
"correct_answer": "D",
"choices": [
"10",
"13.2",
"9.5",
"11.7",
"15",
"16.8",
"20",
"12.5",
"14",
"18.3"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_2115": {
"question_id": "mmlu_pro_2115",
"source_benchmark": "MMLU_Pro",
"domain": "psychology",
"question_text": "Carol and Jim are experiencing marital difficulties. They have two school-aged children, a boy and a gid. According to meta-analysis of research on the relationship between marital discord and child behavior problems, it would be expected that",
"correct_answer": "E",
"choices": [
"the girl is more likely than the boy to manifest an immediate behavior problem",
"the boy is less likely than the girl to manifest an immediate behavior problem",
"neither child is more likely to manifest an immediate behavior problem than are children of stable marriages",
"neither child is expected to manifest any behavior problem",
"the boy is more likely than the girl to manifest an immediate behavior problem",
"both children are less likely to manifest an immediate behavior problem than are children of stable marriages",
"neither child is less likely to manifest an immediate behavior problem than are children of stable marriages",
"the girl is less likely than the boy to manifest an immediate behavior problem",
"both children ore equally likely to manifest an immediate behavior problem",
"both children are more likely to manifest a delayed behavior problem"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_2349": {
"question_id": "mmlu_pro_2349",
"source_benchmark": "MMLU_Pro",
"domain": "psychology",
"question_text": "A score of 50 on the Beck Depression Inventory-II suggests:",
"correct_answer": "J",
"choices": [
"moderate depression.",
"extremely severe depression.",
"mild to moderate depression.",
"depression with anxiety disorder.",
"mild depression.",
"severe anxiety, not depression.",
"depression with psychotic features.",
"borderline clinical depression.",
"no or minimal depression.",
"severe depression."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_2493": {
"question_id": "mmlu_pro_2493",
"source_benchmark": "MMLU_Pro",
"domain": "psychology",
"question_text": "Discuss and distinguish between discrete and continuous values.",
"correct_answer": "A",
"choices": [
"Continuous values can take on any fractional or integer value between specified limits, while discrete values are usually restricted to whole-number values.",
"Continuous values are countable in finite steps, while discrete values can represent an infinite range.",
"Continuous values are restricted to a specific set of numbers, while discrete values can be any number within a range.",
"Continuous values are always integers, while discrete values can be fractions.",
"Discrete values have a clear distinction between each other, while continuous values overlap.",
"Discrete values are used for categorization, while continuous values cannot be categorized.",
"Continuous values are only applicable in digital systems, while discrete values are used in analog systems.",
"Discrete values can only be whole numbers, while continuous values can only be fractions.",
"Discrete values are measured in intervals, while continuous values are not measurable.",
"Discrete values can take any value, while continuous values are restricted to whole numbers."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_2174": {
"question_id": "mmlu_pro_2174",
"source_benchmark": "MMLU_Pro",
"domain": "psychology",
"question_text": "Erich Fromm states that the society prevents you from realizing true nature to be loving, creative, and productive. Which of the responses to societal demands allow a person to realize that?",
"correct_answer": "B",
"choices": [
"The authoritative allows a person to realize its true nature, to be loving, creative, productive",
"The productive allows a person to realize its true nature, to be loving, creative, etc.",
"The destructive allows a person to realize its true nature, to be loving, creative, productive",
"The exploitative allows a person to realize its true nature, to be loving, creative, productive",
"The manipulative allows a person to realize its true nature, to be loving, creative, productive",
"The marketing surely allows a person to realize its true nature, to be loving, creative, productive",
"The hoarding allows a person to realize its true nature, to be loving, creative, productive",
"The submissive allows a person to realize its true nature, to be loving, creative, productive",
"The controlling allows a person to realize its true nature, to be loving, creative, productive",
"The receptive definitely allows a person to realize its true nature, to loving, creative, productive"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_1896": {
"question_id": "mmlu_pro_1896",
"source_benchmark": "MMLU_Pro",
"domain": "psychology",
"question_text": "The _______ is the least developed area of the brain at birth.",
"correct_answer": "D",
"choices": [
"occipital lobe",
"temporal lobe",
"parietal lobe",
"cerebral cortex",
"limbic system",
"hypothalamus",
"thalamus",
"cerebellum",
"brain stem"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_4864": {
"question_id": "mmlu_pro_4864",
"source_benchmark": "MMLU_Pro",
"domain": "history",
"question_text": "The Egyptian system of hieroglyphics:",
"correct_answer": "C",
"choices": [
"was a simple form of writing with one symbol representing one word",
"all of the above",
"appears to have developed suddenly",
"did not use pictographs",
"was borrowed from the Sumerians",
"was only deciphered in the 20th century",
"was only used for religious texts",
"was replaced by the Latin alphabet during the Roman period",
"was the earliest form of writing in the world",
"was only used by the royal family"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_4594": {
"question_id": "mmlu_pro_4594",
"source_benchmark": "MMLU_Pro",
"domain": "history",
"question_text": "This question refers to the following information.\nNo task is more urgent than that of preserving peace. Without peace our independence means little. The rehabilitation and upbuilding of our countries will have little meaning. Our revolutions will not be allowed to run their course. What can we do? We can do much! We can inject the voice of reason into world affairs. We can mobilize all the spiritual, all the moral, all the political strength of Asia and Africa on the side of peace. Yes, we! We, the peoples of Asia and Africa, 1.4 billion strong.\nIndonesian leader Sukarno, keynote address to the Bandung Conference, 1955\nLike numerous other leaders in Africa, Asia, and the Middle East, Sukarno attempted, but did not completely succeed, in maintaining a neutral stance during",
"correct_answer": "C",
"choices": [
"the Indo-Pakistani Wars.",
"the Vietnam conflicts.",
"the Cold War.",
"the Arab-Israeli conflict.",
"the Algerian War.",
"the Cuban Missile Crisis.",
"the Iran-Iraq War.",
"the Korean War.",
"the Suez Crisis.",
"the Gulf War."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_4549": {
"question_id": "mmlu_pro_4549",
"source_benchmark": "MMLU_Pro",
"domain": "history",
"question_text": "This question refers to the following information.\n\"Those whose condition is such that their function is the use of their bodies and nothing better can be expected of them, those, I say, are slaves of nature. It is better for them to be ruled thus.\"\nJuan de Sepulveda, Politics, 1522\n\"When Latin American nations gained independence in the 19th century, those two strains converged, and merged with an older, more universalist, natural law tradition. The result was a distinctively Latin American form of rights discourse. Paolo Carozza traces the roots of that discourse to a distinctive application, and extension, of Thomistic moral philosophy to the injustices of Spanish conquests in the New World. The key figure in that development seems to have been Bartolom\u00e9 de Las Casas, a 16th-century Spanish bishop who condemned slavery and championed the cause of Indians on the basis of a natural right to liberty grounded in their membership in a single common humanity. 'All the peoples of the world are humans,' Las Casas wrote, and 'all the races of humankind are one.' According to Brian Tierney, Las Casas and other Spanish Dominican philosophers laid the groundwork for a doctrine of natural rights that was independent of religious revelation 'by drawing on a juridical tradition that derived natural rights and natural law from human rationality and free will, and by appealing to Aristotelian philosophy.'\"\nMary Ann Glendon, \"The Forgotten Crucible: The Latin American Influence on the Universal Human Rights Idea,\u201d 2003\nWhich one of the following statements about the Spanish conquest of the Americas is most accurate?",
"correct_answer": "I",
"choices": [
"African slavery was a direct result of Spanish settlements in Florida.",
"The Spanish conquest of the Americas was motivated by a desire to spread Aristotelian philosophy.",
"Due to racial prejudice, Spanish explorers shunned intermarriage with native people.",
"The Spanish conquest of the Americas was universally condemned by the Catholic Church.",
"Juan de Sepulveda was a major critic of the Spanish conquest, due to his belief in natural law.",
"Bartolom\u00e9 de Las Casas supported the Spanish conquest because he believed it would improve the lives of the indigenous people.",
"Early native civilizations in Mexico introduced Spanish explorers to cattle ranching and wheat cultivation.",
"The Spanish conquest of the Americas led directly to the abolition of slavery.",
"Christopher Columbus was not the first European to have explored North America.",
"Spanish conquerors were influenced by the Native American belief in natural rights."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_4916": {
"question_id": "mmlu_pro_4916",
"source_benchmark": "MMLU_Pro",
"domain": "history",
"question_text": "This question refers to the following information.\nSource 1:\n\"Moreover, Sir, in our Kingdoms there is another great inconvenience which is of little service to God, and this is that many of our people, keenly desirous as they are of the wares and things of your Kingdoms, which are brought here by your people, and in order to satisfy their voracious appetite, seize many of our people, freed and exempt men, and very often it happens that they kidnap even noblemen and our relatives, and take them to be sold to the white men who are in our kingdoms.\"\nLetter from King Afonso I of Kongo to King John III of Portugal, 1526\nSource 2:\n\"This expedition has cost us much: it would be unreasonable to send it home with empty hands. Although our [principal] wish is to serve God and the pleasure of the king of Kongo, none the less you will make him understand\u2014as though speaking in our name\u2014what he should do to fill the ships, whether with slaves, or copper, or ivory.\"\nInstructions from King Manuel of Portugal to his envoy in Kongo, 1512\nThe two letters best support which of the following conclusions?",
"correct_answer": "D",
"choices": [
"Diplomatic relations between Portugal and Kongo were often cordial.",
"Kongolese noblemen were frequently taken as slaves by the Portuguese.",
"Trade between Portugal and Kongo was highly regulated.",
"Trade between Portugal and Kongo was brisk with both parties."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_4677": {
"question_id": "mmlu_pro_4677",
"source_benchmark": "MMLU_Pro",
"domain": "history",
"question_text": "The _______ developed one of the earliest kingdoms in South America, reaching its height at around _________.",
"correct_answer": "B",
"choices": [
"Inca; AD 200",
"Moche; AD 400",
"Olmec; AD 100",
"Maya; AD 400",
"Zapotec; AD 500",
"Chavin; AD 600",
"Wari; AD 100",
"Aztec; AD 200",
"Toltec; AD 400"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_4662": {
"question_id": "mmlu_pro_4662",
"source_benchmark": "MMLU_Pro",
"domain": "history",
"question_text": "This question refers to the following information.\nRead the the following poem to answer questions.\nTake up the White Man's burden\u2014\nSend forth the best ye breed\u2014\nGo bind your sons to exile\nTo serve your captives' need;\nTo wait in heavy harness,\nOn fluttered folk and wild\u2014\nYour new-caught, sullen peoples,\nHalf-devil and half-child.\n...\nTake up the White Man's burden\u2014\nThe savage wars of peace\u2014\nFill full the mouth of Famine\nAnd bid the sickness cease;\nAnd when your goal is nearest\nThe end for others sought\u2026\nWatch sloth and heathen Folly\nBring all your hopes to naught.\nRudyard Kipling, The White Man's Burden, 1899\nIt is generally acknowledged that Kipling fashioned White Man's Burden to address the subject of the American colonization of the Philippines, which the United States had recently won from Spain in the Spanish-American War. With that information in mind, what message can Kipling be said to be offering the Americans in this excerpt?",
"correct_answer": "I",
"choices": [
"A call for all Western nations to unite in the effort of colonization",
"A critique of the American government for engaging in the act of colonization",
"An appeal to respect the cultural differences and practices of colonized peoples",
"An exhortation to Christianize the heathen peoples of the world",
"A warning to avoid the war and hardships of imperialism",
"An endorsement of the use of force and military might in colonization",
"A plea to the Americans to return the colonized peoples to their homelands",
"An encouragement to Americans to seize the economic opportunities offered by the colonies",
"A reminder of the responsibility of advanced civilizations to bring the benefits of modern civilization to less-developed peoples",
"An ironic presentation of the sheer folly of imperialism"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_4651": {
"question_id": "mmlu_pro_4651",
"source_benchmark": "MMLU_Pro",
"domain": "history",
"question_text": "The native peoples of the northwest coast of North America were:",
"correct_answer": "H",
"choices": [
"simple foragers.",
"maize agriculturalists.",
"conquered by the Aztec.",
"dependent on hunting large game.",
"primarily seafaring traders.",
"primarily desert dwellers.",
"known for their pottery skills.",
"affluent foragers.",
"nomadic herders.",
"early industrialists."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_4608": {
"question_id": "mmlu_pro_4608",
"source_benchmark": "MMLU_Pro",
"domain": "history",
"question_text": "This question refers to the following information.\nThe 1980s have been born in turmoil, strife, and change. This is a time of challenge to our interests and our values and it's a time that tests our wisdom and skills.\nAt this time in Iran, 50 Americans are still held captive, innocent victims of terrorism and anarchy. Also at this moment, massive Soviet troops are attempting to subjugate the fiercely independent and deeply religious people of Afghanistan. These two acts\u2014one of international terrorism and one of military aggression\u2014present a serious challenge to the United States of America and indeed to all the nations of the world. Together we will meet these threats to peace.\u2026\nThree basic developments have helped to shape our challenges: the steady growth and increased projection of Soviet military power beyond its own borders; the overwhelming dependence of the Western democracies on oil supplies from the Middle East; and the press of social and religious and economic and political change in the many nations of the developing world, exemplified by the revolution in Iran.\nEach of these factors is important in its own right. Each interacts with the others. All must be faced together, squarely and courageously. We will face these challenges, and we will meet them with the best that is in us. And we will not fail.\n\u2014Jimmy Carter, State of the Union Address, January 23, 1980\nThe situation Carter described led most directly to which of the following?",
"correct_answer": "C",
"choices": [
"The withdrawal of Soviet troops from Afghanistan",
"Carter's victory in the next presidential election",
"Carter's defeat in the next presidential election",
"An American invasion in the Middle East",
"The establishment of a new government in Afghanistan",
"An economic boom in the United States",
"The creation of the North Atlantic Treaty Organization (NATO)",
"A diplomatic resolution with the Soviet Union",
"The establishment of the United Nations",
"The signing of a peace treaty with Iran"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_4914": {
"question_id": "mmlu_pro_4914",
"source_benchmark": "MMLU_Pro",
"domain": "history",
"question_text": "This question refers to the following information.\nThe text below is the government proclamation.\nOn the basis of the above-mentioned new arrangements, the serfs will receive in time the full rights of free rural inhabitants.\nThe nobles, while retaining their property rights to all the lands belonging to them, grant the peasants perpetual use of their household plots in return for a specified obligation[; . . . the nobles] grant them a portion of arable land fixed by the said arrangements as well as other property. . . . While enjoying these land allotments, the peasants are obliged, in return, to fulfill obligations to the noblemen fixed by the same arrangements. In this status, which is temporary, the peasants are temporarily bound. . . .\n[T]hey are granted the right to purchase their household plots, and, with the consent of the nobles, they may acquire in full ownership the arable lands and other properties which are allotted them for permanent use. Following such acquisition of full ownership of land, the peasants will be freed from their obligations to the nobles for the land thus purchased and will become free peasant landowners.\nWE have deemed it advisable:\n3. To organize Peace Offices on the estates of the nobles, leaving the village communes as they are, and to open cantonal offices in the large villages and unite small village communes.\n4. To formulate, verify, and confirm in each village commune or estate a charter which will specify, on the basis of local conditions, the amount of land allotted to the peasants for permanent use, and the scope of their obligations to the nobleman for the land.\n6. Until that time, peasants and household serfs must be obedient towards their nobles, and scrupulously fulfill their former obligations.\n7. The nobles will continue to keep order on their estates, with the right of jurisdiction and of police, until the organization of cantons and of cantonal courts.\n\u2014Alexander II, \"The Abolition of Serfdom in Russia,\" Manifesto of February 19, 1861\nWhich of the following was a major impetus in convincing Tsar Alexander II of the necessity of freeing the serfs?",
"correct_answer": "A",
"choices": [
"Recent defeat in the Crimean War convinced the tsar some domestic reforms were necessary.",
"The Tsar wanted to improve his popularity among the Russian people.",
"The spread of socialist ideas among the serfs was causing unrest.",
"The increasing population of serfs was becoming too difficult to manage.",
"A labor force to complete the Trans-Siberian Railroad was needed as well as military recruits.",
"The Tsar was motivated by a desire to modernize and industrialize Russia.",
"The Decembrist Revolt and its aftermath had convinced the young tsar to make reforms.",
"The Tsar believed that freeing the serfs would help Russia in its competition with Western powers.",
"The Tsar was influenced by the writings of liberal philosophers.",
"Enlightened rulers in Prussia and Austria had recently done the same, which pressured Alexander II to act."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_4589": {
"question_id": "mmlu_pro_4589",
"source_benchmark": "MMLU_Pro",
"domain": "history",
"question_text": "This question refers to the following information.\n\"Those whose condition is such that their function is the use of their bodies and nothing better can be expected of them, those, I say, are slaves of nature. It is better for them to be ruled thus.\"\nJuan de Sepulveda, Politics, 1522\n\"When Latin American nations gained independence in the 19th century, those two strains converged, and merged with an older, more universalist, natural law tradition. The result was a distinctively Latin American form of rights discourse. Paolo Carozza traces the roots of that discourse to a distinctive application, and extension, of Thomistic moral philosophy to the injustices of Spanish conquests in the New World. The key figure in that development seems to have been Bartolom\u00e9 de Las Casas, a 16th-century Spanish bishop who condemned slavery and championed the cause of Indians on the basis of a natural right to liberty grounded in their membership in a single common humanity. 'All the peoples of the world are humans,' Las Casas wrote, and 'all the races of humankind are one.' According to Brian Tierney, Las Casas and other Spanish Dominican philosophers laid the groundwork for a doctrine of natural rights that was independent of religious revelation 'by drawing on a juridical tradition that derived natural rights and natural law from human rationality and free will, and by appealing to Aristotelian philosophy.'\"\nMary Ann Glendon, \"The Forgotten Crucible: The Latin American Influence on the Universal Human Rights Idea,\u201d 2003\nMaize cultivation among the native peoples of Mexico is most analogous to which of the following?",
"correct_answer": "G",
"choices": [
"Buffalo hunting among the Lakota Sioux",
"Whaling by the Makah",
"Seal hunting among the Inuit",
"Salmon fishing among the Chinook",
"Cattle herding by the Apache",
"Wolf domestication by the Algonquians",
"Mixed agriculture among the Iroquois",
"Deer hunting by the Seminole"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_4883": {
"question_id": "mmlu_pro_4883",
"source_benchmark": "MMLU_Pro",
"domain": "history",
"question_text": "This question refers to the following information.\n\"But you, my dear Pangloss,\" said Candide, \"how can it be that I behold you again?\"\n\"It is true,\" said Pangloss, \"that you saw me hanged&\u2026.A surgeon purchased my body, carried home, and dissected me. He began with making a crucial incision on me from the navel to the clavicula. One could not have been worse hanged than I was. The executioner of the Holy Inquisition was a sub-deacon, and knew how to burn people marvellously well, but he was not accustomed to hanging. The cord was wet and did not slip properly, and besides it was badly tied; in short, I still drew my breath, when the crucial incision made me give such a frightful scream that my surgeon fell flat upon his back&\u2026[At length he] sewed up my wounds; his wife even nursed me. I was upon my legs at the end of fifteen days&\u2026.\nOne day I took it into my head to step into a mosque, where I saw an old Iman and a very pretty young devotee who was saying her paternosters&\u2026.She dropped her bouquet; I picked it up, and presented it to her with a profound reverence. I was so long in delivering it that the Iman began to get angry, and seeing that I was a Christian he called out for help. They carried me before the cadi, who ordered me a hundred lashes on the soles of the feet and sent me to the galleys. I was chained to the very same galley and the same bench as the young Baron. On board this galley there were four young men from Marseilles, five Neapolitan priests, and two monks from Corfu, who told us similar adventures happened daily. The Baron maintained that he had suffered greater injustice than I&\u2026.We were continually disputing, and received twenty lashes with a bull's pizzle when the concatenation of universal events brought you to our galley, and you were good enough to ransom us.\"\n\"Well, my dear Pangloss,\" said Candide to him, \"when you had been hanged, dissected, whipped, and were tugging at the oar, did you always think that everything happens for the best?\"\n\"I am still of my first opinion,\" answered Pangloss, \"for I am a philosopher and I cannot retract, especially as Leibnitz could never be wrong; and besides, the pre-established harmony is the finest thing in the world, and so is his plenum and materia subtilis.\"\nVoltaire, French Enlightenment writer, Candide, 1759\nThe critiques offered by Voltaire through Candide are most closely shared by what other philosopher?",
"correct_answer": "A",
"choices": [
"David Hume",
"Cesare Baccaria",
"Karl Marx",
"Adam Smith",
"Jean-Jacques Rousseau",
"Friedrich Nietzsche",
"John Locke",
"S\u00f8ren Kierkegaard",
"Immanuel Kant",
"Thomas Hobbes"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_4816": {
"question_id": "mmlu_pro_4816",
"source_benchmark": "MMLU_Pro",
"domain": "history",
"question_text": "This question refers to the following information.\n\"The spontaneous forces of capitalism have been steadily growing in the countryside in recent years, with new rich peasants springing up everywhere and many well-to-do middle peasants striving to become rich peasants. On the other hand, many poor peasants are still living in poverty for lack of sufficient means of production, with some in debt and others selling or renting out their land. If this tendency goes unchecked, the polarization in the countryside will inevitably be aggravated day by day. Those peasants who lose their land and those who remain in poverty will complain that we are doing nothing to save them from ruin or to help them overcome their difficulties. Nor will the well-to-do middle peasants who are heading in the capitalist direction be pleased with us, for we shall never be able to satisfy their demands unless we intend to take the capitalist road. Can the worker-peasant alliance continue to stand in these circumstances? Obviously not! There is no solution to this problem except on a new basis. And that means to bring about, step by step, the socialist transformation of the whole of agriculture simultaneously with the gradual realization of socialist industrialization and the socialist transformation of handicrafts and capitalist industry and commerce; in other words, it means to carry out co-operation and eliminate the rich-peasant economy and the individual economy in the countryside so that all the rural people will become increasingly well off together. We maintain that this is the only way to consolidate the worker-peasant alliance.\"\nMao Zedong, On the Question of Agricultural Co-operation, 1955\nMao's view of the cooperation of peasant labor most directly reflects the influence of which of the following?",
"correct_answer": "A",
"choices": [
"The ideals of communism as stated by Joseph Stalin",
"The ideals of democratic socialism as stated by Salvador Allende",
"The ideals of social democracy as stated by Bernie Sanders",
"The ideals of globalization as evidenced by multinational trade blocs such as NAFTA or the European Union",
"The ideals of religious fundamentalism as stated by the Ayatollah Khomeini",
"The ideals of capitalist theory as stated by Milton Friedman",
"The ideals of classical liberalism as stated by Adam Smith",
"The ideals of nationalism as evidenced by the rise of populist movements in the 21st century",
"The ideals of the Enlightenment as stated by political revolutionaries such as Maximilian Robespierre",
"The ideals of fascism as stated by Benito Mussolini"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_4581": {
"question_id": "mmlu_pro_4581",
"source_benchmark": "MMLU_Pro",
"domain": "history",
"question_text": "This question refers to the following information.\n\"The far-reaching, the boundless future will be the era of American greatness. In its magnificent domain of space and time, the nation of many nations is destined to manifest to mankind the excellence of divine principles; to establish on earth the noblest temple ever dedicated to the worship of the Most High\u2014the Sacred and the True. Its floor shall be a hemisphere\u2014its roof the firmament of the star-studded heavens, and its congregation a Union of many Republics, comprising hundreds of happy millions, calling, owning no man master, but governed by God's natural and moral law of equality, the law of brotherhood\u2014of 'peace and good will amongst men.'\"\nJohn L. O'Sullivan, \"The Great Nation of Futurity,\" 1839\nBy what means did the United States take possession of the Oregon Territory?",
"correct_answer": "H",
"choices": [
"The United States annexed it after a revolt by American settlers living in the territory.",
"The United States bought it from the Native Americans who lived there.",
"The territory was divided between the United States and Canada after a vote by residents.",
"U.S. settlers were the first to arrive in the region; they claimed it for their country.",
"The United States claimed it after discovering it during an exploration.",
"The United States won it from Mexico in a war.",
"The United States inherited it from Spain as part of the Louisiana Purchase.",
"Great Britain ceded it to the United States as part of a negotiated treaty.",
"The United States was granted the territory in a postwar treaty with France.",
"The territory was gifted to the United States by Russia."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_4839": {
"question_id": "mmlu_pro_4839",
"source_benchmark": "MMLU_Pro",
"domain": "history",
"question_text": "This question refers to the following information.\nPerhaps, however, I am more conscious of the importance of civil liberties in this particular moment of our history than anyone else, because I travel through the country and meet people and see things that have happened to little people, I realize what it means to democracy to preserve our civil liberties.\nAll through the years we have had to fight for civil liberty, and we know that there are times when the light grows rather dim, and every time that happens democracy is in danger. Now, largely because of the troubled state of the world as a whole, civil liberties have disappeared in many other countries.\nIt is impossible, of course, to be at war and keep freedom of the press and freedom of speech and freedom of assembly. They disappear automatically. And so in many countries where ordinarily they were safe, today they have gone. In other countries, even before war came, not only freedom of the press and freedom of assembly, and freedom of speech disappeared, but freedom of religion disappeared.\nAnd so we know here in this country, we have a grave responsibility. We are at peace. We have no reason for the fears which govern so many other peoples throughout the world; therefore, we have to guard the freedoms of democracy.\n\u2014Eleanor Roosevelt, Address to the American Civil Liberties Union, Chicago, Illinois, March 14, 1940\nIn her speech, Eleanor Roosevelt alluded to the earlier threat to civil liberties created by which of the following?",
"correct_answer": "B",
"choices": [
"The Great Depression",
"World War I",
"The War of 1812",
"The Cold War",
"The American Revolutionary War",
"The New Deal",
"The Korean War",
"The Civil Rights Movement",
"The Vietnam War",
"The Spanish-American War"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_4753": {
"question_id": "mmlu_pro_4753",
"source_benchmark": "MMLU_Pro",
"domain": "history",
"question_text": "This question refers to the following information.\nRead the following governmental regulation.\nPress Law\nSo long as this decree shall remain in force no publication which appears in the form of daily issues, or as a serial not exceeding twenty sheets of printed matter, shall go to press in any state of the union without the previous knowledge and approval of the state officials. Writings which do not belong to one of the above-mentioned classes shall be treated according to the laws now in force, or which may be enacted, in the individual states of the union. . . . Each state of the union is responsible, not only to the state against which the offense is directly committed, but to the whole Confederation, for every publication appearing under its supervision in which the honor or security of other states is infringed or their constitution or administration attacked. . . .\n\u2014Carlsbad Resolutions adopted by the Germanic States, 1819\nWhich of the following nineteenth-century figures would have been the LEAST likely to oppose those liberals described in the above decrees?",
"correct_answer": "H",
"choices": [
"John Stuart Mill",
"Napoleon Bonaparte",
"Simon Bolivar",
"Queen Victoria",
"William Frederick I",
"Pope Pius IX",
"Otto von Bismarck",
"Giuseppe Mazzini",
"Karl Marx",
"Klemens Von Metternich"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_4553": {
"question_id": "mmlu_pro_4553",
"source_benchmark": "MMLU_Pro",
"domain": "history",
"question_text": "This question refers to the following information.\n\"Those whose condition is such that their function is the use of their bodies and nothing better can be expected of them, those, I say, are slaves of nature. It is better for them to be ruled thus.\"\nJuan de Sepulveda, Politics, 1522\n\"When Latin American nations gained independence in the 19th century, those two strains converged, and merged with an older, more universalist, natural law tradition. The result was a distinctively Latin American form of rights discourse. Paolo Carozza traces the roots of that discourse to a distinctive application, and extension, of Thomistic moral philosophy to the injustices of Spanish conquests in the New World. The key figure in that development seems to have been Bartolom\u00e9 de Las Casas, a 16th-century Spanish bishop who condemned slavery and championed the cause of Indians on the basis of a natural right to liberty grounded in their membership in a single common humanity. 'All the peoples of the world are humans,' Las Casas wrote, and 'all the races of humankind are one.' According to Brian Tierney, Las Casas and other Spanish Dominican philosophers laid the groundwork for a doctrine of natural rights that was independent of religious revelation 'by drawing on a juridical tradition that derived natural rights and natural law from human rationality and free will, and by appealing to Aristotelian philosophy.'\"\nMary Ann Glendon, \"The Forgotten Crucible: The Latin American Influence on the Universal Human Rights Idea,\u201d 2003\nWhich of the following presidents was most involved in Latin American politics in the twentieth century?",
"correct_answer": "A",
"choices": [
"Theodore Roosevelt",
"Franklin D. Roosevelt",
"Harry S. Truman",
"Chester Arthur",
"Dwight Eisenhower",
"Woodrow Wilson",
"James K. Polk",
"James Monroe",
"John F. Kennedy",
"Richard Nixon"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_4552": {
"question_id": "mmlu_pro_4552",
"source_benchmark": "MMLU_Pro",
"domain": "history",
"question_text": "This question refers to the following information.\nNow, we have organized a society, and we call it \"Share Our Wealth Society,\" a society with the motto \"Every Man a King.\"\u2026\nWe propose to limit the wealth of big men in the country. There is an average of $15,000 in wealth to every family in America. That is right here today.\nWe do not propose to divide it up equally. We do not propose a division of wealth, but we do propose to limit poverty that we will allow to be inflicted on any man's family. We will not say we are going to try to guarantee any equality \u2026 but we do say that one third of the average is low enough for any one family to hold, that there should be a guarantee of a family wealth of around $5,000; enough for a home, an automobile, a radio, and the ordinary conveniences, and the opportunity to educate their children.\u2026\nWe will have to limit fortunes. Our present plan is that we will allow no man to own more than $50,000,000. We think that with that limit we will be able to carry out the balance of the program.\n\u2014Senator Huey P. Long of Louisiana, Radio Address, February 23, 1934\nSenator Long's \"Share the Wealth Society\" attracted many followers in 1934 because",
"correct_answer": "F",
"choices": [
"The society proposed a revolutionary idea of limiting individual fortunes to $50,000,000.",
"There was a growing dissatisfaction with the capitalist system.",
"People were attracted by the idea of every man being a king.",
"The society promised to limit the wealth of the rich and provide a minimum standard of living for everyone.",
"The rise of technology created a demand for wealth distribution.",
"the New Deal had not ended the Great Depression.",
"the Second World War encouraged an egalitarian ethos.",
"There was a surge in immigrant population looking for equal wealth opportunities.",
"a flourishing economy and a baby boom had led people to desire greater incomes.",
"Socialistic ideas were becoming popular in the United States."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_4584": {
"question_id": "mmlu_pro_4584",
"source_benchmark": "MMLU_Pro",
"domain": "history",
"question_text": "This question refers to the following information.\n\"We found that not only was it a civil war, an effort by a people who had for years been seeking their liberation from any colonial influence whatsoever, but also we found that the Vietnamese whom we had enthusiastically molded after our own image were hard put to take up the fight against the threat we were supposedly saving them from.\n\"We found most people didn't even know the difference between communism and democracy. They only wanted to work in rice paddies without helicopters strafing them and bombs with napalm burning their villages and tearing their country apart. They wanted everything to do with the war, particularly with this foreign presence of the United States of America, to leave them alone in peace, and they practiced the art of survival by siding with whichever military force was present at a particular time, be it Viet Cong, North Vietnamese or American.\"\nJohn Kerry, 1971\nThe conflict described above is most likely a result of which of the following doctrines?",
"correct_answer": "B",
"choices": [
"Wilsonianism",
"Containment",
"Big-stick diplomacy",
"Monroe Doctrine",
"Isolationism",
"Domino Theory",
"Marshall Plan",
"Truman Doctrine",
"Manifest Destiny",
"Imperialism"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_4648": {
"question_id": "mmlu_pro_4648",
"source_benchmark": "MMLU_Pro",
"domain": "history",
"question_text": "What was concluded in a recent study comparing the genomes of modern people with ancient skeletal remains in North and South America?",
"correct_answer": "C",
"choices": [
"All Native Americans, both ancient and modern, are derived from a single migration of Austral-Melanesians into the Americas by about 13,000 years ago.",
"Native Americans in North and South America have no genetic connection to their ancestors in the far north.",
"The separation between native Siberians and the earliest Native Americans occurred sometime between 20,000 and 23,000 years ago.",
"The separation between native Siberians and the earliest Native Americans occurred sometime between 30,000 and 33,000 years ago.",
"All Native Americans, both ancient and modern, are derived from a single migration of the Inuit, who arrived in the Americas by about 13,000 years ago.",
"Native Americans in South America diverged genetically from their ancestors in North America around 30,000 years ago.",
"The separation between native Siberians and the earliest Native Americans occurred around 10,000 years ago.",
"All Native Americans, both ancient and modern, are derived from multiple migrations of different Asian ethnic groups into the Americas.",
"Native Americans in North America and South America genetically diverged from each other and their ancestors in the far north by about 30,000 years ago.",
"The genomes of modern Native Americans show no significant differences from those of ancient skeletal remains."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_4656": {
"question_id": "mmlu_pro_4656",
"source_benchmark": "MMLU_Pro",
"domain": "history",
"question_text": "This question refers to the following information.\n\"The challenge of the next half century is whether we have the wisdom to use wealth to enrich and elevate our national life, and to advance the quality of our American civilization\u2026.The Great Society rests on abundance and liberty for all. It demands an end to poverty and racial injustice, to which we are totally committed in our time. But that is just the beginning. The Great Society is a place where every child can find knowledge to enrich his mind and to enlarge his talents. It is a place where leisure is a welcome chance to build and reflect, not a feared cause of boredom and restlessness. It is a place where the city of man serves not only the needs of the body and the demands of commerce but the desire for beauty and the hunger for community. It is a place where man can renew contact with nature. It is a place which honors creation for its own sake and for what it adds to the understanding of the race. It is a place where men are more concerned with the quality of their goals than the quantity of their goods. But most of all, the Great Society is not a safe harbor, a resting place, a final objective, a finished work. It is a challenge constantly renewed, beckoning us toward a destiny where the meaning of our lives matches the marvelous products of our labor.\"\nLyndon Johnson, Remarks at the University of Michigan, Ann Arbor, 1964\nAlong with his goals of establishing a Great Society, Johnson was also engaged in which of the following initiatives?",
"correct_answer": "J",
"choices": [
"Undermining Communism in Korea with the Korean War",
"Undermining Communism in Russia during the Cuban Missile Crisis",
"Undermining Communism in China with the Cultural Revolution",
"Undermining Communism in Afghanistan with the Soviet-Afghan War",
"Undermining Communism in East Germany with the Berlin Airlift",
"Undermining Communism in the Eastern Block by demanding a removal of the Berlin Wall",
"Undermining Communism in Cuba with the Bay of Pigs Invasion",
"Undermining Communism in Laos with the Laotian Civil War",
"Undermining Communism in Turkey and Greece using economic aid",
"Undermining Communism in Vietnam after the Tet Offensive"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_4795": {
"question_id": "mmlu_pro_4795",
"source_benchmark": "MMLU_Pro",
"domain": "history",
"question_text": "The commoners of the rural Aztec villages of Capilco and Cuexcomate:",
"correct_answer": "C",
"choices": [
"had to pay a high tax in goods and labor to the local governor.",
"lived under constant threat of being sold into slavery.",
"were mostly left alone and did fairly well for themselves.",
"were under the direct control of the high priest, who demanded regular sacrifices.",
"were heavily policed by the king's guards to prevent any form of uprising.",
"were ruled by an iron fist by the king, who had armies stationed there.",
"were forced to send half of all maize they grew to the king as tribute and lived in absolute poverty.",
"were required to send all their children to be educated in the city.",
"had no protection from city states that invaded from the north.",
"were forced to work on large-scale irrigation projects for the king."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_4845": {
"question_id": "mmlu_pro_4845",
"source_benchmark": "MMLU_Pro",
"domain": "history",
"question_text": "This question refers to the following information.\nThose who came before us made certain that this country rode the first waves of the industrial revolutions, the first waves of modern invention, and the first wave of nuclear power, and this generation does not intend to flounder in the backwash of the coming age of space. We mean to be a part of it\u2014we mean to lead it. For the eyes of the world now look into space, to the moon, and the planets beyond, and we have vowed that we shall not see it governed by a hostile flag of conquest, but by a banner of freedom and peace. We have vowed that we shall not see space filled with weapons of mass destruction, but with instruments of knowledge and understanding.\u2026 We choose to go to the moon. We choose to go to the moon in this decade and do the other things, not because they are easy, but because they are hard, because that goal will serve to organize and measure the best of our energies and skills, because that challenge is one that we are willing to accept, one we are unwilling to postpone, and one which we intend to win, and the others, too.\n\u2014John F. Kennedy, September 12, 1962\nKennedy's statement best reflects which of the following?",
"correct_answer": "D",
"choices": [
"A conservative fear of big government",
"A critique of previous government policies",
"A condemnation of the arms race",
"American confidence in a time of prosperity",
"An expression of fear about the future of space exploration",
"A call for international cooperation in space exploration",
"A liberal concern for social justice",
"A reflection of Kennedy's personal ambitions",
"American unease in a time of troubles",
"A call for increased military spending"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_4550": {
"question_id": "mmlu_pro_4550",
"source_benchmark": "MMLU_Pro",
"domain": "history",
"question_text": "This question refers to the following information.\nHere is the case of a woman employed in the manufacturing department of a Broadway house. It stands for a hundred like her own. She averages three dollars a week. Pay is $1.50 for her room; for breakfast she has a cup of coffee; lunch she cannot afford. One meal a day is her allowance. This woman is young, she is pretty. She has \"the world before her.\" Is it anything less than a miracle if she is guilty of nothing less than the \"early and improvident marriage,\" against which moralists exclaim as one of the prolific causes of the distresses of the poor? Almost any door might seem to offer a welcome escape from such slavery as this. \"I feel so much healthier since I got three square meals a day,\" said a lodger in one of the Girls' Homes. Two young sewing-girls came in seeking domestic service, so that they might get enough to eat. They had been only half-fed for some time, and starvation had driven them to the one door at which the pride of the American-born girl will not permit her to knock, though poverty be the price of her independence.\n\u2014Jacob Riis, How the Other Half Lives, 1890\nThe situation faced by the young women in the passage above is most directly comparable to which of the following?",
"correct_answer": "B",
"choices": [
"Jewish immigrants in New York in the 1880s",
"Detroit autoworkers in the 1930s",
"Populist farmers in the 1890s",
"Factory workers in the Industrial Revolution",
"British soldiers in the 1800s",
"Coal miners in the 1900s",
"Civil Rights activists in the 1960s",
"Women suffragettes in the early 20th century",
"American revolutionaries in the 1770s",
"Slaves in the antebellum South"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_4824": {
"question_id": "mmlu_pro_4824",
"source_benchmark": "MMLU_Pro",
"domain": "history",
"question_text": "Interestingly, none of the civilizations of ancient South America appear to have developed:",
"correct_answer": "G",
"choices": [
"advanced metallurgy.",
"monumental works.",
"a system of money.",
"maritime navigation technology.",
"complex religious systems.",
"irrigation technology.",
"a written language."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_4638": {
"question_id": "mmlu_pro_4638",
"source_benchmark": "MMLU_Pro",
"domain": "history",
"question_text": "This question refers to the following information.\nYi Yin sacrificed to the former king, and presented the heir-king reverently before the shrine of his grandfather. . . . Yi Yin then clearly described the virtue of the Meritorious Ancestor for the instruction of the young king.\n\"Oh! of old the former kings of Xia cultivated earnestly their virtue, and then there were no calamities from Heaven. The spirits of the hills and rivers were all in tranquility; and the birds and beasts enjoyed their existence according to their nature. But their descendant did not follow their example, and Heaven sent down calamities, using our ruler, who possessed of its favor. The attack on Xia may be traced to the orgies in Ming Tiao. . . . Our king of Shang brilliantly displayed his sagely prowess; for oppression he substituted his generous gentleness. Now your Majesty is entering on the inheritance of his virtue; all depends on how you commence your reign.\n\"Oh! the former king began with careful attention to the bonds that hold men together. . . . Revere these warnings in your person. . . . The ways of Heaven are not invariable: on the good-doer it sends down all blessings, and on the evil-doer it sends down all miseries. If you not be virtuous, be it in large things or in small, it will bring the ruin of your ancestral temple.\"\n\u2014Excerpted and adapted from the Shu Jing, 6th century BCE, China\nWhich phrase can be seen as a statement of the roots of Daoist beliefs?",
"correct_answer": "C",
"choices": [
"Our king of Shang brilliantly displayed his sagely prowess.",
"The former king began with careful attention to the bonds that hold men together.",
"The birds and beasts . . . enjoyed their existence according to their nature.",
"The ways of Heaven are not invariable: on the good-doer it sends down all blessings."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_4903": {
"question_id": "mmlu_pro_4903",
"source_benchmark": "MMLU_Pro",
"domain": "history",
"question_text": "The pharaoh ruled a population of ________, of which _________ were farmers.",
"correct_answer": "B",
"choices": [
"5,000,000; 100%",
"3,000,000; 75%",
"4,000,000; 80%",
"2,500,000; 90%",
"1,500,000; 70%",
"100,000; 25%",
"500,000; 50%",
"750,000; 30%",
"1,000,000; 50%",
"2,000,000; 60%"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_4869": {
"question_id": "mmlu_pro_4869",
"source_benchmark": "MMLU_Pro",
"domain": "history",
"question_text": "Between 1.4 million and 400,000 years ago, hand axe technology:",
"correct_answer": "E",
"choices": [
"steadily worsened.",
"was invented during this period.",
"evolved into a more sophisticated technology.",
"remained static without any changes.",
"changed only slightly.",
"underwent a rapid evolution.",
"was replaced by other forms of technology.",
"was mostly used for hunting purposes.",
"disappeared completely.",
"steadily improved."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_4896": {
"question_id": "mmlu_pro_4896",
"source_benchmark": "MMLU_Pro",
"domain": "history",
"question_text": "Which culture lived a primarily sedentary lifestyle in the Southwest?",
"correct_answer": "A",
"choices": [
"Mogollon",
"Navajo",
"Clovis",
"Paleo-Indians",
"Hopewell",
"Inca",
"Adena",
"Kwakiutl",
"Mississippian",
"Apache"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_4751": {
"question_id": "mmlu_pro_4751",
"source_benchmark": "MMLU_Pro",
"domain": "history",
"question_text": "This question refers to the following information.\nRead the excerpts below.\nThis corruption is repeatedly designated by Paul by the term sin . . . such as adultery, fornication, theft, hatred, murder, revellings, he terms, in the same way, the fruits of sin, though in various passages of Scripture . . . we are, merely on account of such corruption, deservedly condemned by God, to whom nothing is acceptable but righteousness, innocence, and purity.\n\u2014John Calvin, from The Institutes of Christian Religion, Book 2: Chapter 1, 1545\nThe covenant of life is not preached equally to all, and among those to whom it is preached, does not always meet with the same reception. This diversity displays the unsearchable depth of the divine judgment, and is without doubt subordinate to God's purpose of eternal election. But if it is plainly owing to the mere pleasure of God that salvation is spontaneously offered to some, while others have no access to it, great and difficult questions immediately arise, questions which are inexplicable, when just views are not entertained concerning election and predestination[,] . . . the grace of God being illustrated by the contrast, viz., that he does not adopt all promiscuously to the hope of salvation, but gives to some what he denies to others.\n\u2014John Calvin, from The Institutes of Christian Religion, Book 3: Chapter 21, 1545\nWhich of the following justifications used by Protestant reformers such as Calvin is alluded to above?",
"correct_answer": "E",
"choices": [
"The belief that everyone has direct access to God, without the need for priests or church hierarchy.",
"The belief in the necessity of separation of church and state.",
"They believed in religious tolerance and the acceptance of different faiths.",
"Religion was used to challenge the authority of earthly monarchs.",
"The corruption of the Roman Catholic Church and its leaders meant that reform was needed.",
"They believed that their church should not be subordinate to the state.",
"The notion that salvation is predetermined and not all individuals have access to it.",
"The idea that churches should be self-governed and independent.",
"The idea that religious teachings should be made available in the vernacular rather than in Latin.",
"The concept that salvation comes from faith alone rather than through good works is supported."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_4649": {
"question_id": "mmlu_pro_4649",
"source_benchmark": "MMLU_Pro",
"domain": "history",
"question_text": "In periods of great difficulty and stress, it appears that the Minoans __________ in order to appease the gods.",
"correct_answer": "D",
"choices": [
"stopped all forms of art and music",
"fasted for long periods",
"built monumental structures",
"sacrificed adults and occasionally children",
"burnt their own houses to the ground",
"abstained from sexual relations",
"enacted strict laws and punishments",
"buried their possessions",
"performed complex dance rituals",
"migrated to different regions"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_4766": {
"question_id": "mmlu_pro_4766",
"source_benchmark": "MMLU_Pro",
"domain": "history",
"question_text": "Which of the following sites has monumental earthworks with concentric ridges in a semicircle that date from around 3,500 years ago?",
"correct_answer": "H",
"choices": [
"Caral",
"Chichen Itza",
"The Parthenon",
"Machu Picchu",
"Angkor Wat",
"The Great Pyramids of Giza",
"The Colosseum",
"Poverty Point",
"Gobekli Tepe",
"Stonehenge"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_4838": {
"question_id": "mmlu_pro_4838",
"source_benchmark": "MMLU_Pro",
"domain": "history",
"question_text": "This question refers to the following information.\n\"We found that not only was it a civil war, an effort by a people who had for years been seeking their liberation from any colonial influence whatsoever, but also we found that the Vietnamese whom we had enthusiastically molded after our own image were hard put to take up the fight against the threat we were supposedly saving them from.\n\"We found most people didn't even know the difference between communism and democracy. They only wanted to work in rice paddies without helicopters strafing them and bombs with napalm burning their villages and tearing their country apart. They wanted everything to do with the war, particularly with this foreign presence of the United States of America, to leave them alone in peace, and they practiced the art of survival by siding with whichever military force was present at a particular time, be it Viet Cong, North Vietnamese or American.\"\nJohn Kerry, 1971\nThe two political issues that most concerned the Counterculture Movement of the 1960s were",
"correct_answer": "H",
"choices": [
"the civil rights movement and environmentalism",
"the women's rights movement and censorship",
"the civil rights movement and censorship",
"flag burning and the draft",
"U.S. involvement in Vietnam and flag burning",
"U.S. involvement in Vietnam and the women's rights movement",
"U.S. involvement in Vietnam and environmentalism",
"U.S. involvement in Vietnam and the civil rights movement",
"the draft and the environmental movement",
"censorship and the draft"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_4679": {
"question_id": "mmlu_pro_4679",
"source_benchmark": "MMLU_Pro",
"domain": "history",
"question_text": "The Gault site in south-central Texas has produced evidence from 13,000 years ago that Paleoindians chose this location based on the:",
"correct_answer": "E",
"choices": [
"nearness of the site to navigable rivers and the Gulf coast.",
"proximity to other Paleoindian settlements.",
"presence of a large lake for fishing and transportation.",
"nearby location of migratory routes of large mammals such as bison.",
"proximity to a local source of chert for quarrying and making stone tools.",
"abundance of wild game, plants, and fish in a rich and diverse woodland environment.",
"availability of clay for pottery making."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_4540": {
"question_id": "mmlu_pro_4540",
"source_benchmark": "MMLU_Pro",
"domain": "history",
"question_text": "What did the Moche build at the heart of their urban center?",
"correct_answer": "E",
"choices": [
"the Temple of the Feathered Serpent",
"an enormous walled-in precinct of elite residences",
"the Palace of the Painted Walls",
"a complex irrigation system",
"the Pyramid of the Sun",
"the Tower of Moche",
"an extensive library of their written records",
"a central marketplace for trade",
"a vast complex of temples and artisan workshops",
"an amphitheater for gladiatorial combats"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_4618": {
"question_id": "mmlu_pro_4618",
"source_benchmark": "MMLU_Pro",
"domain": "history",
"question_text": "This question refers to the following information.\nThat day the Reverend Xuanzang was chosen from among all the monks. He had been a monk from infancy, and ever since birth he had eaten vegetarian food and observed the prohibitions. His maternal grandfather was an imperial commander, Yin Kaishan. His father Chen Guangrui had come top in the Palace Examination and had been appointed a grand secretary in the Imperial Library. Xuanzang, however, had no interest in honour and glory, and his only joy was to cultivate Nirvana. His virtue was great; of the thousand sutras and ten thousand holy books there was not a single one that he did not know.\n\u2026\nHe looked to the West and prayed, \"I am the believer Chen Xuanzang sent on imperial orders to fetch the scriptures. If I am fated to have a disciple, may I be able to unseal the golden words and release the divine Monkey King to come with me to the Vulture Peak. If I am not fated to have a disciple, and this monkey is an evil monster who has deceived me and will do me no good, then may I be unable to remove the seal.\" When he had prayed he bowed again.\nFrom Wu Chengen, Journey to the West, ca. 1590s\nIn which of the following ways does the excerpt above most complicate historians' understanding of the career of the Chinese monk and traveler Xuanzang?",
"correct_answer": "A",
"choices": [
"It highly fictionalizes the story of Xuanzang's accomplishments.",
"It was written long after the time period in which Xuanzang lived.",
"It casts doubt on the actual existence of Xuanzang.",
"It is widely considered by scholars to be a forgery."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_5572": {
"question_id": "mmlu_pro_5572",
"source_benchmark": "MMLU_Pro",
"domain": "other",
"question_text": "Internet traffic is expected to grow by 400% in the next few years. What is predicted to be the main driver of this?",
"correct_answer": "A",
"choices": [
"Video",
"Gaming",
"Email communication",
"Virtual Reality applications",
"Online auctions",
"Social media usage",
"Online consumer goods purchases",
"E-book downloads",
"Online education platforms",
"Music"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_5032": {
"question_id": "mmlu_pro_5032",
"source_benchmark": "MMLU_Pro",
"domain": "other",
"question_text": "What are the tallest trees on Earth?",
"correct_answer": "B",
"choices": [
"Noble Fir",
"Coast Redwood",
"Sitka Spruce",
"Coast Douglas Fir",
"Giant Sequoia",
"Yellow Meranti",
"Bald Cypress",
"Mountain Ash",
"Ponderosa Pine",
"Western Red Cedar"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_4943": {
"question_id": "mmlu_pro_4943",
"source_benchmark": "MMLU_Pro",
"domain": "other",
"question_text": "About what percentage of survey respondents from India report having paid a bribe in the last year to access public services (such as education; judiciary; medical and health; police; registry and permit services; utilities; tax revenue and customs; and land service) as of 2017?",
"correct_answer": "A",
"choices": [
"69%",
"9%",
"49%",
"59%",
"89%",
"29%",
"39%",
"79%",
"99%",
"19%"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_5677": {
"question_id": "mmlu_pro_5677",
"source_benchmark": "MMLU_Pro",
"domain": "other",
"question_text": "Which of the following words cannot be decoded through knowledge of letter-sound relationships?",
"correct_answer": "F",
"choices": [
"Lamp",
"Flight",
"Light",
"Things",
"Fold",
"Said",
"Drain"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_5199": {
"question_id": "mmlu_pro_5199",
"source_benchmark": "MMLU_Pro",
"domain": "other",
"question_text": "Which of the following dental topics is developmentally appropriate for toddlers?",
"correct_answer": "B",
"choices": [
"Growth of permanent teeth",
"Correct toothbrushing technique",
"The science behind root canal treatment",
"Understanding the need for dentures",
"Teeth whitening procedures",
"Correct use of dental floss",
"Introduction to dental implants",
"The process of tooth extraction",
"Correct use of incisors while eating",
"Evolution of the human tooth"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_5168": {
"question_id": "mmlu_pro_5168",
"source_benchmark": "MMLU_Pro",
"domain": "other",
"question_text": "Different less developed countries have different levels of secondary school enrollment. Some countries with high secondary-school enrollment are, as of 2020,",
"correct_answer": "I",
"choices": [
"Peru, Uruguay, Cambodia, and Niger.",
"Pakistan, Niger, Peru, and Bhutan.",
"Bhutan, Cambodia, Uruguay and Costa Rica.",
"Costa Rica, Colombia, Niger, and Timor-Leste.",
"Bhutan, Niger, Cambodia, and Costa Rica.",
"Timor-Leste, Pakistan, Peru, and Bhutan.",
"Niger, Bhutan, Timor-Leste, and Colombia.",
"Cambodia, Timor-Leste, Uruguay, and Pakistan.",
"Colombia, Peru, Uruguay, and Costa Rica.",
"Uruguay, Costa Rica, Timor-Leste, and Pakistan."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_5146": {
"question_id": "mmlu_pro_5146",
"source_benchmark": "MMLU_Pro",
"domain": "other",
"question_text": "The swift adoption of the Bill of Rights in the years following ratification of the Constitution demonstrates the",
"correct_answer": "D",
"choices": [
"Framers' unanimous agreement on all constitutional issues",
"Framers' lack of commitment to individual rights",
"states' indifference towards the national government",
"states' fears of an overpowerful national government",
"Framers' unqualified commitment to individual rights",
"Northern states' support for abolitionism",
"Northern and Southern states' compromise over representation",
"small states' determination to receive equal representation in the legislature",
"Southern states' support for slavery",
"large states' push for more representation in the legislature"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_5060": {
"question_id": "mmlu_pro_5060",
"source_benchmark": "MMLU_Pro",
"domain": "other",
"question_text": "Which communication model describes the encoding and decoding of messages?",
"correct_answer": "D",
"choices": [
"Uses and Gratifications model",
"Westley and MacLean model",
"Two-step flow model",
"Osgood and Schramm model",
"Gatekeeping model",
"Cultivation Theory model",
"Lasswell model",
"Shannon and Weaver model",
"Maletzke model",
"Hypodermic Needle model"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_5672": {
"question_id": "mmlu_pro_5672",
"source_benchmark": "MMLU_Pro",
"domain": "other",
"question_text": "What kind of car did Burt Reynolds drive in the movie 'Smokey and the Bandit'?",
"correct_answer": "B",
"choices": [
"Lamborghini",
"Transam",
"Rolls Royce",
"Ferrari",
"Mustang",
"Dodge Charger",
"Camaro",
"Porsche",
"Aston Martin",
"Corvette"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_5022": {
"question_id": "mmlu_pro_5022",
"source_benchmark": "MMLU_Pro",
"domain": "other",
"question_text": "During which of the following processes within the hydrologic cycle do water molecules absorb energy?",
"correct_answer": "I",
"choices": [
"Water seeping into groundwater reservoirs",
"Absorption of rainwater by the soil",
"Formation of a cloud from water vapor",
"Formation of dew from water vapor",
"Water flow in rivers and streams",
"Formation of snow from water vapor",
"Formation of ice from water",
"Runoff along the land surface",
"Evaporation from the ocean surface",
"Formation of fog from water vapor"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_5610": {
"question_id": "mmlu_pro_5610",
"source_benchmark": "MMLU_Pro",
"domain": "other",
"question_text": "Which of the following stages is the first step in the writing cycle?",
"correct_answer": "F",
"choices": [
"Outlining",
"Drafting",
"Editing",
"Researching",
"Formatting",
"Brainstorming",
"Reviewing",
"Publishing",
"Revising",
"Proofreading"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_5676": {
"question_id": "mmlu_pro_5676",
"source_benchmark": "MMLU_Pro",
"domain": "other",
"question_text": "How many water molecules are in a human head?",
"correct_answer": "A",
"choices": [
"8*10^25",
"8*10^23",
"8*10^24",
"8*10^21",
"8*10^30",
"8*10^22",
"8*10^27",
"8*10^26",
"8*10^28",
"8*10^29"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_5831": {
"question_id": "mmlu_pro_5831",
"source_benchmark": "MMLU_Pro",
"domain": "other",
"question_text": "For an individual business owner, which of the following would typically be classified as a capital asset for federal income tax purposes?",
"correct_answer": "A",
"choices": [
"Marketable securities",
"Outstanding loans",
"Machinery and equipment used in a business",
"Real estate property used for the business",
"The business' goodwill",
"Cash on hand",
"Accounts receivable",
"Office furniture and supplies",
"Inventory",
"Intellectual property such as patents"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_5476": {
"question_id": "mmlu_pro_5476",
"source_benchmark": "MMLU_Pro",
"domain": "other",
"question_text": "Which of the following types of stress dominates at divergent boundaries?",
"correct_answer": "E",
"choices": [
"Thermal",
"Horizontal",
"Right-lateral shear",
"Radial",
"Tensional",
"Shear",
"Vertical",
"Gravitational",
"Left-lateral shear",
"Compressional"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_5007": {
"question_id": "mmlu_pro_5007",
"source_benchmark": "MMLU_Pro",
"domain": "other",
"question_text": "In what ways does the Ideal Policy framework provide an analytical framework for practitioners' success?",
"correct_answer": "B",
"choices": [
"The Ideal Policy framework provides the analytical tools for understanding the contextual factors influencing the use of coercive diplomacy, for example why a policymaker takes the decision to, or not to, implement the Ideal Policy.",
"The Ideal Policy framework focuses on the use of coercive diplomacy to counter aggression. The Ideal Policy explains and predicts outcomes with a minimum of success conditions on the basis of the coercer's actions only.",
"The Ideal Policy provides an expansive framework, firstly for the examination of a maximum number of probable causal conditions and secondly to make the conditions applicable to the contextual factors relating to the use of coercive diplomacy.",
"The Ideal Policy framework recognizes that to make non-compliance too costly, the coercer must threaten to defeat the adversary or to deny the targets objectives. The condition for success is pivotal on the coercer's threat of force."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_5522": {
"question_id": "mmlu_pro_5522",
"source_benchmark": "MMLU_Pro",
"domain": "other",
"question_text": "The endometrium grows and thickens during:",
"correct_answer": "B",
"choices": [
"the secretory phase",
"the follicular phase",
"the proliferative phase",
"postmenopause",
"menopause",
"pregnancy",
"menstruation",
"ovulation",
"the luteal phase",
"the menstrual cycle"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_5350": {
"question_id": "mmlu_pro_5350",
"source_benchmark": "MMLU_Pro",
"domain": "other",
"question_text": "According to Esman (1972), which publics help an organization to exist by lending authority or assistance?",
"correct_answer": "B",
"choices": [
"Collaborative",
"Enabling",
"Strategic",
"Operational",
"Normative",
"Regulatory",
"Diffused",
"Functional",
"Financial",
"Administrative"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_4950": {
"question_id": "mmlu_pro_4950",
"source_benchmark": "MMLU_Pro",
"domain": "other",
"question_text": "Which of the following is a likely outcome of the development and use of genetically modified plants?",
"correct_answer": "B",
"choices": [
"Increased risk of human health issues due to consumption",
"New plant varieties being patented",
"Immediate elimination of all plant diseases",
"Less oversight and fewer regulations than for unmodified crops"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_4948": {
"question_id": "mmlu_pro_4948",
"source_benchmark": "MMLU_Pro",
"domain": "other",
"question_text": "In 1943 how many Americans believed in the Holocaust?",
"correct_answer": "C",
"choices": [
"80%",
"30%",
"50%",
"70%",
"90%",
"100%",
"20%",
"40%",
"10%",
"60%"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_5013": {
"question_id": "mmlu_pro_5013",
"source_benchmark": "MMLU_Pro",
"domain": "other",
"question_text": "Which of the following is not recognised as a level of society?",
"correct_answer": "J",
"choices": [
"the continent",
"the community",
"the city",
"the nation state",
"the church",
"the school",
"the household",
"the global village",
"the region",
"the office"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_5141": {
"question_id": "mmlu_pro_5141",
"source_benchmark": "MMLU_Pro",
"domain": "other",
"question_text": "Which of the following entities cannot sue for libel?",
"correct_answer": "F",
"choices": [
"A public figure",
"An individual citizen",
"A house of worship",
"A politician",
"A private educational institution",
"A government institution",
"A private corporation",
"A publicly-traded company",
"A celebrity",
"A non-profit organization"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_5156": {
"question_id": "mmlu_pro_5156",
"source_benchmark": "MMLU_Pro",
"domain": "other",
"question_text": "Which of the following was not a strategy used by the Conservative government of 1979 to reduce the power of the labour movement?",
"correct_answer": "F",
"choices": [
"making secondary action (in support of workers elsewhere) illegal",
"enforcing mandatory arbitration in all labor disputes",
"banning political strikes that were not primarily concerned with the worker's own conditions of work",
"imposing restrictions on picketing",
"implementing a national minimum wage",
"making all strike action illegal",
"outlawing collective bargaining",
"replacing the labour movement with government-controlled worker's councils",
"privatizing all industries"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_5435": {
"question_id": "mmlu_pro_5435",
"source_benchmark": "MMLU_Pro",
"domain": "other",
"question_text": "________ advertising campaigns are focused on gathering support for a particular message or cause.",
"correct_answer": "A",
"choices": [
"Idea-oriented",
"Company-oriented",
"Market-oriented",
"Service-oriented",
"Brand-oriented",
"Product-oriented",
"Person-oriented",
"Consumer-oriented",
"Profit-oriented"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_5534": {
"question_id": "mmlu_pro_5534",
"source_benchmark": "MMLU_Pro",
"domain": "other",
"question_text": "Which value is the most reasonable estimate of the volume of air an adult breathes in one day?",
"correct_answer": "D",
"choices": [
"200,000 liters",
"100,000 liters",
"5,000 liters",
"10,000 liters",
"1,000 liters",
"500 liters",
"30,000 liters",
"100 liters",
"300 liters",
"50,000 liters"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_4945": {
"question_id": "mmlu_pro_4945",
"source_benchmark": "MMLU_Pro",
"domain": "other",
"question_text": "As of 2019, about what percentage of Americans say it is very important to have free media in our country without government/state censorship?",
"correct_answer": "J",
"choices": [
"50%",
"30%",
"10%",
"90%",
"40%",
"60%",
"20%",
"100%",
"70%",
"80%"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_5492": {
"question_id": "mmlu_pro_5492",
"source_benchmark": "MMLU_Pro",
"domain": "other",
"question_text": "Which of the following statements is the most significant characteristic in determining the classification of an enterprise fund?",
"correct_answer": "D",
"choices": [
"The activity is financed by grants and contributions that are restricted for specific purposes.",
"The activity's revenue is predominantly generated from investment income.",
"The activity is financed by debt that is secured partially by a pledge of the net revenues from fees and charges of the activity.",
"The pricing policies of the activity establish fees and charges designed to recover its cost.",
"The activity is financed by debt that is secured solely by a pledge of the general revenues from taxes.",
"The predominant customer is the primary government.",
"Laws or regulations require that the activity\u2019s costs of providing services including capital costs be recovered with taxes or similar revenues.",
"Laws or regulations require that the activity's costs of providing services be subsidized by general fund appropriations.",
"The activity is funded primarily through intergovernmental revenues.",
"The activity's costs of providing services are primarily financed through sales of goods or services to the general public."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_5121": {
"question_id": "mmlu_pro_5121",
"source_benchmark": "MMLU_Pro",
"domain": "other",
"question_text": "Stock A has a beta of 0.8, while Stock B has a beta of 1.4. The expected return on the market is 10% and the risk-free rate is 7%. Using CAPM and this stock information, what is the required rate of return for Stocks A and B, respectively?",
"correct_answer": "C",
"choices": [
"\u22122.6% and 4.9%",
"5.4% and 9.5%",
"9.4% and 11.2%",
"6.4% and 10.5%",
"4.3% and 7.4%",
"11.4% and 14.2%",
"10.4% and 13.2%",
"6.7% and 11.3%",
"7.2% and 10.1%",
"8.1% and 12.6%"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_5651": {
"question_id": "mmlu_pro_5651",
"source_benchmark": "MMLU_Pro",
"domain": "other",
"question_text": "Heterosexual fantasies about sexual activity never involve someone _________, and gay and lesbian fantasies never involve persons of __________",
"correct_answer": "G",
"choices": [
"of the other gender; of the same gender",
"of the opposite gender; of the opposite gender",
"of the same gender; of the same gender",
"of the same gender; of the other gender",
"of the other gender; of the other gender",
"of the same gender; of the opposite gender",
"Both heterosexual and homosexual fantasies may involve persons of the same or other gender",
"Both heterosexual and homosexual fantasies may involve persons of the same gender only",
"Both heterosexual and homosexual fantasies may involve persons of the opposite gender only",
"of the opposite gender; of the same gender"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_5583": {
"question_id": "mmlu_pro_5583",
"source_benchmark": "MMLU_Pro",
"domain": "other",
"question_text": "To what extent and to what effect has the environment been securitized?",
"correct_answer": "F",
"choices": [
"Environmental security has been largely disregarded by policymakers and is not seen as a high-priority issue.",
"Environmental security has led to a 'trading off' of military security for environmental security with the allocation of resources committed to solving environmental problems. A particular emphasis has been placed on environmental change as the cause of violent conflict and addressing environmental threats from other factors than domestic causes.",
"The national security perspective on environmental issues has undermined the utility of the concept of environmental security. The militarization of environmental problems has decreased the ability of policymakers to 'capture' the critical nature of environmental problems and the environmental dimensions to social vulnerability. The reconciliation of environmental security with national security, and global levels of international change cannot be made without trading in the security interests of the state.",
"The securitization of environmental security concerns has raised the profile of environmental security issues among both foreign and domestic security policymakers and agencies. It is now uniformly recognized that environmental change can be considered as an issue of high security politics.",
"The 'securitization' of the environment has led to a significant increase in funding and resources allocated to environmental protection and conservation efforts.",
"The environment has been 'securitized' in the sense that environmental security has been used to make them matters of 'high politics'. This has to some degree raised the profile of critical issues among policymakers and agencies so that the environment can be considered an issue of security. But whilst the issue has been broadened there has been little change in policy and action in terms of the referent object of environmental security.",
"The securitization of environmental issues has led to a reduction in the importance placed on other areas of security.",
"The environment has been 'securitized' to the extent that it has become a central focus of military strategy and planning."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_5636": {
"question_id": "mmlu_pro_5636",
"source_benchmark": "MMLU_Pro",
"domain": "other",
"question_text": "What drives US foreign policy according to the Marxist perspective?",
"correct_answer": "D",
"choices": [
"Religious motivations",
"Nationalist sentiments",
"Humanitarian aid",
"The search for new markets",
"Military expansionism",
"Superstructure",
"Class consciousness",
"Economic protectionism",
"International relations theory",
"Promotion of democracy"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_5347": {
"question_id": "mmlu_pro_5347",
"source_benchmark": "MMLU_Pro",
"domain": "other",
"question_text": "What is the main purpose of licensing public relations practitioners?",
"correct_answer": "G",
"choices": [
"To achieve better wages for licensed practitioners",
"To create an elite of public relations practitioners",
"To regulate access to posts in the field",
"To mandate continued professional education for practitioners",
"To increase government control over public relations",
"To provide a revenue stream for professional organizations",
"To preserve the well-being of society",
"To inflate the cost of public relations services",
"To limit competition in the field"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_5143": {
"question_id": "mmlu_pro_5143",
"source_benchmark": "MMLU_Pro",
"domain": "other",
"question_text": "According to Macnamara's (2005) pyramid of evaluation, at which stage are opinion polls most useful?",
"correct_answer": "D",
"choices": [
"Inputs",
"Pre-evaluation",
"Impact",
"Outtakes",
"Outputs",
"Outcomes",
"Input-Output",
"Evaluation planning",
"Process",
"Feedback"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_5377": {
"question_id": "mmlu_pro_5377",
"source_benchmark": "MMLU_Pro",
"domain": "other",
"question_text": "The two principles that deal with questions of values are:",
"correct_answer": "D",
"choices": [
"faith and science",
"faith and legal",
"religion and science",
"religion and ethics",
"legal and ethics",
"science and legal",
"faith and religion",
"legal and religion",
"ethics and science"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_5521": {
"question_id": "mmlu_pro_5521",
"source_benchmark": "MMLU_Pro",
"domain": "other",
"question_text": "What sexual act is prohibited in most societies?",
"correct_answer": "H",
"choices": [
"sex without consent",
"sex during menstruation",
"sex with an animal",
"sex in public places",
"sex with a minor",
"sex with a dead person",
"sex for money",
"incest",
"masturbation",
"sex with a non-spouse if the person is married."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_5202": {
"question_id": "mmlu_pro_5202",
"source_benchmark": "MMLU_Pro",
"domain": "other",
"question_text": "A spouse died on December 31, year 1. The couple had no dependents. What should be the filing status of the surviving spouse in year 2?",
"correct_answer": "C",
"choices": [
"Nonresident alien.",
"Qualifying widow(er) with dependent child.",
"Single.",
"Married filing separately.",
"Married filing jointly.",
"Widow(er) with dependent child.",
"Head of household.",
"Jointly with deceased spouse.",
"Qualifying widow(er).",
"Dependent of another taxpayer."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_8813": {
"question_id": "mmlu_pro_8813",
"source_benchmark": "MMLU_Pro",
"domain": "math",
"question_text": "Find the number that makes the statement 2 over 9 = 14 over ? true.",
"correct_answer": "G",
"choices": [
"15",
"50",
"18",
"36",
"28",
"14",
"63",
"16",
"32",
"45"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_7732": {
"question_id": "mmlu_pro_7732",
"source_benchmark": "MMLU_Pro",
"domain": "math",
"question_text": "A correlation of 0.6 indicates that the percentage of variation in y that is explained by the variation in x is how many times the percentage indicated by a correlation of 0.3?",
"correct_answer": "B",
"choices": [
"2",
"4",
"8",
"6",
"2.5",
"3",
"1.5",
"0.5",
"0.2",
"5"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_7555": {
"question_id": "mmlu_pro_7555",
"source_benchmark": "MMLU_Pro",
"domain": "math",
"question_text": "Suppose that $f(x)$ is a polynomial that has degree $6$ and $g(x)$ is a polynomial that has degree $3$. If $h(x)$ is also a polynomial such that $f(g(x)) + g(h(x)) + h(f(x))$ is a polynomial of degree $36$, then what is the degree of the polynomial $h$?",
"correct_answer": "C",
"choices": [
"3",
"21",
"6",
"9",
"30",
"72",
"12",
"15",
"24",
"18"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_8067": {
"question_id": "mmlu_pro_8067",
"source_benchmark": "MMLU_Pro",
"domain": "math",
"question_text": "Find the mean of the set of data 18, 9, 9, 10, 11, 14, 30, 19.",
"correct_answer": "D",
"choices": [
"11",
"18",
"19",
"15",
"10",
"30",
"9",
"16",
"20",
"14"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_8005": {
"question_id": "mmlu_pro_8005",
"source_benchmark": "MMLU_Pro",
"domain": "math",
"question_text": "Using n=8 approximate the value of $\\int_{0}^4 cos(1 + \\sqrt{x}) dx$ using the Simpson's rule.",
"correct_answer": "G",
"choices": [
"2.71828183",
"-1.41421356",
"0.98765432",
"3.14159265",
"1.57079633",
"-3.14159265",
"-2.47160136",
"1.23456789",
"-0.78539816",
"0.69314718"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_7961": {
"question_id": "mmlu_pro_7961",
"source_benchmark": "MMLU_Pro",
"domain": "math",
"question_text": "There are 7 desks arranged in a row in Mr. Thompson\u2019s classroom. Hector sits 2 seats to the right of Kim. Tonya sits 3 seats to the right of Hector. How many seats to the left of Tonya does Kim sit?",
"correct_answer": "I",
"choices": [
"6",
"1",
"7",
"2",
"9",
"12",
"8",
"3",
"5",
"4"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_7789": {
"question_id": "mmlu_pro_7789",
"source_benchmark": "MMLU_Pro",
"domain": "math",
"question_text": "A function f(x) is given by f(0)=3, f(2)=7, f(4)=11, f(6)=9, f(8)=3. Approximate the area under the curve y=f(x) between x=0 and x=8 using Trapezoidal rule with n=4 subintervals.",
"correct_answer": "B",
"choices": [
"85.0",
"60.0",
"65.0",
"45.0",
"55.0",
"90.0",
"80.0",
"50.0",
"75.0",
"70.0"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_7713": {
"question_id": "mmlu_pro_7713",
"source_benchmark": "MMLU_Pro",
"domain": "math",
"question_text": "If A = (1, 2, 3, 4). Let ~= {(1, 2), (1, 3), (4, 2)}. Then ~ is",
"correct_answer": "B",
"choices": [
"partial order relation",
"transitive",
"equivalence relation",
"symmetric",
"not anti-symmetric",
"reflexive"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_8620": {
"question_id": "mmlu_pro_8620",
"source_benchmark": "MMLU_Pro",
"domain": "math",
"question_text": "Let x_1 = 1 and x_(n+1) = sqrt(3+2x_n) for all positive integers n. If it is assumed that {x_n} converges, then lim x_n =",
"correct_answer": "C",
"choices": [
"sqrt(5)",
"e",
"3",
"5",
"1",
"0",
"sqrt(2)",
"2",
"sqrt(7)",
"sqrt(3)"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_7682": {
"question_id": "mmlu_pro_7682",
"source_benchmark": "MMLU_Pro",
"domain": "math",
"question_text": "In how many ways can 7 people be seated at 5 identical round tables? Each table must have at least 1 person seated.",
"correct_answer": "I",
"choices": [
"200",
"210",
"250",
"150",
"300",
"120",
"135",
"225",
"175",
"160"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_8713": {
"question_id": "mmlu_pro_8713",
"source_benchmark": "MMLU_Pro",
"domain": "math",
"question_text": "The surface area, S, of a right rectangular prism with length l, width w, and height h can be found using the formula S = 2(lw+ wh + hl). What is the surface area, in square inches, of a prism with a length of 12 inches, a width of 9 inches, and a height of 2 inches?",
"correct_answer": "A",
"choices": [
"300",
"92",
"150",
"258",
"276",
"210",
"225",
"268",
"320",
"180"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_8368": {
"question_id": "mmlu_pro_8368",
"source_benchmark": "MMLU_Pro",
"domain": "math",
"question_text": "For the regression line, which of the following statements about residuals is true?",
"correct_answer": "H",
"choices": [
"The sum of the residuals is always one.",
"If the correlation is 0, there will be a distinct pattern in the residual plot.",
"The larger the residuals, the better the linear model.",
"The residuals are always negative.",
"If the correlation is 1, there will be a distinct pattern in the residual plot.",
"The residual plot will always be a straight line.",
"Influential scores have large residuals.",
"The mean of the residuals is always zero.",
"If the linear model is good, the number of positive residuals will be the same as the number of negative residuals.",
"The residuals can never be zero."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_7569": {
"question_id": "mmlu_pro_7569",
"source_benchmark": "MMLU_Pro",
"domain": "math",
"question_text": "Alex needs to borrow $\\$10,\\!000$ from the bank. The bank gives him two options. 1. A ten-year loan with an annual interest rate of $10\\%$ compounded quarterly, with the condition that at the end of 5 years, Alex must make a payment equal to half of what he owes. The other half continues to accrue interest, and at the end of the ten years, Alex will pay off the remaining balance. 2. A ten-year loan with a simple annual interest rate of $12\\%$, with just one lump-sum payment at the end of the ten years. Find the positive difference between the total amounts Alex has to pay back under the two schemes. Round your answer to the nearest dollar.",
"correct_answer": "D",
"choices": [
"381",
"383",
"384",
"382",
"385",
"16398",
"16399",
"380",
"16400",
"16401"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_7565": {
"question_id": "mmlu_pro_7565",
"source_benchmark": "MMLU_Pro",
"domain": "math",
"question_text": "If the parabola $y_1 = x^2 + 2x + 7$ and the line $y_2 = 6x + b$ intersect at only one point, what is the value of $b$?",
"correct_answer": "C",
"choices": [
"10",
"5",
"3",
"12",
"4",
"6",
"8",
"7",
"2",
"9"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_7695": {
"question_id": "mmlu_pro_7695",
"source_benchmark": "MMLU_Pro",
"domain": "math",
"question_text": "The dye dilution method is used to measure cardiac output with $6 \\mathrm{mg}$ of dye. The dye concentrations, in $\\mathrm{mg} / \\mathrm{L}$, are modeled by $c(t)=20 t e^{-0.6 t}, 0 \\leqslant t \\leqslant 10$, where $t$ is measured in seconds. Find the cardiac output.",
"correct_answer": "C",
"choices": [
"7.5 $\\mathrm{L}/\\mathrm{min}$",
"4.2 $\\mathrm{L}/\\mathrm{min}$",
" 6.6 $\\mathrm{L}/\\mathrm{min}$",
"3.3 $\\mathrm{L}/\\mathrm{min}$",
"8.0 $\\mathrm{L}/\\mathrm{min}$",
"4.8 $\\mathrm{L}/\\mathrm{min}$",
"5.0 $\\mathrm{L}/\\mathrm{min}$",
"5.4 $\\mathrm{L}/\\mathrm{min}$",
"9.5 $\\mathrm{L}/\\mathrm{min}$",
"7.2 $\\mathrm{L}/\\mathrm{min}$"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_7951": {
"question_id": "mmlu_pro_7951",
"source_benchmark": "MMLU_Pro",
"domain": "math",
"question_text": "Estimate 153 + 44. The sum is between which numbers?",
"correct_answer": "D",
"choices": [
"1,100 and 1,299",
"900 and 1,099",
"50 and 99",
"100 and 299",
"400 and 599",
"500 and 699",
"700 and 899",
"300 and 499",
"200 and 399",
"600 and 799"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_7980": {
"question_id": "mmlu_pro_7980",
"source_benchmark": "MMLU_Pro",
"domain": "math",
"question_text": "A sequence of three real numbers forms an arithmetic progression with a first term of 9. If 2 is added to the second term and 20 is added to the third term, the three resulting numbers form a geometric progression. What is the smallest possible value for the third term of the geometric progression?",
"correct_answer": "I",
"choices": [
"12",
"16",
"2",
"7",
"5",
"3",
"-1",
"0",
"1",
"9"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_8538": {
"question_id": "mmlu_pro_8538",
"source_benchmark": "MMLU_Pro",
"domain": "math",
"question_text": "Compute the are of that part of the helicoid z = arctan(y/x) which lies in the first octant between the cylinder $x^2+y^2 = 1^2$ and $x^2+y^2 = 2^2$.",
"correct_answer": "C",
"choices": [
"3.764",
"2.718",
"2.843",
"1.732",
"4.000",
"3.576",
"2.456",
"1.567",
"3.142",
"2.000"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_8736": {
"question_id": "mmlu_pro_8736",
"source_benchmark": "MMLU_Pro",
"domain": "math",
"question_text": "In Canada in 2014, the average wholesale price of soybeans was $0.24 per pound. In 2015, the average wholesale price of soybeans was $0.16 per pound. If a retailer purchased 20,000 pounds of soybeans in 2014 and in 2015, what was the percent change in the retailer\u2019s expenses from 2014 to 2015?",
"correct_answer": "C",
"choices": [
"\u201325%",
"33%",
"\u201333%",
"\u20138%",
"16%",
"8%",
"\u201320%",
"\u201350%",
"25%",
"0%"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_7558": {
"question_id": "mmlu_pro_7558",
"source_benchmark": "MMLU_Pro",
"domain": "math",
"question_text": "Given that $a$ and $b$ are real numbers such that $-3\\leq a\\leq1$ and $-2\\leq b\\leq 4$, and values for $a$ and $b$ are chosen at random, what is the probability that the product $a\\cdot b$ is positive? Express your answer as a common fraction.",
"correct_answer": "E",
"choices": [
"\\frac{7}{12}",
"\\frac{5}{11}",
"\\frac{5}{10}",
"\\frac{6}{11}",
"\\frac{5}{12}",
"\\frac{7}{11}",
"\\frac{5}{17}",
"\\frac{4}{11}",
"\\frac{6}{12}",
"\\frac{4}{12}"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_8653": {
"question_id": "mmlu_pro_8653",
"source_benchmark": "MMLU_Pro",
"domain": "math",
"question_text": "A group of 7 people is to be divided into 3 committees. Within each committee, people are ranked in a certain order. In how many ways can this be done?",
"correct_answer": "J",
"choices": [
"5000",
"25200",
"10500",
"15000",
"40320",
"21000",
"7200",
"10000",
"30000",
"12600"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_7911": {
"question_id": "mmlu_pro_7911",
"source_benchmark": "MMLU_Pro",
"domain": "math",
"question_text": "A young person with no initial capital invests $k$ dollars per year at an annual rate of return $r$. Assume that investments are made continuously and that the return is compounded continuously.\nIf $r=7.5 \\%$, determine $k$ so that $\\$ 1$ million will be available for retirement in 40 years.",
"correct_answer": "H",
"choices": [
"5000 $",
"3500 $",
"$3000",
"$7000",
"$5500",
"$2500",
"$7500",
" 3930 $",
"$6000",
"4500 $"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_8834": {
"question_id": "mmlu_pro_8834",
"source_benchmark": "MMLU_Pro",
"domain": "math",
"question_text": "For the two linear equations $2 * x + 3 * y + z = 8$ and $4 * x + 4 * y + 4z = 12$ and $x + y + 8z = 10$ with variables x, y and z. Use cramer's rule to solve these three variables.",
"correct_answer": "D",
"choices": [
"[1, 3, -1]",
"[1, 2, 2]",
"[1, -3, -1]",
"[-1, 3, 1]",
"[0, -2, 1]",
"[2, 2, -1]",
"[-1, -3, 1]",
"[-2, 1, 3]",
"[3, -1, -2]",
"[-1, -2, 2]"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_8363": {
"question_id": "mmlu_pro_8363",
"source_benchmark": "MMLU_Pro",
"domain": "math",
"question_text": "Suppose you did 10 independent tests of the form H0: \u03bc = 25 versus Ha: \u03bc < 25, each at the \u03b1 = 0.05 significance level. What is the probability of committing a Type I error and incorrectly rejecting a true H0 with at least one of the 10 tests?",
"correct_answer": "J",
"choices": [
"0.60",
"0.50",
"0.30",
"0.05",
"1.00",
"0.70",
"0.10",
"0.20",
"0.80",
"0.40"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_7955": {
"question_id": "mmlu_pro_7955",
"source_benchmark": "MMLU_Pro",
"domain": "math",
"question_text": "Which ratio can form a proportion with 3 over 8?",
"correct_answer": "A",
"choices": [
"24 over 64",
"15 over 48",
"18 over 56",
"18 over 40"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_8423": {
"question_id": "mmlu_pro_8423",
"source_benchmark": "MMLU_Pro",
"domain": "math",
"question_text": "Find the entropy rate of the Markov chain associated with a random walk of a king on the 3 by 3 chessboard. Use base 2 logarithm and return the entropy rate in bits.",
"correct_answer": "I",
"choices": [
"2.70",
"2.45",
"3.21",
"2.00",
"2.98",
"1.75",
"3.00",
"1.50",
"2.24",
"1.95"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_8710": {
"question_id": "mmlu_pro_8710",
"source_benchmark": "MMLU_Pro",
"domain": "math",
"question_text": "A restaurant used 231 eggs last week. Of these, 46 were brown in color. The remaining eggs were white in color. Which equation can be used to solve for w, the number of white eggs used last week?",
"correct_answer": "D",
"choices": [
"w = 231 * 46",
"w = 231/46",
"231+ 46w = 0",
"46+w= 231",
"231 = 46w",
"231w = 46",
"46 = 231w",
"w = 46 - 231",
"w= 231+ 46"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_8073": {
"question_id": "mmlu_pro_8073",
"source_benchmark": "MMLU_Pro",
"domain": "math",
"question_text": "Which two numbers both round to 1,500 when rounded to the nearest hundred?",
"correct_answer": "F",
"choices": [
"1,444 and 1,555",
"1,453 and 1,563",
"1,449 and 1,549",
"1,499 and 1,599",
"1,489 and 1,589",
"1,457 and 1,547",
"1,445 and 1,555",
"1,399 and 1,599"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_7517": {
"question_id": "mmlu_pro_7517",
"source_benchmark": "MMLU_Pro",
"domain": "math",
"question_text": "Two dice are rolled simultaneously. If both dice show 6, then the player wins $20; otherwise the player loses the game. It costs $2.00 to play the game. What is the expected gain or loss per game?",
"correct_answer": "A",
"choices": [
"The player will lose about $1.44.",
"The player will gain about $1.00.",
"The player will lose about $0.89.",
"The player will lose about $1.00.",
"The player will gain about $2.00.",
"The player will gain about $1.44.",
"The player will lose about $2.00.",
"The player will gain about $0.89.",
"The player will lose about $0.55.",
"The player will gain about $0.55."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_7830": {
"question_id": "mmlu_pro_7830",
"source_benchmark": "MMLU_Pro",
"domain": "math",
"question_text": "The number of days it takes to build a new house has a variance of 386. A sample of 40 new homes shows an average building time of 83 days. With what confidence can we assert that the average building time for a new house is between 80 and 90 days?",
"correct_answer": "E",
"choices": [
"75.3%",
"90.5%",
"68.9%",
"97.2%",
"82.1%",
"20.0%",
"85.7%",
"17.8%",
"78.4%",
"15.4%"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_8369": {
"question_id": "mmlu_pro_8369",
"source_benchmark": "MMLU_Pro",
"domain": "math",
"question_text": "The probability is 0.2 that a value selected at random from a normal distribution with mean 600 and standard deviation 15 will be above what number?",
"correct_answer": "H",
"choices": [
"603.8",
"605.3",
"600.0",
"620.5",
"625.9",
"610.2",
"587.4",
"612.6",
"0.84",
"615.7"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_8200": {
"question_id": "mmlu_pro_8200",
"source_benchmark": "MMLU_Pro",
"domain": "math",
"question_text": "How many diagonals are in a regular octagon?",
"correct_answer": "A",
"choices": [
"20",
"28",
"17",
"12",
"22",
"16",
"15",
"8",
"18",
"24"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_7822": {
"question_id": "mmlu_pro_7822",
"source_benchmark": "MMLU_Pro",
"domain": "math",
"question_text": "Find the degree for the given field extension Q(sqrt(2)*sqrt(3)) over Q.",
"correct_answer": "I",
"choices": [
"3",
"8",
"12",
"4",
"10",
"5",
"6",
"1",
"2",
"0"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_7944": {
"question_id": "mmlu_pro_7944",
"source_benchmark": "MMLU_Pro",
"domain": "math",
"question_text": "Order from greatest to least: 3, 3 and 1 over 8,3.8,3.18.",
"correct_answer": "E",
"choices": [
"3.8, 3 and 1 over 8, 3.18, 3",
"3.18, 3.8, 3, 3 and 1 over 8",
"3, 3.8, 3 and 1 over 8, 3.18",
"3.18, 3 and 1 over 8, 3, 3.8",
"3.8, 3.18, 3 and 1 over 8, 3",
"3 and 1 over 8, 3.8, 3.18, 3",
"3,3 and 1 over 8, 3.18, 3",
"3.18, 3 and 1 over 8, 3.8,3",
"3 and 1 over 8, 3, 3.8, 3.18",
"3, 3.18, 3.8, 3 and 1 over 8"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_8193": {
"question_id": "mmlu_pro_8193",
"source_benchmark": "MMLU_Pro",
"domain": "math",
"question_text": "If x and y are directly proportional and x = 3 when y = 8, what is the value of x when y = 13?",
"correct_answer": "G",
"choices": [
"2.0",
"3.25",
"34.667",
"7.5",
"6.125",
"15",
"4.875",
"1.875",
"10",
"0.615"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_10891": {
"question_id": "mmlu_pro_10891",
"source_benchmark": "MMLU_Pro",
"domain": "philosophy",
"question_text": "According to Mill, to determine whether one pleasure is more valuable than another, we must _____.",
"correct_answer": "A",
"choices": [
"determine which pleasure most experienced people prefer",
"consult science",
"consult religious leaders",
"determine which one is objectively most pleasurable",
"measure the intensity of each pleasure",
"consider which pleasure is most easily attainable",
"consult our personal preferences",
"determine which pleasure is most universally preferred",
"determine which pleasure lasts the longest",
"consult philosophers of the past"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_10621": {
"question_id": "mmlu_pro_10621",
"source_benchmark": "MMLU_Pro",
"domain": "philosophy",
"question_text": "A valid disjunctive syllogism has a major premise that:",
"correct_answer": "F",
"choices": [
"affirms the antecedent or denies the consequent",
"leads to an invalid conclusion",
"generalizes a specific case",
"includes three or more alternatives",
"contradicts the conclusion",
"includes two or more alternatives",
"leads to a valid conclusion",
"is based on a false premise",
"denies the antecedent or affirms the consequent",
"classifies subjects"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_10576": {
"question_id": "mmlu_pro_10576",
"source_benchmark": "MMLU_Pro",
"domain": "philosophy",
"question_text": " If a global ban would not be effective in prohibiting genetic engineering that goes beyond eliminating obvious defects, then Singer suggests which \"bolder\" strategy?",
"correct_answer": "B",
"choices": [
"none of the above",
"using a lottery system so everyone gets a fair chance at the benefits of genetic engineering",
"using sanctions and trade restrictions against countries that violate the ban",
"allowing genetic engineering only for medical purposes",
"promoting genetic engineering only for the wealthy",
"creating a global consensus on acceptable genetic engineering practices",
"developing a global regulatory body to monitor and control genetic engineering activities",
"implementing a global education program to convince people of the dangers of excessive genetic engineering",
"requiring genetic engineering for every newborn child",
"creating a world military that would enforce the ban with greater strength"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_10943": {
"question_id": "mmlu_pro_10943",
"source_benchmark": "MMLU_Pro",
"domain": "philosophy",
"question_text": " West argues that feminist legal theory has inadvertently led to the under-acknowledgement of the harms of consensual sex because",
"correct_answer": "B",
"choices": [
"it posits that all sex is rape.",
"by suggesting that the harm of rape is that it is nonconsensual, it implies that all consensual sex is good.",
"by suggesting that the harm of rape has nothing to do with the victim's gender, it implies that women are not especially harmed by nonconsensual sex.",
"it supports the idea that consent is the only factor determining the harm of sex.",
"it implies that consensual sex is inherently harmful.",
"by suggesting that the harm of rape is only physical, it implies that emotional trauma in consensual sex is not harmful.",
"by suggesting that rape is only harmful in cases of physical injury, it implies that psychological harm in consensual sex is not important.",
"it suggests that the harm of rape is entirely dependent on the victim's personal perception, thereby downplaying the potential harms of consensual sex.",
"all of the above.",
"it argues that women are not capable of giving consent."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_10704": {
"question_id": "mmlu_pro_10704",
"source_benchmark": "MMLU_Pro",
"domain": "philosophy",
"question_text": " Which of the following is not one of the four main excuses for terrorism that Michael Walzer discusses?",
"correct_answer": "A",
"choices": [
"Terrorism is a form of freedom of speech.",
"Terrorism is a necessary evil.",
"Terrorism benefits oppressed groups.",
"Terrorism is driven by economic factors.",
"Terrorism is a legitimate form of self-defense.",
"All political activity is terroristic.",
"Terrorism is a method of political influence.",
"Terrorism is a response to unjust government actions.",
"Terrorism is a last resort.",
"Terrorism is an expression of religious beliefs."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_10689": {
"question_id": "mmlu_pro_10689",
"source_benchmark": "MMLU_Pro",
"domain": "philosophy",
"question_text": " Construct a complete truth table for the following argument. Then, using the truth table, determine whether the argument is valid or invalid. If the argument is invalid, choose an option which presents a counterexample. (There may be other counterexamples as well.)\nQ \u2261 R\n~(S \u2228 Q) / R",
"correct_answer": "A",
"choices": [
"Invalid. Counterexample when Q, S, and R are false",
"Invalid. Counterexample when Q is false and S and R are true",
"Invalid. Counterexample when Q is true, S is false, and R is true",
"Valid. No counterexample exists",
"Invalid. Counterexample when Q and S are true and R is false",
"Invalid. Counterexample when S is true and Q and R are false",
"Invalid. Counterexample when R is true and Q and S are false",
"Invalid. Counterexample when Q and R are true and S is false",
"Valid",
"Invalid. Counterexample when Q is true and S and R are false"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_10678": {
"question_id": "mmlu_pro_10678",
"source_benchmark": "MMLU_Pro",
"domain": "philosophy",
"question_text": "According to Moore, the most fundamental question in all of ethics is:",
"correct_answer": "F",
"choices": [
"how \"justice\" is to be defined.",
"whether ethics is a science.",
"whether morality is subjective.",
"whether God exists.",
"whether humans are inherently good or bad.",
"how \u201cgood\u201d is to be defined.",
"whether life is worth living.",
"what is the meaning of life.",
"whether morality is objective.",
"how \"evil\" is to be defined."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_10635": {
"question_id": "mmlu_pro_10635",
"source_benchmark": "MMLU_Pro",
"domain": "philosophy",
"question_text": "Nussbaum claims that for Aristotle the reference of each virtue term is fixed by:",
"correct_answer": "G",
"choices": [
"a thick description of the virtue.",
"religious beliefs.",
"personal interpretation.",
"philosophical analysis.",
"a thin description of the virtue.",
"practical experiences.",
"grounding experiences.",
"societal norms.",
"conventional use.",
"tradition."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_10941": {
"question_id": "mmlu_pro_10941",
"source_benchmark": "MMLU_Pro",
"domain": "philosophy",
"question_text": " Use indirect truth tables to determine whether each set of propositions is consistent. If the set is consistent, choose an option with a consistent valuation. (There may be other consistent valuations.)\n~N \u2261 (O \u00b7 P)\n~O \u2283 Q\nN \u00b7 ~Q",
"correct_answer": "G",
"choices": [
"Inconsistent. Inconsistent valuation when N, O, P, and Q are all false",
"Inconsistent",
"Consistent. Consistent valuation when N, O, and P are true and Q is false",
"Inconsistent. Inconsistent valuation when N and P are true and O and Q are false",
"Consistent. Consistent valuation when N and P are true and O and Q are false",
"Inconsistent. Inconsistent valuation when N, O, P, and Q are all true",
"Consistent. Consistent valuation when N and O are true and P and Q are false",
"Consistent. Consistent valuation when O, P, and Q are true and N is false"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_10616": {
"question_id": "mmlu_pro_10616",
"source_benchmark": "MMLU_Pro",
"domain": "philosophy",
"question_text": " One way to value a patient's autonomy is by aiming to maximize his or her effective options. When we value a patient's autonomy in this way, what, according to Velleman, do we value?",
"correct_answer": "H",
"choices": [
"the patient's ability to make informed decisions",
"the ability to share reasons with the patient",
"the ability to understand the patient's needs",
"the patient's right to refuse treatment",
"the ability to provide the patient with options",
"the patient's personal preferences",
"the patient's capacity for self-determination",
"the patient's opportunities for self-determination",
"the patient's understanding of their medical condition",
"the ability to communicate effectively with the patient"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_10910": {
"question_id": "mmlu_pro_10910",
"source_benchmark": "MMLU_Pro",
"domain": "philosophy",
"question_text": " In contrast to Garrett Hardin's approach, the Confucian model rejects coercion because",
"correct_answer": "D",
"choices": [
"it views coercion as a violation of human rights",
"it believes in the power of persuasion over coercion",
"the Confucian model believes in voluntary compliance based on mutual respect and understanding.",
"it affirms the autonomy of individuals apart from others.",
"The Confucian model actually accepts coercion.",
"the costs of eliminating a collective action problem are not irrelevant to its solution",
"coercion is seen as a last resort, only to be used in extreme circumstances",
"the Confucian model prioritizes collective action over individual action",
"it cannot lead to real social change, which involves changes of mind as well as action",
"it believes in the intrinsic goodness and perfectibility of human beings"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_11020": {
"question_id": "mmlu_pro_11020",
"source_benchmark": "MMLU_Pro",
"domain": "philosophy",
"question_text": " Ghosa, Apala and Lopamurda, all named in the early Vedas, are which of the following?",
"correct_answer": "B",
"choices": [
"Female dancers",
"Female poets",
"Female goddesses",
"Female rulers",
"Female artisans",
"Female physicians",
"Female princesses",
"Female saints",
"Female warriors"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_10843": {
"question_id": "mmlu_pro_10843",
"source_benchmark": "MMLU_Pro",
"domain": "philosophy",
"question_text": "Which of the following best describes the fallacy of figure of speech?",
"correct_answer": "J",
"choices": [
"Assuming that a particular figure of speech is universally understood.",
"Using an irrelevant figure of speech to divert attention from the argument.",
"A form of loaded language that uses an emotionally charged restatement of the claim in place of support for the claim.",
"Misinterpreting a figure of speech as a logical argument.",
"Misusing a figure of speech in order to make a claim sound more persuasive.",
"Confusing the meaning of a figure of speech with its literal translation.",
"Treating something that exists in the mind as if it was a real object.",
"Using a figure of speech in place of a factual claim.",
"Using emotionally charged language to create an impression about the subject of a claim, without making an argument that the language fits the subject.",
"Confusing figurative language with literal language."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_10608": {
"question_id": "mmlu_pro_10608",
"source_benchmark": "MMLU_Pro",
"domain": "philosophy",
"question_text": " Just war theory's principle of military necessity belongs to",
"correct_answer": "G",
"choices": [
"just war theory's principle of proportionality.",
"none of the above.",
"all of the above",
"moral relativism.",
"moral nihilism.",
"just war theory's principle of right intention.",
"jus in bello.",
"jus post bellum.",
"jus ad bellum.",
"moral absolutism."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_10866": {
"question_id": "mmlu_pro_10866",
"source_benchmark": "MMLU_Pro",
"domain": "philosophy",
"question_text": " Why, according to Gill, does Leon Kass think the notion of being \"better off dead\" is incoherent?",
"correct_answer": "E",
"choices": [
"because Kass thinks that death is not a state of being and therefore cannot be evaluated.",
"because Kass believes in the sanctity of life and opposes euthanasia.",
"because Kass maintains that the concept of death is inherently negative and cannot be considered as \"better\".",
"because there has yet to be a satisfactory philosophical explanation of why death is bad.",
"because dead people no longer exist, and thus cannot benefit from choosing to die.",
"because Kass believes suffering in life can lead to personal growth, making death not a better option.",
"because some things (e.g., long-term torture) are worse than death.",
"because Kass believes that one cannot compare being alive and being dead.",
"because death is, by definition, undesirable.",
"because death cannot be considered as a better option when life is valuable."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_10780": {
"question_id": "mmlu_pro_10780",
"source_benchmark": "MMLU_Pro",
"domain": "philosophy",
"question_text": " Shapiro claims that surveys have been confirmed by longitudinal studies that indicate that",
"correct_answer": "H",
"choices": [
"the use of illegal drugs is usually a temporary phase in adolescence.",
"most individuals experiment with illegal drugs but do not continue use.",
"excessive use of legal drugs is the norm.",
"an individual's illegal drug use inevitably diminishes over time.",
"those who begin using illegal drugs typically stop before addiction sets in.",
"longitudinal studies have shown no consistent patterns in illegal drug use.",
"illegal drug users typically transition to legal substances over time.",
"moderate and/or controlled use of illegal drugs is the norm.",
"those who begin using illegal drugs typically do not stop until they become addicted.",
"excessive use of illegal drugs is the norm."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_10580": {
"question_id": "mmlu_pro_10580",
"source_benchmark": "MMLU_Pro",
"domain": "philosophy",
"question_text": " Ross claims that the essential defect of utilitarianism is that it ignores",
"correct_answer": "I",
"choices": [
"the significance of moral intentions.",
"epistemological responsibility.",
"the role of virtue in morality.",
"the value of pleasure.",
"the importance of individual rights.",
"the potential consequences of actions.",
"the concept of justice.",
"moral obligations.",
"the personal character of duty.",
"autonomy."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_10579": {
"question_id": "mmlu_pro_10579",
"source_benchmark": "MMLU_Pro",
"domain": "philosophy",
"question_text": " The principle Arthur calls \"the greater moral evil rule\"says that",
"correct_answer": "D",
"choices": [
"morality itself is a great evil.",
"people should always strive to commit the lesser evil, even if it results in personal gain.",
"the person who commits the greater evil is the one who is morally responsible for the situation.",
"people are entitled to keep their earnings only if there is no way for them to prevent a greater evil by giving them away.",
"people are morally obligated to share their wealth if it can prevent a greater evil.",
"being rich is a great moral evil, and as such we should all strive to live on a moderate income.",
"the greater moral evil rule is a concept which states that it's always wrong to do evil, regardless of the circumstances.",
"two moral evils do not make a moral good.",
"people are allowed to do a smaller evil to prevent a greater one.",
"the concept of evil is subjective and varies from person to person."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_10611": {
"question_id": "mmlu_pro_10611",
"source_benchmark": "MMLU_Pro",
"domain": "philosophy",
"question_text": " What is an example of \"mutual coercion, mutually agreed upon\"?",
"correct_answer": "A",
"choices": [
"all countries cooperating to change the existing incentive structure by introducing a system of enforceable sanctions to curb climate change.",
"the agreement of more powerful nations to require less powerful nations to curb greenhouse gas emissions for the benefit of all humanity.",
"the agreement of less powerful nations to boycott trade with more powerful nations until the latter agree to curb their greenhouse gas emissions.",
"the agreement of a large number of individual agents to restrict their own pollution."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_10675": {
"question_id": "mmlu_pro_10675",
"source_benchmark": "MMLU_Pro",
"domain": "philosophy",
"question_text": "When someone changes the meaning of a word or phrase from one part of the argument to another that person may have committed which of the following fallacies?",
"correct_answer": "G",
"choices": [
"Hypostatization",
"Appeal to authority",
"Red herring fallacy",
"False cause fallacy",
"Straw man fallacy",
"Special pleading",
"Equivocation",
"Slippery slope fallacy",
"Ad hominem attack",
"Question begging epithets"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_10683": {
"question_id": "mmlu_pro_10683",
"source_benchmark": "MMLU_Pro",
"domain": "philosophy",
"question_text": "Mill claims that the principle of utility:",
"correct_answer": "G",
"choices": [
"can only be proven through extensive observation and study.",
"can be proven from principles that are known empirically.",
"can be proven, but only under certain conditions.",
"is inherently flawed and therefore cannot be proven.",
"is a theory that has been widely disproven.",
"can be proven, but this is unique to the principle of utility.",
"cannot be proven, but this is common to all first principles.",
"can be proven, but only by using a complex mathematical model.",
"cannot be proven, and this is a unique problem for the theory.",
"can be proven from self-evident principles."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_10822": {
"question_id": "mmlu_pro_10822",
"source_benchmark": "MMLU_Pro",
"domain": "philosophy",
"question_text": " What assumption does Pence think people who object to SCNT make when they say that SCNT is not best for the child?",
"correct_answer": "D",
"choices": [
"The parents are prioritizing their own needs over the child's.",
"We can't tell what the motives of the parents are.",
"The parents have ulterior motives.",
"The parents have good motives.",
"The parents are incapable of making decisions for the child.",
"none of the above",
"The parents are indifferent to the child's wellbeing.",
"The parents are making an uninformed decision.",
"The parents are exploiting the child for personal gain.",
"The parents have bad motives."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_10872": {
"question_id": "mmlu_pro_10872",
"source_benchmark": "MMLU_Pro",
"domain": "philosophy",
"question_text": " Which of the following statements best describes Gill's response to the argument that the Oregon physician-assisted law falsely assumes that doctors are qualified to judge whether a life is worth living?",
"correct_answer": "G",
"choices": [
"Doctors are normally required to make life-and-death decisions, and this is no exception.",
"The law is in line with the doctors' Hippocratic oath of doing no harm.",
"Doctors are as qualified as any other mature adult to decide whether a life is worth living.",
"The law doesn't require doctors to judge the worth of a life, but rather to provide a professional opinion on the patient's condition.",
"The argument is valid because doctors should not be the ones to decide whether a life is worth living.",
"The law is justified as doctors are inherently equipped to make such judgements due to their profession.",
"The law only requires doctors to make medical decisions about the patient's life expectancy and prognosis.",
"The argument is self-defeating because it was proposed by medical doctors.",
"The argument is irrelevant as the law is about patient autonomy and not about doctors' judgement on life's worth.",
"The law is flawed because it puts too much pressure on the doctors to make such decisions."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_10577": {
"question_id": "mmlu_pro_10577",
"source_benchmark": "MMLU_Pro",
"domain": "philosophy",
"question_text": " Collins suggests that the moral value of caring actions comes from",
"correct_answer": "D",
"choices": [
"the cost-effectiveness of caring actions in terms of resources expended.",
"the non-instrumental value of caring actions as manifestations of intrinsic goods for person like love and forgiveness.",
"the value assigned to the action by the person performing the caring act.",
"how well any caring actions fulfills the recipient's interests and the strength of one's intention to fulfill the recipient's interest.",
"the balance between the intention and the actual outcome of the caring actions.",
"the intrinsic value of caring attitudes which motivate caring actions.",
"the impact of the actions on the overall well-being of the society.",
"the degree to which those actions align with societal norms and expectations.",
"the extent to which the actions are recognized and appreciated by others.",
"the degree to which those actions produces the greatest amount of happiness in the recipient of care."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_10851": {
"question_id": "mmlu_pro_10851",
"source_benchmark": "MMLU_Pro",
"domain": "philosophy",
"question_text": "Feinberg claims that the story about Abraham Lincoln:",
"correct_answer": "A",
"choices": [
"provides some evidence against psychological egoism.",
"supports the theory of psychological altruism.",
"is irrelevant to the discussion of psychological egoism.",
"supports the idea of ethical egoism.",
"proves the concept of psychological egoism.",
"actually involves a confusion between ethical egoism and psychological egoism.",
"provides no evidence for or against psychological egoism.",
"contradicts the concept of ethical egoism.",
"is a misinterpretation of psychological egoism.",
"provides some evidence for psychological egoism."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_10665": {
"question_id": "mmlu_pro_10665",
"source_benchmark": "MMLU_Pro",
"domain": "philosophy",
"question_text": "Which branch of Judaism founded by Zacharias Frankel is known for its \"Positive-Historical Judaism\"?",
"correct_answer": "C",
"choices": [
"Orthodox",
"Sephardic",
"Conservative",
"Hasidic",
"Reconstructionism",
"Secular",
"Liberal",
"Haredi",
"Ultra-Orthodox",
"Reformed"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_10930": {
"question_id": "mmlu_pro_10930",
"source_benchmark": "MMLU_Pro",
"domain": "philosophy",
"question_text": "Hume claims that a person who is unaffected by images of human happiness or misery will also be indifferent to:",
"correct_answer": "C",
"choices": [
"art and beauty.",
"love and hatred.",
"virtue and vice.",
"success and failure.",
"truth and falsehood.",
"all of the above.",
"material wealth.",
"pain and pleasure.",
"his own well-being.",
"the welfare of others."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_10896": {
"question_id": "mmlu_pro_10896",
"source_benchmark": "MMLU_Pro",
"domain": "philosophy",
"question_text": "Hume describes reason as:",
"correct_answer": "G",
"choices": [
"all of the above.",
"synonymous with logic and rationality.",
"the primary cause of human suffering.",
"the source of all moral actions.",
"the root of all evil.",
"the first spring or impulse to desire.",
"cool and disengaged.",
"the driving force behind all human behaviors.",
"the catalyst for emotional reactions.",
"irrelevant to decision making."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_10923": {
"question_id": "mmlu_pro_10923",
"source_benchmark": "MMLU_Pro",
"domain": "philosophy",
"question_text": "Which of the following fallacies happens when someone concludes that a legitimate generalization necessarily applies to a particular case?",
"correct_answer": "C",
"choices": [
"False dilemma",
"Circular reasoning",
"Accident",
"Bifurcation"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_10778": {
"question_id": "mmlu_pro_10778",
"source_benchmark": "MMLU_Pro",
"domain": "philosophy",
"question_text": " The view that moral considerations do not apply to war is known as",
"correct_answer": "E",
"choices": [
"moral absolutism.",
"antiwar pacifism.",
"just war theory.",
"war pragmatism.",
"moral nihilism.",
"jus ad bellum.",
"moral relativism.",
"utilitarianism.",
"consequentialism.",
"war realism."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_10676": {
"question_id": "mmlu_pro_10676",
"source_benchmark": "MMLU_Pro",
"domain": "philosophy",
"question_text": "According to Ross, our prima facie duties:",
"correct_answer": "I",
"choices": [
"are only applicable to certain cultures.",
"are determined by societal norms.",
"are only valid if they align with personal beliefs.",
"have no moral significance.",
"can be altered over time.",
"can be proven.",
"cannot be known.",
"are not objective.",
"are self-evident."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_10793": {
"question_id": "mmlu_pro_10793",
"source_benchmark": "MMLU_Pro",
"domain": "philosophy",
"question_text": "I don't know what the answer is to number 8. But Heather hasn't made the third option into the correct answer for a while, and even then only once, so I'm guessing that this is the fallacy of Composition!",
"correct_answer": "A",
"choices": [
"Gambler's Fallacy",
"Equivocation",
"Fallacy of Composition",
"Appeal to Pity"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_10865": {
"question_id": "mmlu_pro_10865",
"source_benchmark": "MMLU_Pro",
"domain": "philosophy",
"question_text": " What is the \"intergenerational storm\" of climate change?",
"correct_answer": "J",
"choices": [
"the problem of massive world overpopulation over generations.",
"the problem that the pace of climate change is too fast for future generations to adapt.",
"the problem of greenhouse gas emissions increasing over generations due to technological advancements.",
"the problem that current generations are not doing enough to prevent climate change, thus leaving future generations to deal with the consequences.",
"the problem that the compounding effects of greenhouse gas emissions require the current generation to, in effect, cooperate with future generations.",
"the problem of a lack of education among current generations about the long-term effects of climate change.",
"the problem that future generations may not be able to afford the costs of reversing the effects of climate change.",
"the problem that the negative effects of climate change disproportionately affect the younger generation.",
"the problem that the bad effects of current carbon dioxide emissions will fall largely on future generations.",
"the problem that countries are possibly biased toward the interests of the current generation, which largely benefits from carbon dioxide emissions."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_10706": {
"question_id": "mmlu_pro_10706",
"source_benchmark": "MMLU_Pro",
"domain": "philosophy",
"question_text": " Instead of asking why the act of destroying the environment might be immoral, Hill wants to ask",
"correct_answer": "G",
"choices": [
"how the decision to preserve the environment benefits the environment.",
"how the destruction of the environment affects the economy.",
"why people who preserve the environment might be good people.",
"how destroying the environment affects future generations.",
"why the act of destroying nature might be immoral.",
"whether plants have interests.",
"why people who destroy the environment might be bad people.",
"if destroying the environment is legally punishable.",
"whether nature has a right to exist.",
"whether animals have rights."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_10978": {
"question_id": "mmlu_pro_10978",
"source_benchmark": "MMLU_Pro",
"domain": "philosophy",
"question_text": " According to Lukianoff and Haidt, the recent trend to uncover microaggressions encourages",
"correct_answer": "C",
"choices": [
"students to confabulate reasons.",
"the pursuit of justice by marking out racism, sexism, and classism.",
"labeling, by assigning global negative traits to persons.",
"universities to bear overly burdensome legal obligations."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_654": {
"question_id": "mmlu_pro_654",
"source_benchmark": "MMLU_Pro",
"domain": "business",
"question_text": "Mr. Okada owns a home worth $28,750 in a town where homes are assessed at 38% of their market value. If Mr. Okada pays property taxes at the rate of $8.42 per $100, and 47% of his property taxes is spent on education, how much of his taxes goes toward education?",
"correct_answer": "D",
"choices": [
"$2,187.50",
"$765.23",
"$850.19",
"$432.35",
"$300.58",
"$28,750",
"$10,925",
"$1,150.47",
"$919.89",
"$500.00"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_114": {
"question_id": "mmlu_pro_114",
"source_benchmark": "MMLU_Pro",
"domain": "business",
"question_text": "Mr. Langham decides to take a loan from his bank for $60 at a nominal rate of 6%. If he agrees to repay the loan in equal installments over a period of 15 months, how much will each of the monthly payments be?",
"correct_answer": "B",
"choices": [
"$4.60",
"$4.30",
"$4.70",
"$4.50",
"$3.80",
"$4.00",
"$4.10",
"$64.50",
"$5.00",
"$60"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_25": {
"question_id": "mmlu_pro_25",
"source_benchmark": "MMLU_Pro",
"domain": "business",
"question_text": "Where in the balance sheet does each of the following belong? (A) Taxes payable (B) Capital stock (C) Retailed earnings (D) Administrative expense (E) Prepaid expenses",
"correct_answer": "J",
"choices": [
"(A) Liability section, (B) Asset side, (C) Owner's Equity section, (D) Asset side, (E) Owner's Equity section",
"(A) Owner's Equity section, (B) Asset side, (C) Income Statement, (D) Liability section, (E) Liability section",
"(A) Asset side, (B) Liability section, (C) Income Statement, (D) Owner's Equity section, (E) Income Statement",
"(A) Owner's Equity section, (B) Liability section, (C) Asset side, (D) Asset side, (E) Income Statement",
"(A) Income Statement, (B) Liability section, (C) Asset side, (D) Owner's Equity section, (E) Owner's Equity section",
"(A) Owner's Equity section, (B) Income Statement, (C) Asset side, (D) Asset side, (E) Liability section",
"(A) Liability section, (B) Asset side, (C) Liability section, (D) Income Statement, (E) Income Statement",
"(A) Income Statement, (B) Owner's Equity section, (C) Income Statement, (D) Liability section, (E) Asset side",
"(A) Asset side, (B) Income Statement, (C) Liability section, (D) Owner's Equity section, (E) Liability section",
"(A) Liability section, (B) Owner's Equity section, (C) Owner's Equity section, (D) Income Statement, (E) Asset side"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_759": {
"question_id": "mmlu_pro_759",
"source_benchmark": "MMLU_Pro",
"domain": "business",
"question_text": "ThomasSidencarries property-damage insurance on his car witha limit of $5,000. He damages Frank Hartman's car in anaccident to the extent of $2,350. How much will his insurancecompany pay?",
"correct_answer": "E",
"choices": [
"$5,000",
"$2,650",
"$4,700 (double the actual damage)",
"$1,750 (a figure less than the actual damage)",
"$2,350",
"$0",
"$1,350",
"$3,000 (more than the actual damage but less than the limit)",
"$2,500 (a rounded figure close to the actual damage)",
"$2,000 (slightly less than the actual damage)"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_281": {
"question_id": "mmlu_pro_281",
"source_benchmark": "MMLU_Pro",
"domain": "business",
"question_text": "Mr. Louis is presently considering buying a new boat to give rides to tourists. He has two alternatives: Boat A costs $10,000 and consumes $2,000 in fuel per year. Boat B costs $7,000 and consumes $2,500. Both boats have a zero salvage value at the end of 10 years. If Ur. Louis considers a rate of return of 6% acceptable, (a) which boat should he purchase? (b) how much will he charge each tourist if there are 3 tourists to a ride and Mr. Louis plans to have 125 rides each year?",
"correct_answer": "B",
"choices": [
"Boat B, $9.50 per passenger",
"Boat A, $8.96 per passenger",
"Boat A, $9.50 per passenger",
"Boat A, $12 per passenger",
"Boat A, $7.50 per passenger",
"Boat B, $7.50 per passenger",
"Boat B, $8.96 per passenger",
"Boat B, $10 per passenger",
"Boat B, $12 per passenger",
"Boat A, $10 per passenger"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_250": {
"question_id": "mmlu_pro_250",
"source_benchmark": "MMLU_Pro",
"domain": "business",
"question_text": "Mr. Samuels purchased 50 shares of stock when the round lot price was 146(1/8). The odd-lot differential on the sale is 1/4 of a point. Using the table of Brokerage Rates below, calculate the Commission on the sale. BROKERAGE RATES ON ODD LOTS Amount of Purchase Commission $100 - $799 2.0% plus $4.40 up to $65 $800 - $2,499 1.3% plus $10 up to $65 $2,500 and above .9% plus $20 up to $65",
"correct_answer": "F",
"choices": [
"$85.87",
"$92.50",
"$75.25",
"$7,318.75",
"$49.99",
"$65",
"$20",
"$110",
"$35",
"$57.50"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_228": {
"question_id": "mmlu_pro_228",
"source_benchmark": "MMLU_Pro",
"domain": "business",
"question_text": "Describe a procedure that can be used to determine inventory turnover rate.",
"correct_answer": "H",
"choices": [
"Inventory turnover rate is calculated by dividing total sales by ending inventory.",
"Inventory turnover rate is calculated by dividing total sales by average inventory.",
"Inventory turnover rate is calculated by subtracting the cost of goods sold from the average inventory.",
"Inventory turnover rate is determined by adding the cost of goods sold to the ending inventory.",
"Inventory turnover rate is found by dividing average inventory by the cost of goods sold.",
"Inventory turnover rate is calculated by dividing the beginning inventory by the cost of goods sold.",
"Inventory turnover rate is calculated by dividing the sum of the beginning and ending inventory by the cost of goods sold.",
"The inventory turnover rate can be calculated by dividing the cost of goods sold by the average inventory.",
"Inventory turnover rate is calculated by multiplying the cost of goods sold by the average inventory.",
"The inventory turnover rate is found by subtracting the average inventory from the total sales."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_142": {
"question_id": "mmlu_pro_142",
"source_benchmark": "MMLU_Pro",
"domain": "business",
"question_text": "A teacher wants to invest $30,000 into an account that compounds annually. The interest rate at this bank is 1.8%. How much money will be in the account after 6 years?",
"correct_answer": "H",
"choices": [
"37000.00",
"36000.00",
"33850.00",
"31000.00",
"34567.89",
"32700.00",
"30180.00",
"33389.35",
"35000.00",
"32000.00"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_754": {
"question_id": "mmlu_pro_754",
"source_benchmark": "MMLU_Pro",
"domain": "business",
"question_text": "From what does a chain of command extend?",
"correct_answer": "B",
"choices": [
"Bottom to top",
"Top to bottom",
"Horizontally",
"Randomly",
"Inside to outside",
"Diagonally",
"Circularly",
"Laterally",
"Outside to inside",
"Vertically from bottom to top"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_104": {
"question_id": "mmlu_pro_104",
"source_benchmark": "MMLU_Pro",
"domain": "business",
"question_text": "The owner of a small store plans on purchasing $1,500 worth of goods to be marked up 40% based on the selling price. Of this he will have purchased $200 worth of \"floor goods\", which will sell for $250. If he is to maintain the desired 40% markup on the total purchase, what markup % is needed on the balance of the purchases?",
"correct_answer": "J",
"choices": [
"43.75%",
"41%",
"50%",
"45%",
"39.5%",
"38%",
"47.5%",
"46.5%",
"40%",
"42.22%"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_692": {
"question_id": "mmlu_pro_692",
"source_benchmark": "MMLU_Pro",
"domain": "business",
"question_text": "Calculate the Gross Domestic Product using the total expenditure approach:\nConsumption Expenditures | $500 billion\nWages and salaries | $400 billion\n(Gross Private) Investments Expenditures | $80 billion\nGovernment Expenditures | $100 billion\nTaxes | $70 billion\nImports | $50 billion\nExports | $30 billion\nWhat is the GDP (in billions)?",
"correct_answer": "J",
"choices": [
"700",
"730",
"710",
"760",
"640",
"680",
"650",
"720",
"750",
"660"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_758": {
"question_id": "mmlu_pro_758",
"source_benchmark": "MMLU_Pro",
"domain": "business",
"question_text": "Reeves Corporation is going to pay a total dividend of $50,000 for the year.The Corporation has 1,000 shares of 6%, $50 par value preferred stock outstanding, as well as 5,000 shares of common stock outstanding.Find the dividendsper share for each class of stock.",
"correct_answer": "J",
"choices": [
"$4 for preferred stock, $10 for common stock",
"$6 for preferred stock, $7 for common stock",
"$5 for preferred stock, $8.40 for common stock",
"$2 for preferred stock, $9.50 for common stock",
"$4.50 for preferred stock, $8.50 for common stock",
"$3.50 for preferred stock, $9.30 for common stock",
"$3.20 for preferred stock, $9.36 for common stock",
"$2.80 for preferred stock, $9.44 for common stock",
"$2.50 for preferred stock, $9.70 for common stock",
"$3 for preferred stock, $9.40 for common stock"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_558": {
"question_id": "mmlu_pro_558",
"source_benchmark": "MMLU_Pro",
"domain": "business",
"question_text": "What is not a component of an agile organisation?",
"correct_answer": "I",
"choices": [
"Market focus and position",
"Flexibility and adaptability",
"Frequent software updates",
"Resistance to change",
"Traditional hierarchy structure",
"Human resources: multi-skilling",
"Organisational structure",
"Long-term strategic planning",
"Project-based culture",
"Customer satisfaction metrics"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_89": {
"question_id": "mmlu_pro_89",
"source_benchmark": "MMLU_Pro",
"domain": "business",
"question_text": "The Last National Bank has just approved a loan at an interest rate of 6% for 90 days. If the interest charge on the loan is $36, how much is the principal of the loan?",
"correct_answer": "J",
"choices": [
"$3000",
"$2600",
"$2200",
"$2800",
"$3200",
"$2000",
"$2100",
"$1800",
"$2500",
"$2400"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_604": {
"question_id": "mmlu_pro_604",
"source_benchmark": "MMLU_Pro",
"domain": "business",
"question_text": "The Magic-Voice Radio Supply Company is taking inventory at the end of the year. One of the items on stock is their # 84A tube. Records for these tubes show the following: Quantity Cost per Tube Inventory, January 1 800 $.252 Purchase of March 3 2,000 $.247 Purchase of June 5 1,500 $.262 Purchase of October 13 900 $.27 Purchase of November 25 600 $.272 If the quantity of these tubes on hand is 1,700, what would be the value of the inventory using each of the following methods? (a) Average Cost (b) FIFO (c) LIFO",
"correct_answer": "I",
"choices": [
"$440.00, $460.00, $425.00",
"$439.60, $459.60, $424.90",
"$437.00, $457.00, $422.00",
"$441.20, $461.20, $426.30",
"$435.80, $455.80, $421.10",
"$436.40, $456.40, $421.70",
"$438.00, $458.00, $423.00",
"$437.60, $457.60, $422.90",
"$438.60, $458.60, $423.90",
"$439.00, $459.00, $424.00"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_432": {
"question_id": "mmlu_pro_432",
"source_benchmark": "MMLU_Pro",
"domain": "business",
"question_text": "What does PEST stand for?",
"correct_answer": "I",
"choices": [
"Political, environmental, strategic, social",
"Political, economic, strategic, testing",
"Political, economic, social, testing",
"Political, environmental, social, testing",
"Political, environmental, strategic, testing",
"Political, economic, strategic, technological",
"Political, economic, social, strategic",
"Political, environmental, social, technological",
"Political, economic, social, technological",
"Political, environmental, strategic, technological"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_32": {
"question_id": "mmlu_pro_32",
"source_benchmark": "MMLU_Pro",
"domain": "business",
"question_text": "Find the amount to be paid each month in order to pay off car described below in two years. Price of car: $5,779.00 Transportation charge: $73.00 Factory-installed equipment: Radio 95.50 Metallic paint 59.90 Racing stripes 39.50 Power steering 98.00 Wide radial tires 198.10 Air conditioning 429.00 Dealer-installed equipment: Mirror $8.50 Mats 10.75 Undercoat 35.00 Insurance : Collision ($100-deductible) $505.75 for two years Comprehensive 231.50 for two years Sales tax: 5% Cash deposit: $500.00 Cost of financing: 9(1/2)%per year for two Years Used car allowance: $370.00",
"correct_answer": "J",
"choices": [
"$275.50",
"$6826.25",
"$8371.42",
"$344.42",
"$310.22",
"$288.99",
"$398.65",
"$425.78",
"$462.30",
"$349"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_30": {
"question_id": "mmlu_pro_30",
"source_benchmark": "MMLU_Pro",
"domain": "business",
"question_text": "Steven Moore purchased a new car for $3,462.20, including taxes and all other charges. He wishes to pay for it in 35 months. Find his monthly payments.",
"correct_answer": "E",
"choices": [
"$100.20",
"$102.55",
"$110.35",
"$95.46",
"$98.92",
"$96.06",
"$107.49",
"$105.23",
"$89.78",
"$93.20"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_95": {
"question_id": "mmlu_pro_95",
"source_benchmark": "MMLU_Pro",
"domain": "business",
"question_text": "TencerInc. has estimated its revenue function to be r(x) = 3x^2, where x is the number of years the company has been in business and r(x) is the total revenue earned up to year x in millions. The profit function is f(x) = 2x^2 - 5x +1, f(x) is the total profit earned up to year x. What is the cost accrued over a three year period? What is the rate of change of cost of production by the end of the third year?",
"correct_answer": "B",
"choices": [
"$25 million, $12 million",
"$23 million, $11 million",
"$27 million, $13 million",
"$19 million, $10 million",
"$24 million, $13 million",
"$21 million, $9 million",
"$26 million, $14 million",
"$20 million, $10 million",
"$22 million, $11 million",
"$22 million, $12 million"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_223": {
"question_id": "mmlu_pro_223",
"source_benchmark": "MMLU_Pro",
"domain": "business",
"question_text": "A tax of $800 is paid on a property with an assessed value of $20,000. If the tax rate is increased 1%. what will the new tax cost be?",
"correct_answer": "A",
"choices": [
"$1,000",
"$1,200",
"$880",
"$1,160",
"$960",
"$900",
"$1,100",
"$1,080",
"$820",
"$840"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_238": {
"question_id": "mmlu_pro_238",
"source_benchmark": "MMLU_Pro",
"domain": "business",
"question_text": " _____________is an important element in the communication process. It recognizes that successful communications are more likely to be achieved if the source and the receiver understand each other.",
"correct_answer": "C",
"choices": [
"The encoding-decoding process.",
"Personal selling.",
"The realm of understanding.",
"The context of the message.",
"Noise.",
"The communication channel.",
"Feedback.",
"The use of jargon.",
"The sender-receiver model.",
"The use of technology."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_517": {
"question_id": "mmlu_pro_517",
"source_benchmark": "MMLU_Pro",
"domain": "business",
"question_text": "JohnBindelsearns $300 a month. He worked 4 hours overtime last month, for which he was paid time and a half. What were his total earnings for the month?",
"correct_answer": "G",
"choices": [
"$330.00",
"$315.00",
"$305.38",
"$312.50",
"$310.00",
"$320.38",
"$310.38",
"$300.38",
"$335.38",
"$325.38"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_616": {
"question_id": "mmlu_pro_616",
"source_benchmark": "MMLU_Pro",
"domain": "business",
"question_text": "Enhancement of job satisfaction and productivity are key characteristics of which theoretical perspective of work design?",
"correct_answer": "H",
"choices": [
"Human Relations theory",
"Process improvement",
"Job enrichment theory",
"Job enlargement theory",
"Time and Motion study",
"Techno-structural change model",
"Scientific management theory",
"Socio-technical systems",
"Motivational theory",
"Job characteristics model"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_27": {
"question_id": "mmlu_pro_27",
"source_benchmark": "MMLU_Pro",
"domain": "business",
"question_text": "What is the net cost of a tape recorder whose list price is $32 and on which the discount rate is 30%?",
"correct_answer": "F",
"choices": [
"$20.00",
"$30.00",
"$18.40",
"$24.00",
"$32.00",
"$22.40",
"$9.60",
"$10.00",
"$26.40",
"$28.00"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_574": {
"question_id": "mmlu_pro_574",
"source_benchmark": "MMLU_Pro",
"domain": "business",
"question_text": "Mr. Golden purchased 3 bonds, each with a maturity value of $1,000, from theSuttonsmithCorporation. For each bond, he will receive $15 semiannually for 20 years, after which time he will also receive the full face value of $1,000. The $15 payments will be made regardless of the interest rate. If the interest rate on one bond was 3%; on another, 4%; and on the third, 3.6%, what did Mr. Golden pay for each bond?",
"correct_answer": "B",
"choices": [
"$950.00, $800.23, $850.26",
"$1,000.04, $863.23, $915.26",
"$1,000.04, $860.23, $910.26",
"$1,050.00, $890.23, $940.26",
"$1,000.00, $860.23, $910.26",
"$1,050.04, $813.23, $935.26",
"$995.00, $855.23, $905.26",
"$1,000.04, $865.23, $915.26",
"$1,000.00, $900.23, $950.26",
"$1,000.04, $863.23, $920.26"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_203": {
"question_id": "mmlu_pro_203",
"source_benchmark": "MMLU_Pro",
"domain": "business",
"question_text": "Bill deposits $1,000 for 4 years at 5% interest, compounded annually. What is its accumulated value?",
"correct_answer": "C",
"choices": [
"$1,000",
"$1,102.50",
"$1,215.51",
"$1,200",
"$1,215.50",
"$1,220",
"$1,104.08",
"$1,250",
"$1,331.00",
"$1,300"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_733": {
"question_id": "mmlu_pro_733",
"source_benchmark": "MMLU_Pro",
"domain": "business",
"question_text": "How much must be invested in $1,000 5% bonds to have an annual income from interest of $3,000 if the bonds sell at 74(7/8)? Assume a brokerage fee of $5 a bond.",
"correct_answer": "C",
"choices": [
"$35,000",
"$55,000",
"$45,225",
"$40,000",
"$65,000",
"$50,000",
"$70,000",
"$75,000",
"$60,000",
"$30,000"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_665": {
"question_id": "mmlu_pro_665",
"source_benchmark": "MMLU_Pro",
"domain": "business",
"question_text": "You are willing to buy a new machine for $1,000 because it will save you $150 annually for the next 10 years. What is the rate of return on this investment?",
"correct_answer": "C",
"choices": [
"14%",
"9.5%",
"8.0516%",
"10%",
"8.5%",
"12%",
"11.3%",
"5%",
"7.5%",
"6.2%"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_718": {
"question_id": "mmlu_pro_718",
"source_benchmark": "MMLU_Pro",
"domain": "business",
"question_text": "On April 17, Mr. Hicky received a three-month note for $800 bearing interest at 4%. On May 17, his bank dis-counted the note at a 6% interest rate. Find his proceeds.",
"correct_answer": "D",
"choices": [
"$790.92",
"$798",
"$810",
"$799.92",
"$785.92",
"$795",
"$804.08",
"$792.50",
"$808",
"$800"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_429": {
"question_id": "mmlu_pro_429",
"source_benchmark": "MMLU_Pro",
"domain": "business",
"question_text": "A company sells its product at two different prices in two different locations. Which of the following types of research will be most effective in identifying the optimal price for the product if all other marketing factors are held constant?",
"correct_answer": "C",
"choices": [
"Experimental",
"Predictive",
"Causal",
"Observational",
"Longitudinal",
"Correlational",
"Ethnographic",
"Descriptive",
"Cross-sectional",
"Exploratory"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_225": {
"question_id": "mmlu_pro_225",
"source_benchmark": "MMLU_Pro",
"domain": "business",
"question_text": "Herbie'sService Station bought a water pump from his supplier that has a list price of $40. His trade discount is $16. Find the net price.",
"correct_answer": "E",
"choices": [
"$56",
"$26",
"$16",
"$18",
"$24",
"$40",
"$32",
"$30",
"$34",
"$22"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_459": {
"question_id": "mmlu_pro_459",
"source_benchmark": "MMLU_Pro",
"domain": "business",
"question_text": "Holmes Bros, accepted a draft dated July 10 for $326.80 at 5%, payable 90 days after date, (a) How much was paid if Homes Bros, paid it when due? (b) If this draft had been discounted at 5(1/2)% 30 days before maturity and a (1/4)% collection fee was charged, what were the proceeds? (Assume a 360 day year.)",
"correct_answer": "I",
"choices": [
"$326.80, $330.89",
"$326.80, $328.54",
"$328.54, $326.80",
"$328.54, $327.20",
"$330.89, $330.89",
"$332.00, $330.50",
"$326.80, $329.00",
"$332.00, $329.65",
"$330.89, $328.54",
"$330.89, $327.20"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_603": {
"question_id": "mmlu_pro_603",
"source_benchmark": "MMLU_Pro",
"domain": "business",
"question_text": "BaskowToys Wholesalers is offering Albee Stores the new Captain Kirk doll at 25-20% off the retail price. If Albee Stores is buying each doll for $2.10, for what will it sell the dolls, i.e., what is the retail price?",
"correct_answer": "A",
"choices": [
"$3.50",
"$7.00",
"$6.00",
"$4.50",
"$4.00",
"$5.00",
"$2.50",
"$3.00",
"$5.50",
"$2.80"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_284": {
"question_id": "mmlu_pro_284",
"source_benchmark": "MMLU_Pro",
"domain": "business",
"question_text": "J. S.Bacq& Co. runs a \"penny arcade\" with the following fixed costs: Wages and electricity: $500 per week Upkeep and miscellaneous: $100 per week. As a sales incentive, the Arcade distributes a surprise gift to each customer. Each gift costs the Arcade $2.00. The sole charge in the Arcade is $6.00 for admission. Management wants to make $1,000 profit each week. How many people must visit the Arcade weekly to realize this much profit?",
"correct_answer": "J",
"choices": [
"Nine hundred customers",
"Eight hundred customers",
"Five hundred customers",
"Two hundred and fifty customers",
"Seven hundred customers",
"Six hundred customers",
"One thousand customers",
"Three hundred customers",
"One hundred and fifty customers",
"Four hundred customers"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_6": {
"question_id": "mmlu_pro_6",
"source_benchmark": "MMLU_Pro",
"domain": "business",
"question_text": " Pine and Gilmore (1999) derive four distinct realms of experience, based on two dimensions. What are these dimensions?",
"correct_answer": "D",
"choices": [
"Customer participation and environmental acquisition.",
"Environmental acquisition and environmental relationship.",
"Customer retention and environmental relationship.",
"Customer participation and environmental relationship.",
"Customer acquisition and customer retention.",
"Customer participation and customer relationship.",
"Customer acquisition and environmental participation.",
"Environmental participation and customer relationship.",
"Customer retention and customer relationship.",
"Customer acquisition and environmental relationship."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_3633": {
"question_id": "mmlu_pro_3633",
"source_benchmark": "MMLU_Pro",
"domain": "chemistry",
"question_text": "An automobile travelling at 10 miles per hour produces 0.33 1b of CO gas per mile. How many moles of CO are produced per mile?",
"correct_answer": "A",
"choices": [
"5.4 moles per mile",
"9.0 moles per mile",
"2.5 moles per mile",
"7.3 moles per mile",
"3.8 moles per mile",
"4.8 moles per mile",
"4.1 moles per mile",
"5.9 moles per mile",
"8.1 moles per mile",
"6.2 moles per mile"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_3456": {
"question_id": "mmlu_pro_3456",
"source_benchmark": "MMLU_Pro",
"domain": "chemistry",
"question_text": "What is the explanation for the following trends inlattice energies ? NaF - 260 Kcal / moleNaCl- 186 Kcal / mole NaCl - 186 Kcal / moleKCl- 169 Kcal / mole NaBr - 177 Kcal / moleCsCl- 156 Kcal / mole",
"correct_answer": "F",
"choices": [
"The lattice energy is solely determined by the electron configuration of the ions",
"The lattice energy is not affected by the size of the ions",
"The lattice energy is higher for larger ions due to increased attraction between adjacent ions",
"The lattice energy decreases with the increasing polarization of ions",
"The lattice energy is determined by the atomic mass of the ions",
"The size of ions affects the lattice energy. The larger the ion, the lower the lattice energy due to reduced attraction between adjacent ions.",
"The lattice energy increases with the increase in ionic radii due to stronger electrostatic forces",
"The lattice energy is unaffected by the charge on the ions",
"The lattice energy is higher for ions with similar electronegativity values"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_3968": {
"question_id": "mmlu_pro_3968",
"source_benchmark": "MMLU_Pro",
"domain": "chemistry",
"question_text": "When an electron in a certain excited energy level in a one-dimensional box of length $2.00 \u00c5$ makes a transition to the ground state, a photon of wavelength $8.79 \\mathrm{~nm}$ is emitted. Find the quantum number of the initial state.",
"correct_answer": "D",
"choices": [
"10",
"2",
"3",
"4",
"6",
"5",
"8",
"7",
"9",
"1"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_3906": {
"question_id": "mmlu_pro_3906",
"source_benchmark": "MMLU_Pro",
"domain": "chemistry",
"question_text": "AssumingpD= - log [D_3O^+] in analogy to pH, what ispDof pure D_2O? K = 2 \u00d7 10^-15.",
"correct_answer": "I",
"choices": [
"8.00",
"8.15",
"8.35",
"6.35",
"7.00",
"6.85",
"7.45",
"7.25",
"7.35",
"9.35"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_3862": {
"question_id": "mmlu_pro_3862",
"source_benchmark": "MMLU_Pro",
"domain": "chemistry",
"question_text": "Which of the following correctly lists the individual intermolecular attractive forces from the strongest to the weakest?",
"correct_answer": "D",
"choices": [
"Dipole-dipole < induced dipole < hydrogen bond",
"Induced dipole < hydrogen bond < dipole-dipole",
"Dipole-dipole < hydrogen bond < induced dipole",
"Hydrogen bond < dipole-dipole < induced dipole",
"Induced dipole < dipole-dipole < hydrogen bond",
"Hydrogen bond < induced dipole < dipole-dipole"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_3690": {
"question_id": "mmlu_pro_3690",
"source_benchmark": "MMLU_Pro",
"domain": "chemistry",
"question_text": "Of the following compounds, which has the lowest melting point?",
"correct_answer": "H",
"choices": [
"NaCl",
"LiCl",
"AlCl3",
"KCl",
"FeCl3",
"AgCl",
"CaCl2",
"HCl",
"MgCl2",
"CCl4"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_3614": {
"question_id": "mmlu_pro_3614",
"source_benchmark": "MMLU_Pro",
"domain": "chemistry",
"question_text": "Assume all gases are perfect unless stated otherwise. Unless otherwise stated, thermodynamic data are for 298.15 K. Calculate the final temperature of a sample of argon of mass $12.0 \\mathrm{~g}$ that is expanded reversibly and adiabatically from $1.0 \\mathrm{dm}^3$ at $273.15 \\mathrm{~K}$ to $3.0 \\mathrm{dm}^3$.",
"correct_answer": "H",
"choices": [
"162 $\\mathrm{K}$",
"151 $\\mathrm{K}$",
"120 $\\mathrm{K}$",
"145 $\\mathrm{K}$",
"210 $\\mathrm{K}$",
"172 $\\mathrm{K}$",
"189 $\\mathrm{K}$",
"131 $\\mathrm{K}$",
"200 $\\mathrm{K}$",
"158 $\\mathrm{K}$"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_4521": {
"question_id": "mmlu_pro_4521",
"source_benchmark": "MMLU_Pro",
"domain": "chemistry",
"question_text": ".0324 Faradays (F) liberated .651g of Calcium. What is the atomic weight of Calcium?",
"correct_answer": "B",
"choices": [
"35.7",
"40.2",
"24.6",
"50.5",
"45.8",
"80.4",
"20.1",
"70.9",
"60.3",
"32.1"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_3583": {
"question_id": "mmlu_pro_3583",
"source_benchmark": "MMLU_Pro",
"domain": "chemistry",
"question_text": "For the cell H_2(1 atm) \\vertHCl\\vertAgCl\\vertAg, E\u00b0 = 0.2220 V at 298 K. If the measured E = 0.396 V, what is the pH of the HCl solution? Cite any approximations made.",
"correct_answer": "B",
"choices": [
"4.47",
"1.47",
"3.47",
"0.74",
"2.74",
"1.75",
"0.47",
"3.74",
"2.47",
"1.74"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_4269": {
"question_id": "mmlu_pro_4269",
"source_benchmark": "MMLU_Pro",
"domain": "chemistry",
"question_text": "A chemist dissolves 10 g of an unknown protein in a liter of water at 25\u00b0C. The osmotic pressure is found to be 9.25 mmHg. What is the protein's molecular weight. Based upon the number of moles in 10 g of protein, what would the freezing point depression and boiling point elevation be? Assume R = Universal Gas Constant = .0821 (liter- atm / mole\u00b0K), k_f = (1.86\u00b0C / m), and k_b = (.52\u00b0C / m).",
"correct_answer": "H",
"choices": [
"Molecular weight: 21,500 (g / mole), \u2206T_f = 8.8 \u00d7 10^-4 \u00b0C, \u2206T_b = 2.34 \u00d7 10^-4 \u00b0C",
"Molecular weight: 18,500 (g / mole), \u2206T_f = 10.3 \u00d7 10^-4 \u00b0C, \u2206T_b = 2.79 \u00d7 10^-4\u00b0C",
"Molecular weight: 15,000 (g / mole), \u2206T_f = 12.6 \u00d7 10^-4 \u00b0C, \u2206T_b = 3.35 \u00d7 10^-4 \u00b0C",
"Molecular weight: 25,000 (g / mole), \u2206T_f = 7.2 \u00d7 10^-4 \u00b0C, \u2206T_b = 1.89 \u00d7 10^-4 \u00b0C",
"Molecular weight: 22,000 (g / mole), \u2206T_f = 8.3 \u00d7 10^-4 \u00b0C, \u2206T_b = 2.09 \u00d7 10^-4\u00b0C",
"Molecular weight: 23,000 (g / mole), \u2206T_f = 6.5 \u00d7 10^-4 \u00b0C, \u2206T_b = 1.73 \u00d7 10^-4 \u00b0C",
"Molecular weight: 17,800 (g / mole), \u2206T_f = 10.9 \u00d7 10^-4 \u00b0C, \u2206T_b = 2.92 \u00d7 10^-4 \u00b0C",
"Molecular weight: 20,100 (g / mole), \u2206T_f = 9.3 \u00d7 10^-4 \u00b0C, \u2206T_b = 2.59 \u00d7 10^-4\u00b0C",
"Molecular weight: 22,500 (g / mole), \u2206T_f = 8.1 \u00d7 10^-4 \u00b0C, \u2206T_b = 2.15 \u00d7 10^-4 \u00b0C",
"Molecular weight: 19,100 (g / mole), \u2206T_f = 1.3 \u00d7 10^-4 \u00b0C, \u2206T_b = 3.59 \u00d7 10^-4\u00b0C"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_3470": {
"question_id": "mmlu_pro_3470",
"source_benchmark": "MMLU_Pro",
"domain": "chemistry",
"question_text": "Calculate the mean free path for oxygen at 25\u00b0C at (a) 1atm pressure and (b) 10^-3torr.",
"correct_answer": "D",
"choices": [
"7.02 \u00d7 10^-8 m, 4.3 cm",
"6.02 \u00d7 10^-8 m, 6.3 cm",
"7.02 \u00d7 10^-7 m, 53 cm",
"7.02 \u00d7 10^-8 m, 5.3 cm",
"8.02 \u00d7 10^-8 m, 4.3 cm",
"6.02 \u00d7 10^-8 m, 4.3 mm",
"7.02 \u00d7 10^-9 m, 0.53 cm",
"7.02 \u00d7 10^-8 m, 6.3 cm",
"7.02 \u00d7 10^-8 m, 5.3 mm",
"6.02 \u00d7 10^-8 m, 5.3 cm"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_3466": {
"question_id": "mmlu_pro_3466",
"source_benchmark": "MMLU_Pro",
"domain": "chemistry",
"question_text": "Two moles of gaseous NH_3 are introduced into a 1.0-liter vessel and allowed to undergo partial decomposition at high temperature ac-cording to the reaction 2NH_3 (g) \\rightleftarrows N_2 (g) + 3H_2 (g) . At equilibrium, 1.0 mole of NH_3 (g) remains. What is the value of the equilibrium constant?",
"correct_answer": "F",
"choices": [
"0.25",
"4.0",
"1.5",
"2.0",
"3.0",
"1.6875",
"0.5",
"1.0",
"2.5",
"0.75"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_3596": {
"question_id": "mmlu_pro_3596",
"source_benchmark": "MMLU_Pro",
"domain": "chemistry",
"question_text": "Substances whose Lewis structures must be drawn with an unpaired electron are called",
"correct_answer": "B",
"choices": [
"hydrogen bonds",
"free radicals",
"allotropes",
"isotopes",
"nonpolar molecules",
"polar molecules",
"covalent compounds",
"resonance structures",
"dipole moments",
"ionic compounds"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_3852": {
"question_id": "mmlu_pro_3852",
"source_benchmark": "MMLU_Pro",
"domain": "chemistry",
"question_text": "At 1308\u00b0 K 1 mole of ruthenium changes from the \\beta (beta) to the \\alpha solid state. \\DeltaH for the transition = - 34 cal mol^-1. Calculate the entropy changes if the surrounding cooling bath is at STP.",
"correct_answer": "C",
"choices": [
"0.34(J / \u00b0K)",
"0.29(J / \u00b0K)",
"0.41(J / \u00b0K)",
"0.25(J / \u00b0K)"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_3881": {
"question_id": "mmlu_pro_3881",
"source_benchmark": "MMLU_Pro",
"domain": "chemistry",
"question_text": "The mass density of water vapour at $327.6 \\mathrm{~atm}$ and $776.4 \\mathrm{~K}$ is $133.2 \\mathrm{~kg} \\mathrm{~m}^{-3}$. Given that for water $T_{\\mathrm{c}}=647.4 \\mathrm{~K}, p_{\\mathrm{c}}=218.3 \\mathrm{~atm}, a=5.464 \\mathrm{dm}^6 \\mathrm{~atm} \\mathrm{~mol}^{-2}$, $b=0.03049 \\mathrm{dm}^3 \\mathrm{~mol}^{-1}$, and $M=18.02 \\mathrm{~g} \\mathrm{~mol}^{-1}$, calculate the molar volume.",
"correct_answer": "I",
"choices": [
"0.1486$\\mathrm{dm}^3 \\mathrm{~mol}^{-1}$",
"0.1100 $\\mathrm{dm}^3 \\mathrm{~mol}^{-1}$",
"0.1257 $\\mathrm{dm}^3 \\mathrm{~mol}^{-1}$",
"0.1555 $\\mathrm{dm}^3 \\mathrm{~mol}^{-1}$",
"0.1400 $\\mathrm{dm}^3 \\mathrm{~mol}^{-1}$",
"0.1502$\\mathrm{dm}^3 \\mathrm{~mol}^{-1}$",
"0.1428 $\\mathrm{dm}^3 \\mathrm{~mol}^{-1}$",
"0.1234$\\mathrm{dm}^3 \\mathrm{~mol}^{-1}$",
" 0.1353$\\mathrm{dm}^3 \\mathrm{~mol}^{-1}$",
"0.1600 $\\mathrm{dm}^3 \\mathrm{~mol}^{-1}$"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_4439": {
"question_id": "mmlu_pro_4439",
"source_benchmark": "MMLU_Pro",
"domain": "chemistry",
"question_text": "Classify each of the following as a member of the methane series, the ethylene series, or the acetylene series: C_12H_26, C_9H_16, C7H_14 , C_26H_54.",
"correct_answer": "I",
"choices": [
"C_12H_26: alkane series, C_9H_16: ethylene series, C_7H14: alkane series, C_26H_54: ethylene series",
"C_12H_26: acetylene series, C_9H_16: ethylene series, C_7H14: alkane series, C_26H_54: alkane series",
"C_12H_26: acetylene series, C_9H_16: ethylene series, C_7H14: alkane series, C_26H_54: ethylene series",
"C_12H_26: ethylene series, C_9H_16: alkane series, C_7H14: acetylene series, C_26H_54: ethylene series",
"C_12H_26: alkane series, C_9H_16: ethylene series, C_7H14: acetylene series, C_26H_54: ethylene series",
"C_12H_26: ethylene series, C_9H_16: acetylene series, C_7H14: alkane series, C_26H_54: acetylene series",
"C_12H_26: alkane series, C_9H_16: ethylene series, C_7H14: acetylene series, C_26H_54: alkane series",
"C_12H_26: ethylene series, C_9H_16: acetylene series, C_7H14: alkane series, C_26H_54: alkane series",
"C_12H_26: alkane series, C_9H_16: acetylene series, C_7H14: ethylene series, C_26H_54: alkane series",
"C_12H_26: acetylene series, C_9H_16: alkane series, C_7H14: ethylene series, C_26H_54: acetylene series"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_3459": {
"question_id": "mmlu_pro_3459",
"source_benchmark": "MMLU_Pro",
"domain": "chemistry",
"question_text": "An ionic bond is established between positive ion A and negative ion B. How would one expect the strength of the bond to be affected by each of the following changes: (a) Doubling the charge on A, (b) Simultaneously doubling the charge on A and B, (c) Doubling the radius of B, and (d) Simultaneously doubling the radius of A and B?",
"correct_answer": "I",
"choices": [
"(a) Bond strength remains the same, (b) Bond strength is halved, (c) Bond strength is quadrupled, (d) Bond strength is doubled",
"(a) Bond strength is doubled, (b) Bond strength remains the same, (c) Bond strength is decreased by a factor of 2, (d) Bond strength is halved",
"(a) Bond strength is halved, (b) Bond strength is doubled, (c) Bond strength is quadrupled, (d) Bond strength remains the same",
"(a) Bond strength is quadrupled, (b) Bond strength is halved, (c) Bond strength is doubled, (d) Bond strength is decreased by a factor of 2",
"(a) Bond strength is decreased by a factor of 2, (b) Bond strength is increased by a factor of 4, (c) Bond strength is tripled, (d) Bond strength is halved",
"(a) Bond strength is quadrupled, (b) Bond strength is octupled, (c) Bond strength is halved, (d) Bond strength remains the same",
"(a) Bond strength is tripled, (b) Bond strength is sextupled, (c) Bond strength remains the same, (d) Bond strength is decreased by a factor of 8",
"(a) Bond strength is halved, (b) Bond strength remains the same, (c) Bond strength is doubled, (d) Bond strength is quadrupled",
"(a) Bond strength is doubled, (b) Bond strength is quadrupled, (c) Bond strength is halved, (d) Bond strength is decreased by a factor of 4",
"(a) Bond strength is unchanged, (b) Bond strength is doubled, (c) Bond strength is decreased by a factor of 4, (d) Bond strength is quadrupled"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_3812": {
"question_id": "mmlu_pro_3812",
"source_benchmark": "MMLU_Pro",
"domain": "chemistry",
"question_text": "A chemist decides to find the vapor pressure of water by the gas saturation method. 100 liters of N_2 gas is passed through 65.44 g of water. After passage of the gas, 63.13 g remained. The temperature of the H_2O (water) is 25\u00b0C.Find the vapor pressure of water at this temperature.",
"correct_answer": "F",
"choices": [
"2.31 mm",
"15.76 mm",
"298 mm",
"18.02 mm",
"12.15 mm",
"23.8 mm",
"5.67 mm",
"30.5 mm",
"9.82 mm",
"35.2 mm"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_4264": {
"question_id": "mmlu_pro_4264",
"source_benchmark": "MMLU_Pro",
"domain": "chemistry",
"question_text": "Given that the earth's mean radius = 6.37 \u00d7 10^6 m, normal atmosphere pressure = 1.013 \u00d7 10^5 N/m_2, and the gravitational acceleration = 9.8 m/sec^2, what is the mass of the homosphere?",
"correct_answer": "E",
"choices": [
"8.10 \u00d7 10^18 kg",
"6.35 \u00d7 10^18 kg",
"3.14 \u00d7 10^18 kg",
"7.25 \u00d7 10^18 kg",
"5.27 \u00d7 10^18 kg",
"7.00 \u00d7 10^18 kg",
"6.00 \u00d7 10^18 kg",
"4.98 \u00d7 10^18 kg",
"5.55 \u00d7 10^18 kg",
"4.20 \u00d7 10^18 kg"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_3856": {
"question_id": "mmlu_pro_3856",
"source_benchmark": "MMLU_Pro",
"domain": "chemistry",
"question_text": "Without Catalyst(k), sec^-1 With Catalyst(k_c), sec^-1 T_1 = 473\u00b0K 1.76 \u00d7 10^-2 6.10 T_2 = 573\u00b0K 0.804 88.1 The reaction rate of a first-order reaction at a con-stant pressure of 1atmis shown in the table above. Cal-culate the value of H and S\u00b0 with and without catalyst, assuming that they do not change with temperature.",
"correct_answer": "D",
"choices": [
"H = 19.6 kcal/mole, 13.4 kcal/mole; S = 34.9eu, 33.4eu",
"H = 23.5 kcal/mole, 16.2 kcal/mole; S = 37.1eu, 36.5eu",
"H = 18.2 kcal/mole, 12.8 kcal/mole; S = 33.3eu, 32.7eu",
"H = 20.6 kcal/mole, 14.4 kcal/mole; S = 35.5eu, 34.0eu",
"H = 17.8 kcal/mole, 11.9 kcal/mole; S = 32.8eu, 31.2eu",
"H = 25.0 kcal/mole, 18.0 kcal/mole; S = 39.2eu, 38.6eu",
"H = 24.3 kcal/mole, 17.1 kcal/mole; S = 38.0eu, 37.4eu",
"H = 19.2 kcal/mole, 13.7 kcal/mole; S = 33.9eu, 33.3eu",
"H = 22.4 kcal/mole, 15.6 kcal/mole; S = 36.7eu, 35.2eu",
"H = 21.0 kcal/mole, 15.0 kcal/mole; S = 34.0eu, 32.5eu"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_4324": {
"question_id": "mmlu_pro_4324",
"source_benchmark": "MMLU_Pro",
"domain": "chemistry",
"question_text": "ThepK_aof aspartic acid is 2.8 What is the aspartic acid concentration of a 1M aspartic acid solution maintained at pH 2.8? What is theaspartateion concentration if the pH is raised to 3.8?",
"correct_answer": "I",
"choices": [
"0.9 M, 0.1 M",
"0.5 M, 1 M",
"0.6 M, 0.4 M",
"0.1 M, 0.9 M",
"1 M, 0.9 M",
"1 M, 0.1 M",
"0.7 M, 0.3 M",
"0.5 M, 0.8 M",
"0.5 M, 0.9 M",
"0.8 M, 0.2 M"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_3974": {
"question_id": "mmlu_pro_3974",
"source_benchmark": "MMLU_Pro",
"domain": "chemistry",
"question_text": "Assume all gases are perfect unless stated otherwise. Unless otherwise stated, thermodynamic data are for 298.15 K. For a van der Waals gas, $\\pi_T=a / V_{\\mathrm{m}}^2$. Calculate $\\Delta U_{\\mathrm{m}}$ for the isothermal expansion of nitrogen gas from an initial volume of $1.00 \\mathrm{dm}^3$ to $24.8 \\mathrm{dm}^3$ at $298 \\mathrm{~K}$.",
"correct_answer": "I",
"choices": [
"150$\\mathrm{J} \\mathrm{mol}^{-1}$",
"90$\\mathrm{J} \\mathrm{mol}^{-1}$",
"170$\\mathrm{J} \\mathrm{mol}^{-1}$",
"100$\\mathrm{J} \\mathrm{mol}^{-1}$",
"160$\\mathrm{J} \\mathrm{mol}^{-1}$",
"140$\\mathrm{J} \\mathrm{mol}^{-1}$",
"110$\\mathrm{J} \\mathrm{mol}^{-1}$",
"120$\\mathrm{J} \\mathrm{mol}^{-1}$",
" 131$\\mathrm{J} \\mathrm{mol}^{-1}$",
"125$\\mathrm{J} \\mathrm{mol}^{-1}$"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_3418": {
"question_id": "mmlu_pro_3418",
"source_benchmark": "MMLU_Pro",
"domain": "chemistry",
"question_text": "A radioactive isotope, which is used in diagnostic imaging, has a half-life of 6.0 hours. If a quantity of this isotope has an activity of 150 mCi when it is delivered to a hospital, how much activity will remain 24 hours after delivery? (mCi = microcuries)",
"correct_answer": "B",
"choices": [
"150 mCi",
"9.4 mCi",
"4.7 mCi",
"12.5 mCi",
"25 mCi",
"50 mCi",
"19 mCi",
"75 mCi",
"38 mCi",
"2.3 mCi"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_3731": {
"question_id": "mmlu_pro_3731",
"source_benchmark": "MMLU_Pro",
"domain": "chemistry",
"question_text": "An impure sample of aluminum sulfate, Al_2 (SO_4)_3, is analyzed by forming a precipitate of insoluble barium sulfate, BaSO_4, by reacting aluminum sulfate with an excess of BaCl_2 (to insure complete precipitation). After washing and drying, 2.000 g of BaSO_4 was obtained. If the original sample weighed 1.000 g, what was the per cent of aluminum sulfate in the sample?",
"correct_answer": "D",
"choices": [
"75.4 % Al_2(SO_4)_3",
"84.3 % Al_2(SO_4)_3",
"66.7 % Al_2(SO_4)_3",
"97.8 % Al_2 (SO_4)_3",
"2.2 % Al_2 (SO_4)_3",
"10.5 % Al_2(SO_4)_3",
"100 % Al_2 (SO_4)_3",
"56.1 % Al_2(SO_4)_3",
"42.9 % Al_2(SO_4)_3",
"89.5 % Al_2 (SO_4)_3"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_4270": {
"question_id": "mmlu_pro_4270",
"source_benchmark": "MMLU_Pro",
"domain": "chemistry",
"question_text": "There are some 25,000ribosomesin an E. coli cell. If the structucalproteins of theseribosomeswere stretched out end to end as fully extended polypeptide chains, how many times could they encircle the E. coli cell?Assume that theribosomesare 180 \\AA in diameter, with a specific gravity of 1.0, and that they contain 40% protein. Assume that the E. coli cell is a sphere 1\\mu in diameter.",
"correct_answer": "E",
"choices": [
"2.10 \u00d7 10^4 times",
"1.26 \u00d7 10^4 times",
"0.80 \u00d7 10^4 times",
"3.50 \u00d7 10^4 times",
"1.76 \u00d7 10^4 times",
"3.00 \u00d7 10^4 times",
"4.00 \u00d7 10^4 times",
"2.54 \u00d7 10^4 times",
"1.00 \u00d7 10^4 times",
"2.00 \u00d7 10^4 times"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_4101": {
"question_id": "mmlu_pro_4101",
"source_benchmark": "MMLU_Pro",
"domain": "chemistry",
"question_text": "2.000picogram(pg) of ^33P decays by ^0_-1\\beta emission to 0.250 pg in 75.9 days. Find the half-life of ^33P.",
"correct_answer": "I",
"choices": [
"37.95 days",
"50.6 days",
"8 days",
"5.6 days",
"60.4 days",
"75.9 days",
"151.8 days",
"12.65 days",
"25.3 days",
"100.2 days"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_3723": {
"question_id": "mmlu_pro_3723",
"source_benchmark": "MMLU_Pro",
"domain": "chemistry",
"question_text": "How many liters ofphosphine(PH_3) gas at STP could be made from 30 g of calcium by use of the following sequence of reactions: 3Ca + 2P\\rightarrowCa_3 P_2 Ca_3 P_2 + 6HCl\\rightarrow2PH3 + 3CaCl_2 (Molecular weights: Ca = 40, PH_3 = 34.)",
"correct_answer": "A",
"choices": [
"11.2 l",
"18.4 l",
"14.0 l",
"6.8 l",
"22.4 l",
"9.2 l",
"12.8 l",
"15.6 l",
"8.4 l",
"5.6 l"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_3845": {
"question_id": "mmlu_pro_3845",
"source_benchmark": "MMLU_Pro",
"domain": "chemistry",
"question_text": "Paper pulp (almost 100 % cellulose) is prepared commercially by digesting wood chips in a hot aqueous solution of calciumbisulfite, Ca(HSO_3)_2. The latter dissolves lignin and resins in the wood, leaving nearly pure cellulose. The sulfite solution is prepared by the following reactions: S + O_2\\ding{217} SO_2 SO_2 + H_2O\\ding{217} H_2SO_3 CaCO_3 + 2H_2SO_3\\ding{217}Ca(HSO_3)_2 + CO_2 + H_2O (a) For every 250 kg of limestone (CaCO_3) used in the process , what weight of sulfur would be required? (b) What weight of Ca(HSO_3)_2 is produced from the limestone and sulfur in part (a)?",
"correct_answer": "F",
"choices": [
"220 kg of sulfur, 350 kg of Ca(HSO_3)_2",
"180 kg of sulfur, 450 kg of Ca(HSO_3)_2",
"200 kg of sulfur, 400 kg of Ca(HSO_3)_2",
"170 kg of sulfur, 520 kg of Ca(HSO_3)_2",
"100 kg of sulfur, 300 kg of Ca(HSO_3)_2",
"160 kg of sulfur, 505 kg of Ca(HSO_3)_2",
"150 kg of sulfur, 600 kg of Ca(HSO_3)_2",
"140 kg of sulfur, 470 kg of Ca(HSO_3)_2",
"120 kg of sulfur, 550 kg of Ca(HSO_3)_2",
"130 kg of sulfur, 480 kg of Ca(HSO_3)_2"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_4094": {
"question_id": "mmlu_pro_4094",
"source_benchmark": "MMLU_Pro",
"domain": "chemistry",
"question_text": "Using the information in the following table, determine \u2206H\u00b0 for the reactions: a) 3Fe_2O_3 (s) + CO(g) \\rightarrow 2Fe_2O_4 (s) + CO_2 (g) b) Fe_3O_4 (s) + CO(g) \\rightarrow 3FeO(s) + CO_2 (g) c) FeO(s) + CO(g) \\rightarrow Fe(s) + CO_2 (g) Heats of Formation Compound \u2206H\u00b0 (Kcal/mole) CO (g) \\rule{1em}{1pt} 26.4 CO_2 (g) \\rule{1em}{1pt} 94.1 Fe_2O_3(s) \\rule{1em}{1pt} 197 Fe_3O_4 (s) \\rule{1em}{1pt} 267 FeO (s) \\rule{1em}{1pt} 63.7",
"correct_answer": "J",
"choices": [
"10.8 Kcal, 8.4 Kcal, 4.3 Kcal",
"10.6 Kcal, 8.3 Kcal, 4.2 Kcal",
"11.5 Kcal, 7.5 Kcal, 3.8 Kcal",
"9.7 Kcal, 8.1 Kcal, 4.1 Kcal",
"9.0 Kcal, 9.1 Kcal, 5.0 Kcal",
"10.2 Kcal, 7.9 Kcal, 3.9 Kcal",
"12.3 Kcal, 8.5 Kcal, 4.4 Kcal",
"11.0 Kcal, 8.0 Kcal, 4.5 Kcal",
"10.4 Kcal, 7.7 Kcal, 3.7 Kcal",
"10.7 Kcal, 8.2 Kcal, 4.0 Kcal"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_3594": {
"question_id": "mmlu_pro_3594",
"source_benchmark": "MMLU_Pro",
"domain": "chemistry",
"question_text": "Which of the following can form hydrogen bonds?",
"correct_answer": "E",
"choices": [
"CH3F",
"CH3OCH2Br",
"CH4",
"CH3CH2CH2CH3",
"CH3NH2",
"CH3CH3",
"C6H6"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_4183": {
"question_id": "mmlu_pro_4183",
"source_benchmark": "MMLU_Pro",
"domain": "chemistry",
"question_text": "If the \u2206H' of water is 40,600 (J / mole at 100\u00b0C, what is the boiling point of water at a pressure of .750 atm.",
"correct_answer": "D",
"choices": [
"75\u00b0C",
"85\u00b0C",
"100\u00b0C",
"92\u00b0C"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_3603": {
"question_id": "mmlu_pro_3603",
"source_benchmark": "MMLU_Pro",
"domain": "chemistry",
"question_text": "Which nuclide has an NMR frequency of 115.5 MHz in a 20.0 T magnetic field?",
"correct_answer": "A",
"choices": [
"17O",
"29Si",
"31P",
"19F",
"13C",
"15N",
"27Al",
"33S",
"39K",
"23Na"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_4140": {
"question_id": "mmlu_pro_4140",
"source_benchmark": "MMLU_Pro",
"domain": "chemistry",
"question_text": "The relationship introduced in Problem $1-48$ has been interpreted to mean that a particle of mass $m\\left(E=m c^2\\right)$ can materialize from nothing provided that it returns to nothing within a time $\\Delta t \\leq h / m c^2$. Particles that last for time $\\Delta t$ or more are called real particles; particles that last less than time $\\Delta t$ are called virtual particles. The mass of the charged pion, a subatomic particle, is $2.5 \\times 10^{-28} \\mathrm{~kg}$. What is the minimum lifetime if the pion is to be considered a real particle?",
"correct_answer": "B",
"choices": [
"2.0 x 10^-23 s",
" 2.9$10^{-23} \\mathrm{~s}$ ",
"3.0 x 10^-22 s",
"2.5$10^{-23} \\mathrm{~s}$",
"1.2 x 10^-23 s",
"3.2$10^{-23} \\mathrm{~s}$",
"4.1 x 10^-23 s",
"3.7$10^{-23} \\mathrm{~s}$",
"5.0 x 10^-23 s",
"1.5 x 10^-23 s"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_4109": {
"question_id": "mmlu_pro_4109",
"source_benchmark": "MMLU_Pro",
"domain": "chemistry",
"question_text": "The ionization constant of lactic acid at 25\u00b0C is 1.4 \u00d7 10^-4 . A buffer solution is prepared by adding 1.00 mole of lactic acid and 0.80 mole of sodium lactate to 1 kg of water. Assume that water is at unit activity and that the activity coefficient of each univalent ion is 0.65 throughout this problem. (a) Find the pH (in the activity sense) of this solution at 25\u00b0C. (b) Find the change in the pH of the solution resulting from the addition of 0.50 mole of sodium hydroxide to the quantity of solution containing 1 kg of water. (c) Find the change in pH resulting from the addition of 0.50 mole of sodium hydroxide to 1 kg of pure water at 25\u00b0C.",
"correct_answer": "G",
"choices": [
"3.47, 4.17, 13.412",
"4.17, 4.77, 14.212",
"3.87, 4.47, 13.812",
"2.87, 3.47, 12.912",
"3.07, 3.67, 13.312",
"4.57, 5.07, 14.512",
"3.57, 4.07, 13.512",
"4.07, 4.67, 14.112",
"3.67, 4.27, 13.712",
"2.57, 3.07, 12.512"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_3946": {
"question_id": "mmlu_pro_3946",
"source_benchmark": "MMLU_Pro",
"domain": "chemistry",
"question_text": "What is the molarity of a sodium hydroxide solution that requires 42.6 mL of 0.108 M HCl to neutralize 40.0 mL of the base?",
"correct_answer": "I",
"choices": [
"0.0641 M",
"0.045 M",
"0.250 M",
"0.200 M",
"0.400 mol/L",
"0.108 M",
"0.500 M",
"1.64 M",
"0.115 M",
"0.300 M"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_7314": {
"question_id": "mmlu_pro_7314",
"source_benchmark": "MMLU_Pro",
"domain": "economics",
"question_text": "If technology makes production less expensive and at the same time exports decrease which of the following will result with certainty?",
"correct_answer": "G",
"choices": [
"Real GDP will decrease.",
"The unemployment rate will increase.",
"The country's trade balance will worsen.",
"The interest rate will decrease.",
"The price level will increase.",
"The interest rate will increase.",
"The price level will decrease.",
"The country's trade balance will improve.",
"The unemployment rate will decrease.",
"Real GDP will increase."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_6774": {
"question_id": "mmlu_pro_6774",
"source_benchmark": "MMLU_Pro",
"domain": "economics",
"question_text": "What is the difference between monopoly and monopsony?",
"correct_answer": "J",
"choices": [
"Monopoly and monopsony are the same",
"Monopoly is a market structure with unrestricted competition, monopsony is a market where one entity controls all purchases",
"Monopoly refers to a single buyer dominating a market, monopsony refers to a single seller with no competition",
"Monopoly involves only one buyer setting prices, monopsony involves only one seller setting prices",
"Monopoly is a market with one seller and many buyers, monopsony is a market with many sellers and one buyer",
"Monopoly refers to multiple sellers, monopsony refers to multiple buyers",
"Monopoly is when there is only one buyer, monopsony is when there is only one seller",
"Monopoly means a market with many competitors, monopsony means a market dominated by a single company",
"Monopoly is a market condition with a single supplier of goods, monopsony is a market with a single distributor",
"Monopoly is the market situation with only one seller, monopsony is the market situation with only one buyer"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_6685": {
"question_id": "mmlu_pro_6685",
"source_benchmark": "MMLU_Pro",
"domain": "economics",
"question_text": "Suppose a commercial bank has deposits of $400,000 and has made loans and investments of $315,000. Assume also that deposits are its only source of reserves. If the required reserve ratio is 20%, how much are its excess reserves?",
"correct_answer": "C",
"choices": [
"$85,000",
"$80,000",
"$5,000",
"$315,000"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_7419": {
"question_id": "mmlu_pro_7419",
"source_benchmark": "MMLU_Pro",
"domain": "economics",
"question_text": "Monopolistic competitors producelessgoods at a higher average cost than pure competitors. Could a purely competitive market degenerate into a monopolistically competitive market? If so,,why would the lower prices of the pure competitors fail to drive out the higher- priced monopolistic competitor?",
"correct_answer": "B",
"choices": [
"Adoption of homogeneous marketing strategies by all firms",
"Introduction of product differentiation",
"Introduction of price war",
"Mergers of pure competitors into a single entity",
"Increase in production cost",
"Decrease in consumer demand",
"Consumer preference for uniform products",
"Elimination of all competition through collusion",
"Sudden increase in market entry barriers",
"Shifts in government regulations that favor larger firms"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_6941": {
"question_id": "mmlu_pro_6941",
"source_benchmark": "MMLU_Pro",
"domain": "economics",
"question_text": "A perfectly competitive employer hires labor up to the point where",
"correct_answer": "F",
"choices": [
"Wage = Total cost of production.",
"Wage = Marginal product of labor.",
"Wage = Marginal cost.",
"Wage = Average product of labor.",
"Wage = Average factor cost.",
"Wage = Marginal revenue product of labor.",
"Wage = Marginal factor cost.",
"Wage = Marginal revenue.",
"Wage = Average revenue.",
"Wage = Total revenue."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_6910": {
"question_id": "mmlu_pro_6910",
"source_benchmark": "MMLU_Pro",
"domain": "economics",
"question_text": "What are the major forms of money in use?",
"correct_answer": "I",
"choices": [
"Gold, silver, bronze",
"Cryptocurrencies, gift cards, electronic transfers",
"Paper money, barter systems, digital wallets",
"Bonds, stocks, commodities",
"Digital currencies, credit cards, mobile payments",
"Virtual currency, promissory notes, IOUs",
"Checks, money orders, treasury notes",
"Credit cards, debit cards, cash",
"Fractional currency, paper money, checkbook money",
"Banknotes, coins, electronic bank transfers"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_6888": {
"question_id": "mmlu_pro_6888",
"source_benchmark": "MMLU_Pro",
"domain": "economics",
"question_text": "For the perfectly competitive firm, the profit-maximizing decision to shut down is made when the price",
"correct_answer": "G",
"choices": [
"exceeds maximum average total cost.",
"is less than maximum average variable cost.",
"is equal to minimum marginal cost.",
"falls below minimum average total cost.",
"is equal to average revenue.",
"is equal to maximum average total cost.",
"falls below minimum average variable cost.",
"is equal to minimum average total cost.",
"exceeds average total cost but less than average variable cost.",
"is greater than minimum average variable cost, but lower than minimum average total cost."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_6802": {
"question_id": "mmlu_pro_6802",
"source_benchmark": "MMLU_Pro",
"domain": "economics",
"question_text": "Which of the following could limit the ability of a central bank to conduct expansionary monetary policy?",
"correct_answer": "E",
"choices": [
"Investment demand is nearly perfectly elastic.",
"Households carry very little cash holding their money in checking and saving deposits.",
"Banks make loans with all excess reserves.",
"Households carry a significant amount of cash, holding very little in checking and saving deposits.",
"Money demand is nearly perfectly elastic."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_7414": {
"question_id": "mmlu_pro_7414",
"source_benchmark": "MMLU_Pro",
"domain": "economics",
"question_text": "Suppose as a result of certain economic developments the priceof cars and the price of steel both increase. What might bethe reasons behind this upsurge in prices, assuming the automobileindustry is fairly competitive and the supply of steelis constant in the short run?",
"correct_answer": "C",
"choices": [
"Increased taxes on automobile sales",
"Decreased demand for cars",
"Increased demand for cars",
"Decreased supply of steel",
"Technological advancements in car manufacturing"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_6764": {
"question_id": "mmlu_pro_6764",
"source_benchmark": "MMLU_Pro",
"domain": "economics",
"question_text": "Define Gross National Product (GNP).",
"correct_answer": "C",
"choices": [
"The total income earned by a nation's residents in a year",
"The total amount of money in circulation in an economy in a year",
"The total market value of all final goods and services produced in the economy in one year",
"The market value of all goods and services produced abroad by the residents of a nation in a year",
"The total cost of all goods and services purchased in a year",
"The total savings rate of a nation's residents plus the value of imports minus the value of exports in a year",
"The aggregate of all wages paid to employees, plus profits of businesses and taxes, minus any subsidies",
"The sum of all financial transactions within a country's borders in a year",
"The total value of all consumer spending, government spending, investments, and net exports in a year",
"The total value of all goods and services produced by a nation's residents, regardless of the location"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_7352": {
"question_id": "mmlu_pro_7352",
"source_benchmark": "MMLU_Pro",
"domain": "economics",
"question_text": "What might happen to the housing construction industry if com-mercial banks are allowed to compete freely with mutual sav-ings banks?",
"correct_answer": "C",
"choices": [
"The quality of housing construction would deteriorate",
"More houses could be bought",
"The housing industry would slump",
"Mutual savings banks would become obsolete",
"Deposits in mutual savings banks would increase",
"Housing prices would become more volatile",
"The housing industry would boom",
"Commercial banks would solely focus on large-scale developments",
"There would be no noticeable change in the housing construction industry"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_7418": {
"question_id": "mmlu_pro_7418",
"source_benchmark": "MMLU_Pro",
"domain": "economics",
"question_text": "How might the traditional view of economic efficiency in an oligopolyand the historical facts surrounding many oligopol-isticmanufacturing industries disagree with each other?",
"correct_answer": "A",
"choices": [
"Historical data contradicts theory",
"Oligopolies are less common in practice than in theory",
"There is no contradiction between theory and practice",
"Historical data is insufficient to determine the efficiency of oligopolies",
"Oligopolies typically demonstrate higher innovation rates than predicted",
"Oligopolies always operate inefficiently",
"Theory suggests oligopolies lead to perfect competition in the long run",
"Oligopolies always result in lower prices for consumers",
"Historical data supports theory"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_7218": {
"question_id": "mmlu_pro_7218",
"source_benchmark": "MMLU_Pro",
"domain": "economics",
"question_text": "Contrast the way in which economic information is utilized anddisseminated in capitalist and socialist economies, respectively.",
"correct_answer": "F",
"choices": [
"In capitalism, information flows through social networks and informal channels, while in socialism, it is strictly regulated by the state.",
"In socialism, information is freely available to all, while in capitalism, it is a commodity bought and sold in the market.",
"In socialism, information is disseminated via market prices, while in capitalism, it is distributed through a network of state-owned enterprises.",
"In capitalism, information is collected centrally, while in socialism, it's disseminated via market prices.",
"In both capitalism and socialism, information is disseminated via market prices.",
"In capitalism, information is disseminated via market prices, while in socialism, information is collected centrally due to the absence of market prices.",
"In capitalism, information is shared publicly by the government, while in socialism, it is kept secret and only used by the state planners.",
"In capitalism, information dissemination is not necessary due to the self-regulating market, while in socialism, information is disseminated through public forums.",
"In both systems, information is primarily disseminated through government planning agencies.",
"In capitalism, information is controlled by the government, while in socialism, it's controlled by entrepreneurs."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_6749": {
"question_id": "mmlu_pro_6749",
"source_benchmark": "MMLU_Pro",
"domain": "economics",
"question_text": "Which of the following could be used as a test for autocorrelation up to third order?",
"correct_answer": "G",
"choices": [
"The Dickey-Fuller test",
"The Jarque-Bera test",
"White's test",
"The Augmented Dickey-Fuller test",
"The Durbin Watson test",
"The Kolmogorov-Smirnov test",
"The Breusch-Godfrey test",
"The Phillips-Perron test",
"The RESET test"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_7264": {
"question_id": "mmlu_pro_7264",
"source_benchmark": "MMLU_Pro",
"domain": "economics",
"question_text": "A competitive firm's demand for labor is determined directly by",
"correct_answer": "A",
"choices": [
"the marginal (physical) product of labor and the output price",
"the opportunity cost of workers\u2019 time",
"the marginal (physical) product of labor and the wage",
"the number of employees",
"profits",
"the average (physical) product of labor and the output price",
"the wage and the average (physical) product of labor",
"the cost of raw materials",
"the cost of production and the sales revenue",
"the market share of the firm"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_7092": {
"question_id": "mmlu_pro_7092",
"source_benchmark": "MMLU_Pro",
"domain": "economics",
"question_text": "If real GNP is expanding at a steady annual rate of 2 percent andthe nominal money stock at a steady annual rate of 5 per-cent, what is the effect on the average price level if the incomevelocity of circulation of money is unchanged?",
"correct_answer": "I",
"choices": [
"0 percent",
"6 percent increase",
"2 percent",
"2.5 percent decrease",
"1 percent decrease",
"1 percent increase",
"5 percent",
"4 percent increase",
"3 percent",
"7 percent increase"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_6692": {
"question_id": "mmlu_pro_6692",
"source_benchmark": "MMLU_Pro",
"domain": "economics",
"question_text": "When the population of a country consumes 90% of its income, and its technology is such that, at the margin, a $100 billion increase in its capital stock increases its output by $25 billion, what would you expect the growth rate of this society to be?",
"correct_answer": "G",
"choices": [
"4% per year",
"7% per year",
"5% per year",
"1% per year",
"3% per year",
"0.5% per year",
"2.5% per year",
"1.5% per year",
"6% per year",
"8% per year"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_6690": {
"question_id": "mmlu_pro_6690",
"source_benchmark": "MMLU_Pro",
"domain": "economics",
"question_text": "What is Market Socialism?",
"correct_answer": "A",
"choices": [
"An economic system incorporating features of capitalistic markets and socialist planning, characterized by state ownership of land and capital, state-determined investment, market-set wages, and some income redistribution.",
"A system where the market dictates all aspects of the economy, including social welfare and public goods provision",
"An economic system characterized by complete state ownership of all industries, with prices and production centrally planned",
"A purely capitalistic system",
"A theoretical model where private individuals own resources and engage in voluntary exchanges, but profits are redistributed by the state",
"A mixed economic system with equal distribution of personal wealth regardless of individual contribution or market dynamics",
"An economic system where the government solely determines production, investment, and prices without any market influence",
"A system with no state intervention",
"A system where only the private sector determines investment",
"A form of socialism with worker-managed firms competing in free markets without any government intervention"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_6755": {
"question_id": "mmlu_pro_6755",
"source_benchmark": "MMLU_Pro",
"domain": "economics",
"question_text": "List the three main characteristics of monopolistic competition and discuss briefly the implications of these characteristics.",
"correct_answer": "A",
"choices": [
"Large number of competitors, product differentiation, free entry and exit",
"Unlimited competitors, product uniqueness, blocked entry and exit",
"High barriers to entry, perfect information, homogeneous products",
"Few competitors, product similarity, restricted entry"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_6883": {
"question_id": "mmlu_pro_6883",
"source_benchmark": "MMLU_Pro",
"domain": "economics",
"question_text": "Suppose that aluminum is a key production input in the production of bicycles. If the price of aluminum falls, and all other variables are held constant, we expect",
"correct_answer": "D",
"choices": [
"the supply of aluminum to fall.",
"the price of bicycles to fall.",
"the supply of bicycles to fall.",
"the supply of bicycles to rise.",
"the demand for bicycles to rise.",
"the demand for bicycles to fall.",
"the demand for aluminum to fall.",
"the supply of aluminum to rise.",
"the demand for aluminum to rise.",
"the price of bicycles to rise."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_6898": {
"question_id": "mmlu_pro_6898",
"source_benchmark": "MMLU_Pro",
"domain": "economics",
"question_text": "If two variables, $x_t$ and $y_t$ are said to be cointegrated, which of the following statements are true?\n\ni) $x_t$ and $y_t$ must both be stationary\n\n\nii) Only one linear combination of $x_t$ and $y_t$ will be stationary\n\n\niii) The cointegrating equation for $x_t$ and $y_t$ describes the short-run relationship\n\nbetween the two series\n\n\niv) The residuals of a regression of $y_t$ on $x_t$ must be stationary",
"correct_answer": "D",
"choices": [
"(i) and (iv) only",
"(iii) and (iv) only",
"(i) and (iii) only",
"(ii) and (iv) only",
"(i), (iii), and (iv) only",
"(i), (ii), and (iii) only",
"(ii) and (iii) only",
"(i) and (ii) only",
"(ii), (iii), and (iv) only",
"(i), (ii), (iii), and (iv)"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_7177": {
"question_id": "mmlu_pro_7177",
"source_benchmark": "MMLU_Pro",
"domain": "economics",
"question_text": "If the Federal Reserve was concerned about the \"crowding-out\" effect they could engage in",
"correct_answer": "A",
"choices": [
"expansionary monetary policy by lowering the discount rate.",
"expansionary monetary policy by selling Treasury securities.",
"contractionary monetary policy by raising the discount rate.",
"contractionary monetary policy by lowering the discount rate."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_7276": {
"question_id": "mmlu_pro_7276",
"source_benchmark": "MMLU_Pro",
"domain": "economics",
"question_text": "Suppose that John's MPC is constant at 3/4. If his break-even pointoccurs at $7,000, how much will John have to borrow whenhis income is $3,000?",
"correct_answer": "F",
"choices": [
"$4000",
"$2500",
"$6250",
"$1750",
"$500",
"$1,000",
"$7000",
"$5500",
"$3000",
"$0"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_6687": {
"question_id": "mmlu_pro_6687",
"source_benchmark": "MMLU_Pro",
"domain": "economics",
"question_text": "Suppose the government decides that in order to fight inflation, labor unions will not be allowed any wage increases until further notice. What type of inflation would this be aimed at?",
"correct_answer": "I",
"choices": [
"imported inflation",
"hyperinflation",
"built-in inflation",
"sectoral inflation",
"asset inflation",
"stagflation",
"deflation",
"wage inflation",
"cost-push inflation",
"demand-pull inflation"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_7234": {
"question_id": "mmlu_pro_7234",
"source_benchmark": "MMLU_Pro",
"domain": "economics",
"question_text": "The Lincoln Theater had been showing old films for two months. Admission was $2.00 and average daily attendance was 200. Then, the owner decided to present a recent blockbuster film. Admission price was raised to $4.00 to cover the cost of the new film and average daily attendance rose to 600. Does this example cast doubt upon the law of demand?",
"correct_answer": "I",
"choices": [
"The demand curve is upward-sloping",
"The admission price increase caused the demand to rise",
"The law of demand predicts constant attendance regardless of price",
"The demand curve is perfectly inelastic",
"The law of demand is disproved",
"The law of demand is irrelevant in this scenario",
"The law of demand only applies to goods, not services",
"The change in attendance is due to factors other than price",
"The law of demand remains intact",
"The increase in attendance was a coincidence unrelated to the price change"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_6863": {
"question_id": "mmlu_pro_6863",
"source_benchmark": "MMLU_Pro",
"domain": "economics",
"question_text": "If real GDP per capita grows at a rate of 10 percent a year then we can expect the standard of living to double in",
"correct_answer": "G",
"choices": [
"8 years.",
"9 years.",
"6 years.",
"12 years.",
"5 years.",
"11 years.",
"7 years.",
"13 years.",
"10 years.",
"15 years."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_7393": {
"question_id": "mmlu_pro_7393",
"source_benchmark": "MMLU_Pro",
"domain": "economics",
"question_text": "The market demand curve for labor would shift to the left as the result of",
"correct_answer": "A",
"choices": [
"a decrease in the marginal product of labor",
"an increase in the wage rate paid to workers",
"a decrease in the supply of labor",
"an increase in the supply of labor",
"a decrease in the wage rate paid to workers",
"an increase in demand for the good which the labor is producing",
"an increase in the price of the good which the labor is producing",
"an increase in the marginal product of labor"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_7325": {
"question_id": "mmlu_pro_7325",
"source_benchmark": "MMLU_Pro",
"domain": "economics",
"question_text": "A student eats 3 slices of pizza while studying for his Economics exam. The marginal utility of the first slice of pizza is 10 utils, the second slice is 7 utils, and the third slice is 3 utils. Which of the statements below holds true with the above data?",
"correct_answer": "C",
"choices": [
"The total utility this student received from eating pizza is less than 20 utils.",
"The student would not eat any more pizza.",
"The total utility this student received from eating pizza is 20 utils.",
"The marginal utility of each slice of pizza is equal.",
"The student should have stopped eating pizza after 2 slices.",
"The marginal utility of the 4th slice of pizza will be 0.",
"The student should have only eaten one slice of pizza.",
"The total utility this student received from eating pizza is 30 utils.",
"The marginal utility of the 4th slice of pizza will be negative.",
"The student should have stopped eating pizza after the first slice."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_7378": {
"question_id": "mmlu_pro_7378",
"source_benchmark": "MMLU_Pro",
"domain": "economics",
"question_text": "If a bank has $1000 in checking deposits and the bank is required to reserve $250 what is the reserve ratio? How much does the bank have in excess reserves? What is the size of the money multiplier?",
"correct_answer": "G",
"choices": [
"75% $250 M = 4",
"25% $750 M = \u00bc",
"30% $700 M = 3.33",
"40% $600 M = 2.5",
"50% $500 M = 2",
"60% $400 M = 1.67",
"25% $750 M = 4",
"35% $650 M = 2.86",
"20% $800 M = 5",
"75% $750 M = \u00bc"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_7089": {
"question_id": "mmlu_pro_7089",
"source_benchmark": "MMLU_Pro",
"domain": "economics",
"question_text": "What is the economic effect of the increase in the number of financial intermediaries?",
"correct_answer": "F",
"choices": [
"Decreases the diversity of financial products available",
"Decreases the number of financial intermediaries",
"Reduces the overall liquidity in the financial system",
"Decreases the velocity of money",
"Leads to a decrease in consumer lending options",
"Increases the velocity of money",
"Has no effect on the velocity of money",
"Enhances the direct investment into the economy",
"Increases the cost of capital for businesses",
"Stabilizes the stock market volatility"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_6885": {
"question_id": "mmlu_pro_6885",
"source_benchmark": "MMLU_Pro",
"domain": "economics",
"question_text": "For a competitive firm, what is the most important thing to consider in deciding whether to shut down in the short run?",
"correct_answer": "C",
"choices": [
"Do not produce if the TFC is not covered by revenue.",
"Do not produce if the price is lower than AVC.",
"Compare AVC to MR.",
"Compare AFC to MR.",
"Shut down if the firm is not making a profit.",
"Do not produce if the TVC is not covered by revenue.",
"Compare the price of inputs to the price of the product.",
"Compare TR to TC.",
"Produce the highest quantity demanded regardless of price.",
"Produce as long as the price is higher than AVC."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_7119": {
"question_id": "mmlu_pro_7119",
"source_benchmark": "MMLU_Pro",
"domain": "economics",
"question_text": "If the demand for dollars rises while the supply of dollars falls then the",
"correct_answer": "H",
"choices": [
"dollar will remain stable.",
"dollar will lose its status as a global currency.",
"inflation will rise.",
"exchange rates will rise but the value of the dollar will not be affected.",
"value of other currencies will fall.",
"dollar will depreciate.",
"exchange rates will be affected but not the value of the dollar.",
"dollar will appreciate.",
"exchange rate will not be affected."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_7263": {
"question_id": "mmlu_pro_7263",
"source_benchmark": "MMLU_Pro",
"domain": "economics",
"question_text": "Which characteristic is likely a part of a monopoly market but not of monopolistic competition?",
"correct_answer": "F",
"choices": [
"Deadweight loss exists",
"Perfect competition",
"Price taker",
"Many sellers and buyers",
"High barriers to entry",
"Patents and copyrights",
"Differentiated products",
"Long-term equilibrium",
"Possibility of profit in the short run",
"Firms are price setters"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_6944": {
"question_id": "mmlu_pro_6944",
"source_benchmark": "MMLU_Pro",
"domain": "economics",
"question_text": "Which of the following is most likely to decrease the demand for kindergarten teachers?",
"correct_answer": "G",
"choices": [
"An increase in online learning programs for young children",
"Increased demand for pre-school teachers",
"More high school graduates choosing to go straight to work rather than furthering their education.",
"Increased investments in high school education",
"An increase in funding for education",
"A decrease in the population of school-aged children",
"A decrease in the average number of children per household",
"Increased immigration of foreign citizens and their families",
"Subsidies given to college students who major in elementary education",
"More parents choosing to homeschool their children"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_7488": {
"question_id": "mmlu_pro_7488",
"source_benchmark": "MMLU_Pro",
"domain": "economics",
"question_text": "What is economic concentration?",
"correct_answer": "J",
"choices": [
"The total market value of all goods and services produced within a country.",
"Economic concentration describes the rate at which a country adopts new technologies in its industries.",
"It is the distribution of wealth in a country.",
"Economic concentration refers to the number of industries in a country.",
"It is the measure of a country's economic growth.",
"The variation in income levels within different sectors of the economy.",
"The geographic distribution of financial resources across a country.",
"Economic concentration is the total amount of capital investment in a single industry.",
"The degree to which a country's economy is diversified across multiple sectors.",
"Economic concentration measures the control of a particular economic activity by a small number of firms in an industry."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_11717": {
"question_id": "mmlu_pro_11717",
"source_benchmark": "MMLU_Pro",
"domain": "engineering",
"question_text": "A steel cylinder contains liquid at a mean bulk temperature of 80\u00b0F. Steam condensing at 212\u00b0F on the outside surface is used for heating the liquid. The coefficient of heat transfer on the steam side is 1,000 Btu/hr-ft^2-\u00b0F. The liquid is agitated by the stirring action of a turbine impeller. Its diameter is 2 ft., and it moves at an angular velocity of 100 rpm. The cylinder is 6 ft. long, with a diameter of 6 ft. and a wall thickness of 1/8 in. The thermal conductivity of steel may be taken as 9.4 Btu/hr-ft^2-\u00b0F. Properties of the liquid, taken as constant, are: c_p = 0.6 Btu/lbm-\u00b0Fk = 0.1 Btu/hr-ft-\u00b0F \\rho = 60lbm/ft^3 The viscosity at 130\u00b0F is 653.4lbm/ft-hr, and at 212\u00b0F is 113.74lbm/ft-hr. Calculate the time required to raise the mean bulk temperature of the liquid to 180\u00b0F.",
"correct_answer": "B",
"choices": [
"3.5 hr",
"1.92 hr",
"4.0 hr",
"1.75 hr",
"2.0 hr",
"3.0 hr",
"2.5 hr",
"1.5 hr",
"2.75 hr",
"2.15 hr"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_11177": {
"question_id": "mmlu_pro_11177",
"source_benchmark": "MMLU_Pro",
"domain": "engineering",
"question_text": "Find the equivalent sheet current density K' for a uniformly magnetized rod 20cmslong, with 10 cm^2 area of cross-section, and a pole strength of 100 amp-meters. Also find the current I required for a 1,000-turn solenoid of the same size to be magnetically equivalent.",
"correct_answer": "B",
"choices": [
"10^6 amp/meter, 5 amp",
"10^5 amp/meter, 20 amp",
"8 x 10^4 amp/meter, 40 amp",
"10^3 amp/meter, 50 amp",
"10^4 amp/meter, 30 amp",
"10^4 amp/meter, 100 amp",
"10^7 amp/meter, 2 amp",
"10^5 amp/meter, 25 amp",
"5 x 10^5 amp/meter, 15 amp",
"10^6 amp/meter, 10 amp"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_11088": {
"question_id": "mmlu_pro_11088",
"source_benchmark": "MMLU_Pro",
"domain": "engineering",
"question_text": "A 10kVA distribution transformer has a 240V secondary winding. The equivalent internal resistance of this transformer, referred to that winding, is 0.048\\Omega. The core lossof this transformer is 75W. At whatkVAload will this transformeroperate at maximum efficiency?",
"correct_answer": "D",
"choices": [
"110% of rated load",
"90% of rated load",
"100% of rated load",
"95% of rated load",
"50% of rated load",
"105% of rated load",
"85% of rated load",
"70% of rated load",
"60% of rated load",
"80% of rated load"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_11822": {
"question_id": "mmlu_pro_11822",
"source_benchmark": "MMLU_Pro",
"domain": "engineering",
"question_text": "What is the name of the fluorescent material that gives red colour fluorescence?",
"correct_answer": "I",
"choices": [
"Zinc silicate.",
"Calcium sulphide.",
"Zinc oxide.",
"Zinc sulphide.",
"Calcium silicate.",
"Sodium silicate.",
"Sodium sulphide.",
"Magnesium sulphide.",
"Magnesium silicate.",
"Aluminum silicate."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_11344": {
"question_id": "mmlu_pro_11344",
"source_benchmark": "MMLU_Pro",
"domain": "engineering",
"question_text": "Find the inverse Laplace transform L^-1[1 / {s(s^2 + 1)}], using the convolution.",
"correct_answer": "B",
"choices": [
"sin t - t",
"1 - cos t",
"t sin t",
"e^t - 1",
"1 - e^-t",
"sin t",
"e^t - cos t",
"t - sin t",
"1 + cos t",
"- cos t"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_11313": {
"question_id": "mmlu_pro_11313",
"source_benchmark": "MMLU_Pro",
"domain": "engineering",
"question_text": "The maximum heat dissipation from a rectangular profile fin, designed for minimum weight, is 500 k cal per hour per meter width of the fin. The fin base temperature and the ambient air temperature are 110\u00b0C and 10\u00b0C, respectively. The heat transfer coefficient on the fin surface is 90 kcal/m^2-hr-\u00b0C and the thermal conductivity of the fin material is 35 k cal/m-hr-\u00b0C. Determine the height and thickness of the fin required. Assume the thermal gradient at the fin base is zero.",
"correct_answer": "D",
"choices": [
"Thickness of the fin = 6.14, Length of the fin = 44.85 mm",
"Thickness of the fin = 4.14, Length of the fin = 40.85 mm",
"Thickness of the fin = 4.14, Length of the fin = 42.85 mm",
"Thickness of the fin = 5.14, Length of the fin = 44.85 mm",
"Thickness of the fin = 7.14, Length of the fin = 48.85 mm",
"Thickness of the fin = 5.14, Length of the fin = 42.85 mm",
"Thickness of the fin = 5.14, Length of the fin = 47.85 mm",
"Thickness of the fin = 4.50, Length of the fin = 43.85 mm",
"Thickness of the fin = 6.14, Length of the fin = 46.85 mm",
"Thickness of the fin = 5.00, Length of the fin = 45.00 mm"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_11291": {
"question_id": "mmlu_pro_11291",
"source_benchmark": "MMLU_Pro",
"domain": "engineering",
"question_text": "Oxygen initially at 13.8MPa, 15.5\u00b0C is throttled in a steady- flow process to a final pressure of 1.38MPa. Assuming the process to be adiabatic, determine the final temperature of oxygen.",
"correct_answer": "E",
"choices": [
"-20\u00b0C",
"-18\u00b0C",
"-30\u00b0C",
"-25\u00b0C",
"-13.71\u00b0C",
"0\u00b0C",
"-8.5\u00b0C",
"-5\u00b0C",
"-10\u00b0C",
"-15\u00b0C"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_11205": {
"question_id": "mmlu_pro_11205",
"source_benchmark": "MMLU_Pro",
"domain": "engineering",
"question_text": "Find the theoretical variance of the random variable with the following probability distribution. x Pr(X = x) 0 (1/4) 1 (1/2) 2 (1/8) 3 (1/8)",
"correct_answer": "I",
"choices": [
"0.625",
"0.683",
"0.350",
"1.125",
"1.000",
"0.500",
"0.764",
"0.942",
"0.859",
"0.725"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_11817": {
"question_id": "mmlu_pro_11817",
"source_benchmark": "MMLU_Pro",
"domain": "engineering",
"question_text": "A polished stainless steel surface is electrically heated from the bottom to boil water at atmospheric pressure. If the surface temperature is 230 F, calculate the heat flux from the surface to the water and compare this value with the critical heat flux of nucleate boiling.",
"correct_answer": "H",
"choices": [
"15000 Btu/hr-ft^2",
"30000 Btu/hr-ft^2",
"22000 Btu/hr-ft^2",
"5000 Btu/hr-ft^2",
"16000 Btu/hr-ft^2",
"12500 Btu/hr-ft^2",
"9500 Btu/hr-ft^2",
"10888.25 Btu/hr-ft^2",
"7000 Btu/hr-ft^2",
"20000 Btu/hr-ft^2"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_11167": {
"question_id": "mmlu_pro_11167",
"source_benchmark": "MMLU_Pro",
"domain": "engineering",
"question_text": "Find the current through a 0.01 \\muF capacitor at t = 0 if the voltage across it is (a) 2 sin 2\\pi 10^6t V; (b) - 2e^-(10)7 t V; (c) - 2e-(10)7 tsin 2\\pi 10^6t V.",
"correct_answer": "F",
"choices": [
"0.1256 A, 0.2 A, 0.1256 A",
"0.1256 A, 0.3 A, 0.1256 A",
"0.2345 A, 0.1 A, - 0.4567 A",
"0.2 A, 0.1 A, 0.2 A",
"0 A, 0.1 A, 0 A",
"0.1256 A, 0.2 A, - 0.1256 A",
"0.1412 A, 0.25 A, -0.1412 A",
"0.1256 A, 0 A, -0.1256 A",
"0.314 A, 0.2 A, -0.314 A",
"0.0628 A, 0.15 A, -0.0628 A"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_11755": {
"question_id": "mmlu_pro_11755",
"source_benchmark": "MMLU_Pro",
"domain": "engineering",
"question_text": "An ideal gas is enclosed inside a chamber with a volume of 0.1 ft^3 at 115\u00b0C, 690kPa. It then expandsisentropicallyto a final pressure of 138kPa. Calculate the work done during the process, assuming that for this gas c_v= 0.7201 kJ/kg-\u00b0K c_p = 1.0048 kJ/kg-\u00b0K",
"correct_answer": "H",
"choices": [
"2.3456 kJ",
"3.2109 kJ",
"2.0000 kJ",
"1.5678 kJ",
"1.4567 kJ",
"2.6789 kJ",
"2.1234 kJ",
"1.8122 kJ",
"1.2345 kJ",
"0.9876 kJ"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_11821": {
"question_id": "mmlu_pro_11821",
"source_benchmark": "MMLU_Pro",
"domain": "engineering",
"question_text": "A wide plate of length 3.5 in. has air at 1 atm. and 180\u00b0F moving parallel to it at a velocity of 190 ft/sec. Determine the average heat flux if the surface of the plate is at a constant temperature of 110\u00b0F.",
"correct_answer": "A",
"choices": [
"1223 Btu/hr-ft^2",
"980 Btu/hr-ft^2",
"75 Btu/hr-ft^2",
"2200 Btu/hr-ft^2",
"500 Btu/hr-ft^2",
"650 Btu/hr-ft^2",
"3050 Btu/hr-ft^2",
"17.47 Btu/hr-ft^2",
"145 Btu/hr-ft^2",
"263050 Btu/hr-ft^2"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_11976": {
"question_id": "mmlu_pro_11976",
"source_benchmark": "MMLU_Pro",
"domain": "engineering",
"question_text": "Calculate the heat added when one mole of carbon dioxide is heated at constant volume from 540 to 3540 F.",
"correct_answer": "B",
"choices": [
"28,500 Btu",
"34,650 Btu",
"40,000 Btu",
"25,000 Btu",
"45,000 Btu",
"30,000 Btu",
"55,000 Btu",
"60,000 Btu",
"20,000 Btu",
"50,000 Btu"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_11621": {
"question_id": "mmlu_pro_11621",
"source_benchmark": "MMLU_Pro",
"domain": "engineering",
"question_text": "Find the capacitance of a single, isolated conducting sphere of radius a.",
"correct_answer": "I",
"choices": [
"C = \u03c0\u03b5\u2080a",
"C = 10\u03c0\u03b5\u2080a",
"C = 2\u03c0\u03b5\u2080a",
"C = 12\u03c0\u03b5\u2080a",
"C = 6\u03c0\u03b5\u2080a",
"C = 3\u03c0\u03b5\u2080a",
"C = 7\u03c0\u03b5\u2080a",
"C = 8\u03c0\u03b5\u2080a",
"C = 4\u03c0\u03b5\u2080a",
"C = 5\u03c0\u03b5\u2080a"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_11152": {
"question_id": "mmlu_pro_11152",
"source_benchmark": "MMLU_Pro",
"domain": "engineering",
"question_text": "0.1m^3 of a ferromagnetic substance is kept at a constant temperatureof 4\u00b0K while the magnetic field is increased from0 to (4\\pi / 10) toweber/m^2. Obtain an expression for the workrequired if the substance follows the Curie equation of state. Assume C = 4\\pi \u00d7 10^9weber- \u00b0K/amp-m.",
"correct_answer": "I",
"choices": [
"(16 \u00d7 10^6)\u03c0N-m",
"(12 \u00d7 10^6)\u03c0N-m",
"(4 \u00d7 10^5)\u03c0N-m",
"(3 \u00d7 10^6)\u03c0N-m",
"(6 \u00d7 10^6)\u03c0N-m",
"(2 \u00d7 10^6)\u03c0N-m",
"(1 \u00d7 10^7)\u03c0N-m",
"(4 \u00d7 10^6)\u03c0N-m",
"(8 \u00d7 10^6)\u03c0N-m",
"(8 \u00d7 10^5)\u03c0N-m"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_11667": {
"question_id": "mmlu_pro_11667",
"source_benchmark": "MMLU_Pro",
"domain": "engineering",
"question_text": "Lowest critical frequency is due to zero and it may be present at the origin or nearer to the origin, then the type of network is",
"correct_answer": "A",
"choices": [
"RL Circuit.",
"RC circuit.",
"CL circuit.",
"CR circuit.",
"RCL circuit.",
"LRC circuit.",
"LCR circuit.",
"RLC circuit.",
"LC circuit.",
"CLR circuit."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_11495": {
"question_id": "mmlu_pro_11495",
"source_benchmark": "MMLU_Pro",
"domain": "engineering",
"question_text": "Determine the initial rate of heat transfer from a vertically hung2-ft high flat copper plate to the air at 70\u00b0F, 14.7psia. The copper plate was taken out of an annealing furnace at a temperatureof 220\u00b0F.",
"correct_answer": "G",
"choices": [
"76.8 Btu/hr-ft^2",
"134.2 Btu/hr-ft^2",
"150 Btu/hr-ft^2",
"0.85 Btu/hr-ft^2",
"85 Btu/hr-ft^2",
"110.5 Btu/hr-ft^2",
"127.5 Btu/hr-ft^2",
"100.08 Btu/hr-ft^2",
"65.3 Btu/hr-ft^2",
"95.7 Btu/hr-ft^2"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_11095": {
"question_id": "mmlu_pro_11095",
"source_benchmark": "MMLU_Pro",
"domain": "engineering",
"question_text": "Use partial fraction expansion to find the inverse z-transform of F (z) = [{2z^2 - 2z} / {(z - 3) (z - 5)^2}].",
"correct_answer": "D",
"choices": [
"f [n] = 4nu[n] + [(5/6)n - 1] 6^n u[n]",
"f [n] = 6nu[n] + [(2/3)n - 1] 3^n u[n]",
"f [n] = 3^(n+1)u[n] - 5^n u[n]",
"f [n] = 3nu[n] + [(4/5)n - 1] 5^n u[n]",
"f [n] = [(2/3)n + 1] (3/2)^n u[n] - 2^(n-1) u[n]",
"f [n] = 3nu[n] + [(5/4)n - 1] (4/5)^n u[n]",
"f [n] = 4nu[n] + [(7/8)n - 2] 8^n u[n]",
"f [n] = 2nu[n] + [(3/5)n - 1] 4^n u[n]",
"f [n] = 5nu[n] + [(6/7)n - 1] 7^n u[n]",
"f [n] = 2^n u[n] + [(3/4)n + 1] (4/3)^n u[n]"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_11093": {
"question_id": "mmlu_pro_11093",
"source_benchmark": "MMLU_Pro",
"domain": "engineering",
"question_text": "Find the attenuation per meter along a wave guide for an appliedwave length \\lambda_0 = 2 meters, if the cutoff wave length ofthe guide is\\lambda_oc= 20 cm.",
"correct_answer": "B",
"choices": [
"400 db/meter",
"545 db/meter",
"600 db/meter",
"325 db/meter",
"250 db/meter",
"1000 db/meter",
"650 db/meter",
"700 db/meter",
"800 db/meter",
"475 db/meter"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_11158": {
"question_id": "mmlu_pro_11158",
"source_benchmark": "MMLU_Pro",
"domain": "engineering",
"question_text": "Five resistances of 10, 15, 20, 25, and 30 ohms are connected in parallel. Calculate the joint resistance.",
"correct_answer": "A",
"choices": [
"3.46 ohms",
"4.56 ohms",
"8.23 ohms",
"4.12 ohms",
"6.78 ohms",
"7.89 ohms",
"1.92 ohms",
"2.34 ohms",
"2.21 ohms",
"5.67 ohms"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_11286": {
"question_id": "mmlu_pro_11286",
"source_benchmark": "MMLU_Pro",
"domain": "engineering",
"question_text": "Calculate the change in specific entropy of 1lbmof air when itis compressed from 14psia, 60\u00b0F to 84psiaand 460\u00b0F.",
"correct_answer": "H",
"choices": [
"1.467 Btu/\u00b0R",
"0.052 Btu/\u00b0R",
"0.0032 Btu/\u00b0R",
"0.0248 Btu/\u00b0R",
"0.200 Btu/\u00b0R",
"0.1467 Btu/\u00b0R",
"0.075 Btu/\u00b0R",
"0.01416 Btu/\u00b0R",
"0.158 Btu/\u00b0R",
"0.235 Btu/\u00b0R"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_11301": {
"question_id": "mmlu_pro_11301",
"source_benchmark": "MMLU_Pro",
"domain": "engineering",
"question_text": "Using the definition of the bilateral z transform F (z) = ^\\infty\\sum_n=-\\infty f[n] z^-n, find the z - transform of f[n] = 3\\delta [n+2] + 4\\delta [n-1] + 2\\delta [n-3].",
"correct_answer": "B",
"choices": [
"3z^-2 + 4z^-1 + 2z^3",
"3z^2+ 4z^-1 + 2z^-3",
"2z^3 + 4z^-2 + 3z^-1",
"4z^2+ 3z^-1 + 2z^-3",
"4z^-3 + 3z^-2 + 2z^-1",
"3z^2+ 2z^-1 + 4z^-3",
"2z^2+ 4z^-1 + 3z^-3",
"3z^3 + 4z + 2z^-2",
"4z^3 + 2z + 3z^-2",
"2z^-1 + 3z^-3 + 4z^2"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_11580": {
"question_id": "mmlu_pro_11580",
"source_benchmark": "MMLU_Pro",
"domain": "engineering",
"question_text": "The directivity of an antenna is 50 and the antenna operates ata wavelength of 4 meters. What is its maximumeffective aperture?",
"correct_answer": "H",
"choices": [
"100 meter^2",
"50 meter^2",
"12.7 meter^2",
"200 meter^2",
"25 meter^2",
"4 meter^2",
"80 meter^2",
"63.6 meter^2",
"16 meter^2",
"31.8 meter^2"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_11679": {
"question_id": "mmlu_pro_11679",
"source_benchmark": "MMLU_Pro",
"domain": "engineering",
"question_text": "A gas obeys the vanderWaals equation, with P_c = 30atm andT_c= 200\u00b0C. The compressibility factor (PV / RT) will be more than one (at P = 50atm, T = 250\u00b0C; at P = 1atm, T = 100\u00b0C; P = 500atm, T = 500\u00b0C; none of these). Calculate the vanderWaals constant b for this gas.",
"correct_answer": "E",
"choices": [
"0.202 liter/mole",
"0.4848 liter/mole",
"0.054 liter/mole",
"0.081 liter/mole",
"0.162 liter/mole",
"0.365 liter/mole",
"0.324 liter/mole",
"0.097 liter/mole",
"0.648 liter/mole",
"0.243 liter/mole"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_11090": {
"question_id": "mmlu_pro_11090",
"source_benchmark": "MMLU_Pro",
"domain": "engineering",
"question_text": "(i) (a) A triplex lap-wound armature is used in a 14-pole machine with fourteen brush sets, each spanning three commutator bars. Calculate the number of paths in the armature. (b) Repeat (a) for a triplex wave-wound armature having two such brush sets and 14 poles. (ii) Calculate the generated emf in each of the above problems if the flux per pole is 4.2 \u00d7 10^6 lines, the generator speed is 60 rpm, and there are 420 coils on the armature, each coil having 20 turns.",
"correct_answer": "F",
"choices": [
"35 paths; 5 paths; 200 V; 1500 V",
"50 paths; 3 paths; 245 V; 1750 V",
"44 paths; 12 paths; 210 V; 1550 V",
"38 paths; 10 paths; 225 V; 1650 V",
"48 paths; 4 paths; 260 V; 1900 V",
"42 paths; 6 paths; 235.2 V; 1646.4 V",
"36 paths; 9 paths; 220 V; 1600 V",
"34 paths; 11 paths; 230 V; 1625 V",
"40 paths; 7 paths; 240 V; 1700 V",
"45 paths; 8 paths; 250 V; 1800 V"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_11637": {
"question_id": "mmlu_pro_11637",
"source_benchmark": "MMLU_Pro",
"domain": "engineering",
"question_text": "A pulley 15 inches in diameter rotates at 1200 rpm and transmits15 hp. If a (1/4) in. thick single leather belt is used, calculatethe width of the belt if the ratio of tensions on the tightto slack side is 2:1 and design stress is 200 psi.",
"correct_answer": "H",
"choices": [
"2.5 in.",
"4.8 in.",
"4.5 in.",
"5.6 in.",
"5.2 in.",
"3.8 in.",
"3.2 in.",
"4.2 in.",
"6.0 in.",
"2.8 in."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_11266": {
"question_id": "mmlu_pro_11266",
"source_benchmark": "MMLU_Pro",
"domain": "engineering",
"question_text": "Water at a temperature of 350\u00b0K moves at 2.5 \u00d7 10^-4m^3/sec through a (1/2) in. schedule 80 pipe. Calculate the heat transfer coefficient within the pipe.",
"correct_answer": "D",
"choices": [
"11,000 W/m^2-K",
"9,450 W/m^2-K",
"10,300 W/m^2-K",
"10,060 W/m^2-K",
"9,900 W/m^2-K",
"10,500 W/m^2-K",
"9,750 W/m^2-K",
"9,600 W/m^2-K",
"10,150 W/m^2-K",
"10,200 W/m^2-K"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_11796": {
"question_id": "mmlu_pro_11796",
"source_benchmark": "MMLU_Pro",
"domain": "engineering",
"question_text": "A hollow shaft, 5 in. OD is used instead of a solid shaft of 3.8 in. diameter having the sametorsionalstrength. Calculate the inside diameter of the hollow shaft and the percentage by weight of the material saved.",
"correct_answer": "J",
"choices": [
"4.4 in, 58%",
"4.5 in, 60%",
"3.9 in, 50%",
"4.7 in, 61%",
"4.0 in, 56%",
"4.2 in, 55%",
"4.1 in, 59%",
"4.25 in, 62%",
"4.6 in, 53%",
"4.33 in, 57%"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_11728": {
"question_id": "mmlu_pro_11728",
"source_benchmark": "MMLU_Pro",
"domain": "engineering",
"question_text": "Consider an air stream flowing over a smooth solid naphthalene plate with a velocity of 116 ft/sec at points far away from the surface of the plate. The air stream pressure is 1 atm. and the temperature of the system is 115\u00b0F. Assume the influence of non-unity Schmidt number is negligible. Assuming turbulent boundary layer from the leading edge, determine the following: (a) the average coefficient of mass transfer over the first 1.5 ft from the leading edge of the plate. (b) the average mass transfer rate per unit width over the first 1.5 ft from the leading edge of the plate. (c) the local coefficient of mass transfer at 1.5 ft from the leading edge of the plate. (d) the local concentration gradient of naphthalene at the surface at 1.5 ft from the leading edge.",
"correct_answer": "F",
"choices": [
"375 ft/hr, 0.126 lb/hr, 308 ft/hr, -0.2530 lb/ft^4",
"410 ft/hr, 0.129 lb/hr, 300 ft/hr, -0.2680 lb/ft^4",
"400 ft/hr, 0.140 lb/hr, 315 ft/hr, -0.2600 lb/ft^4",
"385 ft/hr, 0.128 lb/hr, 318 ft/hr, -0.2550 lb/ft^4",
"410 ft/hr, 0.135 lb/hr, 320 ft/hr, -0.2650 lb/ft^4",
"393 ft/hr, 0.132 lb/hr, 314.4 ft/hr, -0.2582 lb/ft^4",
"395 ft/hr, 0.133 lb/hr, 312 ft/hr, -0.2625 lb/ft^4",
"405 ft/hr, 0.138 lb/hr, 320 ft/hr, -0.2650 lb/ft^4",
"380 ft/hr, 0.125 lb/hr, 310 ft/hr, -0.2500 lb/ft^4",
"420 ft/hr, 0.130 lb/hr, 305 ft/hr, -0.2700 lb/ft^4"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_11781": {
"question_id": "mmlu_pro_11781",
"source_benchmark": "MMLU_Pro",
"domain": "engineering",
"question_text": "A single turn loop is situated in air, with a uniform magnetic field normal to its plane. The area of the loop is 5 meter^2. What is theemfappearing at the terminals of the loop, if the rate of change of flux density is 2webers/meter^2/sec ?",
"correct_answer": "B",
"choices": [
"30 volts",
"10 volts",
"12 volts",
"0 volts",
"25 volts",
"7.5 volts",
"20 volts",
"2.5 volts",
"15 volts",
"5 volts"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_11492": {
"question_id": "mmlu_pro_11492",
"source_benchmark": "MMLU_Pro",
"domain": "engineering",
"question_text": "A cylindrical pressure vessel, mean radius 15 in., is filled witha gas of light molecular weight causing an internal pressureof 3500 psi. The efficiency of welded connections inthe vessel is 78%. Assuming the material of the vessel to havea yield strength\\sigma_y= 12 \u00d7 10^4 psi, calculate for this strengththe thickness of the vessel assuming a factor of safetyof 1.5.",
"correct_answer": "D",
"choices": [
"0.65 in",
"0.90 in",
"0.95 in",
"0.84 in",
"1.25 in",
"1.10 in",
"0.70 in",
"0.55 in",
"0.75 in",
"1.00 in"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_11288": {
"question_id": "mmlu_pro_11288",
"source_benchmark": "MMLU_Pro",
"domain": "engineering",
"question_text": "Air at an initial pressure and temperature of 1 bar and 17\u00b0C respectively is contained inside a cylinder. The air is then compressedpolytropically, along a path for which n = 1.30, until the final pressure inside the cylinder is 5 bars. What is the heat transferred per kg of air for this process?",
"correct_answer": "A",
"choices": [
"- 31.33 kJ/kg",
"62.15 kJ/kg",
"-93.45 kJ/kg",
"155.67 kJ/kg",
"124.78 kJ/kg",
"93.45 kJ/kg",
"0 kJ/kg (no heat transfer)",
"31.33 kJ/kg",
"-62.15 kJ/kg",
"-124.78 kJ/kg"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_11522": {
"question_id": "mmlu_pro_11522",
"source_benchmark": "MMLU_Pro",
"domain": "engineering",
"question_text": "In a continuously fed catalytic ammonia converter 1 mol of nitrogenreacts with 3 mol of hydrogen according to the followingchemical reaction: N_2 + 3H_2 = 2NH_3 The converter is operated at a pressure of 100atmand a temperatureof 350\u00b0C. The equilibrium constant for this reactionK is equal to 14.4 \u00d7 10^-5 at 500\u00b0C. The enthalpy of reaction\\DeltaH^0 is - 25,800 cal/g-mol at the desired temperatures. All standard states are assumed to be pure componentsat 1atmpressure. Assume ideal-gas and Lewis-Randall behavior.Calculate the equilibrium conversionof nitrogen to ammonia.",
"correct_answer": "J",
"choices": [
"70% conversion",
"40% conversion",
"35% conversion",
"60% conversion",
"65% conversion",
"50% conversion",
"30% conversion",
"45% conversion",
"75% conversion",
"54% conversion"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_11666": {
"question_id": "mmlu_pro_11666",
"source_benchmark": "MMLU_Pro",
"domain": "engineering",
"question_text": "A long wire composed of a smooth round conductor runs above and parallel to the ground (assumed to be a large conducting plane). A high voltage exists between the conductor and the ground. The maximum electric stress occurs at",
"correct_answer": "B",
"choices": [
"the point farthest from the ground on the conductor's surface.",
"lower surface of the conductor.",
"upper surface of the conductor.",
"the midpoint of the conductor's surface.",
"the point where the conductor and ground surface meet.",
"the ground surface.",
"midway between the conductor and ground.",
"the point of the conductor farthest from the ground surface.",
"the point closest to the ground on the conductor's surface."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_11347": {
"question_id": "mmlu_pro_11347",
"source_benchmark": "MMLU_Pro",
"domain": "engineering",
"question_text": "Two thousand signals are sampled atNyquistrate by the sampling function S(t) = I ^\\infty\\sum_n=-\\infty [t - (1 /f_s) n] wheref_sis the sampling rate. The signals are then time-division multiplexed. If each signal has a bandwidth of 5 kHz, calculate the sampling time T_s and determine\\tau_maxfor pulses of duration \\tau. (i. e. , stretched impulses).",
"correct_answer": "E",
"choices": [
"T_s = 2 \u00d7 10^-3 sec, \\tau_max = 1.0 \u00d7 10^-6 sec",
"T_s = 1 \u00d7 10^-2 sec, \tau_max = 0.5 \u00d7 10^-5 sec",
"T_s = 2 \u00d7 10^-4 sec, \\tau_max = 1.0 \u00d7 10^-7 sec",
"T_s = 1.5 \u00d7 10^-4 sec, \\tau_max = 0.75 \u00d7 10^-7 sec",
"T_s = 1 \u00d7 10^-4 sec, \tau_max = 0.5 \u00d7 10^-7 sec",
"T_s = 1 \u00d7 10^-5 sec, \tau_max = 0.5 \u00d7 10^-8 sec",
"T_s = 3 \u00d7 10^-4 sec, \\tau_max = 1.5 \u00d7 10^-7 sec",
"T_s = 5 \u00d7 10^-5 sec, \\tau_max = 2.5 \u00d7 10^-8 sec",
"T_s = 1 \u00d7 10^-3 sec, \tau_max = 0.5 \u00d7 10^-6 sec",
"T_s = 8 \u00d7 10^-5 sec, \\tau_max = 4.0 \u00d7 10^-8 sec"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_6496": {
"question_id": "mmlu_pro_6496",
"source_benchmark": "MMLU_Pro",
"domain": "health",
"question_text": "A baby born with pulmonary hypoplasia secondary to oligohydramnios caused by renal agenesis would be classified as having:",
"correct_answer": "F",
"choices": [
"an association.",
"a disruption.",
"a syndrome.",
"a deformation.",
"a dysplasia.",
"a sequence.",
"a phenotype.",
"a spectrum.",
"a malformation.",
"a complex."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_5956": {
"question_id": "mmlu_pro_5956",
"source_benchmark": "MMLU_Pro",
"domain": "health",
"question_text": "A 42-year-old woman is brought to the emergency department 10 minutes after being involved in a high-speed motor vehicle collision in which she was a restrained passenger. On arrival, she has shortness of breath and abdominal pain. Her pulse is 135/min, respirations are 30/min, and blood pressure is 80/40 mm Hg. Breath sounds are decreased at the left lung base. An x-ray of the chest shows opacification of the left lower lung field with loss of the diaphragmatic shadow. Placement of a chest tube yields a small amount of air followed by greenish fluid. Which of the following is the most appropriate next step in management?",
"correct_answer": "F",
"choices": [
"Immediate intubation",
"Administration of supplemental oxygen",
"Thoracotomy",
"Intravenous administration of broad-spectrum antibiotics",
"CT scan of the chest",
"Laparotomy",
"Placement of a second chest tube",
"CT scan of the abdomen",
"Thoracoscopy",
"Repeat chest x-ray"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_5867": {
"question_id": "mmlu_pro_5867",
"source_benchmark": "MMLU_Pro",
"domain": "health",
"question_text": "The muscles of the soft palate are innervated by branches of the",
"correct_answer": "J",
"choices": [
"facial and vagus nerves.",
"trigeminal and hypoglossal nerves.",
"facial and glossopharyngeal nerves.",
"trigeminal and glossopharyngeal nerves.",
"optic and oculomotor nerves.",
"olfactory and optic nerves.",
"facial and hypoglossal nerves.",
"glossopharyngeal and vagus nerves.",
"trigeminal and facial nerves.",
"trigeminal and vagus nerves."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_6601": {
"question_id": "mmlu_pro_6601",
"source_benchmark": "MMLU_Pro",
"domain": "health",
"question_text": "Which of the following is a function of folate?\n",
"correct_answer": "E",
"choices": [
"Synthesis of fatty acids from glucose",
"Conversion of oxaloacetate to phosphoenolpyruvate",
"Carboxylation of pyruvate to oxaloacetate",
"Conversion of fructose to glucose",
"Synthesis of TMP (thymidine monophosphate)",
"Decarboxylation of amino acids to form amine neurotransmitters",
"Synthesis of methionine from homocysteine",
"Oxidation of pyruvate to acetyl CoA",
"Conversion of glucose to pyruvate in glycolysis"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_6123": {
"question_id": "mmlu_pro_6123",
"source_benchmark": "MMLU_Pro",
"domain": "health",
"question_text": "Which of the following is recommended for cleaning the mouth?",
"correct_answer": "C",
"choices": [
"Baking soda.",
"Hydrogen peroxide.",
"Water.",
"Bleach.",
"Alcohol swabs.",
"Normal saline.",
"Vinegar.",
"Mouthwash containing alcohol.",
"Lemon and glycerine swabs.",
"Dish soap."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_6092": {
"question_id": "mmlu_pro_6092",
"source_benchmark": "MMLU_Pro",
"domain": "health",
"question_text": "A 30-year-old woman comes to the physician because of a 2-day history of abdominal pain. She has a history of recurrent upper respiratory tract infections, sinusitis, and pancreatitis. She has thick nasal secretions. She says that her sweat is salty and crystallizes on her skin. Her vital signs are within normal limits. Physical examination shows epigastric tenderness. Genetic testing for the 36 most common mutations shows a detectable mutation (G551D) in one allele of the CFTR gene. Which of the following best explains this patient's clinical phenotype?",
"correct_answer": "G",
"choices": [
"Both CFTR alleles must be mutated to produce symptoms",
"Only one G551D allele is needed in CFTR",
"The patient is a CFTR obligate carrier",
"The patient's symptoms are coincidental and not related to the CFTR mutation",
"The G551D mutation in CFTR does not cause symptoms",
"The patient's CFTR mutation is unrelated to her clinical phenotype",
"The second CFTR mutation was not detected by the testing obtained",
"The patient's clinical phenotype is due to a mutation in a different gene",
"The patient has a rare form of CFTR mutation that requires two alleles",
"The CFTR mutation is associated with her clinical phenotype but not the cause"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_6070": {
"question_id": "mmlu_pro_6070",
"source_benchmark": "MMLU_Pro",
"domain": "health",
"question_text": "A 40-year-old woman comes to the physician because of a 6-month history of increased facial hair growth. Her last menstrual period was 4 months ago. She is 165 cm (5 ft 5 in) tall and weighs 70 kg (154 lb); BMI is 26 kg/m2 . Her pulse is 80/min, and blood pressure is 130/82 mm Hg. Physical examination shows temporal balding and coarse dark hair on the upper lip and chin. Pelvic examination shows clitoral enlargement. Her serum testosterone concentration is increased. Serum concentrations of androstenedione, dehydroepiandrosterone, and urinary 17-ketosteroids are within the reference ranges. Ultrasonography of the pelvis shows a 12-cm ovarian mass. Which of the following best describes this mass? ",
"correct_answer": "C",
"choices": [
"Endometrioid tumor",
"Fibroma",
"Sertoli-Leydig tumor",
"Mucinous cystadenoma",
"Clear cell carcinoma",
"Brenner tumor",
"Ovarian carcinoid",
"Teratoma",
"Serous cystadenoma",
"Granulosa tumor"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_5984": {
"question_id": "mmlu_pro_5984",
"source_benchmark": "MMLU_Pro",
"domain": "health",
"question_text": "In the discharge checklist recommended by the Department of Health (2004), who should be involved in completing the form?",
"correct_answer": "G",
"choices": [
"The ward clerk, GP, and patient.",
"The nurse, ward clerk, and consultant.",
"The GP, patient, and ward clerk.",
"The GP, consultant, and named nurse.",
"The consultant, patient, and named nurse.",
"The consultant, named nurse, and GP.",
"The nurse, patient, and ward clerk.",
"The nurse, GP, and ward clerk.",
"The nurse, consultant, and GP.",
"The ward clerk, consultant, and named nurse."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_6596": {
"question_id": "mmlu_pro_6596",
"source_benchmark": "MMLU_Pro",
"domain": "health",
"question_text": "A new severe respiratory illness caused by a newly identified virus is discovered. Which of the following properties of a killed vaccine relative to a live vaccine is the most appropriate rationale for developing a killed vaccine for this illness?",
"correct_answer": "I",
"choices": [
"Is more likely to provide sterilizing immunity",
"Can be administered in higher doses",
"Develops more rapid protective immunity",
"Is less likely to require subsequent boosters for lifelong immunity",
"Is less likely to cause an allergic reaction",
"Is more effective for immunocompromised individuals",
"Is most likely to generate mucosal immunity",
"Is less likely to cause local adverse reactions at the site of injection",
"Avoids the concern for reversion to virulence",
"Is less likely to be affected by preexisting antibodies"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_5946": {
"question_id": "mmlu_pro_5946",
"source_benchmark": "MMLU_Pro",
"domain": "health",
"question_text": "During a study of renal glomeruli, a healthy animal kidney is kept in a vascular bath preparation at a constant afferent arterial pressure of 100 mm Hg. If the efferent arteriole is constricted with a vascular clamp, which of the following Starling forces is most likely to change in the glomeruli?",
"correct_answer": "B",
"choices": [
"Increased filtration coefficient (Kf)",
"Increased hydrostatic pressure",
"Increased blood flow",
"Increased oncotic pressure",
"Decreased hydrostatic pressure",
"No change in Starling forces",
"Decreased oncotic pressure",
"Decreased filtration coefficient (Kf)"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_6534": {
"question_id": "mmlu_pro_6534",
"source_benchmark": "MMLU_Pro",
"domain": "health",
"question_text": "It is correct to state that for the treatment of Bulimia Nervosa:\n ",
"correct_answer": "I",
"choices": [
"Treatment should focus solely on physical symptoms",
"Treatments should target exclusively on purging practices",
"It is not useful to discuss weight with patients",
"Therapies should primarily focus on food avoidance tactics",
"The first approach is to discuss body image",
"Use of antipsychotic medication is first-line treatment",
"Treatment should prioritize on patients' social relationships",
"Treatment should ignore the psychological aspects of the disorder",
"Fluoxetine is considered of benefit",
"The main treatment approach is diet control"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_6600": {
"question_id": "mmlu_pro_6600",
"source_benchmark": "MMLU_Pro",
"domain": "health",
"question_text": "Which of the following is not a function of vitamin A\n",
"correct_answer": "A",
"choices": [
"Synthesis of the blood clotting proteins",
"Prevention of bone loss",
"Regulation of gene expression and cell differentiation",
"Boosting immune system function"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_6400": {
"question_id": "mmlu_pro_6400",
"source_benchmark": "MMLU_Pro",
"domain": "health",
"question_text": "In which one of the following circumstances will the prevalence of a disease in the population increase, all else being constant?\n",
"correct_answer": "D",
"choices": [
"If recovery of the disease is faster.",
"If the incidence rate of the disease falls.",
"If the population in which the disease is measured increases.",
"If survival time with the disease increases.",
"If the disease becomes less contagious.",
"If the disease's incubation period decreases.",
"If effective treatment for the disease becomes widely available.",
"If vaccination against the disease becomes more common.",
"If the disease mutates to a less harmful form.",
"If the population's overall health improves."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_5931": {
"question_id": "mmlu_pro_5931",
"source_benchmark": "MMLU_Pro",
"domain": "health",
"question_text": "With respect to job training, younger workers",
"correct_answer": "G",
"choices": [
"Have less experience and therefore need more training",
"Do not benefit from training as much as older workers do",
"Are less open to feedback than older workers",
"Prefer to have classes by themselves",
"Always learn faster than older workers",
"Always perform worse on the job than trained older workers",
"Receive more opportunities than older workers",
"Are not interested in job training",
"Perform better on the job than trained older workers",
"Prefer online training over traditional classroom learning"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_6446": {
"question_id": "mmlu_pro_6446",
"source_benchmark": "MMLU_Pro",
"domain": "health",
"question_text": "A 46-year-old woman, gravida 1, para 1, comes to the office because of a 2-week history of black discharge from her right nipple. The patient had mammography and ultrasonography of the breasts 2 months ago for evaluation of increased glandularity, palpated in the upper outer quadrant of the right breast, noted at her most recent annual examination. The studies showed likely benign findings with recommended follow-up in 6 months. Medical history is otherwise unremarkable and she takes no medications. BMI is 30 kg/m2 . Vital signs are normal. Palpation of the right breast discloses glandularity in the upper outer quadrant but no other masses. There is scant, black discharge from the right nipple. Which of the following is the most appropriate next step in diagnosis?",
"correct_answer": "E",
"choices": [
"Core needle biopsy of the right breast",
"Ductography",
"Start on antifungal treatment",
"Excisional biopsy of glandular tissue",
"MRI of the right breast",
"Fine needle aspiration of the right nipple discharge",
"Mammography of the left breast",
"Repeat ultrasonography of the right breast",
"Histopathological study of the discharge",
"Repeat mammography"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_6274": {
"question_id": "mmlu_pro_6274",
"source_benchmark": "MMLU_Pro",
"domain": "health",
"question_text": "A 16-year-old male is brought to the emergency department with sudden-onset swelling of the right knee. He denies any trauma. Past medical history reveals cystic fibrosis. Examination of the knee reveals fluctuance on palpation of the joint but no increased warmth; mild restriction in range of motion due to the swelling is also noted. Numerous bruises are noted in various stages of resolution. Further history reveals easy bruising and bleeding from the gums over the past month. The most likely explanation for these findings is",
"correct_answer": "D",
"choices": [
"acquired factor VIII deficiency",
"diffuse intravascular coagulation secondary to infection",
"hypocomplementemia",
"malabsorption of vitamins A, D, E, and K"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_5874": {
"question_id": "mmlu_pro_5874",
"source_benchmark": "MMLU_Pro",
"domain": "health",
"question_text": "A 23-year-old woman with bone marrow failure is treated with a large dose of rabbit antithymocyte globulin. Ten days later, she develops fever, lymphadenopathy, arthralgias, and erythema on her hands and feet. Which of the following is the most likely cause of these symptoms?",
"correct_answer": "F",
"choices": [
"Activation of complement system",
"Overproduction of monocytes",
"Cytokine secretion by natural killer cells",
"Hyperactive B-lymphocyte response",
"Allergic reaction to rabbit proteins",
"Immune complex deposition in tissues",
"Polyclonal T-lymphocyte activation",
"Eosinophil degranulation",
"Increased production of interferon-gamma",
"Acute graft-versus-host disease"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_5872": {
"question_id": "mmlu_pro_5872",
"source_benchmark": "MMLU_Pro",
"domain": "health",
"question_text": "A patient with damage to their cervical sympathetic chain will show the following signs and symptoms.",
"correct_answer": "A",
"choices": [
"Pupillary constriction and vasodilation of facial vessels",
"Pupillary dilation",
"Pupillary constriction, vasoconstriction of facial vessels and increased facial sweating",
"Pupillary dilation, vasoconstriction of facial vessels, decreased facial sweating and increased lacrimation",
"Pupillary constriction, vasodilation of facial vessels, increased facial sweating and decreased lacrimation",
"Pupillary constriction, vasodilation of facial vessels and increased facial sweating",
"Pupillary constriction and vasodilation of facial vessels, decreased facial sweating and increased lacrimation",
"Pupillary constriction",
"Pupillary dilation and vasoconstriction of facial vessels",
"Pupillary dilation, vasoconstriction of facial vessels and decreased facial sweating"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_5937": {
"question_id": "mmlu_pro_5937",
"source_benchmark": "MMLU_Pro",
"domain": "health",
"question_text": "Simple tandem repeat polymorphisms in humans are most useful for",
"correct_answer": "C",
"choices": [
"studying the effects of radiation exposure",
"identifying the presence of bacterial infections",
"solving criminal and paternity cases",
"determining blood type",
"transferring disease resistance factors into bone marrow cells",
"estimating relationships of humans and Neanderthals",
"accelerating the process of aging",
"predicting the risk of genetic disorders",
"reconstructing the relationships of humans and chimps.",
"determining the sex of an unborn child"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_6065": {
"question_id": "mmlu_pro_6065",
"source_benchmark": "MMLU_Pro",
"domain": "health",
"question_text": "Examination of a patient indicates that they have a medially directed strabismus (squint). This could be due to damage to the ",
"correct_answer": "F",
"choices": [
"oculomotor nerve.",
"ophthalmic trigeminal nerve.",
"glossopharyngeal nerve.",
"accessory nerve.",
"trochlear nerve.",
"abducens nerve.",
"vestibulocochlear nerve.",
"hypoglossal nerve.",
"facial nerve.",
"optic nerve."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_6080": {
"question_id": "mmlu_pro_6080",
"source_benchmark": "MMLU_Pro",
"domain": "health",
"question_text": "A 31-year-old man with a 5-year history of HIV infection comes to the office because of anal pain, particularly on defecation, for the past 4 months. He says he has seen spots of blood on the toilet tissue but has not had any other noticeable bleeding. He reports no change in bowel habits and has not had recent fever, chills, or rectal drainage. He says he and his partner engage in anal-receptive intercourse. His most recent CD4+ T-lymphocyte count 2 months ago was 350/mm3 ; HIV viral load at that time was undetectable. He currently is being treated with antiretroviral therapy. He has had no opportunistic infections. Medical history is also significant for syphilis and genital herpes treated with penicillin and acyclovir, respectively. He does not smoke cigarettes or drink alcoholic beverages. Vital signs are normal. Physical examination shows small bilateral inguinal lymph nodes, but respiratory, cardiac, and abdominal examinations disclose no abnormalities. There are several tender fleshy lesions around the perianal area. Rectal examination produces tenderness, but there is no rectal discharge. Test of the stool for occult blood is trace positive. Which of the following is the most appropriate pharmacotherapy at this time?",
"correct_answer": "B",
"choices": [
"Acyclovir",
"Imiquimod",
"Levofloxacin",
"Metronidazole"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_6359": {
"question_id": "mmlu_pro_6359",
"source_benchmark": "MMLU_Pro",
"domain": "health",
"question_text": "The electron transport chain, which is embedded in the mitochondrial membrane, exists primarily to generate new molecules of ATP for use by the cell. This is accomplished by a positive gradient of H+ ions that are formed outside the membrane which then pass back through a specialized channel known as ATP synthase. The energy created from this phosphorylates an ATP to an ATP, known as oxidative phosphorylation. The mechanism by which H+ returns to the inside of the mitochondria is known as what?",
"correct_answer": "G",
"choices": [
"Phagocytosis",
"Exocytosis",
"Osmosis",
"Endocytosis",
"Passive diffusion",
"Facilitated diffusion",
"Passive transport",
"Pinocytosis",
"Simple diffusion",
"Active transport"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_6458": {
"question_id": "mmlu_pro_6458",
"source_benchmark": "MMLU_Pro",
"domain": "health",
"question_text": "What is food security?\n",
"correct_answer": "C",
"choices": [
"It is about ensuring that food prices are controlled and affordable for everyone.",
"It relates to efforts to prevent terrorists from poisoning food supplies.",
"Its component elements include availability, utilisation, and stability, as well as access.",
"Food security involves the development of genetically modified crops to increase yield.",
"It is an initiative to protect food supplies from natural disasters.",
"It is about ensuring everyone's access to food.",
"Food security refers to the preservation of food to prevent spoilage.",
"Food security focuses primarily on ending micronutrient malnutrition.",
"It is the system of producing enough food to feed a country's population.",
"Food security refers to the protection of agricultural land."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_5869": {
"question_id": "mmlu_pro_5869",
"source_benchmark": "MMLU_Pro",
"domain": "health",
"question_text": "The coronary arteries",
"correct_answer": "I",
"choices": [
"arise from the ascending aorta and do not fill during either systole or diastole.",
"arise from the arch of the aorta and do not fill during either systole or diastole.",
"arise from the ascending aorta and fill during systole.",
"arise from the arch of the aorta and fill during systole.",
"arise from the pulmonary artery and fill during systole.",
"arise from the pulmonary artery and fill during diastole.",
"arise from the descending aorta and fill during systole.",
"arise from the descending aorta and fill during diastole.",
"arise from the ascending aorta and fill during diastole.",
"arise from the arch of the aorta and fill during diastole."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_6416": {
"question_id": "mmlu_pro_6416",
"source_benchmark": "MMLU_Pro",
"domain": "health",
"question_text": "Which of the following cannot be a substrate for gluconeogenesis?\n",
"correct_answer": "A",
"choices": [
"Palmitic acid",
"Pyruvate",
"Galactose",
"Glycerol",
"Propionic acid",
"Lactic acid",
"Aspartic acid",
"Alanine",
"Glutamic acid",
"Oxaloacetate"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_6045": {
"question_id": "mmlu_pro_6045",
"source_benchmark": "MMLU_Pro",
"domain": "health",
"question_text": "The normal respiratory rate for an adult male is:",
"correct_answer": "D",
"choices": [
"20-22 breaths per minute.",
"24-26 breaths per minute.",
"26-28 breaths per minute.",
"14-16 breaths per minute.",
"12-14 breaths per minute.",
"10-12 breaths per minute.",
"22-24 breaths per minute.",
"8-10 breaths per minute.",
"16-18 breaths per minute.",
"18-20 breaths per minute."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_6575": {
"question_id": "mmlu_pro_6575",
"source_benchmark": "MMLU_Pro",
"domain": "health",
"question_text": "Which of the following structures is derived from ectomesenchyme?",
"correct_answer": "E",
"choices": [
"Cardiac muscle",
"Liver cells",
"Blood vessels",
"Adipose tissue",
"Melanocytes",
"Pancreatic cells",
"Skeletal muscles",
"Motor neurons"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_6507": {
"question_id": "mmlu_pro_6507",
"source_benchmark": "MMLU_Pro",
"domain": "health",
"question_text": "An investigator is studying the incidence of the common cold among medical students at various time points during the school year. Results show an increased incidence of upper respiratory tract infections among these students during finals week. It is hypothesized that the stress of studying for examinations adversely affects the immune system, making the students more susceptible to infection. Which of the following laboratory findings in these students during examination week is most likely to support this hypothesis?",
"correct_answer": "D",
"choices": [
"Decreased lymphocyte count",
"Decreased basophil count",
"Decreased macrophage activity",
"Increased AM serum cortisol concentration",
"Increased macrophage activity",
"Decreased neutrophil count",
"Decreased AM serum cortisol concentration",
"Increased eosinophil count",
"Increased lymphocyte count",
"Increased basophil count"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_6560": {
"question_id": "mmlu_pro_6560",
"source_benchmark": "MMLU_Pro",
"domain": "health",
"question_text": "Who has legal responsibility for a patient's care during hospital admission, stay, and discharge?",
"correct_answer": "B",
"choices": [
"The GP.",
"The named consultant.",
"The named nurse.",
"The hospital pharmacist.",
"The hospital's legal department.",
"The ward manager.",
"The head nurse of the hospital.",
"The head of the hospital board.",
"The patient's family doctor.",
"The hospital CEO."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_6271": {
"question_id": "mmlu_pro_6271",
"source_benchmark": "MMLU_Pro",
"domain": "health",
"question_text": "A sexually active 20-year-old woman has had fever, chills, malaise, and pain of the vulva for 2 days. Examination shows a vulvar pustule that has ulcerated and formed multiple satellite lesions. Nodes are palpated in the inguinal and femoral areas. A smear of fluid from the lesions establishes the diagnosis. Which of the following is the most likely causal organism?",
"correct_answer": "G",
"choices": [
"Mycoplasma genitalium",
"Herpes simplex virus",
"Candida albicans",
"Chlamydia trachomatis",
"Streptococcus pyogenes (group A)",
"Human papillomavirus",
"Haemophilus ducreyi",
"Neisseria gonorrhoeae",
"Trichomonas vaginalis",
"Treponema pallidum"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_6067": {
"question_id": "mmlu_pro_6067",
"source_benchmark": "MMLU_Pro",
"domain": "health",
"question_text": "The phrenic nerves innervate the",
"correct_answer": "E",
"choices": [
"diaphragm.",
"diaphragm and pericardium.",
"diaphragm, parietal pleura, pericardium and abdominal muscles.",
"diaphragm, parietal pleura, pericardium and lungs.",
"diaphragm, parietal pleura and pericardium.",
"diaphragm and parietal pleura.",
"diaphragm, parietal pleura, pericardium and heart.",
"diaphragm and intercostal muscles.",
"diaphragm, parietal pleura, pericardium and esophagus.",
"diaphragm, parietal pleura, pericardium and intercostals muscles."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_6301": {
"question_id": "mmlu_pro_6301",
"source_benchmark": "MMLU_Pro",
"domain": "health",
"question_text": "Insensible loss does not come from:",
"correct_answer": "C",
"choices": [
"burns.",
"frequent urination.",
"excessive weight loss.",
"consuming spicy foods.",
"diarrhoea.",
"using unhumidified oxygen.",
"pyrexia.",
"vomiting.",
"rapid breathing.",
"heavy sweating."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_6445": {
"question_id": "mmlu_pro_6445",
"source_benchmark": "MMLU_Pro",
"domain": "health",
"question_text": "A 3-year-old white girl is brought to the office by her parents for a follow-up visit 48 hours after receiving a 5-TU PPD skin test. The test was done as part of a routine screening for enrollment in a homeless shelter. Physical examination shows 10 mm of induration at the puncture site; the examination is otherwise normal. The parents tell you they are shocked by this finding since both of their skin tests were nonreactive. They say they were born in this country and tell you that their daughter has always been in good health. She has not had much medical care in the past 2 years but she has been healthy. Until moving into this shelter they had been \"squatters\" in vacant buildings. Which of the following is the most appropriate step at this time?",
"correct_answer": "B",
"choices": [
"Call her previous physician to obtain more history",
"Order a chest x-ray",
"Order a test for HIV antibody",
"Repeat the PPD skin test"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_6126": {
"question_id": "mmlu_pro_6126",
"source_benchmark": "MMLU_Pro",
"domain": "health",
"question_text": "Which of the following could very well be a cohort difference rather than a change that occurs with age?",
"correct_answer": "C",
"choices": [
"Loss of muscle mass",
"Shrinkage of the thymus gland",
"Differences in weight",
"Changes in metabolism rate",
"Changes in eyesight",
"Enlargement of the heart",
"Increase in wrinkles",
"Loss of hair color",
"Decreasing bone density",
"Increasing size of nose and ears"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_5848": {
"question_id": "mmlu_pro_5848",
"source_benchmark": "MMLU_Pro",
"domain": "health",
"question_text": "Which of the following releases most energy when completely oxidised in the body?",
"correct_answer": "D",
"choices": [
"One gram of glycerol",
"One gram of lactose",
"One gram of fructose",
"One gram of palmitic acid",
"One gram of cellulose",
"One gram of glucose",
"One gram of alcohol",
"One gram of leucine"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_3342": {
"question_id": "mmlu_pro_3342",
"source_benchmark": "MMLU_Pro",
"domain": "biology",
"question_text": "The checkered pattern of pigeons is controlled by a dominant gene C; plain color is determined by the recessive allele c. Red color is controlled by a dominant gene B, and brown color by the recessive allele b. Complete aPunnett square for adihybridcross involving a homozygous checkered red bird and a plain brown bird. For this cross, show the expected phenotypes, genotypes, genotypic frequencies and phenotypic ratios for the F_2 generation. P: CCBB \u00d7 ccbb Gametes: (CB) (cb) F_1 \u00d7 F_1 CcBb \u00d7 CcBb Gametes (CB) (Cb) (cB) (cb) (CB) CCBB CCBb CcBB CcBb (Cb) CCBb CCbb CcBb Ccbb (cB) CcBB CcBb ccBB ccBb (cb) CcBb Ccbb ccBb Ccbb",
"correct_answer": "C",
"choices": [
"Phenotypic ratio: checkered red 6, checkered brown 2, plain red 6, plain brown 2",
"Phenotypic ratio: checkered red 8, checkered brown 4, plain red 0, plain brown 4",
"Phenotypic ratio: checkered red 9, checkered brown 3, plain red 3, plain brown 1",
"Phenotypic ratio: checkered red 1, checkered brown 3, plain red 9, plain brown 3",
"Phenotypic ratio: checkered red 3, checkered brown 9, plain red 1, plain brown 3",
"Phenotypic ratio: checkered red 4, checkered brown 4, plain red 4, plain brown 4",
"Phenotypic ratio: checkered red 12, checkered brown 2, plain red 2, plain brown 0",
"Phenotypic ratio: checkered red 5, checkered brown 3, plain red 2, plain brown 6",
"Phenotypic ratio: checkered red 7, checkered brown 5, plain red 1, plain brown 3",
"Phenotypic ratio: checkered red 2, checkered brown 6, plain red 3, plain brown 1"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_2802": {
"question_id": "mmlu_pro_2802",
"source_benchmark": "MMLU_Pro",
"domain": "biology",
"question_text": "Which of the following would most likely describe the fate of a vesicle formed as a result of phagocytosis?",
"correct_answer": "F",
"choices": [
"The vesicle remains intact and floats freely in the cytoplasm.",
"The vesicle merges with a Golgi apparatus.",
"The vesicle merges with a mitochondrion.",
"The vesicle fuses with the cell membrane and its contents are expelled out of the cell.",
"The vesicle breaks down on its own without merging with any other organelle.",
"The vesicle merges with a lysosome.",
"The vesicle releases its contents to the cytoplasm to be digested.",
"The vesicle is absorbed by the endoplasmic reticulum.",
"The vesicle is shuttled to the nucleus, and its contents become part of the nucleolus.",
"The vesicle merges with a peroxisome."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_2713": {
"question_id": "mmlu_pro_2713",
"source_benchmark": "MMLU_Pro",
"domain": "biology",
"question_text": "A male bacterium conjugates with a female bacterium. After conjugation, the female becomes a male. Account for this \"sexchange\".",
"correct_answer": "B",
"choices": [
"The female bacterium becomes a male as a result of nutrient deficiency",
"The female becomes a male by receiving one copy of the F factor",
"The female bacterium becomes a male through the process of osmosis",
"The female bacterium becomes a male through gene mutation",
"The female bacterium becomes a male after receiving multiple copies of non-F factor genes",
"The female bacterium becomes a male through binary fission",
"The female bacterium becomes a male by incorporating a plasmid unrelated to the F factor",
"The female bacterium becomes a male after exposure to antibiotics",
"The female bacterium becomes a male due to a viral infection",
"The female bacterium becomes a male due to environmental factors"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_2969": {
"question_id": "mmlu_pro_2969",
"source_benchmark": "MMLU_Pro",
"domain": "biology",
"question_text": "Contrast the meanings of the terms \"gene pool\" and \"genotype\\textquotedblright.",
"correct_answer": "I",
"choices": [
"A gene pool is the genetic makeup present in the environment, while a genotype is the genetic makeup that an organism inherits from its parents.",
"A gene pool is the set of all genes within a single organism, while a genotype refers to the genetic variations among different populations.",
"A gene pool is the genetic constitution of an individual, while a genotype is the total genetic information in a population.",
"A gene pool is the collection of genes that predict an organism's phenotype, while a genotype is the combination of alleles that an organism possesses for a single trait.",
"A genotype is the unique genetic code found within a cell, while a gene pool encompasses the various genotypes that interact within an ecosystem.",
"Genotype refers to the breeding potential of a population, while a gene pool is the set of all possible genes that can be passed down through reproduction.",
"A gene pool and genotype both refer to the genetic constitution of an individual.",
"A gene pool and genotype both refer to the total genetic information in a population.",
"A gene pool is the total genetic information in a population, while a genotype is the genetic constitution of an individual.",
"A gene pool refers to a collection of physical traits in a population, while a genotype is the specific genetic makeup responsible for one particular trait."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_2938": {
"question_id": "mmlu_pro_2938",
"source_benchmark": "MMLU_Pro",
"domain": "biology",
"question_text": "A scientist carries out a cross between two guinea pigs, both of which have black coats. Black hair coat is dominant over white hair coat. Three quarters of the offspring have black coats, and one quarter have white coats. The genotypes of the parents were most likely",
"correct_answer": "A",
"choices": [
"Bb \u00d7 Bb",
"BB \u00d7 BB and Bb \u00d7 bb",
"bb \u00d7 Bb",
"bb \u00d7 bb",
"bb \u00d7 BB",
"Bb \u00d7 bb",
"Bb \u00d7 BB",
"BB \u00d7 Bb",
"BB \u00d7 bb",
"BB \u00d7 BB"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_2916": {
"question_id": "mmlu_pro_2916",
"source_benchmark": "MMLU_Pro",
"domain": "biology",
"question_text": "Why does milk \\textquotedblleftspoil\\textquotedblright when kept in a refrigerator?",
"correct_answer": "G",
"choices": [
"Enzymatic reactions with oxygen",
"Lactose crystallization",
"Loss of refrigeration power over time",
"mesophiles",
"Natural separation of milk components without bacterial influence",
"Acidification from exposure to light",
"psychrophilic bacteria",
"thermophiles",
"chemical reactions",
"Contamination from other food items"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_2830": {
"question_id": "mmlu_pro_2830",
"source_benchmark": "MMLU_Pro",
"domain": "biology",
"question_text": "What is differential reproduction? What factors bring about differential reproduction?",
"correct_answer": "I",
"choices": [
"Differential reproduction is the result of equal survival rates for all genotypes within a population",
"Differential reproduction is driven exclusively by the presence of predators in an ecosystem",
"Differential reproduction occurs when certain traits prevent any mating from happening",
"Differential reproduction is when genetic mutations are the only factor influencing which offspring survive",
"Differential reproduction is purely based on random mating",
"Differential reproduction is determined solely by the environmental temperature during breeding seasons",
"Differential reproduction is related to the physical strength of the species",
"Differential reproduction is when all offsprings have the same genotype",
"Differential reproduction occurs when the percentage of viable offspring carrying a certain genotype cannot be accounted for by purely random mating. It results from nonrandom mating, differential fecundity, or differences in zygote viability or offspring fertility.",
"Differential reproduction occurs when all offspring inherit traits randomly, regardless of parental genotypes"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_2792": {
"question_id": "mmlu_pro_2792",
"source_benchmark": "MMLU_Pro",
"domain": "biology",
"question_text": "How can radioactive isotopes be used in biological research such as tracing the pathways of compounds synthesized in the living cell?",
"correct_answer": "A",
"choices": [
"Labeling substance with radioactive isotopes is a powerful method for studying specific metabolic pathways, as well as being applicable to many other areas of biological research.",
"Radioactive isotopes are used for improving the flavor and texture of food products.",
"Radioactive isotopes are used to stimulate the immune system in organisms.",
"Radioactive isotopes are used to increase the oxygen-carrying capacity of blood.",
"Radioactive isotopes are used to enhance the growth of agricultural crops.",
"Radioactive isotopes are used as a direct nutrient supplement in animal diets.",
"Radioactive isotopes are primarily used to create glow-in-the-dark materials for scientific equipment.",
"Radioactive isotopes are used to provide energy for spacecrafts.",
"Radioactive isotopes are used to kill cancer cells.",
"Radioactive isotopes are used for dating archaeological samples."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_3380": {
"question_id": "mmlu_pro_3380",
"source_benchmark": "MMLU_Pro",
"domain": "biology",
"question_text": "What is implied by the theory ofuniformitarianism?",
"correct_answer": "G",
"choices": [
"Uniformitarianism indicates that human activity is the primary driver of geological change.",
"Uniformitarianism posits that the earth is static and unchanging over time.",
"Uniformitarianism implies rapid geological changes.",
"Uniformitarianism suggests that the same geological processes observed today did not operate in the past.",
"Uniformitarianism infers that geological processes occur exclusively during catastrophic events.",
"Uniformitarianism asserts that the earth's geological features were formed in a single, divine creation event.",
"Uniformitarianism implies that animals and plants continually undergo a process of organic evolution and that the earth must be much older than a few thousand years.",
"Uniformitarianism assumes that the rate of geological change is unpredictable and chaotic.",
"Uniformitarianism states that geological forces have changed over time.",
"Uniformitarianism suggests the earth is only a few thousand years old."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_3246": {
"question_id": "mmlu_pro_3246",
"source_benchmark": "MMLU_Pro",
"domain": "biology",
"question_text": "An inexperienced nurse measures the blood pressure in the artery of the upper arm of a man. She then measures the blood pressure in the artery of the man's leg. The nurse obtains a different value. Why?",
"correct_answer": "C",
"choices": [
"The man's leg has a different heart rate",
"The arm and leg measurements differ because of a temporary blockage in the leg artery",
"The blood pressure decreases as the blood moves further away from the heart",
"The different readings are due to the man's varying stress levels",
"The man was standing during the leg measurement, affecting the result",
"The leg artery was constricted, leading to a higher reading",
"The blood pressure is always the same in all parts of the body",
"The nurse made a measurement error",
"The blood pressure is higher in the leg due to increased muscle activity",
"The leg artery naturally has higher pressure due to gravity"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_2777": {
"question_id": "mmlu_pro_2777",
"source_benchmark": "MMLU_Pro",
"domain": "biology",
"question_text": "Consider the ABO blood group locus in humans, which has six (6) alleles reported for it, of which three are most prominent, I^A, I^B andi. The I^A and I^B alleles are both dominant toiandcodominantto each other. A woman of blood group O marries a man who has blood group B. There are four children in the family: a son and daughter of blood group O; a daughter who has A blood type; and a son who has B blood type. One of the children is adopted. Which child is the adopted child?",
"correct_answer": "D",
"choices": [
"the daughter of blood group O",
"The son with A blood type",
"the son of blood group O",
"the daughter who has blood group A",
"None of the children are adopted; a genetic mutation occurred",
"The daughter with AB blood type",
"the son who has B blood type",
"The son with AB blood type",
"The daughter with B blood type",
"The child with a rare blood type not listed"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_3292": {
"question_id": "mmlu_pro_3292",
"source_benchmark": "MMLU_Pro",
"domain": "biology",
"question_text": "Describe the function of the lateral-line system in fishes.",
"correct_answer": "G",
"choices": [
"The lateral-line system in fishes helps with the digestion of food.",
"The lateral-line system in fishes is used for excreting waste materials.",
"The lateral-line system in fishes is involved in the secretion of hormones.",
"The lateral-line system in fishes is used for breathing.",
"The lateral-line system in fishes is used for vision.",
"The lateral-line system in fishes functions to absorb oxygen directly from the water.",
"The lateral-line system in fishes functions in the detection of sound and acceleration, enables the fish to detect localized as well as distant water disturbances, and functions as an organ of equilibrium.",
"The lateral-line system in fishes is primarily for attracting mates during the breeding season.",
"The lateral-line system in fishes is used for reproduction.",
"The lateral-line system in fishes is used for thermoregulation."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_3120": {
"question_id": "mmlu_pro_3120",
"source_benchmark": "MMLU_Pro",
"domain": "biology",
"question_text": "The leaf colors resulting from crosses between different branches on variegated Mirabilisjalapa(\\textquotedblleftfour o'clock\\textquotedblright) plants are shown below: Branch of Origin of the Male parent Branch of Origin of the Female parent Progeny Green Green Pale Variegated Green Pale Green, pale, variegated Pale Green Pale Variegated Green Pale Green, pale, variegated Variegated Green Pale Variegated Green Pale Green, pale, variegated What is the most likely mode of inheritance?",
"correct_answer": "H",
"choices": [
"Environmental influence inheritance",
"Random segregation inheritance",
"Epigenetic inheritance",
"Genetic inheritance",
"Bilateral inheritance",
"Pollen inheritance",
"Mendelian dominant-recessive inheritance",
"Cytoplasmic (maternal) inheritance",
"Paternal inheritance",
"Multifactorial inheritance"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_2720": {
"question_id": "mmlu_pro_2720",
"source_benchmark": "MMLU_Pro",
"domain": "biology",
"question_text": "A cross between two yellow-haired mice results in an F_1 ratio of: 2 yellow: 1nonyellow: 1 yellow (dies). Diagram a cross to explainthese results. P_1: yellow \u00d7 yellow Y^ly^L Y^ly^L G_1: (1/2)Y^l, (1/2)y^L (1/2)Y^l, (1/2)y^L F_1: (1/4)Y^lY^l (1/2)Y^ly^L (1/4)y^Ly^L dies yellow nonyellow",
"correct_answer": "A",
"choices": [
"The surviving yellow mice can be represented genotypically Y^ly^L and the nonyellow mice can be represented as y^Ly^L.",
"The surviving yellow mice can be represented genotypically Y^lY^l and the nonyellow mice can be represented as y^Ly^L.",
"The surviving yellow mice can be represented genotypically Y^LY^L and the nonyellow mice can be represented as Y^lY^l.",
"The surviving yellow mice can be represented genotypically y^Ly^L and the nonyellow mice can be represented as Y^Ly^L.",
"The surviving yellow mice can be represented genotypically Y^Ly^L and the nonyellow mice can be represented as Y^ly^L.",
"The surviving yellow mice can be represented genotypically y^Ly^L and the nonyellow mice can be represented as Y^lY^l.",
"The surviving yellow mice can be represented genotypically y^Ly^L and the nonyellow mice can be represented as Y^ly^L.",
"The surviving yellow mice can be represented genotypically Y^lY^l and the nonyellow mice can be represented as Y^Ly^L.",
"The surviving yellow mice can be represented genotypically y^ly^l and the nonyellow mice can be represented as Y^lY^l.",
"The surviving yellow mice can be represented genotypically Y^ly^L and the nonyellow mice can be represented as Y^Ly^L."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_2718": {
"question_id": "mmlu_pro_2718",
"source_benchmark": "MMLU_Pro",
"domain": "biology",
"question_text": "T_4phage that carrymutations in the genomic region calledrII cancause thelysisof strain B of E. coli but not of E. coli strainK. A plate of E. coli strain B is infected by two types of mutantphage simultaneously. The resultinglysateis divided inhalf. Half is diluted 1:10^7 and plated onto a lawn of E. coli strainB. The other half is diluted 1:10^5 and plated on strain K. Four plaques are found on K and 40 are found on B. Determine the amount of recombination between the two mutationsin this region of the T_4 genome.",
"correct_answer": "E",
"choices": [
"1 percent",
"2 percent",
"1.5 percent",
"0.1 percent",
"0.2 percent",
"0.3 percent",
"0.5 percent",
"5 percent",
"0.02 percent",
"0.05 percent"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_2783": {
"question_id": "mmlu_pro_2783",
"source_benchmark": "MMLU_Pro",
"domain": "biology",
"question_text": "What are the major forces of evolution?",
"correct_answer": "J",
"choices": [
"reproduction, migration, mutation, natural selection",
"mutation, genetic drift, migration, genetic hitchhiking",
"mutation, genetic drift, migration, random mating",
"mutation, gene flow, migration, natural selection",
"adaptation, genetic drift, migration, natural selection",
"mutation, genetic drift, movement, natural selection",
"mutation, genetic drift, migration, sexual selection",
"mutation, genetic drift, natural selection, artificial selection",
"mutation, genetic shift, migration, natural selection",
"mutation, genetic drift, migration, natural selection"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_2911": {
"question_id": "mmlu_pro_2911",
"source_benchmark": "MMLU_Pro",
"domain": "biology",
"question_text": "What do the words 'gymnosperm' and 'angiosperm' mean? What characteristics distinguish conifers from flowering plants?",
"correct_answer": "H",
"choices": [
"Gymnosperm means 'naked seeds' and angiosperm means 'enclosed seeds'. Conifers are distinguished from flowering plants by their ability to grow in colder climates and their distinct cone-like fruits.",
"Gymnosperm means 'naked seeds' and angiosperm means 'enclosed seeds', but conifers are not distinguished from flowering plants as both include species with cones and flowers.",
"Conifers are distinguished from flowering plants by their broad, flat leaves.",
"Both gymnosperms and angiosperms mean 'enclosed seeds'; however, conifers have leaves that change color and drop in the autumn, unlike flowering plants.",
"Angiosperms mean 'naked seeds' and gymnosperms mean 'enclosed seeds'. Conifers are distinguished from flowering plants by their soft, fleshy cones and brightly colored flowers.",
"Gymnosperm means 'enclosed seeds' and angiosperm means 'naked seeds'.",
"Angiosperms produce cones while conifers produce flowers and fruits.",
"Gymnosperm means 'naked seeds' and angiosperm means 'enclosed seeds'. Conifers are distinguished from flowering plants by their needle-like leaves, xylem consisting almost entirely of tracheids, and their reproductive structures (cones).",
"Gymnosperms are plants with flowers and fruits, while angiosperms are plants with cones and needle-like leaves.",
"Gymnosperm means 'enclosed seeds' and angiosperm means 'naked seeds'. Conifers are distinguished from flowering plants by their evergreen foliage and their production of fruits instead of cones."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_2926": {
"question_id": "mmlu_pro_2926",
"source_benchmark": "MMLU_Pro",
"domain": "biology",
"question_text": "Black-headed gulls remove broken eggshells from their nests immediately after the young have hatched. Explain the biological significance of this behavior.",
"correct_answer": "I",
"choices": [
"It attracts predators away from the nestlings.",
"It aids in temperature regulation of the nest by reducing insulation.",
"It assists in the hatching of other eggs.",
"It is a ritualistic behavior without any particular significance.",
"It signals to other gulls that the nesting site is occupied.",
"It helps to keep the nest clean.",
"It provides extra nutrition to the parents, who eat the eggshells.",
"It is a way of preparing the nest for potential future clutches of eggs.",
"It reduces the chances of a nest being robbed, thus enhancing the survival of offspring."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_3205": {
"question_id": "mmlu_pro_3205",
"source_benchmark": "MMLU_Pro",
"domain": "biology",
"question_text": "Which statement about variation is true?",
"correct_answer": "D",
"choices": [
"All phenotypic variation is the result of genotypic variation.",
"All genetic variation produces phenotypic variation.",
"All nucleotide variability results in neutral variation.",
"All new alleles are the result of nucleotide variability."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_3304": {
"question_id": "mmlu_pro_3304",
"source_benchmark": "MMLU_Pro",
"domain": "biology",
"question_text": "Suppose you discovered a new species of bioluminescent worm . How could you prove that it was the worm itself and not some contaminating bacteria that was producing the light ?",
"correct_answer": "C",
"choices": [
"Isolate the light-producing organ and test it in isolation from the worm",
"Use spectroscopy to analyze the light's wavelength signature and compare it to known bioluminescent organisms",
"Place the light-producing substance in complete growth media or physically examine the light-emitting substance under a microscope.",
"Treat the worm with antibiotics to see if the light production stops",
"Conduct genetic modification to knock out suspected bioluminescent genes and observe if light production ceases",
"Perform DNA sequencing on the worm",
"Look for the presence of bioluminescent proteins in the worm",
"Compare the worm's bioluminescence with known bacterial bioluminescence patterns",
"Observe the worm in a dark environment",
"Measure the intensity of light produced by the worm over time to see if it correlates with the worm's life cycle"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_2715": {
"question_id": "mmlu_pro_2715",
"source_benchmark": "MMLU_Pro",
"domain": "biology",
"question_text": "A group of students were invited to taste phenylthiocarbamide (PTC). The ability to taste PTC is inherited by a single pair of genes and tasting (T) is dominant to non-tasting (t). Among 798 students, 60.4 percent were tasters, a) Calculate the allelic frequency of T and t. b) How many of the students were TT? Tt? tt?",
"correct_answer": "A",
"choices": [
"Frequency of T is .371, Frequency of t is .629, Number of students with TT is 109, Tt is 373, tt is 316",
"Frequency of T is .396, Frequency of t is .604, Number of students with TT is 150, Tt is 348, tt is 300",
"Frequency of T is .533, Frequency of t is .467, Number of students with TT is 200, Tt is 398, tt is 200",
"Frequency of T is .440, Frequency of t is .560, Number of students with TT is 220, Tt is 358, tt is 220",
"Frequency of T is .629, Frequency of t is .371, Number of students with TT is 180, Tt is 418, tt is 200",
"Frequency of T is .604, Frequency of t is .396, Number of students with TT is 373, Tt is 316, tt is 109",
"Frequency of T is .467, Frequency of t is .533, Number of students with TT is 250, Tt is 298, tt is 250",
"Frequency of T is .560, Frequency of t is .440, Number of students with TT is 180, Tt is 438, tt is 180",
"Frequency of T is .467, Frequency of t is .533, Number of students with TT is 109, Tt is 373, tt is 316",
"Frequency of T is .629, Frequency of t is .371, Number of students with TT is 316, Tt is 373, tt is 109"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_3262": {
"question_id": "mmlu_pro_3262",
"source_benchmark": "MMLU_Pro",
"domain": "biology",
"question_text": "The relatives of a group of pelicans from the same species that separated from each other because of an unsuccessful migration are reunited 150 years later and find that they are unable to produce offspring. This is an example of",
"correct_answer": "J",
"choices": [
"gene flow.",
"temporal isolation.",
"disruptive selection.",
"founder effect.",
"genetic drift.",
"sexual selection.",
"sympatric speciation.",
"habitat fragmentation.",
"bottleneck effect.",
"allopatric speciation."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_2891": {
"question_id": "mmlu_pro_2891",
"source_benchmark": "MMLU_Pro",
"domain": "biology",
"question_text": "Monoploidlines of many species exist. Plant breeders often convert these to diploids orpolyploidsthroughcolchicine treatment. Can you see any advantages to this?",
"correct_answer": "A",
"choices": [
"Plant breeders can control the euploidy of their nuclei, produce genetically pure organisms, develop disease resistant strains of plants, produce larger fruits and flowers, and convert a sterile species hybrid into a fertile double diploid.",
"The treatment can extend the natural lifespan of plants indefinitely",
"Colchicine treatment can make plants immune to all diseases",
"The treatment results in plants that can survive in zero-gravity environments",
"Plant breeders can create new species",
"The treatment can make plants change color spontaneously",
"Colchicine treatment can eliminate the need for sunlight in plant growth",
"Colchicine treatment can convert diploids to monoploids",
"Colchicine treatment can make plants resistant to all herbicides",
"Colchicine treatment increases the speed of plant growth exponentially"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_3353": {
"question_id": "mmlu_pro_3353",
"source_benchmark": "MMLU_Pro",
"domain": "biology",
"question_text": "Describe the various land biomes that are usually encounteredby a traveler going from the equator to the arcticpolar ice cap.",
"correct_answer": "J",
"choices": [
"Tropical rain forests, mangrove swamps, coral reefs, and polar ice caps",
"Tropical rain forests, temperate grasslands, and polar ice caps only",
"Tropical rain forests, tundra, and desert regions",
"Tropical rain forests, savannas, deserts, mediterranean shrublands, temperate grasslands, and arctic tundra",
"Savannahs, deserts, temperate broadleaf forests, chaparral, and alpine biomes",
"Grassland biomes, boreal forests, and sclerophylous bushlands",
"Temperate deciduous forests, tropical rain forests, and tundra",
"Tropical rain forests, montane forests, temperate coniferous forests, and permanent ice fields",
"Tropical rain forests, desert regions, temperate deciduous forests, and arctic tundra",
"The various land biomes encountered from the equator to the arctic polar ice cap are tropical rain forests, grassland biomes, temperate deciduous forests, the boreal forest or taiga, the tundra, desert regions, and sclerophylous bushlands."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_3117": {
"question_id": "mmlu_pro_3117",
"source_benchmark": "MMLU_Pro",
"domain": "biology",
"question_text": "What is meant by a gene pool, balanced polymorphism, and genetic load?",
"correct_answer": "C",
"choices": [
"Gene pool is the total number of organisms in a population; balanced polymorphism is a genetic imbalance; genetic load is the genetic weight of an organism.",
"Gene pool refers to the physical pool where genes are stored; balanced polymorphism is the imbalance of allele frequencies; genetic load is the amount of genetic information carried by an organism.",
"Gene pool is the sum total of genes in a population; balanced polymorphism is when different forms of a genotype are maintained in equilibrium over time; genetic load is the reduction in average fitness due to presence of suboptimal forms.",
"Gene pool is the total genetic information in the gametes of a population; balanced polymorphism is the consistent occurrence of a single allele; genetic load is the genetic diversity within a species.",
"Gene pool is the collection of genes in a single organism; balanced polymorphism is the variation of a single gene; genetic load is the number of genes in an organism.",
"Gene pool refers to a single gene; balanced polymorphism refers to a dominance of one gene; genetic load refers to the genetic burden of an organism.",
"Gene pool is the total number of alleles in a population; balanced polymorphism is the maintenance of two or more alleles at a locus by natural selection; genetic load is the proportion of individuals in a population with genetic defects.",
"Gene pool is the diversity of genes in a population; balanced polymorphism is the dominance of a single gene; genetic load is the genetic capacity of an organism.",
"Gene pool is the sum of all genetic variations in a closed environment; balanced polymorphism is the equal representation of all alleles in a population; genetic load is the accumulated genetic mutations in a population.",
"Gene pool is the set of all genetic traits in a biological community; balanced polymorphism is when one allele is favored over others; genetic load is the genetic contribution of an organism to its offspring."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_2913": {
"question_id": "mmlu_pro_2913",
"source_benchmark": "MMLU_Pro",
"domain": "biology",
"question_text": "The hormone which brings about metamorphosis of the cecropia moth is secreted by a gland in the thorax. However, no metamorphosis takes place if the head is cut off, even though the larva continues to live. Explain.",
"correct_answer": "I",
"choices": [
"The gland in the thorax is the only gland involved, but it requires a signal from the head to start metamorphosis.",
"Only the gland in the head is necessary for metamorphosis.",
"The gland in the head secretes a different hormone necessary for the initiation of the metamorphosis process.",
"The gland in the thorax secretes the hormone, but the head must be present to activate it.",
"Metamorphosis is triggered by environmental factors, not by hormones secreted by glands.",
"Metamorphosis is controlled by a gland in the abdomen, not the thorax or the head.",
"The head contains an inhibitor that prevents the gland in the thorax from inducing metamorphosis.",
"Neither the gland in the head nor the gland in the thorax are necessary for metamorphosis.",
"Both the gland in the head and the gland in the thorax of the cecropia moth are necessary for the process of metamorphosis.",
"Only the gland in the thorax is necessary for metamorphosis."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_3147": {
"question_id": "mmlu_pro_3147",
"source_benchmark": "MMLU_Pro",
"domain": "biology",
"question_text": "Which of the following is an example of a cline?",
"correct_answer": "H",
"choices": [
"Males of some species have long antlers to fight other males for females.",
"A population of moths in a forest has evolved different color patterns depending on the type of tree they inhabit.",
"A species of fish has developed a unique color pattern in response to changes in water temperature.",
"Certain species of spiders have evolved different web-building behaviors depending on the availability of prey.",
"The hybrid tomato plant is stronger and produces better fruit than the pure genotype.",
"A certain type of beetle exhibits a wide range of body size, with larger individuals found in colder regions.",
"There are two distinct varieties in one population of snail that inhabits an island in the Pacific Ocean.",
"In one species of rabbit, the ones that evolved in the cold, snowy north are white, while the ones that evolved in the south are brown.",
"Some plants produce toxins that deter herbivores.",
"The frequency of a particular gene in a population of birds varies depending on the altitude of their habitat."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_3291": {
"question_id": "mmlu_pro_3291",
"source_benchmark": "MMLU_Pro",
"domain": "biology",
"question_text": "Mostmarine vertebrates have body fluids with osmotic marine vertebrates have body fluids with osmotic pressure lower than that of their saline environment. How do these organismsosmoregulatein face of aprepetualthreat of dehydration?",
"correct_answer": "J",
"choices": [
"Marine organisms osmoregulate by secreting excess water through their gills.",
"Marine organisms osmoregulate by periodically releasing salt crystals through their digestive tract.",
"Marine organisms osmoregulate by reducing their body fluid concentration",
"Marine organisms osmoregulate by avoiding sea water",
"Marine organisms osmoregulate by retaining urea in their bloodstream to balance the osmotic pressure.",
"Marine organisms osmoregulate by undergoing periodic dehydration and rehydration cycles.",
"Marine organisms osmoregulate by producing large amounts of mucus to trap salt particles.",
"Marine organisms osmoregulate by absorbing salt through their skin to match the sea water's salinity.",
"Marine organisms osmoregulate by consuming more sea water",
"Marine organisms osmoregulate by actively transporting out excess salt from their body fluids, often through specialized glands."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_2972": {
"question_id": "mmlu_pro_2972",
"source_benchmark": "MMLU_Pro",
"domain": "biology",
"question_text": "Why is a moss plant restricted to a height of less thanabout 15 cm?",
"correct_answer": "C",
"choices": [
"Moss plants are limited in height by the competitive shading of taller plants",
"Moss plants require direct sunlight, which limits their height",
"Moss plants are restricted to a height of less than 15 cm due to the inefficiency of the rhizoids in absorbing water and minerals, absence of True vascular and supporting tissues, and the reproductive process requiring a moist environment.",
"Moss plants are always preyed upon by insects",
"Moss plants have weak seed dispersal mechanisms limiting their height",
"Moss plants cannot photosynthesize effectively",
"Moss plants rely on water absorption through leaves, which limits their height",
"Moss plants have specialized leaves that restrict their growth",
"Moss plants have a rapid growth rate",
"Moss plants are genetically designed to be short"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_2694": {
"question_id": "mmlu_pro_2694",
"source_benchmark": "MMLU_Pro",
"domain": "biology",
"question_text": "The theory of evolution is most accurately described as",
"correct_answer": "F",
"choices": [
"a universally accepted fact about the origin and development of all species.",
"a hypothesis in the process of being tested and verified.",
"a disproven theory about the development of species over time.",
"a speculation about possible changes in populations over time.",
"an opinion that some scientists hold about how living things change over time.",
"an overarching explanation, supported by much evidence, for how populations change over time.",
"one possible explanation, among several scientific alternatives, about how species have come into existence.",
"an educated guess about how species originate.",
"an assumption based on observations of a few species.",
"a religious belief about the origin of species."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_2851": {
"question_id": "mmlu_pro_2851",
"source_benchmark": "MMLU_Pro",
"domain": "biology",
"question_text": "We know that radiation and chemicals can cause cancer. Can viruses be a cause of cancer?",
"correct_answer": "E",
"choices": [
"Only radiation can cause cancer",
"Only chemicals can cause cancer",
"Only inherited genetics can cause cancer",
"Only lifestyle factors such as diet and exercise can cause cancer",
"Yes, viruses can be a cause of cancer",
"No, viruses cannot cause cancer",
"Viruses can increase the risk but are not a direct cause of cancer",
"Yes, but only bacteria, not viruses, can be a cause of cancer"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_3402": {
"question_id": "mmlu_pro_3402",
"source_benchmark": "MMLU_Pro",
"domain": "biology",
"question_text": "Why would you not expect conjugation among a group of paramecia that had descended from a single individual through repeated fission?",
"correct_answer": "C",
"choices": [
"conjugation results in no genetic diversity",
"conjugation requires more than two organisms",
"all individuals are identical genetically, and conjugation could not occur",
"repeated fission in paramecia results in sterile offspring that cannot conjugate",
"the group of paramecia lacks the cellular structures necessary for conjugation",
"conjugation is only possible in the presence of a specific chemical signal absent in this group",
"conjugation is prevented by the environmental conditions in which the paramecia live",
"conjugation can occur regardless of genetic similarities",
"paramecia descended from a single individual are incapable of conjugation due to a mutation",
"conjugation only occurs at a specific time of year which has not yet arrived"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_3036": {
"question_id": "mmlu_pro_3036",
"source_benchmark": "MMLU_Pro",
"domain": "biology",
"question_text": "The same gene that causes various coat patterns in wild and domesticated cats also causes the cross-eyed condition in these cats, the cross-eyed condition being slightly maladaptive. In a hypothetical environment, the coat pattern that is associated with crossed eyes is highly adaptive, with the result that both the coat pattern and the cross-eyed condition increase in a feline population over time. Which statement is supported by these observations?",
"correct_answer": "G",
"choices": [
"Genetic mutations causing the coat pattern are more likely to occur in this population.",
"Natural selection always results in the elimination of maladaptive traits.",
"Polygenic inheritance is generally maladaptive, and should become less common in future generations.",
"This scenario disproves the theory of evolution.",
"The cross-eyed condition is actually an adaptive trait in this environment.",
"The coat pattern gene and the cross-eyed condition gene are separate and unrelated.",
"Phenotype is often the result of compromise.",
"Evolution is progressive and tends toward a more perfect population.",
"Natural selection reduces the frequency of maladaptive genes in populations over the course of time.",
"The cross-eyed condition will eventually disappear from the population as it is maladaptive."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_2847": {
"question_id": "mmlu_pro_2847",
"source_benchmark": "MMLU_Pro",
"domain": "biology",
"question_text": "Besides temperature, what other physical conditions must be taken into account for the growth of bacteria?",
"correct_answer": "A",
"choices": [
"Levels of oxygen, acidity or alkalinity of the medium",
"Temperature changes, light exposure",
"Atmospheric pressure, noise levels",
"Nitrogen levels, radiation exposure",
"Soil texture, light wavelength",
"Levels of carbon dioxide, humidity",
"Water activity, presence of specific nutrients",
"Magnetic fields, altitude",
"Barometric pressure, salinity of water",
"Light exposure, humidity"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_2908": {
"question_id": "mmlu_pro_2908",
"source_benchmark": "MMLU_Pro",
"domain": "biology",
"question_text": "Hemophilia, a genetic blood disorder, is caused by a recessive sex-linked gene. Aphenotypicallynormal couple had a son with hemophilia. What is the probability that their next child, if a girl, would also have hemophilia?",
"correct_answer": "G",
"choices": [
"5% chance",
"12.5% chance",
"50% chance",
"66% chance",
"10% chance",
"33% chance",
"No chance",
"25% chance",
"100% chance",
"75% chance"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_10481": {
"question_id": "mmlu_pro_10481",
"source_benchmark": "MMLU_Pro",
"domain": "computer science",
"question_text": "Let us say that we have computed the gradient of our cost function and stored it in a vector g. What is the cost of one gradient descent update given the gradient?",
"correct_answer": "I",
"choices": [
"O(DN)",
"O(D^3)",
"O(1)",
"O(N^3)",
"O(ND^2)",
"O(ND)",
"O(N)",
"O(N^2)",
"O(D)",
"O(D^2)"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_10211": {
"question_id": "mmlu_pro_10211",
"source_benchmark": "MMLU_Pro",
"domain": "computer science",
"question_text": "Consider the following grammar.\nS \u2192 ( S )\nS \u2192 x\nWhich of the following statements is (are) true?\nI. The grammar is ambiguous.\nII. The grammar is suitable for top-down parsing.\nIII. The grammar is suitable for bottom-up parsing.",
"correct_answer": "G",
"choices": [
"III only and not II",
"II only and not III",
"I only",
"I and II only",
"III only",
"Neither I, II, nor III",
"II and III only",
"II only",
"I and III only",
"I, II, and III"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_10166": {
"question_id": "mmlu_pro_10166",
"source_benchmark": "MMLU_Pro",
"domain": "computer science",
"question_text": "Which among them has the strongest wireless security?",
"correct_answer": "H",
"choices": [
"WPA",
"WPA2-Enterprise",
"WEP",
"WEP+",
"WPA-PSK",
"WEP2",
"WPA2",
"WPA3",
"WPA4",
"WPA2-PSK"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_10533": {
"question_id": "mmlu_pro_10533",
"source_benchmark": "MMLU_Pro",
"domain": "computer science",
"question_text": "Determine the period of the following signal, $$ x_1(t)=\\cos (3 \\pi t)-4 \\cos (5 \\pi t-0.5 \\pi) $$",
"correct_answer": "H",
"choices": [
"3",
"4",
"0.5",
"3.5",
"1.5",
"1",
"5",
"2",
"2.5",
"6"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_10294": {
"question_id": "mmlu_pro_10294",
"source_benchmark": "MMLU_Pro",
"domain": "computer science",
"question_text": "Calculate the Hamming pairwise distances and determine the minimum Hamming distance among the following codewords: 00000,10101,01010",
"correct_answer": "J",
"choices": [
"3",
"6",
"10",
"7",
"8",
"1",
"9",
"4",
"5",
"2"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_10279": {
"question_id": "mmlu_pro_10279",
"source_benchmark": "MMLU_Pro",
"domain": "computer science",
"question_text": "Which of the following considerations applies (apply) to choosing the page size in a paging system?\nI. An advantage of larger pages is that they lead to smaller page tables.\nII. An advantage of smaller pages is that they lead to less waste due to internal fragmentation.\nIII. Normally, the dominant factor in disk access time is not dependent on page length, so longer pages can be used advantageously.",
"correct_answer": "G",
"choices": [
"III only",
"II only",
"Only II and I apply, not III",
"I only",
"I and III only",
"I and II only",
"I, II, and III",
"Only III and I apply, not II",
"II and III only",
"None of the above considerations apply"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_10268": {
"question_id": "mmlu_pro_10268",
"source_benchmark": "MMLU_Pro",
"domain": "computer science",
"question_text": "Briefly discuss the major functional unit of the Central Processing Unit (CPU).",
"correct_answer": "I",
"choices": [
"The CPU consists of six major functional units: Control Unit (CU), Arithmetic Control Unit (ALU), Internal Storage Unit (ISU), Cache Memory, Input/Output (I/O) Management, and Data Bus.",
"The CPU consists of two major functional units: Control Unit (CU) and Arithmetic Control Unit (ALU).",
"The CPU consists of four major functional units: Control Unit (CU), Arithmetic Control Unit (ALU), Internal Storage Unit (ISU), and Cache Memory.",
"The CPU consists of three major functional units: Control Unit (CU), Cache Memory, and Internal Storage Unit (ISU).",
"The CPU consists of four major functional units: Control Unit (CU), Arithmetic Control Unit (ALU), Cache Memory, and External Storage Unit (ESU).",
"The CPU consists of one major functional unit: Control Unit (CU).",
"The CPU consists of five major functional units: Control Unit (CU), Arithmetic Control Unit (ALU), Internal Storage Unit (ISU), Cache Memory, and Input/Output (I/O) Management.",
"The CPU consists of seven major functional units: Control Unit (CU), Arithmetic Control Unit (ALU), Internal Storage Unit (ISU), Cache Memory, Input/Output (I/O) Management, Data Bus, and Graphics Processing Unit (GPU).",
"The CPU consists of three major functional units: Control Unit (CU), Arithmetic Control Unit (ALU), and Internal Storage Unit (ISU).",
"The CPU consists of two major functional units: Arithmetic Control Unit (ALU) and Cache Memory."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_10225": {
"question_id": "mmlu_pro_10225",
"source_benchmark": "MMLU_Pro",
"domain": "computer science",
"question_text": "In a Gigabit Ethernet LAN, the average size of a frame is 1000 bytes. If a noise of 2ms occurs on the LAN, how many frames are destroyed?",
"correct_answer": "J",
"choices": [
"750",
"500",
"1000",
"600",
"150",
"100",
"200",
"400",
"125",
"250"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_10531": {
"question_id": "mmlu_pro_10531",
"source_benchmark": "MMLU_Pro",
"domain": "computer science",
"question_text": "Given 3 Colors whose RGB representations are given as follows: Color 1: (0.5, 0.5, 0.5), Color 2: (0.4, 0.6, 0.5), Color 3: (0.3, 0.7, 0.5), Which Color does not carry chrominance (Color) Information? Answer with 1 or 2 or 3.",
"correct_answer": "F",
"choices": [
"None of the above",
"Both 1 and 2",
"Both 2 and 3",
"All of the above",
"3",
"1",
"Only 1 and 2 carry chrominance information",
"2",
"Only 2 and 3 carry chrominance information",
"Both 1 and 3"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_10206": {
"question_id": "mmlu_pro_10206",
"source_benchmark": "MMLU_Pro",
"domain": "computer science",
"question_text": "In the procedure Mystery below, the parameter number is a positive integer.\n\n PROCEDURE Mystery (number)\n {\n REPEAT UNTIL (number <= 0)\n {\n number \u2190 number - 2\n }\n IF (number = 0)\n {\n RETURN (true)\n }\n ELSE\n {\n RETURN (false)\n }\n }\n\n Which of the following best describes the result of running the procedure Mystery?",
"correct_answer": "G",
"choices": [
"The procedure returns true when the initial value of number is a multiple of 2 or 3, and it otherwise returns false.",
"The procedure returns false when the initial value of number is greater than 2, and it otherwise returns true.",
"The procedure returns false when the initial value of number is a prime number, and it otherwise returns true.",
"The procedure returns true when the initial value of number is odd, and it otherwise returns false.",
"The procedure returns false when the initial value of number is even, and it otherwise returns true.",
"The procedure returns true when the initial value of number is less than 2, and it otherwise returns false.",
"The procedure returns true when the initial value of number is even, and it otherwise returns false.",
"The procedure returns true when the initial value of number is 2, and it otherwise returns false.",
"The procedure returns true when the initial value of number is a prime number, and it otherwise returns false.",
"The procedure returns true when the initial value of number is greater than 2, and it otherwise returns false."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_10500": {
"question_id": "mmlu_pro_10500",
"source_benchmark": "MMLU_Pro",
"domain": "computer science",
"question_text": "Obtain the 1's and 2's complements of the following binary numbers: a)1010101 b)0111000 c)0000001 d)10000 e)0000",
"correct_answer": "J",
"choices": [
"a) 1's complement: 1010100, 2's complement: 1010101; b) 1's complement: 0111001, 2's complement: 0111010; c) 1's complement: 0000001, 2's complement: 0000010; d) 1's complement: 10001, 2's complement: 10010; e) 1's complement: 0001, 2's complement: 0000",
"a) 1's complement: 0101011, 2's complement: 0101100; b) 1's complement: 1000110, 2's complement: 1000111; c) 1's complement: 1111111, 2's complement: 0000000; d) 1's complement: 01110, 2's complement: 01111; e) 1's complement: 0001, 2's complement: 0010",
"a) 1's complement: 0101010, 2's complement: 0101001; b) 1's complement: 1000111, 2's complement: 1001011; c) 1's complement: 1111101, 2's complement: 1111110; d) 1's complement: 01111, 2's complement: 01110; e) 1's complement: 1110, 2's complement: 1111",
"a) 1's complement: 1010101, 2's complement: 1010100; b) 1's complement: 0111000, 2's complement: 0110111; c) 1's complement: 0000000, 2's complement: 0000001; d) 1's complement: 10000, 2's complement: 01111; e) 1's complement: 0000, 2's complement: 1111",
"a) 1's complement: 1010101, 2's complement: 0101010; b) 1's complement: 0111011, 2's complement: 0111000; c) 1's complement: 0000010, 2's complement: 0000001; d) 1's complement: 11110, 2's complement: 00001; e) 1's complement: 1111, 2's complement: 1110",
"a) 1's complement: 0101010, 2's complement: 1010101; b) 1's complement: 1000111, 2's complement: 0111000; c) 1's complement: 1111110, 2's complement: 0000001; d) 1's complement: 01111, 2's complement: 10000; e) 1's complement: 1111, 2's complement: 0000",
"a) 1's complement: 0101001, 2's complement: 0101010; b) 1's complement: 1000101, 2's complement: 1000110; c) 1's complement: 1111100, 2's complement: 1111101; d) 1's complement: 01111, 2's complement: 01110; e) 1's complement: 1110, 2's complement: 1001",
"a) 1's complement: 1111111, 2's complement: 0000000; b) 1's complement: 1000111, 2's complement: 0111000; c) 1's complement: 1111111, 2's complement: 0000000; d) 1's complement: 11111, 2's complement: 00000; e) 1's complement: 1111, 2's complement: 0000",
"a) 1's complement: 0101011, 2's complement: 0101010; b) 1's complement: 1000100, 2's complement: 1000101; c) 1's complement: 1111110, 2's complement: 1111101; d) 1's complement: 01110, 2's complement: 01111; e) 1's complement: 0000, 2's complement: 0001",
"a) 1's complement: 0101010, 2's complement: 0101011; b) 1's complement: 1000111, 2's complement: 1001000; c) 1's complement: 1111110, 2's complement: 1111111; d) 1's complement: 01111, 2's complement: 10000; e) 1's complement: 1111, 2's complement: 10000"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_10433": {
"question_id": "mmlu_pro_10433",
"source_benchmark": "MMLU_Pro",
"domain": "computer science",
"question_text": "Let a undirected graph G with edges E = {<0,2>,<2,1>,<2,3>,<3,4>,<4,1>}, which <A,B> represent Node A is connected to Node B. What is the shortest path from node 4 to node 0? Represent the path as a list.",
"correct_answer": "I",
"choices": [
"[4, 3, 1, 0]",
"[4, 2, 1, 0]",
"[4, 3, 2, 0]",
"[4, 1, 0]",
"[4, 3, 2, 1, 0]",
"[4, 3, 1, 2, 0]",
"[4, 1, 3, 2, 0]",
"[4, 3, 0]",
"[4, 1, 2, 0]",
"[4, 2, 3, 1, 0]"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_10198": {
"question_id": "mmlu_pro_10198",
"source_benchmark": "MMLU_Pro",
"domain": "computer science",
"question_text": "Develop a FORTRAN subroutine that sorts a list of N elements and arranges them in ascending order.",
"correct_answer": "I",
"choices": [
"SUBROUTINE SORT (Y, N) DIMENSION Y (N) DO 10 I = 1, N DO 20 J = 1, N IF (Y (J).LT.Y (I)) THEN TEMP = Y (I) Y (I) = Y (J) Y (J) = TEMP END IF 20 CONTINUE 10 CONTINUE RETURN END",
"SUBROUTINE SORT (Y, N) DIMENSION Y (N) DO 10 I = N, 1, -1 DO 20 J = N, I+1, -1 IF (Y (J).LT.Y (I)) THEN TEMP = Y (I) Y (I) = Y (J) Y (J) = TEMP END IF 20 CONTINUE 10 CONTINUE RETURN END",
"SUBROUTINE SORT (Y, N) DIMENSION Y (N) DO 10 I = 1, N MIN = I DO 20 J = I+1, N IF (Y (J).LT.Y (MIN)) MIN = J 20 CONTINUE TEMP = Y (I) Y (I) = Y (MIN) Y (MIN) = TEMP 10 CONTINUE RETURN END",
"SUBROUTINE SORT (Y, N) DIMENSION Y (N) DO 10 I = 1, N DO 20 J = 1, N-1 IF (Y (J).LE.Y (J+1)) GO TO 20 TEMP = Y (J) Y (J) = Y (J+1) Y (J+1) = TEMP 20 CONTINUE 10 CONTINUE RETURN END",
"SUBROUTINE SORT (Y, N) DIMENSION Y (1) CY IS THE ARRAY TO BE SORTED N1 = N - 1 DO 10 I = 1, N1 J = I + 1 DO 20 K = J, N IF (Y (I).LT.Y (K)) GO TO 20 TEMP = Y (I) Y (I) = Y (K) Y (K) = TEMP 20CONTINUE 10CONTINUE RETURN END",
"SUBROUTINE SORT (Y, N) DIMENSION Y (N) DO 10 I = 1, N J = I + 1 DO 20 K = J, N IF (Y (I).GT.Y (K)) GO TO 20 Y (I) = Y (K) Y (K) = TEMP 20CONTINUE 10CONTINUE RETURN END",
"SUBROUTINE SORT (Y, N) DIMENSION Y (1) DO 10 I = 1, N J = I + 1 DO 20 K = J, N IF (Y (I).GE.Y (K)) GO TO 20 TEMP = Y (I) Y (I) = Y (K) Y (K) = TEMP 20CONTINUE 10CONTINUE RETURN END",
"SUBROUTINE SORT (Y, N) DIMENSION Y (N) DO 10 I = 1, N MAX = I DO 20 J = I+1, N IF (Y (J).GT.Y (MAX)) MAX = J 20 CONTINUE TEMP = Y (I) Y (I) = Y (MAX) Y (MAX) = TEMP 10 CONTINUE RETURN END",
"SUBROUTINE SORT (Y, N) DIMENSION Y (1) CY IS THE ARRAY TO BE SORTED N1 = N - 1 DO 10 I = 1, N1 J = I + 1 DO 20 K = J, N IF (Y (I).LE.Y (K)) GO TO 20 TEMP = Y (I) Y (I) = Y (K) Y (K) = TEMP 20CONTINUE 10CONTINUE RETURN END",
"SUBROUTINE SORT (Y, N) DIMENSION Y (N) DO 10 I = 1, N-1 DO 20 J = I+1, N IF (Y (J).GT.Y (I)) THEN TEMP = Y (I) Y (I) = Y (J) Y (J) = TEMP END IF 20 CONTINUE 10 CONTINUE RETURN END"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_10456": {
"question_id": "mmlu_pro_10456",
"source_benchmark": "MMLU_Pro",
"domain": "computer science",
"question_text": "Statement 1| We learn a classifier f by boosting weak learners h. The functional form of f\u2019s decision boundary is the same as h\u2019s, but with different parameters. (e.g., if h was a linear classifier, then f is also a linear classifier). Statement 2| Cross validation can be used to select the number of iterations in boosting; this procedure may help reduce overfitting.",
"correct_answer": "H",
"choices": [
"The statement 1 is not clear, True",
"False, False",
"True, Statement 2 is not clear",
"False, Both are not related",
"True, False",
"True, True",
"Both statements are not clear",
"False, True",
"True, Cross validation is not needed",
"False, Cross validation increases overfitting"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_10370": {
"question_id": "mmlu_pro_10370",
"source_benchmark": "MMLU_Pro",
"domain": "computer science",
"question_text": "Calculate the number of parity bits required in order to code aninformation consisting of one binary bit on each input line, intothe Hamming code, if each input information has: a) 8 bits, and, b) 4 bits.",
"correct_answer": "B",
"choices": [
"a) 1 parity bit, b) 2 parity bits",
"a) 4 parity bits, b) 3 parity bits",
"a) 6 parity bits, b) 5 parity bits",
"a) 3 parity bits, b) 2 parity bits",
"a) 7 parity bits, b) 6 parity bits",
"a) 5 parity bits, b) 4 parity bits",
"a) 2 parity bits, b) 3 parity bits",
"a) 2 parity bits, b) 1 parity bit",
"a) 3 parity bits, b) 4 parity bits",
"a) 5 parity bits, b) 2 parity bits"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_10170": {
"question_id": "mmlu_pro_10170",
"source_benchmark": "MMLU_Pro",
"domain": "computer science",
"question_text": "How many trees are there on 5 unlabeled vertices?",
"correct_answer": "A",
"choices": [
"3",
"6",
"8",
"10",
"11",
"12",
"15",
"9",
"7",
"5"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_10169": {
"question_id": "mmlu_pro_10169",
"source_benchmark": "MMLU_Pro",
"domain": "computer science",
"question_text": "The bandwidth of an analog signal is 4kHz. An A/D converter is used to convert the signal from analog to digital. What is the minimum sampling rate for eliminating the aliasing problem? (in kHz)",
"correct_answer": "E",
"choices": [
"10",
"7",
"4",
"6",
"8",
"12",
"5",
"20",
"16",
"2"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_10201": {
"question_id": "mmlu_pro_10201",
"source_benchmark": "MMLU_Pro",
"domain": "computer science",
"question_text": "A is any 20 \u00d7 20 array. Write a FUNCTION subprogram to compute PD (A, I, J) = [ A(I-1,J) + A(I+1,J) + A(I,J-1) + A(I,J+1)] / 4 Then use it to compute B_ij = (1-\\alpha)B_ij+ \\alpha [{Bi-1, j+Bi+1, j+ Bi, j-1+ Bi, j+1} / 4].",
"correct_answer": "B",
"choices": [
"FUNCTION PD (A, I, J) DIMENSION A (20, 20) PD = (A (I, J-1) + A (I, J+1) + A (I-1, J) + A (I+1, J)) / 5.0 RETURN END; B(I, J) = [(1.-ALPHA)*B(I, J) + ALPHA*PD(B, I, J)].",
"FUNCTION PD (A, I, J) DIMENSION A (20, 20) PD = (A (I-1, J) + A (I+1, J) + A (I, J-1) + A (I, J+1)) / 4.0 RETURN END; B(I, J) = [(1.-ALPHA)*B(I, J) + ALPHA*PD(B, I, J)].",
"FUNCTION PD (A, I, J) DIMENSION A (20, 20) PD = (A (I-1, J-1) + A (I+1, J+1) + A (I-1, J+1) + A (I+1, J-1)) / 4.0 RETURN END; B(I, J) = [(1.-ALPHA)*B(I, J) + ALPHA*PD(B, I, J)].",
"FUNCTION PD (A, I, J) DIMENSION A (20, 20) PD = (A (I, J) + A (I, J+1) + A (I, J-1) + A (I-1, J) + A (I+1, J)) / 5.0 RETURN END; B(I, J) = [(1.-ALPHA)*B(I, J) + ALPHA*PD(B, I, J)].",
"FUNCTION PD (A, I, J) DIMENSION A (20, 20) PD = (A (I, J) + A (I+1, J) + A (I-1, J) + A (I, J-1) + A (I, J+1)) / 6.0 RETURN END; B(I, J) = [(1.-ALPHA)*B(I, J) + ALPHA*PD(B, I, J)].",
"FUNCTION PD (A, I, J) DIMENSION A (20, 20) PD = (A (I, J) + A (I-2, J) + A (I+2, J) + A (I, J-2) + A (I, J+2)) / 4.0 RETURN END; B(I, J) = [(1.-ALPHA)*B(I, J) + ALPHA*PD(B, I, J)].",
"FUNCTION PD (A, I, J) DIMENSION A (20, 20) PD = (A (I-2, J) + A (I+2, J) + A (I, J-2) + A (I, J+2)) / 4.0 RETURN END; B(I, J) = [(1.-ALPHA)*B(I, J) + ALPHA*PD(B, I, J)].",
"FUNCTION PD (A, I, J) DIMENSION A (20, 20) PD = (A (I-1, J) + A (I, J+1) + A (I+1, J) + A (I, J-1)) / 3.0 RETURN END; B(I, J) = [(1.-ALPHA)*B(I, J) + ALPHA*PD(B, I, J)].",
"FUNCTION PD (A, I, J) DIMENSION A (20, 20) PD = (A (I-1, J) + A (I+1, J) + A (I, J-1) + A (I, J+1)) / 2.0 RETURN END; B(I, J) = [(1.-ALPHA)*B(I, J) + ALPHA*PD(B, I, J)]."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_10265": {
"question_id": "mmlu_pro_10265",
"source_benchmark": "MMLU_Pro",
"domain": "computer science",
"question_text": "Explain the action of the following procedure which in-cludes asubroutine procedure within itself. VERIFY_TEST_VALUES: PROCEDURE; DCL LENGTH FIXEDDEC(3); CALL GET_AND_TEST_INPUT; \\textbullet \\textbullet \\textbullet \\textbullet \\textbullet CALL GET_AND_TEST_INPUT' \\textbullet \\textbullet \\textbullet \\textbullet \\textbullet GET_AND_TEST_INPUT:PROCEDURE; AGAIN:GETLIST(LENGTH); IF LENGTH = 0 THEN GOTO L; IF LENGTH<0 \\vert LENGTH>90 THEN DO; PUTLIST('ERROR', LENGTH);GOTOAGAIN; END; /\\textasteriskcentered END OF DO GROUP \\textasteriskcentered/ END GET_AND_TEST_INPUT; \\textbullet \\textbullet \\textbullet \\textbullet \\textbullet CALL GET_AND_TEST_INPUT; L:ENDVERIFY_TEST_VALUES;",
"correct_answer": "F",
"choices": [
"The subroutine procedure checks if each length value is between 0 and 100",
"The subroutine procedure returns the length value to the main program",
"The subroutine procedure is called only once in the program",
"The subroutine procedure is called at the beginning and end of the main program to validate the length",
"The subroutine procedure is called multiple times, each time incrementing the length value by 1",
"The subroutine procedure is called from three different points in the program, checks if each length value is between 0 and 90, and returns control to the main program.",
"The subroutine procedure is used to exit the main program if the length is within the specified range",
"The subroutine procedure is called recursively within itself to handle multiple length values",
"The subroutine procedure generates a list of length values from 0 to 90",
"The subroutine procedure is a loop that continues until the length value is outside the range of 0 to 90"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_10273": {
"question_id": "mmlu_pro_10273",
"source_benchmark": "MMLU_Pro",
"domain": "computer science",
"question_text": "The procedure below is intended to display the index in a list of unique names (nameList) where a particular name (targetName) is found. lf targetName is not found in nameList, the code should display 0.\n PROCEDURE FindName (nameList, targetName)\n {\n index \u2190 0\n FOR EACH name IN nameList\n {\n index \u2190 index + 1\n IF (name = targetName)\n {\n foundIndex \u2190 index\n }\n ELSE\n {\n foundIndex \u2190 0\n }\n }\n DISPLAY (foundIndex)\n }\n Which of the following procedure calls can be used to demonstrate that the procedure does NOT Work as intended?",
"correct_answer": "G",
"choices": [
"FindName ([\"Andrea\", \"Ben\", \"Chris\", \"Diane\", \"Eva\", \"Frank\", \"Grace\", \"Hannah\", \"Igor\"], \"Igor\" )",
"FindName ([\"Andrea\", \"Ben\", \"Chris\", \"Diane\"], \"Diane\" )",
"FindName ([\"Andrea\", \"Ben\", \"Chris\", \"Diane\", \"Eva\", \"Frank\"], \"Frank\" )",
"FindName ([\"Andrea\", \"Ben\"], \"Ben\" )",
"FindName ([\"Andrea\", \"Chris\", \"Diane\"], \"Ben\")",
"FindName ([\"Andrea\", \"Ben\" ], \"Diane\" )",
"FindName ([\"Andrea\", \"Ben\", \"Chris\"], \"Ben\")",
"FindName ([\"Andrea\", \"Ben\", \"Chris\", \"Diane\", \"Eva\"], \"Eva\" )",
"FindName ([\"Andrea\", \"Ben\", \"Chris\", \"Diane\", \"Eva\", \"Frank\", \"Grace\", \"Hannah\"], \"Hannah\" )",
"FindName ([\"Andrea\", \"Ben\", \"Chris\", \"Diane\", \"Eva\", \"Frank\", \"Grace\"], \"Grace\" )"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_10412": {
"question_id": "mmlu_pro_10412",
"source_benchmark": "MMLU_Pro",
"domain": "computer science",
"question_text": "The access matrix approach to protection has the difficulty that",
"correct_answer": "C",
"choices": [
"it requires an extensive amount of computational power",
"it is not capable of expressing complex protection requirements",
"the matrix, if stored directly, is large and can be clumsy to manage",
"the matrix cannot be modified once it is created",
"it does not allow for the sharing of resources between processes",
"there is no way to express who has rights to change the access matrix itself",
"it does not provide sufficient protection against external threats",
"there is no way to express the hierarchy of access rights within the matrix",
"deciding whether a process has access to a resource is undecidable",
"the matrix, if not stored directly, can cause system failure"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_10462": {
"question_id": "mmlu_pro_10462",
"source_benchmark": "MMLU_Pro",
"domain": "computer science",
"question_text": "MIT\u2019s Kerberos KDC server has a maximum ticket lifetime of 24 hours (for most user principals). What ensures that an expired Kerberos ticket can no longer be used?",
"correct_answer": "A",
"choices": [
"When a client connects to a server, the server compares the ticket\u2019s expiration time to the server\u2019s current clock, and refuses to authenticate the user if the ticket expiration time is in the past.",
"The server maintains a database of all active and expired tickets, and refuses to authenticate the user if the ticket is found in the expired list.",
"When a client connects to a server, the server sets a 24-hour timer to terminate the connection, which ensures a client cannot remain connected past the ticket\u2019s maximum lifetime.",
"The client's system automatically disconnects from the server once the ticket lifetime is over.",
"The client needs to manually disconnect from the server after the ticket's maximum lifetime is reached.",
"The Kerberos server (KDC) refuses to establish new connections between clients and servers for expired tickets.",
"The Kerberos server (KDC) sends an alert to the client and server to terminate the connection once the ticket is expired."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_10167": {
"question_id": "mmlu_pro_10167",
"source_benchmark": "MMLU_Pro",
"domain": "computer science",
"question_text": "Encryption and decryption provide secrecy, or condentiality, but not",
"correct_answer": "B",
"choices": [
"Privacy",
"Integrity",
"Security",
"Reliability",
"Confidentiality",
"All of the above",
"Authentication",
"Anonymity"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_10441": {
"question_id": "mmlu_pro_10441",
"source_benchmark": "MMLU_Pro",
"domain": "computer science",
"question_text": "Of the following potential benefits, which is LEAST likely to be provided by the upgraded system?",
"correct_answer": "F",
"choices": [
"Customers will receive instant solutions to their issues.",
"Customers are likely to spend less time listening to information not relevant to their issue.",
"Human representatives will not be needed to respond to some inquiries.",
"Customers will be unable to mistakenly select the incorrect department for their particular issue.",
"The system will automatically resolve all customer issues.",
"The company will be able to provide a human representative for any incoming call."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_10255": {
"question_id": "mmlu_pro_10255",
"source_benchmark": "MMLU_Pro",
"domain": "computer science",
"question_text": "Let I = (S,V)I=(S,V) be a MAC. Suppose S(k,m)S(k,m) is always 5 bits long. Can this MAC be secure?",
"correct_answer": "C",
"choices": [
"Yes, the attacker cannot generate a valid tag for any message",
"Yes, the PRG is pseudorandom",
"No, an attacker can simply guess the tag for messages",
"No, the attacker can simply guess the key",
"Yes, the key length is sufficiently long",
"No, the key length is not long enough",
"Yes, the tag length is sufficiently long",
"It depends on the complexity of the PRG",
"It depends on the details of the MAC",
"It depends on the encryption algorithm used in the MAC"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_10520": {
"question_id": "mmlu_pro_10520",
"source_benchmark": "MMLU_Pro",
"domain": "computer science",
"question_text": "Which among the following prevents overfitting when we perform bagging?",
"correct_answer": "D",
"choices": [
"The use of all data without sampling",
"The practice of validation performed on a random subset of classifiers trained",
"The use of underfitting to counterbalance overfitting",
"The use of weak classifiers",
"The use of classification algorithms which are prone to overfitting",
"The use of classification algorithms which are not prone to overfitting",
"The use of sampling with replacement as the sampling technique",
"The practice of validation performed on every classifier trained",
"The use of strong classifiers",
"The use of sampling without replacement as the sampling technique"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_10486": {
"question_id": "mmlu_pro_10486",
"source_benchmark": "MMLU_Pro",
"domain": "computer science",
"question_text": "What are the port states determined by Nmap?",
"correct_answer": "F",
"choices": [
"Closed, unused, half-closed",
"Open, half-open, closed ",
"Open, blocked, filtered",
"Active, passive, blocked",
"Closed, filtered, unfiltered",
"Open, filtered, unfiltered",
"Active, inactive, standby",
"Open, unused, blocked",
"Active, closed, unused",
"Active, passive, filtered"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_10513": {
"question_id": "mmlu_pro_10513",
"source_benchmark": "MMLU_Pro",
"domain": "computer science",
"question_text": "TCP protocol is responsible (among other things) for",
"correct_answer": "F",
"choices": [
"Ensuring secure and encrypted communication between machines",
"Enabling communication between different network protocols",
"Routing packets through the network",
"Managing the energy consumption of network devices",
"Dealing with differences among operating system architectures",
"Reliable delivery of large (multi-packet) messages between machines that are not necessarily directly connected",
"Managing the physical layer of network communication",
"Providing a user interface for network communication",
"Reliable delivery of packets between directly connected machines"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_10368": {
"question_id": "mmlu_pro_10368",
"source_benchmark": "MMLU_Pro",
"domain": "computer science",
"question_text": "Consider Convolutional Neural Network D2 which takes input images of size 32x32 with 1 colour channels. The first layer of D2 uses 4 filters of size 5x5, a stride of 2, and zero-padding of width 1. The dimensions of the resulting activation map for each filter in this first layer will be k x k. What is the value of k?",
"correct_answer": "D",
"choices": [
"25",
"14",
"20",
"15",
"16",
"28",
"22",
"24",
"30",
"18"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_10266": {
"question_id": "mmlu_pro_10266",
"source_benchmark": "MMLU_Pro",
"domain": "computer science",
"question_text": "Given the function f(x,y,z) below, write f(x,y,z) as a product ofmaxterms. f(x,y,z) = (z +x) (y +z) (x + y + z) (x+y)",
"correct_answer": "G",
"choices": [
"f(x,y,z) = \u03a0M(0,3,4,5,6)",
"f(x,y,z) = \u03a0M(0,1,2,3,7)",
"f(x,y,z) = \u03a0M(1,4,5,6,7)",
"f(x,y,z) = \u03a0M(0,2,4,6,8)",
"f(x,y,z) = \u03a0M(0,2,3,5,7)",
"f(x,y,z) = \u03a0M(1,2,3,4,5)",
"f(x,y,z) = \u03a0M(0,1,4,5,6,7)",
"f(x,y,z) = \u03a0M(2,3,5,6,7)",
"f(x,y,z) = \u03a0M(2,3,4,5,6)",
"f(x,y,z) = \u03a0M(1,3,5,7)"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_10383": {
"question_id": "mmlu_pro_10383",
"source_benchmark": "MMLU_Pro",
"domain": "computer science",
"question_text": "The language {ww | w in (0 + 1)*} is",
"correct_answer": "A",
"choices": [
"accepted by some Turing machine, but by no pushdown automaton",
"context-free, but not regular",
"accepted by some finite automaton, but not pushdown automaton",
"accepted by some Turing machine and some pushdown automaton",
"accepted by any context-free grammar, but not regular",
"accepted by some context-free grammar, but not Turing machine",
"regular, but not context-free",
"accepted by some pushdown automaton, but not context-free",
"not accepted by any Turing machine"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_10455": {
"question_id": "mmlu_pro_10455",
"source_benchmark": "MMLU_Pro",
"domain": "computer science",
"question_text": "Statement 1| The ID3 algorithm is guaranteed to find the optimal decision tree. Statement 2| Consider a continuous probability distribution with density f() that is nonzero everywhere. The probability of a value x is equal to f(x).",
"correct_answer": "F",
"choices": [
"True, True for a given data set",
"False, True",
"True, False",
"False, False unless specific conditions are met.",
"False, True only for discrete probability distributions",
"False, False",
"True, False only for specific continuous probability distributions",
"True, True only under certain conditions",
"False, True only when the optimal decision tree is defined in a specific way",
"True, True"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_10296": {
"question_id": "mmlu_pro_10296",
"source_benchmark": "MMLU_Pro",
"domain": "computer science",
"question_text": "Let a undirected graph G with edges E = {<0,4>,<4,1>,<0,3>,<3,4>,<3,2>,<1,3>}, which <A,B> represent Node A is connected to Node B. What is the minimum vertex cover of G? Represent the vertex cover in a list of ascending order.",
"correct_answer": "C",
"choices": [
"[0, 2, 4]",
"[0, 1, 3, 4]",
"[3, 4]",
"[0, 1, 2]",
"[1, 3]",
"[1, 2]",
"[2, 3, 4]",
"[0, 1]",
"[2, 4]",
"[0, 3, 4]"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_10157": {
"question_id": "mmlu_pro_10157",
"source_benchmark": "MMLU_Pro",
"domain": "computer science",
"question_text": "Which of the following is NOT a property of bitmap graphics?",
"correct_answer": "F",
"choices": [
"They can support millions of colors",
"Realistic lighting and shading can be done.",
"Bitmaps can be made transparent",
"Fast hardware exists to move blocks of pixels efficiently.",
"Bitmap graphics can be created in multiple layers",
"All line segments can be displayed as straight.",
"Polygons can be filled with solid colors and textures."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_10542": {
"question_id": "mmlu_pro_10542",
"source_benchmark": "MMLU_Pro",
"domain": "computer science",
"question_text": "If p(x) is the minimal-degree interpolating polynomial for the real-valued function f(x) at the n + 1 distinct real numbers x0, .... xn what is the maximum possible degree of p(x)?",
"correct_answer": "G",
"choices": [
"2n",
"n + 1",
"2n + 1",
"n\u00b3",
"n\u00b2 + 1",
"n/2",
"n",
"n - 1",
"n\u00b2",
"n + 2"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_9083": {
"question_id": "mmlu_pro_9083",
"source_benchmark": "MMLU_Pro",
"domain": "physics",
"question_text": "The vibrational frequency of $I_2$ is $208 \\mathrm{~cm}^{-1}$. At what temperature will the population in the first excited state be half that of the ground state?",
"correct_answer": "J",
"choices": [
"390 $\\mathrm{~K}$",
"360 $\\mathrm{~K}$",
"212 $\\mathrm{~K}$",
"325 $\\mathrm{~K}$",
"500 $\\mathrm{~K}$",
"275 $\\mathrm{~K}$",
"550 $\\mathrm{~K}$",
"458 $\\mathrm{~K}$",
"600 $\\mathrm{~K}$",
" 432 $\\mathrm{~K}$"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_8906": {
"question_id": "mmlu_pro_8906",
"source_benchmark": "MMLU_Pro",
"domain": "physics",
"question_text": "A car starts from rest and reaches a speed of 30 miles per hour in 8 seconds. What is its acceleration?",
"correct_answer": "C",
"choices": [
"6.5 ft per sec per sec",
"6.0 ft per sec per sec",
"5.5 ft per sec per sec",
"4.0 ft per sec per sec",
"2.5 ft per sec per sec",
"4.5 ft per sec per sec",
"8.0 ft per sec per sec",
"3.75 ft per sec per sec",
"7.0 ft per sec per sec",
"5.0 ft per sec per sec"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_9418": {
"question_id": "mmlu_pro_9418",
"source_benchmark": "MMLU_Pro",
"domain": "physics",
"question_text": "If a rock sample is found to contain approximately 1 polonium atom ^214 _84 Po for every 8.7 \u00d7 10^20 uranium nuclei ^238 _92 U, what is the half-life of polonium.",
"correct_answer": "A",
"choices": [
"1.6 \u00d7 10^4 seconds",
"4 \u00d7 10^4 seconds",
"1 \u00d7 10^4 seconds",
"5 \u00d7 10^3 seconds",
"2.5 \u00d7 10^4 seconds",
"8 \u00d7 10^3 seconds",
"1.2 \u00d7 10^5 seconds",
"7.5 \u00d7 10^3 seconds",
"2 \u00d7 10^4 seconds",
"3 \u00d7 10^4 seconds"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_9356": {
"question_id": "mmlu_pro_9356",
"source_benchmark": "MMLU_Pro",
"domain": "physics",
"question_text": "In a mixture of hydrogen, oxygen, and nitrogen gases at a given temperature, the fastest molecules on average are those of",
"correct_answer": "C",
"choices": [
"Nitrogen and hydrogen have the same average speed, but oxygen is slower",
"nitrogen",
"hydrogen",
"The speed depends on the specific temperature and cannot be determined",
"Nitrogen and oxygen have the same average speed, but hydrogen is slower",
"Oxygen and hydrogen have the same average speed, but nitrogen is slower",
"The speed of the gases is determined by their density, not their type",
"All have same average speed",
"The speed of the gases is independent of their type and depends solely on their volume",
"oxygen"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_9312": {
"question_id": "mmlu_pro_9312",
"source_benchmark": "MMLU_Pro",
"domain": "physics",
"question_text": "A bell circuit consists of 150 feet of No. 18 wire which has a resistance of 1 ohm, a bell which has a resistance of 5 ohms, and a battery of two dry cells, each having an emf of 1.5 volts. What is the current through the circuit, assuming that the resistance of the battery is negligible?",
"correct_answer": "G",
"choices": [
"0.9 amp",
"0.6 amp",
"0.2 amp",
"0.3 amp",
"1 amp",
"0.4 amp",
"0.5 amp",
"2 amps",
"1.5 amps",
"0.75 amp"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_9140": {
"question_id": "mmlu_pro_9140",
"source_benchmark": "MMLU_Pro",
"domain": "physics",
"question_text": "An athlete whirls a discus in a circle of radius 80.0 cm. At a certain instant, the athlete is rotating at 10.0 rad / s and the angular speed is increasing at 50.0 rad / s^2. At this instant, find the magnitude (Unit: m / s^2) of the acceleration. Return the numeric value.",
"correct_answer": "C",
"choices": [
"110.5",
"70.2",
"89.4",
"82.5",
"102.1",
"120.7",
"96.3",
"75.6",
"58.0",
"64.2"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_9064": {
"question_id": "mmlu_pro_9064",
"source_benchmark": "MMLU_Pro",
"domain": "physics",
"question_text": "From laboratory measurements we know that a particular spectral line formed by hydrogen appears at a wavelength of 486.1 nanometers (nm). The spectrum of a particular star shows the same hydrogen line appearing at a wavelength of 485.9 nm. What can we conclude?",
"correct_answer": "J",
"choices": [
"The star's brightness is fluctuating.",
"The star's size is increasing.",
"The star's size is decreasing.",
"The star's temperature is constant.",
"The star is getting hotter.",
"The star is not moving.",
"The star is rotating on its axis.",
"The star is getting colder.",
"The star is moving away from us.",
"The star is moving toward us."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_9971": {
"question_id": "mmlu_pro_9971",
"source_benchmark": "MMLU_Pro",
"domain": "physics",
"question_text": "The equation E = mc^2 indicates that energy",
"correct_answer": "A",
"choices": [
"and mass are closely related.",
"is equal to the square of the speed of light.",
"equals moving mass.",
"is fundamentally different than mass.",
"equals mass divided by the speed of light squared.",
"and mass are not related at all.",
"equals the sum of mass and the speed of light squared.",
"is inversely proportional to mass.",
"equals mass moving at the speed of light squared.",
"equals mass multiplied by the speed of light."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_9033": {
"question_id": "mmlu_pro_9033",
"source_benchmark": "MMLU_Pro",
"domain": "physics",
"question_text": "Given that the spacing between vibrational energy levels of the HCl molecule is 0.36 eV, calculate the effective force constant in N/m.",
"correct_answer": "H",
"choices": [
"450.0",
"550.0",
"300.0",
"520.0",
"600.0",
"200.0",
"350.0",
"490.0",
"575.0",
"420.0"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_10064": {
"question_id": "mmlu_pro_10064",
"source_benchmark": "MMLU_Pro",
"domain": "physics",
"question_text": "A spring is mounted horizontally, with its left end fixed. A spring balance attached to the free end and pulled toward the right indicates that the stretching force is proportional to the displacement, and a force of 6.0 N causes a displacement of 0.030 m. We replace the spring balance with a 0.50-kg glider, pull it 0.020 m to the right along a frictionless air track, and release it from rest. Find the period T of the resulting oscillation. (Unit: s)",
"correct_answer": "G",
"choices": [
"0.45",
"0.22",
"0.37",
"0.40",
"0.20",
"0.50",
"0.31",
"0.25",
"0.35",
"0.28"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_9719": {
"question_id": "mmlu_pro_9719",
"source_benchmark": "MMLU_Pro",
"domain": "physics",
"question_text": "A baseball pitcher throws a ball weighing 1/3 pound with an acceleration of 480 feet per second^2. How much force does he apply to the ball?",
"correct_answer": "G",
"choices": [
"7.5 lbs. of force",
"3 lbs. of force",
"4 lbs. of force",
"10 lbs. of force",
"0.5 lbs. of force",
"2 lbs. of force",
"5 lbs. of force",
"6 lbs. of force",
"8 lbs. of force",
"160 lbs. of force"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_8920": {
"question_id": "mmlu_pro_8920",
"source_benchmark": "MMLU_Pro",
"domain": "physics",
"question_text": "It is known that a lab cart is moving east at 25 cm/s at time t1 = 0.10 s, and then moving east at 15 cm/s at t2 = 0.20 s. Is this enough information to determine the direction of the net force acting on the cart between t1 and t2?",
"correct_answer": "G",
"choices": [
"No, because we don\u2019t know if there are other forces acting on the cart apart from the net force.",
"No, because we don\u2019t know whether forces such as friction or air resistance might be acting on the cart.",
"No, because we don't know the initial velocity of the cart.",
"Yes, since we know the cart keeps moving to the east, the net force must be in the direction of motion.",
"Yes, because the cart is moving in the east direction, the net force must be acting in the east direction.",
"No, because we don\u2019t know the total distance covered by the cart.",
"Yes, since we know the cart is slowing down, its momentum change is opposite the direction of movement, and the net force is in the direction of momentum change.",
"No, because we don\u2019t know the mass of the cart.",
"Yes, because the cart's velocity is decreasing, the net force must be acting in the west direction.",
"Yes, since the cart is slowing down, the net force must be acting in the opposite direction to the direction of motion."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_8916": {
"question_id": "mmlu_pro_8916",
"source_benchmark": "MMLU_Pro",
"domain": "physics",
"question_text": "Each of the following ionized isotopes is projected with the same speed into a uniform magnetic field B such that the isotope's initial velocity is perpendicular to B. Which combination of mass and charge would result in a circular path with the largest radius?",
"correct_answer": "B",
"choices": [
"m = 20 u, q = \u20132 e",
"m = 20 u, q = \u20131 e",
"m = 18 u, q = \u20133 e",
"m = 16 u, q = \u20135 e",
"m = 18 u, q = \u20134 e",
"m = 17 u, q = \u20134 e",
"m = 19 u, q = \u20132 e",
"m = 16 u, q = \u20136 e",
"m = 21 u, q = \u20131 e",
"m = 22 u, q = \u20132 e"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_9046": {
"question_id": "mmlu_pro_9046",
"source_benchmark": "MMLU_Pro",
"domain": "physics",
"question_text": "How much work is required to turn an electric dipole $180^{\\circ}$ in a uniform electric field of magnitude $E=46.0 \\mathrm{~N} / \\mathrm{C}$ if the dipole moment has a magnitude of $p=3.02 \\times$ $10^{-25} \\mathrm{C} \\cdot \\mathrm{m}$ and the initial angle is $64^{\\circ} ?$\n",
"correct_answer": "I",
"choices": [
"$2.05 \\times 10^{-23} \\mathrm{~J}$",
"$7.32 \\times 10^{-23} \\mathrm{~J}$",
"$2.44 \\times 10^{-23} \\mathrm{~J}$",
"$5.50 \\times 10^{-23} \\mathrm{~J}$",
"$3.66 \\times 10^{-23} \\mathrm{~J}$",
"$1.83 \\times 10^{-23} \\mathrm{~J}$",
"$0.92 \\times 10^{-23} \\mathrm{~J}$",
"$0.61 \\times 10^{-23} \\mathrm{~J}$",
" $1.22$$10^{-23} \\mathrm{~J}$ ",
"$4.88 \\times 10^{-23} \\mathrm{~J}$"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_9302": {
"question_id": "mmlu_pro_9302",
"source_benchmark": "MMLU_Pro",
"domain": "physics",
"question_text": "The partial molar volumes of water and ethanol in a solution with $x_{\\mathrm{H}_2 \\mathrm{O}}=0.45$ at $25^{\\circ} \\mathrm{C}$ are 17.0 and $57.5 \\mathrm{~cm}^3 \\mathrm{~mol}^{-1}$, respectively. Calculate the volume change upon mixing sufficient ethanol with $3.75 \\mathrm{~mol}$ of water to give this concentration. The densities of water and ethanol are 0.997 and $0.7893 \\mathrm{~g} \\mathrm{~cm}^{-3}$, respectively, at this temperature.",
"correct_answer": "F",
"choices": [
"-16 $\\mathrm{~cm}^3$",
"-12 $\\mathrm{~cm}^3$",
"-14 $\\mathrm{~cm}^3$",
"-4 $\\mathrm{~cm}^3$",
"4 $\\mathrm{~cm}^3$",
" -8 $\\mathrm{~cm}^3$",
"2 $\\mathrm{~cm}^3$",
"-10 $\\mathrm{~cm}^3$",
"0 $\\mathrm{~cm}^3$",
"-6 $\\mathrm{~cm}^3$"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_9331": {
"question_id": "mmlu_pro_9331",
"source_benchmark": "MMLU_Pro",
"domain": "physics",
"question_text": "(a) How much heat energy Is produced by a 5-kg rock that falls a vertical distance of 10 m before it strikes the surface of the earth? Assume that the rock was initially at rest. (b) How much would the temperature of 1 kg of water be raised by the rock striking the earth's surface? (4.19 \u00d7 10^3 J of heat energy is required to raise the temperature of 1 kg of water 1\u00b0K.)",
"correct_answer": "I",
"choices": [
"5.0 \u00d7 10^2J, 1.2 \u00d7 10^-1 \u00b0K",
"4.6 \u00d7 10^2J, 1.1 \u00d7 10^-1 \u00b0K",
"3.5 \u00d7 10^2J, 0.9 \u00d7 10^-1 \u00b0K",
"7.2 \u00d7 10^2J, 1.7 \u00d7 10^-1 \u00b0K",
"6.0 \u00d7 10^2J, 1.4 \u00d7 10^-1 \u00b0K",
"3.0 \u00d7 10^2J, 0.7 \u00d7 10^-1 \u00b0K",
"5.5 \u00d7 10^2J, 1.3 \u00d7 10^-1 \u00b0K",
"5.2 \u00d7 10^2J, 1.3 \u00d7 10^-1 \u00b0K",
"4.9 \u00d7 10^2J, 1.2 \u00d7 10^-1 \u00b0K",
"4.0 \u00d7 10^2J, 1.0 \u00d7 10^-1 \u00b0K"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_9889": {
"question_id": "mmlu_pro_9889",
"source_benchmark": "MMLU_Pro",
"domain": "physics",
"question_text": "A distant sodium street lamp seen through a woven nylon curtain appears to be accompanied by a series of \"images\\textquotedblright spaced 0.3\u00b0 apart. What is the spacing of the nylon fibers? (\\lambda_Na= 5893 \\AA)",
"correct_answer": "H",
"choices": [
"0.92 \u00d7 10^-2 cm",
"0.75 \u00d7 10^-2 cm",
"0.99 \u00d7 10^-2 cm",
"0.85 \u00d7 10^-2 cm",
"1.22 \u00d7 10^-2 cm",
"1.03 \u00d7 10^-2 cm",
"1.50 \u00d7 10^-2 cm",
"1.12 \u00d7 10^-2 cm",
"1.05 \u00d7 10^-2 cm",
"1.35 \u00d7 10^-2 cm"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_10087": {
"question_id": "mmlu_pro_10087",
"source_benchmark": "MMLU_Pro",
"domain": "physics",
"question_text": "1) What is the short-wave limit of the continuous x-ray spectrum produced by an x-ray tube operating at 30,000 volts? 2) What is the wavelength of the shortest x-rays that could be produced by a Van deGraaffgenerator developing 5,000,000 volts?",
"correct_answer": "E",
"choices": [
"0.214 A degree and 2.18 x 10- 3A degree",
"0.414 \u00c5 and 3.48 x 10^-3 \u00c5",
"0.514 A degree and 3.48 x 10- 3A degree",
"0.514 \u00c5 and 1.98 x 10^-3 \u00c5",
"0.414 A degree and 2.48 x 10- 3A degree",
"0.214 \u00c5 and 1.28 x 10^-3 \u00c5",
"0.314 A degree and 1.48 x 10- 3A degree",
"0.314 \u00c5 and 2.98 x 10^-3 \u00c5",
"0.614 \u00c5 and 4.48 x 10^-3 \u00c5",
"0.114 \u00c5 and 0.58 x 10^-3 \u00c5"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_8909": {
"question_id": "mmlu_pro_8909",
"source_benchmark": "MMLU_Pro",
"domain": "physics",
"question_text": "A boat travels directly upstream in a river, moving with constant but unknown speed v with respect to the water. At the start of this trip upstream, a bottle is dropped over the side. After 15 minutes the boat turns around and heads downstream. It catches up with the bottle when the bottle has drifted one mile downstream from the point at which it was dropped into the water. What is the current in the stream?",
"correct_answer": "A",
"choices": [
"2 miles per hour",
"1 mile per hour",
"2.5 miles per hour",
"3 miles per hour",
"5 miles per hour",
"4 miles per hour",
"1.5 miles per hour",
"0.5 miles per hour",
"0.75 miles per hour",
"3.5 miles per hour"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_10004": {
"question_id": "mmlu_pro_10004",
"source_benchmark": "MMLU_Pro",
"domain": "physics",
"question_text": "Suppose that a two-stage rocket starts from rest with a mass m_1. At burn-out of the first stage engine, the mass is m_2. If the exhaust velocity is v_0, the rocket velocity after the first stage engines quit is v = v_0ln(m_1 / m_2). Before the second stage engines are ignited, part of the mass m_2 is discarded - the mass of the first stage engines. The mass of the second stage ism_Awhen the en-gines start andm_Bwhen the engines shut down. What is the terminal velocity of the second stage?",
"correct_answer": "I",
"choices": [
"v = v_0 ln [(m_1 - m_2) / (m_A * m_B)]",
"v = v_0 In [(m_1 * m_2)(m_A*m_B)]",
"v = v_0 ln [(m_1^2 / m_2) * (m_A / m_B)]",
"v = v_0 ln [(m_1 * m_2) / (m_A + m_B)]",
"v = v_0 ln [(m_1 / m_2) + (m_A/m_B)]",
"v = v_0 In [(m_1 / m_2)(m_A+m_B)]",
"v = v_0 ln [(m_1 + m_2) * (m_A - m_B)]",
"v = v_0 ln [m_1 / (m_2 + m_A + m_B)]",
"v = v_0 In [(m_1 / m_2)(m_A/m_B)]",
"v = v_0 ln [(m_1 + m_2) / (m_A * m_B)]"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_9262": {
"question_id": "mmlu_pro_9262",
"source_benchmark": "MMLU_Pro",
"domain": "physics",
"question_text": "A steel ball of velocity $5 \\mathrm{~m} / \\mathrm{s}$ strikes a smooth, heavy steel plate at an angle of $30^{\\circ}$ from the normal. If the coefficient of restitution is 0.8 , at what velocity does the steel ball bounce off the plate?",
"correct_answer": "C",
"choices": [
"$4.8$$\\mathrm{~m} / \\mathrm{s}$",
"$3.8 \\mathrm{~m} / \\mathrm{s}$",
" $4.3$$\\mathrm{~m} / \\mathrm{s}$ ",
"$4.0 \\mathrm{~m} / \\mathrm{s}$",
"$3.5$$\\mathrm{~m} / \\mathrm{s}$",
"$4.6 \\mathrm{~m} / \\mathrm{s}$",
"$2.5 \\mathrm{~m} / \\mathrm{s}$",
"$5.0$$\\mathrm{~m} / \\mathrm{s}$",
"$3.2 \\mathrm{~m} / \\mathrm{s}$",
"$2.8 \\mathrm{~m} / \\mathrm{s}$"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_9714": {
"question_id": "mmlu_pro_9714",
"source_benchmark": "MMLU_Pro",
"domain": "physics",
"question_text": "How much energy is required to break up a C^12 nucleus into three \\alpha particles?",
"correct_answer": "J",
"choices": [
"92.15 MeV",
"13.6 eV",
"25.711 MeV",
"0.007 809 AMU",
"0.5 MeV",
"3.6 MeV per \\alpha particle",
"28.30 MeV",
"12.007809 AMU",
"931.481MeV/AMU",
"7.274MeV"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_9306": {
"question_id": "mmlu_pro_9306",
"source_benchmark": "MMLU_Pro",
"domain": "physics",
"question_text": "What is thegyrofrequencyof an electron in a magnetic field of 10 kilogauss, or 1 \u00d7 10^4 gauss? (A field of 10 to 15 kilogauss is typical of ordinary laboratory iron core electromagnets.)",
"correct_answer": "J",
"choices": [
"2.1 \u00d7 10^10 cps",
"1.2 \u00d7 10^10 cps",
"1.5 \u00d7 10^10 cps",
"1.8 \u00d7 10^10 cps",
"3.5 \u00d7 10^10 cps",
"5.5 \u00d7 10^10 cps",
"3.2 \u00d7 10^10 cps",
"4.0 \u00d7 10^10 cps",
"2.5 \u00d7 10^10 cps",
"2.8 \u00d7 10^10 cps"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_9774": {
"question_id": "mmlu_pro_9774",
"source_benchmark": "MMLU_Pro",
"domain": "physics",
"question_text": "An object 5 cm in diameter is 33.3 cm from a converging lens of 8diopters. Calculate the position of the image from the lens and its size.",
"correct_answer": "C",
"choices": [
"40 cm, 1.5 cm",
"35 cm, 0.5 cm",
"20 cm, 3 cm",
"25 cm, 2 cm",
"12 cm, 6 cm",
"30 cm, 1 cm",
"24 cm, 3.5 cm",
"18 cm, 2.5 cm",
"10 cm, 5 cm",
"15 cm, 4 cm"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_10061": {
"question_id": "mmlu_pro_10061",
"source_benchmark": "MMLU_Pro",
"domain": "physics",
"question_text": "While riding a multispeed bicycle, the rider can select the radius of the rear sprocket that is fixed to the rear axle. The front sprocket of a bicycle has radius 12.0 cm. If the angular speed of the front sprocket is 0.6 rev/s, what is the radius (in cm) of the rear sprocket for which the tangential speed of a point on the rim of the rear wheel will be 5 m/s? The rear wheel has radius 0.330 m.",
"correct_answer": "D",
"choices": [
"3.50",
"2.00",
"4.25",
"2.99",
"3.25",
"4.50",
"5.00",
"2.50",
"3.99",
"1.75"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_9424": {
"question_id": "mmlu_pro_9424",
"source_benchmark": "MMLU_Pro",
"domain": "physics",
"question_text": "Consider a system of N particles each of which can exist in only two energy levels + \\epsilon and - \\epsilon. If the probability of occupying an energy level at temperature T is given by P_E = e- E/KT where E is the energy of the level, and K is Boltzmann's constant, calculate the internal energy of the system.",
"correct_answer": "C",
"choices": [
"-2N\\epsilon\\cosh(\\epsilon/KT)",
"2N\\epsilon\\sinh(\\epsilon/KT)",
"-\\epsilonNtanh(\\epsilon/KT)",
"-\\epsilonN",
"N\\epsilon^2/KT",
"0",
"\\epsilonN(1 - e^{-\\epsilon/KT})",
"\\epsilonNtanh(\\epsilon/KT)",
"\\epsilonNcoth(\\epsilon/KT)",
"N\\epsilon(1 - 2e^{-2\\epsilon/KT})"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_8868": {
"question_id": "mmlu_pro_8868",
"source_benchmark": "MMLU_Pro",
"domain": "physics",
"question_text": "You drive a beat-up pickup truck along a straight road for $8.4 \\mathrm{~km}$ at $70 \\mathrm{~km} / \\mathrm{h}$, at which point the truck runs out of gasoline and stops. Over the next $30 \\mathrm{~min}$, you walk another $2.0 \\mathrm{~km}$ farther along the road to a gasoline station.\nWhat is your overall displacement from the beginning of your drive to your arrival at the station?",
"correct_answer": "D",
"choices": [
"9.4 km",
"10.0 km",
"12.0 km",
" 10.4 km",
"11.4 km",
"7.4 km",
"6.4 km",
"12.4 km",
"8.4 km",
"13.4 km"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_9181": {
"question_id": "mmlu_pro_9181",
"source_benchmark": "MMLU_Pro",
"domain": "physics",
"question_text": "A net force F_A acts on object A, and a net force F_B acts on object B. The mass of object B is twice the mass of object A, and the acceleration of object B is twice that of object A. Which of the following is true of forces F_A and F_B?",
"correct_answer": "C",
"choices": [
"F_B = 6F_A",
"F_B = 3F_A",
"F_B = 4F_A",
"F_B = F_A",
"F_B = 1/2 F_A",
"F_B = 8F_A",
"F_B = 3/2 F_A",
"F_B = 5F_A",
"F_B = 2F_A",
"F_B = 1/4 F_A"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_9720": {
"question_id": "mmlu_pro_9720",
"source_benchmark": "MMLU_Pro",
"domain": "physics",
"question_text": "A 2.4 kilowatt generator delivers 10 amperes. At what potential difference does the generator operate?",
"correct_answer": "D",
"choices": [
"260 volts",
"280 volts",
"220 volts",
"240 volts",
"320 volts",
"200 volts",
"150 volts",
"300 volts",
"180 volts",
"400 volts"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_9551": {
"question_id": "mmlu_pro_9551",
"source_benchmark": "MMLU_Pro",
"domain": "physics",
"question_text": "A bungee cord is 30.0 m long and, when stretched a distance x, it exerts a restoring force of magnitude kx. Your father-in-law (mass 95.0 kg) stands on a platform 45.0 m above the ground, and one end of the cord is tied securely to his ankle and the other end to the platform. You have promised him that when he steps off the platform he will fall a maximum distance of only 41.0 m before the cord stops him. You had several bungee cords to select from, and you tested them by stretching them out, tying one end to a tree, and pulling on the other end with a force of 380.0 N. When you do this, what distance (in m) will the bungee cord that you should select have stretched?",
"correct_answer": "B",
"choices": [
"1.0",
"0.602",
"1.8",
"0.45",
"1.2",
"1.5",
"0.8",
"0.3",
"0.95",
"0.75"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_9173": {
"question_id": "mmlu_pro_9173",
"source_benchmark": "MMLU_Pro",
"domain": "physics",
"question_text": "The two moons of Mars are called ...",
"correct_answer": "D",
"choices": [
"Deimos and Triton",
"Phobos and Demos",
"Triton and Deimos",
"Phobos and Deimos",
"Tritos and Desmos",
"Phobos and Mars II",
"Phobos and Tritos",
"Phobos and Triton",
"Tritos and Deimos",
"Deimos and Mars I"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_9295": {
"question_id": "mmlu_pro_9295",
"source_benchmark": "MMLU_Pro",
"domain": "physics",
"question_text": "Point charges of $+6.0 \\mu \\mathrm{C}$ and $-4.0 \\mu \\mathrm{C}$ are placed on an $x$ axis, at $x=8.0 \\mathrm{~m}$ and $x=16 \\mathrm{~m}$, respectively. What charge must be placed at $x=24 \\mathrm{~m}$ so that any charge placed at the origin would experience no electrostatic force?\n",
"correct_answer": "I",
"choices": [
"$-50$ $\\mu \\mathrm{C}$",
"$-40$ $\\mu \\mathrm{C}$",
"$-30$ $\\mu \\mathrm{C}$",
"$+30$ $\\mu \\mathrm{C}$",
"$+60$ $\\mu \\mathrm{C}$",
"$-60$ $\\mu \\mathrm{C}$",
"$+45$ $\\mu \\mathrm{C}$",
"$+50$ $\\mu \\mathrm{C}$",
" $-45$ $\\mu \\mathrm{C}$",
"$-75$ $\\mu \\mathrm{C}$"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_9544": {
"question_id": "mmlu_pro_9544",
"source_benchmark": "MMLU_Pro",
"domain": "physics",
"question_text": "When the color yellow is seen on your TV screen the phosphors being activated on the screen are",
"correct_answer": "B",
"choices": [
"blue and yellow.",
"red and green.",
"blue and red.",
"red and blue.",
"all colors equally.",
"mainly green.",
"mainly blue.",
"green and yellow.",
"mainly yellow.",
"blue and green."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_9044": {
"question_id": "mmlu_pro_9044",
"source_benchmark": "MMLU_Pro",
"domain": "physics",
"question_text": "A particle of charge $+3.00 \\times 10^{-6} \\mathrm{C}$ is $12.0 \\mathrm{~cm}$ distant from a second particle of charge $-1.50 \\times 10^{-6} \\mathrm{C}$. Calculate the magnitude of the electrostatic force between the particles.",
"correct_answer": "H",
"choices": [
"3.42 N",
"1.56 N",
"0.92 N",
"5.00 N",
"2.35 N",
"4.50 N",
"3.67 N",
"2.81 N ",
"1.23 N",
"0.75 N"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_9633": {
"question_id": "mmlu_pro_9633",
"source_benchmark": "MMLU_Pro",
"domain": "physics",
"question_text": "A 10-N force at an angle 45\u00b0 above the horizontal has a horizontal component of about",
"correct_answer": "I",
"choices": [
"6 N",
"20 N",
"15 N",
"3 N",
"12 N",
"8 N",
"9 N",
"10 N",
"7 N",
"5 N"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_10926": {
"question_id": "mmlu_pro_10926",
"source_benchmark": "MMLU_Pro",
"domain": "philosophy",
"question_text": "According to Hume, justice:",
"correct_answer": "C",
"choices": [
"has no value.",
"has value in part because it is useful to society.",
"has value solely because it is useful to society.",
"has value solely because it is useful to the agent."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_1894": {
"question_id": "mmlu_pro_1894",
"source_benchmark": "MMLU_Pro",
"domain": "psychology",
"question_text": "Which of the following item difficulty (p) levels maximizes the differentiation of examinees into high- and low-performing groups:",
"correct_answer": "B",
"choices": [
"0",
"0.5",
"0.8",
"0.3",
"1.2",
"0.2",
"0.9",
"1.0",
"0.7",
"1.5"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_427": {
"question_id": "mmlu_pro_427",
"source_benchmark": "MMLU_Pro",
"domain": "business",
"question_text": " This is a form of targeted advertising, on websites, with advertisements selected and served by automated systems based on the content displayed to the user.",
"correct_answer": "E",
"choices": [
"Social media marketing.",
"Display advertising.",
"Mobile advertising.",
"Search engine marketing.",
"Contextual advertising.",
"Email advertising.",
"Direct marketing.",
"Affiliate marketing.",
"Interactive marketing.",
"Internet advertising."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_4700": {
"question_id": "mmlu_pro_4700",
"source_benchmark": "MMLU_Pro",
"domain": "history",
"question_text": "At its peak, the population of the city of Teotihuac\u00e1n may have reached:",
"correct_answer": "B",
"choices": [
"50,000 people.",
"200,000 people.",
"500,000 people.",
"1,000,000 people."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_4180": {
"question_id": "mmlu_pro_4180",
"source_benchmark": "MMLU_Pro",
"domain": "chemistry",
"question_text": "It has been found that the following sequence can be used to prepare sodium sulfate, Na_2 SO_4 : S (s) + O_2 (g)\\rightarrow SO_2 (g) 2SO_2 (g) + O_2 (g)\\rightarrow 2SO_3 (g) SO_3 (g) + H_2 O(l)\\rightarrow H_2 SO_4 (l) 2NaOH + H_2 SO_4\\rightarrow Na_2 SO_4 + 2H_2 O If you performed this sequence of reactions, how many moles of Na_2 SO_4 could possibly be produced if you start with 1 mole of sulfur? How many moles are possible with 4.5 g of water?",
"correct_answer": "D",
"choices": [
"2 moles of Na_2 SO_4, 0.5 moles of Na_2 SO_4",
"0.5 moles of Na_2 SO_4, 0.5 moles of Na_2 SO_4",
"1 mole of Na_2 SO_4, 1 mole of Na_2 SO_4",
"1 mole of Na_2 SO_4, 0.25 moles of Na_2 SO_4",
"1.5 moles of Na_2 SO_4, 0.35 moles of Na_2 SO_4",
"1.5 moles of Na_2 SO_4, 0.5 moles of Na_2 SO_4",
"0.75 moles of Na_2 SO_4, 0.20 moles of Na_2 SO_4",
"2.5 moles of Na_2 SO_4, 0.30 moles of Na_2 SO_4",
"0.5 moles of Na_2 SO_4, 0.15 moles of Na_2 SO_4",
"2 moles of Na_2 SO_4, 0.25 moles of Na_2 SO_4"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_3811": {
"question_id": "mmlu_pro_3811",
"source_benchmark": "MMLU_Pro",
"domain": "chemistry",
"question_text": "Glucose-1-phosphate, essential to the metabolism of carbohydrates in humans, has a molecular weight of 260 g/mole and a density of about 1.5 g/cm^3. What is the volume occupied by one molecule of glucose-1-phosphate?",
"correct_answer": "A",
"choices": [
"2.9 \u00d7 10^-22 cm^3",
"8.2 \u00d7 10^-22 cm^3",
"1.5 \u00d7 10^-22 cm^3",
"1.0 \u00d7 10^-21 cm^3",
"5.0 \u00d7 10^-22 cm^3",
"9.0 \u00d7 10^-23 cm^3",
"6.02 \u00d7 10^-23 cm^3",
"3.5 \u00d7 10^-22 cm^3",
"4.3 \u00d7 10^-22 cm^3",
"7.1 \u00d7 10^-23 cm^3"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_2377": {
"question_id": "mmlu_pro_2377",
"source_benchmark": "MMLU_Pro",
"domain": "psychology",
"question_text": "Discuss some physiological changes that can occur as a resultof meditation.",
"correct_answer": "B",
"choices": [
"Stabilization of blood sugar levels",
"Increased alpha and theta brain waves, lowered metabolism and heart rate",
"Heightened cortisol levels and stress response",
"Enhanced sympathetic nervous system activity",
"Decrease in blood oxygen levels",
"Accelerated metabolic processes",
"Decreased brain wave activity",
"Increased heart rate and blood pressure",
"Increased respiratory rate",
"Reduction in cognitive function"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_1748": {
"question_id": "mmlu_pro_1748",
"source_benchmark": "MMLU_Pro",
"domain": "law",
"question_text": "A prominent judge lived next door to a father. Recently, the judge had sentenced the father's son to six months in prison on a narcotics charge. One afternoon while the judge was mowing his lawn, the father decided to avenge his son's conviction. The father set up his water sprinkler behind some shrubbery separating their adjoining properties. As the judge was mowing his lawn and came within reach of the water sprinkler, the father turned on the sprinkling device. The judge did not see the water coming toward him. The water hit the judge in the back and drenched him. The judge would be able to recover against the father for which of the following tort or torts?",
"correct_answer": "G",
"choices": [
"Trespass to land and intentional infliction of emotional distress.",
"Assault and intentional infliction of emotional distress.",
"Negligence and intentional infliction of emotional distress.",
"Trespass to chattels and battery.",
"Negligence and battery.",
"Assault and trespass to chattels.",
"Battery and trespass to land.",
"Negligence and assault.",
"Battery, assault and trespass to land."
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_11562": {
"question_id": "mmlu_pro_11562",
"source_benchmark": "MMLU_Pro",
"domain": "engineering",
"question_text": "Saturated steam, at 100\u00b0F, condenses on the outer surface ofa vertical pipe, 1 ft long and 6 in OD, maintained at 80\u00b0F. Calculate a)theaverage coefficient of heat transfer, b)therate of heat transfer, and c)themass flow rate of the condensate, assuming laminar flow.",
"correct_answer": "C",
"choices": [
"a) 1,250 Btu/hr-ft^2-\u00b0F, b) 41,000 Btu/hr, c) 42lbm/hr",
"a) 1,500 Btu/hr-ft^2-\u00b0F, b) 45,000 Btu/hr, c) 45lbm/hr",
"a) 1,357.2 Btu/hr-ft^2-\u00b0F, b) 42,638 Btu/hr, c) 41lbm/hr",
"a) 1,200 Btu/hr-ft^2-\u00b0F, b) 40,000 Btu/hr, c) 40lbm/hr"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
},
"mmlu_pro_9300": {
"question_id": "mmlu_pro_9300",
"source_benchmark": "MMLU_Pro",
"domain": "physics",
"question_text": "Approximately how many oxygen molecules arrive each second at the mitochondrion of an active person with a mass of $84 \\mathrm{~kg}$ ? The following data are available: Oxygen consumption is about $40 . \\mathrm{mL}$ of $\\mathrm{O}_2$ per minute per kilogram of body weight, measured at $T=300 . \\mathrm{K}$ and $P=1.00 \\mathrm{~atm}$. In an adult there are about $1.6 \\times 10^{10}$ cells per kg body mass. Each cell contains about 800 . mitochondria.",
"correct_answer": "C",
"choices": [
"0.8$10^6$",
"0.97$10^6$",
" 1.27$10^6$ ",
"0.5$10^6$",
"1.4$10^6$",
"1.77$10^6$",
"2.2$10^6$",
"2.5$10^6$",
"1.57$10^6$",
"1.0$10^7$"
],
"model_results": {},
"success_rate": null,
"num_models": 0,
"difficulty_tier": null,
"difficulty_label": null
}
}
}