{"benchmark":null,"runs":[{"runId":"20260507T213757_gpt-realtime-2_e916af24","benchmark":"appointment_bench","model":"gpt-realtime-2","turnsScored":25,"scores":{"toolUse":2,"toolUseDenom":8,"instructionFollowing":15,"kbGrounding":24,"ambiguityHandling":1,"stateTracking":8,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":64.62,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T21:55:56+00:00","createdAt":"2026-05-07T23:02:04.707876+00:00"},{"runId":"20260507T213745_gpt-realtime-2_74fa002e","benchmark":"appointment_bench","model":"gpt-realtime-2","turnsScored":25,"scores":{"toolUse":7,"toolUseDenom":8,"instructionFollowing":20,"kbGrounding":24,"ambiguityHandling":1,"stateTracking":9,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":82.17,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T21:55:54+00:00","createdAt":"2026-05-07T23:02:03.025108+00:00"},{"runId":"20260507T213756_gpt-realtime-2_18c5eeda","benchmark":"appointment_bench","model":"gpt-realtime-2","turnsScored":25,"scores":{"toolUse":1,"toolUseDenom":8,"instructionFollowing":17,"kbGrounding":23,"ambiguityHandling":1,"stateTracking":11,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":66.08,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T21:55:35+00:00","createdAt":"2026-05-07T23:02:04.186351+00:00"},{"runId":"20260507T213746_gpt-realtime-2_47ac5f21","benchmark":"appointment_bench","model":"gpt-realtime-2","turnsScored":25,"scores":{"toolUse":5,"toolUseDenom":8,"instructionFollowing":15,"kbGrounding":21,"ambiguityHandling":1,"stateTracking":15,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":77.09,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T21:55:26+00:00","createdAt":"2026-05-07T23:02:03.27609+00:00"},{"runId":"20260507T213810_gpt-realtime-2_dc4ed617","benchmark":"appointment_bench","model":"gpt-realtime-2","turnsScored":25,"scores":{"toolUse":1,"toolUseDenom":8,"instructionFollowing":18,"kbGrounding":23,"ambiguityHandling":1,"stateTracking":9,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":64.77,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T21:55:02+00:00","createdAt":"2026-05-07T23:02:06.051774+00:00"},{"runId":"20260507T213740_gpt-realtime-2_3de3c631","benchmark":"appointment_bench","model":"gpt-realtime-2","turnsScored":25,"scores":{"toolUse":1,"toolUseDenom":8,"instructionFollowing":15,"kbGrounding":25,"ambiguityHandling":1,"stateTracking":17,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":72.39,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T21:54:54+00:00","createdAt":"2026-05-07T23:02:02.139932+00:00"},{"runId":"20260507T213731_gpt-realtime-2_19d179ec","benchmark":"appointment_bench","model":"gpt-realtime-2","turnsScored":25,"scores":{"toolUse":1,"toolUseDenom":8,"instructionFollowing":14,"kbGrounding":22,"ambiguityHandling":1,"stateTracking":7,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":58.67,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T21:54:26+00:00","createdAt":"2026-05-07T23:02:00.559346+00:00"},{"runId":"20260507T213719_gpt-realtime-2_68995e96","benchmark":"appointment_bench","model":"gpt-realtime-2","turnsScored":25,"scores":{"toolUse":1,"toolUseDenom":8,"instructionFollowing":13,"kbGrounding":21,"ambiguityHandling":1,"stateTracking":9,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":59.17,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T21:54:16+00:00","createdAt":"2026-05-07T23:01:59.377312+00:00"},{"runId":"20260507T213733_gpt-realtime-2_d4d8a941","benchmark":"appointment_bench","model":"gpt-realtime-2","turnsScored":25,"scores":{"toolUse":4,"toolUseDenom":8,"instructionFollowing":14,"kbGrounding":24,"ambiguityHandling":1,"stateTracking":7,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":67.77,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T21:54:03+00:00","createdAt":"2026-05-07T23:02:01.614343+00:00"},{"runId":"20260507T213732_gpt-realtime-2_d51a1030","benchmark":"appointment_bench","model":"gpt-realtime-2","turnsScored":25,"scores":{"toolUse":1,"toolUseDenom":8,"instructionFollowing":13,"kbGrounding":25,"ambiguityHandling":1,"stateTracking":6,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":59.22,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T21:53:54+00:00","createdAt":"2026-05-07T23:02:01.277953+00:00"},{"runId":"20260507T213717_gpt-realtime-2_f5fe90a0","benchmark":"appointment_bench","model":"gpt-realtime-2","turnsScored":25,"scores":{"toolUse":1,"toolUseDenom":8,"instructionFollowing":18,"kbGrounding":24,"ambiguityHandling":1,"stateTracking":18,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":75.05,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T21:53:25+00:00","createdAt":"2026-05-07T23:01:59.022652+00:00"},{"runId":"20260507T213807_gpt-realtime-2_8c57a4df","benchmark":"appointment_bench","model":"gpt-realtime-2","turnsScored":25,"scores":{"toolUse":0,"toolUseDenom":8,"instructionFollowing":9,"kbGrounding":22,"ambiguityHandling":1,"stateTracking":5,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":50.06,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T21:51:52+00:00","createdAt":"2026-05-07T23:02:05.781267+00:00"},{"runId":"20260507T213754_gpt-realtime-2_e665aa17","benchmark":"appointment_bench","model":"gpt-realtime-2","turnsScored":25,"scores":{"toolUse":5,"toolUseDenom":8,"instructionFollowing":9,"kbGrounding":22,"ambiguityHandling":1,"stateTracking":2,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":59.41,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T21:51:44+00:00","createdAt":"2026-05-07T23:02:03.522527+00:00"},{"runId":"20260507T213817_gpt-realtime-2_eb69ce8b","benchmark":"appointment_bench","model":"gpt-realtime-2","turnsScored":25,"scores":{"toolUse":1,"toolUseDenom":8,"instructionFollowing":10,"kbGrounding":24,"ambiguityHandling":1,"stateTracking":8,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":58.12,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T21:51:18+00:00","createdAt":"2026-05-07T23:02:06.287369+00:00"},{"runId":"20260507T213802_gpt-realtime-2_f327df73","benchmark":"appointment_bench","model":"gpt-realtime-2","turnsScored":25,"scores":{"toolUse":2,"toolUseDenom":8,"instructionFollowing":12,"kbGrounding":18,"ambiguityHandling":1,"stateTracking":7,"ambiguityDenom":1,"stateTrackingDenom":20},"passRate":56,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T21:51:04+00:00","createdAt":"2026-05-07T23:02:05.30229+00:00"},{"runId":"20260507T213759_gpt-realtime-2_dd65ef9c","benchmark":"appointment_bench","model":"gpt-realtime-2","turnsScored":25,"scores":{"toolUse":6,"toolUseDenom":8,"instructionFollowing":12,"kbGrounding":17,"ambiguityHandling":1,"stateTracking":8,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":66.62,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T21:50:30+00:00","createdAt":"2026-05-07T23:02:05.056082+00:00"},{"runId":"20260507T213731_gpt-realtime-2_2d5f3c02","benchmark":"appointment_bench","model":"gpt-realtime-2","turnsScored":25,"scores":{"toolUse":1,"toolUseDenom":8,"instructionFollowing":9,"kbGrounding":19,"ambiguityHandling":1,"stateTracking":2,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":47.01,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T21:50:29+00:00","createdAt":"2026-05-07T23:02:00.949261+00:00"},{"runId":"20260507T213742_gpt-realtime-2_68e1e557","benchmark":"appointment_bench","model":"gpt-realtime-2","turnsScored":25,"scores":{"toolUse":0,"toolUseDenom":8,"instructionFollowing":13,"kbGrounding":13,"ambiguityHandling":1,"stateTracking":11,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":52.38,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T21:50:21+00:00","createdAt":"2026-05-07T23:02:02.716414+00:00"},{"runId":"20260507T213722_gpt-realtime-2_08eb497d","benchmark":"appointment_bench","model":"gpt-realtime-2","turnsScored":25,"scores":{"toolUse":1,"toolUseDenom":8,"instructionFollowing":10,"kbGrounding":19,"ambiguityHandling":1,"stateTracking":8,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":54.12,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T21:49:57+00:00","createdAt":"2026-05-07T23:01:59.874918+00:00"},{"runId":"20260507T213716_gpt-realtime-2_a5f7132d","benchmark":"appointment_bench","model":"gpt-realtime-2","turnsScored":25,"scores":{"toolUse":0,"toolUseDenom":8,"instructionFollowing":7,"kbGrounding":8,"ambiguityHandling":1,"stateTracking":3,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":35.16,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T21:49:48+00:00","createdAt":"2026-05-07T23:01:58.151948+00:00"},{"runId":"20260507T061247_grok-voice-think-fast-1.0_cdd4ee1f","benchmark":"appointment_bench","model":"grok-voice-think-fast-1.0","turnsScored":25,"scores":{"toolUse":4,"toolUseDenom":8,"instructionFollowing":18,"kbGrounding":22,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":76.74,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T06:31:30+00:00","createdAt":"2026-05-07T20:57:55.998607+00:00"},{"runId":"20260507T060852_grok-voice-think-fast-1.0_c0e50623","benchmark":"appointment_bench","model":"grok-voice-think-fast-1.0","turnsScored":25,"scores":{"toolUse":7,"toolUseDenom":8,"instructionFollowing":22,"kbGrounding":24,"ambiguityHandling":1,"stateTracking":17,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":92.19,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T06:30:26+00:00","createdAt":"2026-05-07T20:57:55.224014+00:00"},{"runId":"20260507T060850_grok-voice-think-fast-1.0_6378972b","benchmark":"appointment_bench","model":"grok-voice-think-fast-1.0","turnsScored":25,"scores":{"toolUse":7,"toolUseDenom":8,"instructionFollowing":22,"kbGrounding":21,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":86.64,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T06:29:23+00:00","createdAt":"2026-05-07T20:57:54.727593+00:00"},{"runId":"20260507T060836_grok-voice-think-fast-1.0_ab2e25de","benchmark":"appointment_bench","model":"grok-voice-think-fast-1.0","turnsScored":25,"scores":{"toolUse":7,"toolUseDenom":8,"instructionFollowing":23,"kbGrounding":24,"ambiguityHandling":1,"stateTracking":18,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":94.05,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T06:28:54+00:00","createdAt":"2026-05-07T20:57:54.005132+00:00"},{"runId":"20260507T060830_grok-voice-think-fast-1.0_6fd54741","benchmark":"appointment_bench","model":"grok-voice-think-fast-1.0","turnsScored":25,"scores":{"toolUse":7,"toolUseDenom":8,"instructionFollowing":22,"kbGrounding":22,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":86.38,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T06:28:40+00:00","createdAt":"2026-05-07T20:57:53.762539+00:00"},{"runId":"20260507T060851_grok-voice-think-fast-1.0_a7aae41d","benchmark":"appointment_bench","model":"grok-voice-think-fast-1.0","turnsScored":25,"scores":{"toolUse":1,"toolUseDenom":8,"instructionFollowing":16,"kbGrounding":18,"ambiguityHandling":1,"stateTracking":4,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":53.91,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T06:28:18+00:00","createdAt":"2026-05-07T20:57:54.973394+00:00"},{"runId":"20260507T060847_grok-voice-think-fast-1.0_0b7019be","benchmark":"appointment_bench","model":"grok-voice-think-fast-1.0","turnsScored":25,"scores":{"toolUse":4,"toolUseDenom":8,"instructionFollowing":15,"kbGrounding":19,"ambiguityHandling":1,"stateTracking":12,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":69.83,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T06:28:08+00:00","createdAt":"2026-05-07T20:57:54.480113+00:00"},{"runId":"20260507T060817_grok-voice-think-fast-1.0_9a616026","benchmark":"appointment_bench","model":"grok-voice-think-fast-1.0","turnsScored":25,"scores":{"toolUse":7,"toolUseDenom":8,"instructionFollowing":22,"kbGrounding":24,"ambiguityHandling":1,"stateTracking":17,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":92.19,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T06:27:37+00:00","createdAt":"2026-05-07T20:57:52.223097+00:00"},{"runId":"20260507T060852_grok-voice-think-fast-1.0_fb8893ae","benchmark":"appointment_bench","model":"grok-voice-think-fast-1.0","turnsScored":25,"scores":{"toolUse":2,"toolUseDenom":8,"instructionFollowing":15,"kbGrounding":16,"ambiguityHandling":1,"stateTracking":7,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":57.17,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T06:27:22+00:00","createdAt":"2026-05-07T20:57:55.468887+00:00"},{"runId":"20260507T060828_grok-voice-think-fast-1.0_8e59262e","benchmark":"appointment_bench","model":"grok-voice-think-fast-1.0","turnsScored":25,"scores":{"toolUse":3,"toolUseDenom":8,"instructionFollowing":17,"kbGrounding":20,"ambiguityHandling":1,"stateTracking":12,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":69.73,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T06:27:22+00:00","createdAt":"2026-05-07T20:57:53.25565+00:00"},{"runId":"20260507T060828_grok-voice-think-fast-1.0_c7e89649","benchmark":"appointment_bench","model":"grok-voice-think-fast-1.0","turnsScored":25,"scores":{"toolUse":2,"toolUseDenom":8,"instructionFollowing":16,"kbGrounding":18,"ambiguityHandling":1,"stateTracking":8,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":60.62,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T06:27:19+00:00","createdAt":"2026-05-07T20:57:53.507317+00:00"},{"runId":"20260507T060857_grok-voice-think-fast-1.0_11e10dca","benchmark":"appointment_bench","model":"grok-voice-think-fast-1.0","turnsScored":25,"scores":{"toolUse":2,"toolUseDenom":8,"instructionFollowing":22,"kbGrounding":23,"ambiguityHandling":1,"stateTracking":17,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":78.89,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T06:27:05+00:00","createdAt":"2026-05-07T20:57:55.757867+00:00"},{"runId":"20260507T060824_grok-voice-think-fast-1.0_d82f9b4c","benchmark":"appointment_bench","model":"grok-voice-think-fast-1.0","turnsScored":25,"scores":{"toolUse":3,"toolUseDenom":8,"instructionFollowing":12,"kbGrounding":12,"ambiguityHandling":1,"stateTracking":8,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":55.12,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T06:27:05+00:00","createdAt":"2026-05-07T20:57:53.002087+00:00"},{"runId":"20260507T060801_grok-voice-think-fast-1.0_00081af2","benchmark":"appointment_bench","model":"grok-voice-think-fast-1.0","turnsScored":25,"scores":{"toolUse":4,"toolUseDenom":8,"instructionFollowing":20,"kbGrounding":24,"ambiguityHandling":1,"stateTracking":18,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":84.15,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T06:27:03+00:00","createdAt":"2026-05-07T20:57:51.199518+00:00"},{"runId":"20260507T060810_grok-voice-think-fast-1.0_6c1621f1","benchmark":"appointment_bench","model":"grok-voice-think-fast-1.0","turnsScored":25,"scores":{"toolUse":3,"toolUseDenom":8,"instructionFollowing":18,"kbGrounding":24,"ambiguityHandling":1,"stateTracking":17,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":78.99,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T06:26:29+00:00","createdAt":"2026-05-07T20:57:51.73065+00:00"},{"runId":"20260507T060823_grok-voice-think-fast-1.0_33e57f14","benchmark":"appointment_bench","model":"grok-voice-think-fast-1.0","turnsScored":25,"scores":{"toolUse":4,"toolUseDenom":8,"instructionFollowing":16,"kbGrounding":21,"ambiguityHandling":1,"stateTracking":7,"ambiguityDenom":1,"stateTrackingDenom":20},"passRate":66.6,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T06:26:08+00:00","createdAt":"2026-05-07T20:57:52.606921+00:00"},{"runId":"20260507T060836_grok-voice-think-fast-1.0_f904be1f","benchmark":"appointment_bench","model":"grok-voice-think-fast-1.0","turnsScored":25,"scores":{"toolUse":7,"toolUseDenom":8,"instructionFollowing":23,"kbGrounding":24,"ambiguityHandling":1,"stateTracking":9,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":84.57,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T06:25:48+00:00","createdAt":"2026-05-07T20:57:54.239435+00:00"},{"runId":"20260507T060808_grok-voice-think-fast-1.0_5bea49b4","benchmark":"appointment_bench","model":"grok-voice-think-fast-1.0","turnsScored":25,"scores":{"toolUse":3,"toolUseDenom":8,"instructionFollowing":23,"kbGrounding":24,"ambiguityHandling":1,"stateTracking":17,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":82.99,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T06:25:43+00:00","createdAt":"2026-05-07T20:57:51.469018+00:00"},{"runId":"20260507T060816_grok-voice-think-fast-1.0_c4adb252","benchmark":"appointment_bench","model":"grok-voice-think-fast-1.0","turnsScored":25,"scores":{"toolUse":6,"toolUseDenom":8,"instructionFollowing":16,"kbGrounding":23,"ambiguityHandling":1,"stateTracking":10,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":76.73,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T06:24:08+00:00","createdAt":"2026-05-07T20:57:51.974613+00:00"},{"runId":"20260507T055129_grok-voice-think-fast-1.0_4931b861","benchmark":"appointment_bench","model":"grok-voice-think-fast-1.0","turnsScored":25,"scores":{"toolUse":2,"toolUseDenom":8,"instructionFollowing":22,"kbGrounding":22,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":73.88,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T06:12:55+00:00","createdAt":"2026-05-07T20:57:50.71885+00:00"},{"runId":"20260414T152031_ultravox-v0.7_0c93179b","benchmark":"appointment_bench","model":"ultravox-v0.7","turnsScored":25,"scores":{"toolUse":7,"toolUseDenom":8,"instructionFollowing":24,"kbGrounding":23,"ambiguityHandling":1,"stateTracking":17,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":92.99,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T22:39:50+00:00","createdAt":"2026-04-17T18:05:37.134916+00:00"},{"runId":"20260414T152031_ultravox-v0.7_110469e0","benchmark":"appointment_bench","model":"ultravox-v0.7","turnsScored":25,"scores":{"toolUse":1,"toolUseDenom":8,"instructionFollowing":21,"kbGrounding":21,"ambiguityHandling":1,"stateTracking":15,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":71.89,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T22:38:40+00:00","createdAt":"2026-04-17T18:05:38.326153+00:00"},{"runId":"20260414T152031_ultravox-v0.7_019afd82","benchmark":"appointment_bench","model":"ultravox-v0.7","turnsScored":25,"scores":{"toolUse":4,"toolUseDenom":8,"instructionFollowing":21,"kbGrounding":18,"ambiguityHandling":1,"stateTracking":12,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":73.83,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T22:36:31+00:00","createdAt":"2026-04-17T18:05:36.528849+00:00"},{"runId":"20260414T152031_gpt-realtime_a44742e3","benchmark":"appointment_bench","model":"gpt-realtime","turnsScored":25,"scores":{"toolUse":3,"toolUseDenom":8,"instructionFollowing":18,"kbGrounding":22,"ambiguityHandling":1,"stateTracking":18,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":78.45,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T22:35:14+00:00","createdAt":"2026-04-17T18:05:35.959265+00:00"},{"runId":"20260411T191757_amazon.nova-2-sonic-v1_0_cd67e09f","benchmark":"appointment_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":25,"scores":{"toolUse":4,"toolUseDenom":8,"instructionFollowing":11,"kbGrounding":16,"ambiguityHandling":0,"stateTracking":6,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":37.92,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T22:33:48+00:00","createdAt":"2026-04-17T18:03:31.905155+00:00"},{"runId":"20260414T152031_gemini-3.1-flash-live-preview_3877ac1a","benchmark":"appointment_bench","model":"gemini-3.1-flash-live-preview","turnsScored":25,"scores":{"toolUse":1,"toolUseDenom":8,"instructionFollowing":17,"kbGrounding":22,"ambiguityHandling":1,"stateTracking":10,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":64.23,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T22:33:47+00:00","createdAt":"2026-04-17T18:05:35.335437+00:00"},{"runId":"20260411T203310_ultravox-v0.7_9acf2650","benchmark":"appointment_bench","model":"ultravox-v0.7","turnsScored":25,"scores":{"toolUse":2,"toolUseDenom":8,"instructionFollowing":11,"kbGrounding":22,"ambiguityHandling":1,"stateTracking":6,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":57.72,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T22:33:27+00:00","createdAt":"2026-04-17T18:05:02.689006+00:00"},{"runId":"20260411T200351_grok-realtime_2ebbb49f","benchmark":"appointment_bench","model":"grok-realtime","turnsScored":25,"scores":{"toolUse":2,"toolUseDenom":8,"instructionFollowing":25,"kbGrounding":23,"ambiguityHandling":1,"stateTracking":18,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":82.35,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T22:33:17+00:00","createdAt":"2026-04-17T18:04:21.677917+00:00"},{"runId":"20260411T202826_grok-realtime_6a4acc7c","benchmark":"appointment_bench","model":"grok-realtime","turnsScored":25,"scores":{"toolUse":3,"toolUseDenom":8,"instructionFollowing":24,"kbGrounding":18,"ambiguityHandling":1,"stateTracking":7,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":68.47,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T22:32:09+00:00","createdAt":"2026-04-17T18:04:55.616809+00:00"},{"runId":"20260411T200724_amazon.nova-2-sonic-v1_0_702e1ffc","benchmark":"appointment_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":25,"scores":{"toolUse":4,"toolUseDenom":8,"instructionFollowing":12,"kbGrounding":21,"ambiguityHandling":1,"stateTracking":10,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":66.93,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T22:31:36+00:00","createdAt":"2026-04-17T18:04:26.020226+00:00"},{"runId":"20260411T202743_gemini-3.1-flash-live-preview_527ab15e","benchmark":"appointment_bench","model":"gemini-3.1-flash-live-preview","turnsScored":25,"scores":{"toolUse":2,"toolUseDenom":8,"instructionFollowing":14,"kbGrounding":21,"ambiguityHandling":1,"stateTracking":5,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":58.26,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T22:31:22+00:00","createdAt":"2026-04-17T18:04:53.944391+00:00"},{"runId":"20260411T202643_gpt-realtime_c07a1627","benchmark":"appointment_bench","model":"gpt-realtime","turnsScored":25,"scores":{"toolUse":2,"toolUseDenom":8,"instructionFollowing":15,"kbGrounding":14,"ambiguityHandling":1,"stateTracking":11,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":59.78,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T22:30:17+00:00","createdAt":"2026-04-17T18:04:52.833029+00:00"},{"runId":"20260414T152031_ultravox-v0.7_0de362a7","benchmark":"appointment_bench","model":"ultravox-v0.7","turnsScored":25,"scores":{"toolUse":3,"toolUseDenom":8,"instructionFollowing":11,"kbGrounding":11,"ambiguityHandling":1,"stateTracking":6,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":51.42,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T22:29:59+00:00","createdAt":"2026-04-17T18:05:37.756957+00:00"},{"runId":"20260411T200646_ultravox-v0.7_a00e1d32","benchmark":"appointment_bench","model":"ultravox-v0.7","turnsScored":25,"scores":{"toolUse":6,"toolUseDenom":8,"instructionFollowing":22,"kbGrounding":21,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":84.14,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T22:29:55+00:00","createdAt":"2026-04-17T18:04:24.899659+00:00"},{"runId":"20260411T200501_gemini-2.5-flash-native-audio-preview-12-2025_6049ea10","benchmark":"appointment_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":25,"scores":{"toolUse":3,"toolUseDenom":8,"instructionFollowing":20,"kbGrounding":13,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":68.64,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T22:29:52+00:00","createdAt":"2026-04-17T18:04:23.526885+00:00"},{"runId":"20260411T194456_grok-realtime_d3738b4e","benchmark":"appointment_bench","model":"grok-realtime","turnsScored":25,"scores":{"toolUse":1,"toolUseDenom":8,"instructionFollowing":18,"kbGrounding":16,"ambiguityHandling":1,"stateTracking":9,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":59.17,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T22:28:13+00:00","createdAt":"2026-04-17T18:04:00.087771+00:00"},{"runId":"20260411T195723_gpt-realtime_2f58033b","benchmark":"appointment_bench","model":"gpt-realtime","turnsScored":25,"scores":{"toolUse":8,"toolUseDenom":8,"instructionFollowing":19,"kbGrounding":15,"ambiguityHandling":1,"stateTracking":5,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":72.46,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T22:27:38+00:00","createdAt":"2026-04-17T18:04:15.192193+00:00"},{"runId":"20260411T195044_gemini-2.5-flash-native-audio-preview-12-2025_ebbe9b89","benchmark":"appointment_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":25,"scores":{"toolUse":1,"toolUseDenom":8,"instructionFollowing":23,"kbGrounding":19,"ambiguityHandling":1,"stateTracking":16,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":72.94,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T22:26:13+00:00","createdAt":"2026-04-17T18:04:05.298453+00:00"},{"runId":"20260411T191756_gpt-realtime_bc5c2ee2","benchmark":"appointment_bench","model":"gpt-realtime","turnsScored":25,"scores":{"toolUse":4,"toolUseDenom":8,"instructionFollowing":13,"kbGrounding":17,"ambiguityHandling":1,"stateTracking":11,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":65.58,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T22:25:26+00:00","createdAt":"2026-04-17T18:03:27.868617+00:00"},{"runId":"20260411T015952_gpt-realtime_a30b93f8","benchmark":"appointment_bench","model":"gpt-realtime","turnsScored":25,"scores":{"toolUse":2,"toolUseDenom":8,"instructionFollowing":14,"kbGrounding":21,"ambiguityHandling":1,"stateTracking":8,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":61.42,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T22:25:18+00:00","createdAt":"2026-04-17T18:03:17.307746+00:00"},{"runId":"20260411T191757_gemini-2.5-flash-native-audio-preview-12-2025_4c4ab79a","benchmark":"appointment_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":25,"scores":{"toolUse":3,"toolUseDenom":8,"instructionFollowing":14,"kbGrounding":14,"ambiguityHandling":1,"stateTracking":6,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":56.22,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T22:25:09+00:00","createdAt":"2026-04-17T18:03:32.448189+00:00"},{"runId":"20260411T000628_gemini-2.5-flash-native-audio-preview-12-2025_df450d8f","benchmark":"appointment_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":25,"scores":{"toolUse":3,"toolUseDenom":8,"instructionFollowing":18,"kbGrounding":23,"ambiguityHandling":1,"stateTracking":11,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":71.88,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T22:24:04+00:00","createdAt":"2026-04-17T18:02:04.245118+00:00"},{"runId":"20260411T015220_gemini-2.5-flash-native-audio-preview-12-2025_1b61d08e","benchmark":"appointment_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":25,"scores":{"toolUse":0,"toolUseDenom":8,"instructionFollowing":12,"kbGrounding":18,"ambiguityHandling":1,"stateTracking":10,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":54.53,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T22:24:03+00:00","createdAt":"2026-04-17T18:03:14.978794+00:00"},{"runId":"20260411T013338_gpt-realtime_6f6473b0","benchmark":"appointment_bench","model":"gpt-realtime","turnsScored":25,"scores":{"toolUse":2,"toolUseDenom":8,"instructionFollowing":17,"kbGrounding":19,"ambiguityHandling":1,"stateTracking":11,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":65.38,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T22:23:39+00:00","createdAt":"2026-04-17T18:03:03.443232+00:00"},{"runId":"20260411T011940_gemini-2.5-flash-native-audio-preview-12-2025_68e112bf","benchmark":"appointment_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":25,"scores":{"toolUse":1,"toolUseDenom":8,"instructionFollowing":17,"kbGrounding":11,"ambiguityHandling":1,"stateTracking":7,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":52.27,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T22:23:20+00:00","createdAt":"2026-04-17T18:02:51.504159+00:00"},{"runId":"20260411T003637_gemini-3.1-flash-live-preview_3db5ec7e","benchmark":"appointment_bench","model":"gemini-3.1-flash-live-preview","turnsScored":25,"scores":{"toolUse":1,"toolUseDenom":8,"instructionFollowing":14,"kbGrounding":16,"ambiguityHandling":1,"stateTracking":10,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":57.03,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T22:22:33+00:00","createdAt":"2026-04-17T18:02:24.748698+00:00"},{"runId":"20260411T000628_gpt-realtime_490c2bb5","benchmark":"appointment_bench","model":"gpt-realtime","turnsScored":25,"scores":{"toolUse":3,"toolUseDenom":8,"instructionFollowing":21,"kbGrounding":22,"ambiguityHandling":1,"stateTracking":12,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":74.53,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T22:22:25+00:00","createdAt":"2026-04-17T18:02:05.292532+00:00"},{"runId":"20260411T004830_gpt-realtime_40b12494","benchmark":"appointment_bench","model":"gpt-realtime","turnsScored":25,"scores":{"toolUse":3,"toolUseDenom":8,"instructionFollowing":13,"kbGrounding":22,"ambiguityHandling":1,"stateTracking":10,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":66.03,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T22:22:09+00:00","createdAt":"2026-04-17T18:02:30.733192+00:00"},{"runId":"20260411T013139_gemini-3.1-flash-live-preview_93ec63ea","benchmark":"appointment_bench","model":"gemini-3.1-flash-live-preview","turnsScored":25,"scores":{"toolUse":0,"toolUseDenom":8,"instructionFollowing":1,"kbGrounding":1,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":1.6,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T22:21:56+00:00","createdAt":"2026-04-17T18:03:00.902114+00:00"},{"runId":"20260414T005313_glm-realtime-flash_b1436d2b","benchmark":"appointment_bench","model":"glm-realtime-flash","turnsScored":25,"scores":{"toolUse":0,"toolUseDenom":8,"instructionFollowing":0,"kbGrounding":25,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":20,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T08:50:41+00:00","createdAt":"2026-04-17T18:05:34.770211+00:00"},{"runId":"20260414T005236_glm-realtime-flash_de71fc20","benchmark":"appointment_bench","model":"glm-realtime-flash","turnsScored":25,"scores":{"toolUse":0,"toolUseDenom":8,"instructionFollowing":0,"kbGrounding":0,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":0,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T08:50:04+00:00","createdAt":"2026-04-17T18:05:34.163103+00:00"},{"runId":"20260414T005155_glm-realtime-flash_2f1e0945","benchmark":"appointment_bench","model":"glm-realtime-flash","turnsScored":25,"scores":{"toolUse":0,"toolUseDenom":8,"instructionFollowing":0,"kbGrounding":25,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":20,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T08:49:21+00:00","createdAt":"2026-04-17T18:05:33.585207+00:00"},{"runId":"20260413T235719_glm-realtime-flash_198d29b4","benchmark":"appointment_bench","model":"glm-realtime-flash","turnsScored":25,"scores":{"toolUse":0,"toolUseDenom":8,"instructionFollowing":0,"kbGrounding":25,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":20,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T07:54:59+00:00","createdAt":"2026-04-17T18:05:33.005774+00:00"},{"runId":"20260413T235701_glm-realtime-flash_a168d1a0","benchmark":"appointment_bench","model":"glm-realtime-flash","turnsScored":25,"scores":{"toolUse":0,"toolUseDenom":8,"instructionFollowing":0,"kbGrounding":0,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":0,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T07:54:41+00:00","createdAt":"2026-04-17T18:05:32.465784+00:00"},{"runId":"20260413T235606_glm-realtime-flash_de9b6d4a","benchmark":"appointment_bench","model":"glm-realtime-flash","turnsScored":25,"scores":{"toolUse":0,"toolUseDenom":8,"instructionFollowing":0,"kbGrounding":0,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":0,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T07:53:32+00:00","createdAt":"2026-04-17T18:05:31.333788+00:00"},{"runId":"20260413T235531_glm-realtime-flash_a4a11c22","benchmark":"appointment_bench","model":"glm-realtime-flash","turnsScored":25,"scores":{"toolUse":0,"toolUseDenom":8,"instructionFollowing":0,"kbGrounding":0,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":0,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T07:53:12+00:00","createdAt":"2026-04-17T18:05:30.787215+00:00"},{"runId":"20260413T235612_glm-realtime-flash_553f030c","benchmark":"appointment_bench","model":"glm-realtime-flash","turnsScored":25,"scores":{"toolUse":0,"toolUseDenom":8,"instructionFollowing":0,"kbGrounding":0,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":0,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T07:52:35+00:00","createdAt":"2026-04-17T18:05:31.898031+00:00"},{"runId":"20260413T235524_glm-realtime-flash_60ac01b3","benchmark":"appointment_bench","model":"glm-realtime-flash","turnsScored":25,"scores":{"toolUse":0,"toolUseDenom":8,"instructionFollowing":0,"kbGrounding":25,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":20,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T07:51:54+00:00","createdAt":"2026-04-17T18:05:30.24125+00:00"},{"runId":"20260413T225947_glm-realtime-flash_88f2bd3b","benchmark":"appointment_bench","model":"glm-realtime-flash","turnsScored":25,"scores":{"toolUse":0,"toolUseDenom":8,"instructionFollowing":0,"kbGrounding":0,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":0,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T06:57:17+00:00","createdAt":"2026-04-17T18:05:29.6825+00:00"},{"runId":"20260413T225936_glm-realtime-flash_dbe0413f","benchmark":"appointment_bench","model":"glm-realtime-flash","turnsScored":25,"scores":{"toolUse":0,"toolUseDenom":8,"instructionFollowing":0,"kbGrounding":0,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":0,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T06:57:00+00:00","createdAt":"2026-04-17T18:05:29.136499+00:00"},{"runId":"20260413T225846_glm-realtime-flash_3d8e9a38","benchmark":"appointment_bench","model":"glm-realtime-flash","turnsScored":25,"scores":{"toolUse":0,"toolUseDenom":8,"instructionFollowing":0,"kbGrounding":25,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":20,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T06:56:11+00:00","createdAt":"2026-04-17T18:05:28.580443+00:00"},{"runId":"20260413T225842_glm-realtime-flash_ce687eaf","benchmark":"appointment_bench","model":"glm-realtime-flash","turnsScored":25,"scores":{"toolUse":0,"toolUseDenom":8,"instructionFollowing":0,"kbGrounding":25,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":20,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T06:56:05+00:00","createdAt":"2026-04-17T18:05:28.020645+00:00"},{"runId":"20260413T225800_glm-realtime-flash_87cc5985","benchmark":"appointment_bench","model":"glm-realtime-flash","turnsScored":25,"scores":{"toolUse":0,"toolUseDenom":8,"instructionFollowing":0,"kbGrounding":25,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":20,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T06:55:30+00:00","createdAt":"2026-04-17T18:05:26.954334+00:00"},{"runId":"20260413T225811_glm-realtime-flash_0ce26b00","benchmark":"appointment_bench","model":"glm-realtime-flash","turnsScored":25,"scores":{"toolUse":0,"toolUseDenom":8,"instructionFollowing":0,"kbGrounding":25,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":20,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T06:55:23+00:00","createdAt":"2026-04-17T18:05:27.48456+00:00"},{"runId":"20260413T220050_glm-realtime-flash_b4442471","benchmark":"appointment_bench","model":"glm-realtime-flash","turnsScored":25,"scores":{"toolUse":0,"toolUseDenom":8,"instructionFollowing":0,"kbGrounding":25,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":20,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T05:59:46+00:00","createdAt":"2026-04-17T18:05:24.597681+00:00"},{"runId":"20260413T220050_glm-realtime-flash_1540b3f9","benchmark":"appointment_bench","model":"glm-realtime-flash","turnsScored":25,"scores":{"toolUse":0,"toolUseDenom":8,"instructionFollowing":0,"kbGrounding":25,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":20,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T05:59:35+00:00","createdAt":"2026-04-17T18:05:23.412054+00:00"},{"runId":"20260413T220050_glm-realtime-flash_e112524f","benchmark":"appointment_bench","model":"glm-realtime-flash","turnsScored":25,"scores":{"toolUse":0,"toolUseDenom":8,"instructionFollowing":0,"kbGrounding":25,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":20,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T05:58:45+00:00","createdAt":"2026-04-17T18:05:26.399114+00:00"},{"runId":"20260413T220050_glm-realtime-flash_c1493ebc","benchmark":"appointment_bench","model":"glm-realtime-flash","turnsScored":25,"scores":{"toolUse":0,"toolUseDenom":8,"instructionFollowing":0,"kbGrounding":25,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":20,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T05:58:40+00:00","createdAt":"2026-04-17T18:05:25.729222+00:00"},{"runId":"20260413T220050_glm-realtime-flash_a78c615e","benchmark":"appointment_bench","model":"glm-realtime-flash","turnsScored":25,"scores":{"toolUse":0,"toolUseDenom":8,"instructionFollowing":0,"kbGrounding":0,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":0,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T05:58:10+00:00","createdAt":"2026-04-17T18:05:23.987971+00:00"},{"runId":"20260413T172526_glm-realtime-flash_613b9f6f","benchmark":"appointment_bench","model":"glm-realtime-flash","turnsScored":25,"scores":{"toolUse":0,"toolUseDenom":8,"instructionFollowing":0,"kbGrounding":25,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":20,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T01:23:37+00:00","createdAt":"2026-04-17T18:05:15.556335+00:00"},{"runId":"20260413T172318_glm-realtime-flash_15466775","benchmark":"appointment_bench","model":"glm-realtime-flash","turnsScored":25,"scores":{"toolUse":0,"toolUseDenom":8,"instructionFollowing":0,"kbGrounding":25,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":20,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T01:22:01+00:00","createdAt":"2026-04-17T18:05:15.011893+00:00"},{"runId":"20260413T162527_glm-realtime-flash_2eca9733","benchmark":"appointment_bench","model":"glm-realtime-flash","turnsScored":25,"scores":{"toolUse":0,"toolUseDenom":8,"instructionFollowing":0,"kbGrounding":0,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":0,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T00:23:20+00:00","createdAt":"2026-04-17T18:05:14.471228+00:00"},{"runId":"20260413T162526_glm-realtime-flash_d1405d98","benchmark":"appointment_bench","model":"glm-realtime-flash","turnsScored":25,"scores":{"toolUse":0,"toolUseDenom":8,"instructionFollowing":0,"kbGrounding":0,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":0,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T00:23:17+00:00","createdAt":"2026-04-17T18:05:13.921966+00:00"},{"runId":"20260413T162526_glm-realtime-flash_3f5100d7","benchmark":"appointment_bench","model":"glm-realtime-flash","turnsScored":25,"scores":{"toolUse":0,"toolUseDenom":8,"instructionFollowing":0,"kbGrounding":0,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":0,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T00:23:17+00:00","createdAt":"2026-04-17T18:05:13.388215+00:00"},{"runId":"20260413T162526_glm-realtime-flash_02d72df9","benchmark":"appointment_bench","model":"glm-realtime-flash","turnsScored":25,"scores":{"toolUse":0,"toolUseDenom":8,"instructionFollowing":0,"kbGrounding":25,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":20,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T00:23:16+00:00","createdAt":"2026-04-17T18:05:12.840705+00:00"},{"runId":"20260411T013336_amazon.nova-2-sonic-v1_0_26cccba2","benchmark":"appointment_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":25,"scores":{"toolUse":6,"toolUseDenom":8,"instructionFollowing":12,"kbGrounding":21,"ambiguityHandling":1,"stateTracking":10,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":71.93,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-13T22:27:53+00:00","createdAt":"2026-04-17T18:03:02.619877+00:00"},{"runId":"20260413T135034_glm-realtime-flash_feb5b81e","benchmark":"appointment_bench","model":"glm-realtime-flash","turnsScored":25,"scores":{"toolUse":0,"toolUseDenom":8,"instructionFollowing":0,"kbGrounding":0,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":0,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-13T21:48:26+00:00","createdAt":"2026-04-17T18:05:12.110662+00:00"},{"runId":"20260413T135034_glm-realtime-flash_4d3f2702","benchmark":"appointment_bench","model":"glm-realtime-flash","turnsScored":25,"scores":{"toolUse":0,"toolUseDenom":8,"instructionFollowing":0,"kbGrounding":25,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":20,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-13T21:48:22+00:00","createdAt":"2026-04-17T18:05:11.555458+00:00"},{"runId":"20260413T134821_glm-realtime-flash_cf0bfb5b","benchmark":"appointment_bench","model":"glm-realtime-flash","turnsScored":25,"scores":{"toolUse":0,"toolUseDenom":8,"instructionFollowing":0,"kbGrounding":25,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":20,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-13T21:46:25+00:00","createdAt":"2026-04-17T18:05:10.41263+00:00"},{"runId":"20260413T134824_glm-realtime-flash_841c45e9","benchmark":"appointment_bench","model":"glm-realtime-flash","turnsScored":25,"scores":{"toolUse":0,"toolUseDenom":8,"instructionFollowing":0,"kbGrounding":25,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":20,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-13T21:46:18+00:00","createdAt":"2026-04-17T18:05:10.970584+00:00"},{"runId":"20260413T125035_glm-realtime-flash_2eced878","benchmark":"appointment_bench","model":"glm-realtime-flash","turnsScored":25,"scores":{"toolUse":0,"toolUseDenom":8,"instructionFollowing":0,"kbGrounding":0,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":0,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-13T20:48:29+00:00","createdAt":"2026-04-17T18:05:08.669302+00:00"},{"runId":"20260413T125035_glm-realtime-flash_93d185bc","benchmark":"appointment_bench","model":"glm-realtime-flash","turnsScored":25,"scores":{"toolUse":0,"toolUseDenom":8,"instructionFollowing":0,"kbGrounding":25,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":20,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-13T20:48:23+00:00","createdAt":"2026-04-17T18:05:09.222886+00:00"},{"runId":"20260413T125035_glm-realtime-flash_e86837c8","benchmark":"appointment_bench","model":"glm-realtime-flash","turnsScored":25,"scores":{"toolUse":0,"toolUseDenom":8,"instructionFollowing":0,"kbGrounding":0,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":0,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-13T20:48:19+00:00","createdAt":"2026-04-17T18:05:09.797847+00:00"},{"runId":"20260413T125035_glm-realtime-flash_278cdc7a","benchmark":"appointment_bench","model":"glm-realtime-flash","turnsScored":25,"scores":{"toolUse":0,"toolUseDenom":8,"instructionFollowing":0,"kbGrounding":25,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":20,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-13T20:48:19+00:00","createdAt":"2026-04-17T18:05:08.131642+00:00"},{"runId":"20260411T203431_amazon.nova-2-sonic-v1_0_3741e751","benchmark":"appointment_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":25,"scores":{"toolUse":5,"toolUseDenom":8,"instructionFollowing":14,"kbGrounding":22,"ambiguityHandling":1,"stateTracking":8,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":69.72,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:09:46+00:00","createdAt":"2026-04-17T18:05:03.931765+00:00"},{"runId":"20260411T202519_grok-realtime_e07c47d6","benchmark":"appointment_bench","model":"grok-realtime","turnsScored":25,"scores":{"toolUse":3,"toolUseDenom":8,"instructionFollowing":23,"kbGrounding":22,"ambiguityHandling":1,"stateTracking":4,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":67.71,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:03:17+00:00","createdAt":"2026-04-17T18:04:51.731379+00:00"},{"runId":"20260411T203622_ultravox-v0.7_55c8493e","benchmark":"appointment_bench","model":"ultravox-v0.7","turnsScored":25,"scores":{"toolUse":6,"toolUseDenom":8,"instructionFollowing":23,"kbGrounding":23,"ambiguityHandling":1,"stateTracking":16,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":88.64,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:55:26+00:00","createdAt":"2026-04-17T18:05:06.375718+00:00"},{"runId":"20260411T203633_amazon.nova-2-sonic-v1_0_6f771d90","benchmark":"appointment_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":25,"scores":{"toolUse":2,"toolUseDenom":8,"instructionFollowing":14,"kbGrounding":15,"ambiguityHandling":0,"stateTracking":5,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":33.46,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:54:24+00:00","createdAt":"2026-04-17T18:05:07.543818+00:00"},{"runId":"20260411T203630_grok-realtime_2803ab2d","benchmark":"appointment_bench","model":"grok-realtime","turnsScored":25,"scores":{"toolUse":3,"toolUseDenom":8,"instructionFollowing":23,"kbGrounding":19,"ambiguityHandling":1,"stateTracking":8,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":69.52,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:52:48+00:00","createdAt":"2026-04-17T18:05:06.960786+00:00"},{"runId":"20260411T203344_grok-realtime_196eab9b","benchmark":"appointment_bench","model":"grok-realtime","turnsScored":25,"scores":{"toolUse":5,"toolUseDenom":8,"instructionFollowing":21,"kbGrounding":20,"ambiguityHandling":1,"stateTracking":10,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":75.83,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:49:52+00:00","createdAt":"2026-04-17T18:05:03.343978+00:00"},{"runId":"20260411T203014_ultravox-v0.7_5555004f","benchmark":"appointment_bench","model":"ultravox-v0.7","turnsScored":25,"scores":{"toolUse":6,"toolUseDenom":8,"instructionFollowing":22,"kbGrounding":22,"ambiguityHandling":1,"stateTracking":10,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":80.73,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:49:38+00:00","createdAt":"2026-04-17T18:04:59.043253+00:00"},{"runId":"20260411T202530_amazon.nova-2-sonic-v1_0_231cba20","benchmark":"appointment_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":25,"scores":{"toolUse":2,"toolUseDenom":8,"instructionFollowing":12,"kbGrounding":17,"ambiguityHandling":0,"stateTracking":6,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":34.52,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:48:28+00:00","createdAt":"2026-04-17T18:04:52.287506+00:00"},{"runId":"20260411T202909_gemini-2.5-flash-native-audio-preview-12-2025_77afca5e","benchmark":"appointment_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":25,"scores":{"toolUse":3,"toolUseDenom":8,"instructionFollowing":17,"kbGrounding":21,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":71.58,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:47:49+00:00","createdAt":"2026-04-17T18:04:57.38995+00:00"},{"runId":"20260411T203545_gemini-3.1-flash-live-preview_127df050","benchmark":"appointment_bench","model":"gemini-3.1-flash-live-preview","turnsScored":25,"scores":{"toolUse":4,"toolUseDenom":8,"instructionFollowing":20,"kbGrounding":23,"ambiguityHandling":1,"stateTracking":15,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":80.19,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:47:07+00:00","createdAt":"2026-04-17T18:05:05.65257+00:00"},{"runId":"20260411T202704_gemini-2.5-flash-native-audio-preview-12-2025_9803e3c8","benchmark":"appointment_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":25,"scores":{"toolUse":2,"toolUseDenom":8,"instructionFollowing":18,"kbGrounding":21,"ambiguityHandling":1,"stateTracking":12,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":68.83,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:47:00+00:00","createdAt":"2026-04-17T18:04:53.366215+00:00"},{"runId":"20260411T203014_grok-realtime_95063555","benchmark":"appointment_bench","model":"grok-realtime","turnsScored":25,"scores":{"toolUse":2,"toolUseDenom":8,"instructionFollowing":18,"kbGrounding":14,"ambiguityHandling":1,"stateTracking":4,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":54.81,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:46:55+00:00","createdAt":"2026-04-17T18:04:58.456883+00:00"},{"runId":"20260411T203050_amazon.nova-2-sonic-v1_0_4ea1e518","benchmark":"appointment_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":25,"scores":{"toolUse":3,"toolUseDenom":8,"instructionFollowing":13,"kbGrounding":21,"ambiguityHandling":1,"stateTracking":8,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":63.12,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:46:35+00:00","createdAt":"2026-04-17T18:04:59.747919+00:00"},{"runId":"20260411T203509_gemini-2.5-flash-native-audio-preview-12-2025_3a655987","benchmark":"appointment_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":25,"scores":{"toolUse":5,"toolUseDenom":8,"instructionFollowing":19,"kbGrounding":20,"ambiguityHandling":1,"stateTracking":16,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":80.54,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:46:30+00:00","createdAt":"2026-04-17T18:05:05.108395+00:00"},{"runId":"20260411T203102_gpt-realtime_08660a8e","benchmark":"appointment_bench","model":"gpt-realtime","turnsScored":25,"scores":{"toolUse":3,"toolUseDenom":8,"instructionFollowing":24,"kbGrounding":24,"ambiguityHandling":1,"stateTracking":18,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":84.85,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:46:09+00:00","createdAt":"2026-04-17T18:05:00.52876+00:00"},{"runId":"20260411T203131_gemini-3.1-flash-live-preview_e76d1873","benchmark":"appointment_bench","model":"gemini-3.1-flash-live-preview","turnsScored":25,"scores":{"toolUse":5,"toolUseDenom":8,"instructionFollowing":21,"kbGrounding":23,"ambiguityHandling":1,"stateTracking":16,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":84.54,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:45:32+00:00","createdAt":"2026-04-17T18:05:02.097156+00:00"},{"runId":"20260411T202405_ultravox-v0.7_504ac42c","benchmark":"appointment_bench","model":"ultravox-v0.7","turnsScored":25,"scores":{"toolUse":7,"toolUseDenom":8,"instructionFollowing":24,"kbGrounding":16,"ambiguityHandling":1,"stateTracking":9,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":78.97,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:45:12+00:00","createdAt":"2026-04-17T18:04:51.121505+00:00"},{"runId":"20260411T202832_amazon.nova-2-sonic-v1_0_8fabf736","benchmark":"appointment_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":25,"scores":{"toolUse":6,"toolUseDenom":8,"instructionFollowing":11,"kbGrounding":7,"ambiguityHandling":1,"stateTracking":4,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":53.61,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:45:11+00:00","createdAt":"2026-04-17T18:04:56.319404+00:00"},{"runId":"20260411T202835_gpt-realtime_394e9ba4","benchmark":"appointment_bench","model":"gpt-realtime","turnsScored":25,"scores":{"toolUse":2,"toolUseDenom":8,"instructionFollowing":22,"kbGrounding":23,"ambiguityHandling":1,"stateTracking":16,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":77.84,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:43:39+00:00","createdAt":"2026-04-17T18:04:56.832637+00:00"},{"runId":"20260411T203445_gpt-realtime_df58fe9a","benchmark":"appointment_bench","model":"gpt-realtime","turnsScored":25,"scores":{"toolUse":5,"toolUseDenom":8,"instructionFollowing":15,"kbGrounding":17,"ambiguityHandling":1,"stateTracking":11,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":69.68,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:43:27+00:00","createdAt":"2026-04-17T18:05:04.497198+00:00"},{"runId":"20260411T203131_gemini-2.5-flash-native-audio-preview-12-2025_1ef01ac6","benchmark":"appointment_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":25,"scores":{"toolUse":4,"toolUseDenom":8,"instructionFollowing":20,"kbGrounding":23,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":78.08,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:43:25+00:00","createdAt":"2026-04-17T18:05:01.200894+00:00"},{"runId":"20260411T202931_gemini-3.1-flash-live-preview_b800f615","benchmark":"appointment_bench","model":"gemini-3.1-flash-live-preview","turnsScored":25,"scores":{"toolUse":8,"toolUseDenom":8,"instructionFollowing":21,"kbGrounding":24,"ambiguityHandling":1,"stateTracking":16,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":92.84,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:43:22+00:00","createdAt":"2026-04-17T18:04:57.934248+00:00"},{"runId":"20260411T202342_gemini-2.5-flash-native-audio-preview-12-2025_6c634ba7","benchmark":"appointment_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":25,"scores":{"toolUse":1,"toolUseDenom":8,"instructionFollowing":14,"kbGrounding":19,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":62.58,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:38:52+00:00","createdAt":"2026-04-17T18:04:50.022982+00:00"},{"runId":"20260411T201808_amazon.nova-2-sonic-v1_0_88382398","benchmark":"appointment_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":25,"scores":{"toolUse":1,"toolUseDenom":8,"instructionFollowing":10,"kbGrounding":16,"ambiguityHandling":1,"stateTracking":5,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":48.56,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:38:41+00:00","createdAt":"2026-04-17T18:04:45.42111+00:00"},{"runId":"20260411T201616_amazon.nova-2-sonic-v1_0_6b8453e7","benchmark":"appointment_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":25,"scores":{"toolUse":6,"toolUseDenom":8,"instructionFollowing":11,"kbGrounding":19,"ambiguityHandling":0,"stateTracking":5,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":44.26,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:38:23+00:00","createdAt":"2026-04-17T18:04:39.433934+00:00"},{"runId":"20260411T202755_ultravox-v0.7_f4c8e750","benchmark":"appointment_bench","model":"ultravox-v0.7","turnsScored":25,"scores":{"toolUse":4,"toolUseDenom":8,"instructionFollowing":12,"kbGrounding":24,"ambiguityHandling":1,"stateTracking":9,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":68.27,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:38:22+00:00","createdAt":"2026-04-17T18:04:54.72554+00:00"},{"runId":"20260411T202317_gpt-realtime_de6bbd3b","benchmark":"appointment_bench","model":"gpt-realtime","turnsScored":25,"scores":{"toolUse":3,"toolUseDenom":8,"instructionFollowing":21,"kbGrounding":20,"ambiguityHandling":1,"stateTracking":11,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":71.88,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:37:47+00:00","createdAt":"2026-04-17T18:04:49.247199+00:00"},{"runId":"20260411T202351_gemini-3.1-flash-live-preview_129eb228","benchmark":"appointment_bench","model":"gemini-3.1-flash-live-preview","turnsScored":25,"scores":{"toolUse":1,"toolUseDenom":8,"instructionFollowing":16,"kbGrounding":18,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":63.38,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:37:26+00:00","createdAt":"2026-04-17T18:04:50.565421+00:00"},{"runId":"20260411T202143_amazon.nova-2-sonic-v1_0_662e523d","benchmark":"appointment_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":25,"scores":{"toolUse":2,"toolUseDenom":8,"instructionFollowing":17,"kbGrounding":21,"ambiguityHandling":0,"stateTracking":15,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":51.19,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:36:32+00:00","createdAt":"2026-04-17T18:04:48.358194+00:00"},{"runId":"20260411T201230_amazon.nova-2-sonic-v1_0_4cccc3bb","benchmark":"appointment_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":25,"scores":{"toolUse":3,"toolUseDenom":8,"instructionFollowing":14,"kbGrounding":19,"ambiguityHandling":1,"stateTracking":9,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":63.37,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:36:21+00:00","createdAt":"2026-04-17T18:04:34.916508+00:00"},{"runId":"20260411T202113_grok-realtime_b4df59c9","benchmark":"appointment_bench","model":"grok-realtime","turnsScored":25,"scores":{"toolUse":2,"toolUseDenom":8,"instructionFollowing":17,"kbGrounding":12,"ambiguityHandling":1,"stateTracking":8,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":56.62,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:35:44+00:00","createdAt":"2026-04-17T18:04:47.55884+00:00"},{"runId":"20260411T201734_ultravox-v0.7_3d896c6c","benchmark":"appointment_bench","model":"ultravox-v0.7","turnsScored":25,"scores":{"toolUse":4,"toolUseDenom":8,"instructionFollowing":22,"kbGrounding":16,"ambiguityHandling":1,"stateTracking":12,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":73.03,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:35:08+00:00","createdAt":"2026-04-17T18:04:42.871606+00:00"},{"runId":"20260411T201737_grok-realtime_d006b4cb","benchmark":"appointment_bench","model":"grok-realtime","turnsScored":25,"scores":{"toolUse":2,"toolUseDenom":8,"instructionFollowing":21,"kbGrounding":22,"ambiguityHandling":1,"stateTracking":16,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":76.24,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:34:43+00:00","createdAt":"2026-04-17T18:04:43.615589+00:00"},{"runId":"20260411T201832_gpt-realtime_37353da1","benchmark":"appointment_bench","model":"gpt-realtime","turnsScored":25,"scores":{"toolUse":7,"toolUseDenom":8,"instructionFollowing":19,"kbGrounding":20,"ambiguityHandling":1,"stateTracking":11,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":80.28,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:34:30+00:00","createdAt":"2026-04-17T18:04:46.118075+00:00"},{"runId":"20260411T201652_gemini-2.5-flash-native-audio-preview-12-2025_44411272","benchmark":"appointment_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":25,"scores":{"toolUse":3,"toolUseDenom":8,"instructionFollowing":23,"kbGrounding":25,"ambiguityHandling":1,"stateTracking":17,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":83.79,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:33:42+00:00","createdAt":"2026-04-17T18:04:41.069275+00:00"},{"runId":"20260411T201846_gemini-2.5-flash-native-audio-preview-12-2025_d3cfefb1","benchmark":"appointment_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":25,"scores":{"toolUse":2,"toolUseDenom":8,"instructionFollowing":11,"kbGrounding":10,"ambiguityHandling":1,"stateTracking":5,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":47.06,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:33:08+00:00","createdAt":"2026-04-17T18:04:46.744522+00:00"},{"runId":"20260411T201516_ultravox-v0.7_70e82785","benchmark":"appointment_bench","model":"ultravox-v0.7","turnsScored":25,"scores":{"toolUse":1,"toolUseDenom":8,"instructionFollowing":22,"kbGrounding":19,"ambiguityHandling":1,"stateTracking":6,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":61.62,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:31:30+00:00","createdAt":"2026-04-17T18:04:37.752835+00:00"},{"runId":"20260411T201041_amazon.nova-2-sonic-v1_0_c3868712","benchmark":"appointment_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":25,"scores":{"toolUse":1,"toolUseDenom":8,"instructionFollowing":10,"kbGrounding":12,"ambiguityHandling":1,"stateTracking":5,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":45.36,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:31:01+00:00","createdAt":"2026-04-17T18:04:30.361873+00:00"},{"runId":"20260411T201551_grok-realtime_92a773ae","benchmark":"appointment_bench","model":"grok-realtime","turnsScored":25,"scores":{"toolUse":2,"toolUseDenom":8,"instructionFollowing":21,"kbGrounding":22,"ambiguityHandling":1,"stateTracking":11,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":70.98,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:30:49+00:00","createdAt":"2026-04-17T18:04:38.626487+00:00"},{"runId":"20260411T201645_gpt-realtime_ddd24763","benchmark":"appointment_bench","model":"gpt-realtime","turnsScored":25,"scores":{"toolUse":4,"toolUseDenom":8,"instructionFollowing":17,"kbGrounding":19,"ambiguityHandling":1,"stateTracking":12,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":71.43,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:30:13+00:00","createdAt":"2026-04-17T18:04:40.213473+00:00"},{"runId":"20260411T201026_grok-realtime_bff55039","benchmark":"appointment_bench","model":"grok-realtime","turnsScored":25,"scores":{"toolUse":0,"toolUseDenom":8,"instructionFollowing":24,"kbGrounding":19,"ambiguityHandling":1,"stateTracking":18,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":73.35,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:30:12+00:00","createdAt":"2026-04-17T18:04:29.507795+00:00"},{"runId":"20260411T201223_ultravox-v0.7_be6b90bb","benchmark":"appointment_bench","model":"ultravox-v0.7","turnsScored":25,"scores":{"toolUse":8,"toolUseDenom":8,"instructionFollowing":22,"kbGrounding":21,"ambiguityHandling":1,"stateTracking":9,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":83.87,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:29:29+00:00","createdAt":"2026-04-17T18:04:33.720284+00:00"},{"runId":"20260411T201224_grok-realtime_d8d95cdf","benchmark":"appointment_bench","model":"grok-realtime","turnsScored":25,"scores":{"toolUse":3,"toolUseDenom":8,"instructionFollowing":20,"kbGrounding":13,"ambiguityHandling":1,"stateTracking":7,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":61.27,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:29:07+00:00","createdAt":"2026-04-17T18:04:34.40585+00:00"},{"runId":"20260411T201726_gemini-3.1-flash-live-preview_502c81f1","benchmark":"appointment_bench","model":"gemini-3.1-flash-live-preview","turnsScored":25,"scores":{"toolUse":5,"toolUseDenom":8,"instructionFollowing":20,"kbGrounding":22,"ambiguityHandling":1,"stateTracking":15,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":81.89,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:28:34+00:00","createdAt":"2026-04-17T18:04:42.092108+00:00"},{"runId":"20260411T201020_ultravox-v0.7_df5d61b8","benchmark":"appointment_bench","model":"ultravox-v0.7","turnsScored":25,"scores":{"toolUse":8,"toolUseDenom":8,"instructionFollowing":24,"kbGrounding":24,"ambiguityHandling":1,"stateTracking":19,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":98.4,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:28:31+00:00","createdAt":"2026-04-17T18:04:28.597098+00:00"},{"runId":"20260411T201320_gpt-realtime_293df4b6","benchmark":"appointment_bench","model":"gpt-realtime","turnsScored":25,"scores":{"toolUse":2,"toolUseDenom":8,"instructionFollowing":12,"kbGrounding":19,"ambiguityHandling":1,"stateTracking":6,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":56.12,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:28:24+00:00","createdAt":"2026-04-17T18:04:35.500112+00:00"},{"runId":"20260411T201347_gemini-2.5-flash-native-audio-preview-12-2025_7fdf9479","benchmark":"appointment_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":25,"scores":{"toolUse":0,"toolUseDenom":8,"instructionFollowing":9,"kbGrounding":9,"ambiguityHandling":1,"stateTracking":7,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":41.77,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:27:42+00:00","createdAt":"2026-04-17T18:04:36.24377+00:00"},{"runId":"20260411T201129_gemini-3.1-flash-live-preview_37238227","benchmark":"appointment_bench","model":"gemini-3.1-flash-live-preview","turnsScored":25,"scores":{"toolUse":5,"toolUseDenom":8,"instructionFollowing":22,"kbGrounding":23,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":82.18,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:27:02+00:00","createdAt":"2026-04-17T18:04:32.543125+00:00"},{"runId":"20260411T201104_gemini-2.5-flash-native-audio-preview-12-2025_e4524035","benchmark":"appointment_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":25,"scores":{"toolUse":3,"toolUseDenom":8,"instructionFollowing":17,"kbGrounding":17,"ambiguityHandling":1,"stateTracking":11,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":66.28,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:26:42+00:00","createdAt":"2026-04-17T18:04:31.718675+00:00"},{"runId":"20260411T201417_gemini-3.1-flash-live-preview_3a05ae46","benchmark":"appointment_bench","model":"gemini-3.1-flash-live-preview","turnsScored":25,"scores":{"toolUse":8,"toolUseDenom":8,"instructionFollowing":21,"kbGrounding":24,"ambiguityHandling":1,"stateTracking":18,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":94.95,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:25:29+00:00","createdAt":"2026-04-17T18:04:37.034015+00:00"},{"runId":"20260411T201058_gpt-realtime_bd421c31","benchmark":"appointment_bench","model":"gpt-realtime","turnsScored":25,"scores":{"toolUse":5,"toolUseDenom":8,"instructionFollowing":17,"kbGrounding":22,"ambiguityHandling":1,"stateTracking":12,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":76.33,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:24:04+00:00","createdAt":"2026-04-17T18:04:31.184564+00:00"},{"runId":"20260411T200713_grok-realtime_e006c5cb","benchmark":"appointment_bench","model":"grok-realtime","turnsScored":25,"scores":{"toolUse":3,"toolUseDenom":8,"instructionFollowing":22,"kbGrounding":19,"ambiguityHandling":1,"stateTracking":12,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":72.93,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:23:50+00:00","createdAt":"2026-04-17T18:04:25.435139+00:00"},{"runId":"20260411T200739_gpt-realtime_39b8a95f","benchmark":"appointment_bench","model":"gpt-realtime","turnsScored":25,"scores":{"toolUse":2,"toolUseDenom":8,"instructionFollowing":22,"kbGrounding":21,"ambiguityHandling":1,"stateTracking":17,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":77.29,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:23:41+00:00","createdAt":"2026-04-17T18:04:26.583586+00:00"},{"runId":"20260411T200826_gemini-3.1-flash-live-preview_f342d178","benchmark":"appointment_bench","model":"gemini-3.1-flash-live-preview","turnsScored":25,"scores":{"toolUse":7,"toolUseDenom":8,"instructionFollowing":18,"kbGrounding":25,"ambiguityHandling":1,"stateTracking":17,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":89.79,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:21:42+00:00","createdAt":"2026-04-17T18:04:27.802278+00:00"},{"runId":"20260411T200411_amazon.nova-2-sonic-v1_0_8983c942","benchmark":"appointment_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":25,"scores":{"toolUse":5,"toolUseDenom":8,"instructionFollowing":18,"kbGrounding":21,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":77.38,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:21:01+00:00","createdAt":"2026-04-17T18:04:22.304764+00:00"},{"runId":"20260411T200805_gemini-2.5-flash-native-audio-preview-12-2025_24962a53","benchmark":"appointment_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":25,"scores":{"toolUse":1,"toolUseDenom":8,"instructionFollowing":21,"kbGrounding":17,"ambiguityHandling":1,"stateTracking":17,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":70.79,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:20:48+00:00","createdAt":"2026-04-17T18:04:27.115755+00:00"},{"runId":"20260411T195943_amazon.nova-2-sonic-v1_0_d09795e5","benchmark":"appointment_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":25,"scores":{"toolUse":0,"toolUseDenom":8,"instructionFollowing":17,"kbGrounding":18,"ambiguityHandling":1,"stateTracking":9,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":57.47,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:18:45+00:00","createdAt":"2026-04-17T18:04:18.586221+00:00"},{"runId":"20260411T200556_gemini-3.1-flash-live-preview_397b83da","benchmark":"appointment_bench","model":"gemini-3.1-flash-live-preview","turnsScored":25,"scores":{"toolUse":8,"toolUseDenom":8,"instructionFollowing":21,"kbGrounding":23,"ambiguityHandling":1,"stateTracking":18,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":94.15,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:18:30+00:00","createdAt":"2026-04-17T18:04:24.394598+00:00"},{"runId":"20260411T200443_gpt-realtime_1ce7c734","benchmark":"appointment_bench","model":"gpt-realtime","turnsScored":25,"scores":{"toolUse":7,"toolUseDenom":8,"instructionFollowing":22,"kbGrounding":24,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":87.98,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:18:07+00:00","createdAt":"2026-04-17T18:04:22.997558+00:00"},{"runId":"20260411T195521_amazon.nova-2-sonic-v1_0_28f65f82","benchmark":"appointment_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":25,"scores":{"toolUse":2,"toolUseDenom":8,"instructionFollowing":16,"kbGrounding":21,"ambiguityHandling":1,"stateTracking":10,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":65.13,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:17:36+00:00","createdAt":"2026-04-17T18:04:14.628617+00:00"},{"runId":"20260411T200216_gemini-2.5-flash-native-audio-preview-12-2025_50b991ca","benchmark":"appointment_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":25,"scores":{"toolUse":1,"toolUseDenom":8,"instructionFollowing":19,"kbGrounding":14,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":62.58,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:17:32+00:00","createdAt":"2026-04-17T18:04:19.703852+00:00"},{"runId":"20260411T195942_grok-realtime_2734b43a","benchmark":"appointment_bench","model":"grok-realtime","turnsScored":25,"scores":{"toolUse":2,"toolUseDenom":8,"instructionFollowing":23,"kbGrounding":20,"ambiguityHandling":1,"stateTracking":6,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":65.72,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:17:24+00:00","createdAt":"2026-04-17T18:04:17.863636+00:00"},{"runId":"20260411T200311_ultravox-v0.7_24846264","benchmark":"appointment_bench","model":"ultravox-v0.7","turnsScored":25,"scores":{"toolUse":5,"toolUseDenom":8,"instructionFollowing":17,"kbGrounding":23,"ambiguityHandling":1,"stateTracking":12,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":77.13,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:16:51+00:00","createdAt":"2026-04-17T18:04:20.874365+00:00"},{"runId":"20260411T200238_gemini-3.1-flash-live-preview_265776ae","benchmark":"appointment_bench","model":"gemini-3.1-flash-live-preview","turnsScored":25,"scores":{"toolUse":7,"toolUseDenom":8,"instructionFollowing":21,"kbGrounding":23,"ambiguityHandling":1,"stateTracking":16,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":89.54,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:16:44+00:00","createdAt":"2026-04-17T18:04:20.275839+00:00"},{"runId":"20260411T195344_ultravox-v0.7_0c28ba4e","benchmark":"appointment_bench","model":"ultravox-v0.7","turnsScored":25,"scores":{"toolUse":8,"toolUseDenom":8,"instructionFollowing":19,"kbGrounding":22,"ambiguityHandling":1,"stateTracking":12,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":85.43,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:16:15+00:00","createdAt":"2026-04-17T18:04:10.056704+00:00"},{"runId":"20260411T191757_gpt-realtime_1f06990d","benchmark":"appointment_bench","model":"gpt-realtime","turnsScored":25,"scores":{"toolUse":0,"toolUseDenom":8,"instructionFollowing":0,"kbGrounding":25,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":20,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:15:49+00:00","createdAt":"2026-04-17T18:03:36.422082+00:00"},{"runId":"20260411T195841_ultravox-v0.7_9a977cd2","benchmark":"appointment_bench","model":"ultravox-v0.7","turnsScored":25,"scores":{"toolUse":6,"toolUseDenom":8,"instructionFollowing":15,"kbGrounding":23,"ambiguityHandling":0,"stateTracking":12,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":58.03,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:15:14+00:00","createdAt":"2026-04-17T18:04:17.11376+00:00"},{"runId":"20260411T194457_amazon.nova-2-sonic-v1_0_de87d8d5","benchmark":"appointment_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":25,"scores":{"toolUse":3,"toolUseDenom":8,"instructionFollowing":9,"kbGrounding":20,"ambiguityHandling":1,"stateTracking":9,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":60.17,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:13:46+00:00","createdAt":"2026-04-17T18:04:00.659512+00:00"},{"runId":"20260411T194901_amazon.nova-2-sonic-v1_0_a8c7c3fe","benchmark":"appointment_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":25,"scores":{"toolUse":4,"toolUseDenom":8,"instructionFollowing":9,"kbGrounding":13,"ambiguityHandling":0,"stateTracking":5,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":32.86,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:13:18+00:00","createdAt":"2026-04-17T18:04:04.185132+00:00"},{"runId":"20260411T200132_gpt-realtime_cfb4bf3e","benchmark":"appointment_bench","model":"gpt-realtime","turnsScored":25,"scores":{"toolUse":2,"toolUseDenom":8,"instructionFollowing":15,"kbGrounding":19,"ambiguityHandling":1,"stateTracking":11,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":63.78,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:12:29+00:00","createdAt":"2026-04-17T18:04:19.173112+00:00"},{"runId":"20260411T195414_amazon.nova-2-sonic-v1_0_b54e3424","benchmark":"appointment_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":25,"scores":{"toolUse":1,"toolUseDenom":8,"instructionFollowing":14,"kbGrounding":16,"ambiguityHandling":1,"stateTracking":5,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":51.76,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:12:23+00:00","createdAt":"2026-04-17T18:04:11.319255+00:00"},{"runId":"20260411T195359_grok-realtime_e60dead0","benchmark":"appointment_bench","model":"grok-realtime","turnsScored":25,"scores":{"toolUse":4,"toolUseDenom":8,"instructionFollowing":23,"kbGrounding":21,"ambiguityHandling":1,"stateTracking":16,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":82.04,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:12:22+00:00","createdAt":"2026-04-17T18:04:10.747368+00:00"},{"runId":"20260411T195840_gemini-2.5-flash-native-audio-preview-12-2025_e48a10e5","benchmark":"appointment_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":25,"scores":{"toolUse":7,"toolUseDenom":8,"instructionFollowing":12,"kbGrounding":13,"ambiguityHandling":1,"stateTracking":2,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":59.61,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:11:28+00:00","createdAt":"2026-04-17T18:04:15.781118+00:00"},{"runId":"20260411T195457_grok-realtime_16e61d09","benchmark":"appointment_bench","model":"grok-realtime","turnsScored":25,"scores":{"toolUse":2,"toolUseDenom":8,"instructionFollowing":25,"kbGrounding":23,"ambiguityHandling":1,"stateTracking":10,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":73.93,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:10:57+00:00","createdAt":"2026-04-17T18:04:13.915423+00:00"},{"runId":"20260411T195150_grok-realtime_d0fc1016","benchmark":"appointment_bench","model":"grok-realtime","turnsScored":25,"scores":{"toolUse":2,"toolUseDenom":8,"instructionFollowing":22,"kbGrounding":18,"ambiguityHandling":1,"stateTracking":11,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":68.58,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:10:39+00:00","createdAt":"2026-04-17T18:04:06.99172+00:00"},{"runId":"20260411T195841_gemini-3.1-flash-live-preview_f73a3d53","benchmark":"appointment_bench","model":"gemini-3.1-flash-live-preview","turnsScored":25,"scores":{"toolUse":4,"toolUseDenom":8,"instructionFollowing":18,"kbGrounding":19,"ambiguityHandling":1,"stateTracking":7,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":66.97,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:10:25+00:00","createdAt":"2026-04-17T18:04:16.542654+00:00"},{"runId":"20260411T195416_gpt-realtime_2dc97c4b","benchmark":"appointment_bench","model":"gpt-realtime","turnsScored":25,"scores":{"toolUse":6,"toolUseDenom":8,"instructionFollowing":21,"kbGrounding":22,"ambiguityHandling":1,"stateTracking":15,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":85.19,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:10:19+00:00","createdAt":"2026-04-17T18:04:11.933486+00:00"},{"runId":"20260411T194820_ultravox-v0.7_9ffc72db","benchmark":"appointment_bench","model":"ultravox-v0.7","turnsScored":25,"scores":{"toolUse":5,"toolUseDenom":8,"instructionFollowing":15,"kbGrounding":18,"ambiguityHandling":1,"stateTracking":8,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":67.32,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:08:24+00:00","createdAt":"2026-04-17T18:04:03.032647+00:00"},{"runId":"20260411T195440_gemini-3.1-flash-live-preview_aafe04f6","benchmark":"appointment_bench","model":"gemini-3.1-flash-live-preview","turnsScored":25,"scores":{"toolUse":1,"toolUseDenom":8,"instructionFollowing":14,"kbGrounding":21,"ambiguityHandling":1,"stateTracking":6,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":56.82,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:08:04+00:00","createdAt":"2026-04-17T18:04:13.342341+00:00"},{"runId":"20260411T195058_ultravox-v0.7_ef8166b4","benchmark":"appointment_bench","model":"ultravox-v0.7","turnsScored":25,"scores":{"toolUse":4,"toolUseDenom":8,"instructionFollowing":14,"kbGrounding":23,"ambiguityHandling":1,"stateTracking":11,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":71.18,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:07:38+00:00","createdAt":"2026-04-17T18:04:06.425956+00:00"},{"runId":"20260411T195154_amazon.nova-2-sonic-v1_0_80c125d8","benchmark":"appointment_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":25,"scores":{"toolUse":5,"toolUseDenom":8,"instructionFollowing":16,"kbGrounding":17,"ambiguityHandling":1,"stateTracking":10,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":69.43,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:07:22+00:00","createdAt":"2026-04-17T18:04:07.578693+00:00"},{"runId":"20260411T195341_gemini-3.1-flash-live-preview_fd15ba6f","benchmark":"appointment_bench","model":"gemini-3.1-flash-live-preview","turnsScored":25,"scores":{"toolUse":4,"toolUseDenom":8,"instructionFollowing":23,"kbGrounding":22,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":79.68,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:07:12+00:00","createdAt":"2026-04-17T18:04:09.423315+00:00"},{"runId":"20260411T194113_amazon.nova-2-sonic-v1_0_fd783bf7","benchmark":"appointment_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":25,"scores":{"toolUse":1,"toolUseDenom":8,"instructionFollowing":16,"kbGrounding":20,"ambiguityHandling":1,"stateTracking":7,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":58.67,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:06:45+00:00","createdAt":"2026-04-17T18:03:57.147556+00:00"},{"runId":"20260411T195429_gemini-2.5-flash-native-audio-preview-12-2025_0de4b330","benchmark":"appointment_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":25,"scores":{"toolUse":6,"toolUseDenom":8,"instructionFollowing":23,"kbGrounding":20,"ambiguityHandling":1,"stateTracking":15,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":85.19,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:04:59+00:00","createdAt":"2026-04-17T18:04:12.733253+00:00"},{"runId":"20260411T194832_grok-realtime_ae168cba","benchmark":"appointment_bench","model":"grok-realtime","turnsScored":25,"scores":{"toolUse":2,"toolUseDenom":8,"instructionFollowing":23,"kbGrounding":23,"ambiguityHandling":1,"stateTracking":11,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":73.38,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:04:42+00:00","createdAt":"2026-04-17T18:04:03.620716+00:00"},{"runId":"20260411T194932_gpt-realtime_4cb179c1","benchmark":"appointment_bench","model":"gpt-realtime","turnsScored":25,"scores":{"toolUse":2,"toolUseDenom":8,"instructionFollowing":24,"kbGrounding":20,"ambiguityHandling":1,"stateTracking":15,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":75.99,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:04:09+00:00","createdAt":"2026-04-17T18:04:04.755084+00:00"},{"runId":"20260411T195239_gemini-2.5-flash-native-audio-preview-12-2025_0dd5c32e","benchmark":"appointment_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":25,"scores":{"toolUse":1,"toolUseDenom":8,"instructionFollowing":15,"kbGrounding":17,"ambiguityHandling":1,"stateTracking":10,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":58.63,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:03:10+00:00","createdAt":"2026-04-17T18:04:08.867579+00:00"},{"runId":"20260411T194406_ultravox-v0.7_e96e475f","benchmark":"appointment_bench","model":"ultravox-v0.7","turnsScored":25,"scores":{"toolUse":8,"toolUseDenom":8,"instructionFollowing":22,"kbGrounding":21,"ambiguityHandling":1,"stateTracking":16,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":91.24,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:02:36+00:00","createdAt":"2026-04-17T18:03:59.469112+00:00"},{"runId":"20260411T195051_gemini-3.1-flash-live-preview_f8beb009","benchmark":"appointment_bench","model":"gemini-3.1-flash-live-preview","turnsScored":25,"scores":{"toolUse":7,"toolUseDenom":8,"instructionFollowing":21,"kbGrounding":20,"ambiguityHandling":1,"stateTracking":15,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":86.09,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:02:15+00:00","createdAt":"2026-04-17T18:04:05.836011+00:00"},{"runId":"20260411T194804_gemini-3.1-flash-live-preview_abd265a0","benchmark":"appointment_bench","model":"gemini-3.1-flash-live-preview","turnsScored":25,"scores":{"toolUse":7,"toolUseDenom":8,"instructionFollowing":24,"kbGrounding":23,"ambiguityHandling":1,"stateTracking":17,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":92.99,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T02:59:42+00:00","createdAt":"2026-04-17T18:04:02.457257+00:00"},{"runId":"20260411T194653_gemini-2.5-flash-native-audio-preview-12-2025_bd0e97a6","benchmark":"appointment_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":25,"scores":{"toolUse":4,"toolUseDenom":8,"instructionFollowing":18,"kbGrounding":18,"ambiguityHandling":1,"stateTracking":12,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":71.43,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T02:58:40+00:00","createdAt":"2026-04-17T18:04:01.287746+00:00"},{"runId":"20260411T194305_gemini-2.5-flash-native-audio-preview-12-2025_ef7e7329","benchmark":"appointment_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":25,"scores":{"toolUse":0,"toolUseDenom":8,"instructionFollowing":18,"kbGrounding":18,"ambiguityHandling":0,"stateTracking":14,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":43.54,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T02:58:40+00:00","createdAt":"2026-04-17T18:03:58.356699+00:00"},{"runId":"20260411T194350_gemini-3.1-flash-live-preview_8e55c8dd","benchmark":"appointment_bench","model":"gemini-3.1-flash-live-preview","turnsScored":25,"scores":{"toolUse":4,"toolUseDenom":8,"instructionFollowing":13,"kbGrounding":19,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":70.34,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T02:57:22+00:00","createdAt":"2026-04-17T18:03:58.937388+00:00"},{"runId":"20260411T193112_grok-realtime_6b7f35b2","benchmark":"appointment_bench","model":"grok-realtime","turnsScored":25,"scores":{"toolUse":4,"toolUseDenom":8,"instructionFollowing":18,"kbGrounding":22,"ambiguityHandling":1,"stateTracking":6,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":68.32,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T02:54:27+00:00","createdAt":"2026-04-17T18:03:44.243203+00:00"},{"runId":"20260411T194011_grok-realtime_4fa9b0b5","benchmark":"appointment_bench","model":"grok-realtime","turnsScored":25,"scores":{"toolUse":0,"toolUseDenom":8,"instructionFollowing":12,"kbGrounding":7,"ambiguityHandling":1,"stateTracking":2,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":37.31,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T02:53:39+00:00","createdAt":"2026-04-17T18:03:56.602355+00:00"},{"runId":"20260411T193932_gemini-3.1-flash-live-preview_55d279fe","benchmark":"appointment_bench","model":"gemini-3.1-flash-live-preview","turnsScored":25,"scores":{"toolUse":4,"toolUseDenom":8,"instructionFollowing":19,"kbGrounding":19,"ambiguityHandling":1,"stateTracking":9,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":69.87,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T02:51:49+00:00","createdAt":"2026-04-17T18:03:55.300423+00:00"},{"runId":"20260411T193615_gemini-3.1-flash-live-preview_dc777258","benchmark":"appointment_bench","model":"gemini-3.1-flash-live-preview","turnsScored":25,"scores":{"toolUse":1,"toolUseDenom":8,"instructionFollowing":15,"kbGrounding":9,"ambiguityHandling":1,"stateTracking":3,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":44.86,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T02:50:50+00:00","createdAt":"2026-04-17T18:03:51.967005+00:00"},{"runId":"20260411T012909_glm-realtime-flash_8d576206","benchmark":"appointment_bench","model":"glm-realtime-flash","turnsScored":25,"scores":{"toolUse":0,"toolUseDenom":8,"instructionFollowing":0,"kbGrounding":25,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":20,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T09:27:49+00:00","createdAt":"2026-04-17T18:02:58.127119+00:00"},{"runId":"20260411T012454_glm-realtime-flash_a7db56a0","benchmark":"appointment_bench","model":"glm-realtime-flash","turnsScored":25,"scores":{"toolUse":0,"toolUseDenom":8,"instructionFollowing":0,"kbGrounding":0,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":0,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T09:23:22+00:00","createdAt":"2026-04-17T18:02:53.674028+00:00"},{"runId":"20260411T011445_glm-realtime-flash_ecbfe538","benchmark":"appointment_bench","model":"glm-realtime-flash","turnsScored":25,"scores":{"toolUse":0,"toolUseDenom":8,"instructionFollowing":0,"kbGrounding":0,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":0,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T09:13:01+00:00","createdAt":"2026-04-17T18:02:48.33259+00:00"},{"runId":"20260411T013006_amazon.nova-2-sonic-v1_0_e1e655b2","benchmark":"appointment_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":25,"scores":{"toolUse":4,"toolUseDenom":8,"instructionFollowing":11,"kbGrounding":21,"ambiguityHandling":1,"stateTracking":9,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":65.07,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T09:02:14+00:00","createdAt":"2026-04-17T18:02:58.863377+00:00"},{"runId":"20260411T013156_ultravox-v0.7_9e40b177","benchmark":"appointment_bench","model":"ultravox-v0.7","turnsScored":25,"scores":{"toolUse":6,"toolUseDenom":8,"instructionFollowing":23,"kbGrounding":23,"ambiguityHandling":1,"stateTracking":18,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":90.75,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T08:52:18+00:00","createdAt":"2026-04-17T18:03:01.42129+00:00"},{"runId":"20260411T013305_grok-realtime_9dac41ea","benchmark":"appointment_bench","model":"grok-realtime","turnsScored":25,"scores":{"toolUse":1,"toolUseDenom":8,"instructionFollowing":15,"kbGrounding":16,"ambiguityHandling":1,"stateTracking":8,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":55.72,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T08:50:01+00:00","createdAt":"2026-04-17T18:03:01.981843+00:00"},{"runId":"20260411T013053_gemini-2.5-flash-native-audio-preview-12-2025_a04419d6","benchmark":"appointment_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":25,"scores":{"toolUse":2,"toolUseDenom":8,"instructionFollowing":18,"kbGrounding":20,"ambiguityHandling":1,"stateTracking":17,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":73.29,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T08:49:45+00:00","createdAt":"2026-04-17T18:03:00.339806+00:00"},{"runId":"20260411T012803_gemini-2.5-flash-native-audio-preview-12-2025_48972285","benchmark":"appointment_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":25,"scores":{"toolUse":1,"toolUseDenom":8,"instructionFollowing":16,"kbGrounding":18,"ambiguityHandling":1,"stateTracking":12,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":62.33,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T08:48:54+00:00","createdAt":"2026-04-17T18:02:55.492378+00:00"},{"runId":"20260411T012836_ultravox-v0.7_bc9e6cbe","benchmark":"appointment_bench","model":"ultravox-v0.7","turnsScored":25,"scores":{"toolUse":6,"toolUseDenom":8,"instructionFollowing":24,"kbGrounding":24,"ambiguityHandling":1,"stateTracking":19,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":93.4,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T08:47:36+00:00","createdAt":"2026-04-17T18:02:56.922565+00:00"},{"runId":"20260411T012858_grok-realtime_39d6624c","benchmark":"appointment_bench","model":"grok-realtime","turnsScored":25,"scores":{"toolUse":7,"toolUseDenom":8,"instructionFollowing":25,"kbGrounding":23,"ambiguityHandling":1,"stateTracking":12,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":88.53,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T08:46:32+00:00","createdAt":"2026-04-17T18:02:57.501894+00:00"},{"runId":"20260411T013012_gpt-realtime_3922a3bc","benchmark":"appointment_bench","model":"gpt-realtime","turnsScored":25,"scores":{"toolUse":7,"toolUseDenom":8,"instructionFollowing":14,"kbGrounding":23,"ambiguityHandling":1,"stateTracking":9,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":76.57,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T08:44:51+00:00","createdAt":"2026-04-17T18:02:59.76858+00:00"},{"runId":"20260411T012804_gemini-3.1-flash-live-preview_a8a8ae39","benchmark":"appointment_bench","model":"gemini-3.1-flash-live-preview","turnsScored":25,"scores":{"toolUse":1,"toolUseDenom":8,"instructionFollowing":15,"kbGrounding":15,"ambiguityHandling":1,"stateTracking":4,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":50.71,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T08:43:12+00:00","createdAt":"2026-04-17T18:02:56.263928+00:00"},{"runId":"20260411T012557_amazon.nova-2-sonic-v1_0_311196ed","benchmark":"appointment_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":25,"scores":{"toolUse":3,"toolUseDenom":8,"instructionFollowing":22,"kbGrounding":22,"ambiguityHandling":1,"stateTracking":9,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":72.17,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T08:42:19+00:00","createdAt":"2026-04-17T18:02:54.231741+00:00"},{"runId":"20260411T012127_ultravox-v0.7_8954e9bb","benchmark":"appointment_bench","model":"ultravox-v0.7","turnsScored":25,"scores":{"toolUse":6,"toolUseDenom":8,"instructionFollowing":23,"kbGrounding":24,"ambiguityHandling":1,"stateTracking":17,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":90.49,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T08:41:22+00:00","createdAt":"2026-04-17T18:02:52.55748+00:00"},{"runId":"20260411T012240_grok-realtime_cc28530c","benchmark":"appointment_bench","model":"grok-realtime","turnsScored":25,"scores":{"toolUse":4,"toolUseDenom":8,"instructionFollowing":23,"kbGrounding":22,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":79.68,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T08:40:10+00:00","createdAt":"2026-04-17T18:02:53.135998+00:00"},{"runId":"20260411T011421_grok-realtime_bcd8d747","benchmark":"appointment_bench","model":"grok-realtime","turnsScored":25,"scores":{"toolUse":7,"toolUseDenom":8,"instructionFollowing":22,"kbGrounding":21,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":85.58,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T08:39:47+00:00","createdAt":"2026-04-17T18:02:46.15026+00:00"},{"runId":"20260411T012606_gpt-realtime_867cbfd9","benchmark":"appointment_bench","model":"gpt-realtime","turnsScored":25,"scores":{"toolUse":2,"toolUseDenom":8,"instructionFollowing":11,"kbGrounding":17,"ambiguityHandling":1,"stateTracking":7,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":54.77,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T08:37:59+00:00","createdAt":"2026-04-17T18:02:54.842415+00:00"},{"runId":"20260411T011617_amazon.nova-2-sonic-v1_0_97d4d679","benchmark":"appointment_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":25,"scores":{"toolUse":3,"toolUseDenom":8,"instructionFollowing":13,"kbGrounding":22,"ambiguityHandling":1,"stateTracking":9,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":64.97,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T08:37:44+00:00","createdAt":"2026-04-17T18:02:50.397688+00:00"},{"runId":"20260411T005614_gemini-2.5-flash-native-audio-preview-12-2025_8223e19d","benchmark":"appointment_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":25,"scores":{"toolUse":1,"toolUseDenom":8,"instructionFollowing":16,"kbGrounding":18,"ambiguityHandling":1,"stateTracking":10,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":60.23,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T08:37:07+00:00","createdAt":"2026-04-17T18:02:35.110352+00:00"},{"runId":"20260411T011445_gemini-2.5-flash-native-audio-preview-12-2025_fe82807b","benchmark":"appointment_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":25,"scores":{"toolUse":1,"toolUseDenom":8,"instructionFollowing":19,"kbGrounding":16,"ambiguityHandling":1,"stateTracking":16,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":67.34,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T08:36:36+00:00","createdAt":"2026-04-17T18:02:46.740571+00:00"},{"runId":"20260411T011420_amazon.nova-2-sonic-v1_0_278b93f4","benchmark":"appointment_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":25,"scores":{"toolUse":2,"toolUseDenom":8,"instructionFollowing":13,"kbGrounding":13,"ambiguityHandling":1,"stateTracking":2,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":47.91,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T08:36:29+00:00","createdAt":"2026-04-17T18:02:38.493493+00:00"},{"runId":"20260411T011940_gemini-3.1-flash-live-preview_7c38b192","benchmark":"appointment_bench","model":"gemini-3.1-flash-live-preview","turnsScored":25,"scores":{"toolUse":0,"toolUseDenom":8,"instructionFollowing":12,"kbGrounding":12,"ambiguityHandling":1,"stateTracking":4,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":43.41,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T08:36:16+00:00","createdAt":"2026-04-17T18:02:52.038134+00:00"},{"runId":"20260411T011750_gpt-realtime_d5ca74ad","benchmark":"appointment_bench","model":"gpt-realtime","turnsScored":25,"scores":{"toolUse":8,"toolUseDenom":8,"instructionFollowing":21,"kbGrounding":23,"ambiguityHandling":1,"stateTracking":12,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":87.83,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T08:34:44+00:00","createdAt":"2026-04-17T18:02:50.97035+00:00"},{"runId":"20260411T011445_grok-realtime_c866d816","benchmark":"appointment_bench","model":"grok-realtime","turnsScored":25,"scores":{"toolUse":3,"toolUseDenom":8,"instructionFollowing":18,"kbGrounding":21,"ambiguityHandling":1,"stateTracking":11,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":70.28,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T08:34:26+00:00","createdAt":"2026-04-17T18:02:48.913871+00:00"},{"runId":"20260411T011445_ultravox-v0.7_108cc347","benchmark":"appointment_bench","model":"ultravox-v0.7","turnsScored":25,"scores":{"toolUse":6,"toolUseDenom":8,"instructionFollowing":21,"kbGrounding":23,"ambiguityHandling":1,"stateTracking":15,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":85.99,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T08:33:35+00:00","createdAt":"2026-04-17T18:02:49.450573+00:00"},{"runId":"20260411T011420_grok-realtime_b5477eff","benchmark":"appointment_bench","model":"grok-realtime","turnsScored":25,"scores":{"toolUse":1,"toolUseDenom":8,"instructionFollowing":19,"kbGrounding":19,"ambiguityHandling":1,"stateTracking":6,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":59.22,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T08:33:28+00:00","createdAt":"2026-04-17T18:02:41.870963+00:00"},{"runId":"20260411T011419_amazon.nova-2-sonic-v1_0_990ad59a","benchmark":"appointment_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":25,"scores":{"toolUse":8,"toolUseDenom":8,"instructionFollowing":19,"kbGrounding":18,"ambiguityHandling":1,"stateTracking":6,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":75.92,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T08:33:16+00:00","createdAt":"2026-04-17T18:02:37.718684+00:00"},{"runId":"20260411T011420_ultravox-v0.7_5fafed2b","benchmark":"appointment_bench","model":"ultravox-v0.7","turnsScored":25,"scores":{"toolUse":8,"toolUseDenom":8,"instructionFollowing":23,"kbGrounding":23,"ambiguityHandling":1,"stateTracking":17,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":94.69,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T08:33:03+00:00","createdAt":"2026-04-17T18:02:42.965595+00:00"},{"runId":"20260411T011253_grok-realtime_d15f1431","benchmark":"appointment_bench","model":"grok-realtime","turnsScored":25,"scores":{"toolUse":7,"toolUseDenom":8,"instructionFollowing":22,"kbGrounding":18,"ambiguityHandling":1,"stateTracking":11,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":81.08,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T08:31:54+00:00","createdAt":"2026-04-17T18:02:37.013057+00:00"},{"runId":"20260411T011420_ultravox-v0.7_3cd6060e","benchmark":"appointment_bench","model":"ultravox-v0.7","turnsScored":25,"scores":{"toolUse":6,"toolUseDenom":8,"instructionFollowing":22,"kbGrounding":23,"ambiguityHandling":1,"stateTracking":16,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":87.84,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T08:31:37+00:00","createdAt":"2026-04-17T18:02:42.407732+00:00"},{"runId":"20260411T011421_amazon.nova-2-sonic-v1_0_aa8128b4","benchmark":"appointment_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":25,"scores":{"toolUse":3,"toolUseDenom":8,"instructionFollowing":14,"kbGrounding":13,"ambiguityHandling":1,"stateTracking":8,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":57.52,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T08:30:51+00:00","createdAt":"2026-04-17T18:02:43.507187+00:00"},{"runId":"20260411T004607_amazon.nova-2-sonic-v1_0_9e586fd2","benchmark":"appointment_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":25,"scores":{"toolUse":2,"toolUseDenom":8,"instructionFollowing":16,"kbGrounding":23,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":70.94,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T08:30:11+00:00","createdAt":"2026-04-17T18:02:30.133559+00:00"},{"runId":"20260411T011445_gemini-3.1-flash-live-preview_6e32ba18","benchmark":"appointment_bench","model":"gemini-3.1-flash-live-preview","turnsScored":25,"scores":{"toolUse":4,"toolUseDenom":8,"instructionFollowing":13,"kbGrounding":14,"ambiguityHandling":1,"stateTracking":4,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":55.81,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T08:30:04+00:00","createdAt":"2026-04-17T18:02:47.341989+00:00"},{"runId":"20260411T011420_gpt-realtime_e5825517","benchmark":"appointment_bench","model":"gpt-realtime","turnsScored":25,"scores":{"toolUse":5,"toolUseDenom":8,"instructionFollowing":18,"kbGrounding":21,"ambiguityHandling":1,"stateTracking":12,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":76.33,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T08:29:23+00:00","createdAt":"2026-04-17T18:02:41.340402+00:00"},{"runId":"20260411T011420_gemini-3.1-flash-live-preview_27c36211","benchmark":"appointment_bench","model":"gemini-3.1-flash-live-preview","turnsScored":25,"scores":{"toolUse":8,"toolUseDenom":8,"instructionFollowing":10,"kbGrounding":15,"ambiguityHandling":1,"stateTracking":4,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":64.21,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T08:29:07+00:00","createdAt":"2026-04-17T18:02:39.947375+00:00"},{"runId":"20260411T005332_amazon.nova-2-sonic-v1_0_26c05a44","benchmark":"appointment_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":25,"scores":{"toolUse":5,"toolUseDenom":8,"instructionFollowing":9,"kbGrounding":22,"ambiguityHandling":1,"stateTracking":4,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":61.51,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T08:28:57+00:00","createdAt":"2026-04-17T18:02:33.930371+00:00"},{"runId":"20260411T011420_gemini-2.5-flash-native-audio-preview-12-2025_d7231cbb","benchmark":"appointment_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":25,"scores":{"toolUse":1,"toolUseDenom":8,"instructionFollowing":23,"kbGrounding":23,"ambiguityHandling":1,"stateTracking":16,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":76.14,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T08:28:34+00:00","createdAt":"2026-04-17T18:02:39.267609+00:00"},{"runId":"20260411T011421_gemini-3.1-flash-live-preview_0eeca47b","benchmark":"appointment_bench","model":"gemini-3.1-flash-live-preview","turnsScored":25,"scores":{"toolUse":1,"toolUseDenom":8,"instructionFollowing":13,"kbGrounding":17,"ambiguityHandling":1,"stateTracking":5,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":51.76,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T08:28:03+00:00","createdAt":"2026-04-17T18:02:44.586984+00:00"},{"runId":"20260411T005626_gemini-3.1-flash-live-preview_62a6eed9","benchmark":"appointment_bench","model":"gemini-3.1-flash-live-preview","turnsScored":25,"scores":{"toolUse":7,"toolUseDenom":8,"instructionFollowing":16,"kbGrounding":19,"ambiguityHandling":1,"stateTracking":4,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":69.71,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T08:28:02+00:00","createdAt":"2026-04-17T18:02:35.970205+00:00"},{"runId":"20260411T011420_gpt-realtime_08aeb824","benchmark":"appointment_bench","model":"gpt-realtime","turnsScored":25,"scores":{"toolUse":2,"toolUseDenom":8,"instructionFollowing":10,"kbGrounding":17,"ambiguityHandling":1,"stateTracking":5,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":51.86,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T08:26:05+00:00","createdAt":"2026-04-17T18:02:40.488166+00:00"},{"runId":"20260411T011421_gemini-2.5-flash-native-audio-preview-12-2025_04e62aa6","benchmark":"appointment_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":25,"scores":{"toolUse":0,"toolUseDenom":8,"instructionFollowing":15,"kbGrounding":12,"ambiguityHandling":1,"stateTracking":9,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":51.07,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T08:25:55+00:00","createdAt":"2026-04-17T18:02:44.014954+00:00"},{"runId":"20260411T011421_gpt-realtime_e52191ed","benchmark":"appointment_bench","model":"gpt-realtime","turnsScored":25,"scores":{"toolUse":4,"toolUseDenom":8,"instructionFollowing":11,"kbGrounding":13,"ambiguityHandling":1,"stateTracking":4,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":53.41,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T08:25:13+00:00","createdAt":"2026-04-17T18:02:45.510784+00:00"},{"runId":"20260411T004845_gemini-2.5-flash-native-audio-preview-12-2025_c0fb9191","benchmark":"appointment_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":25,"scores":{"toolUse":2,"toolUseDenom":8,"instructionFollowing":18,"kbGrounding":22,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":70.68,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T08:24:52+00:00","createdAt":"2026-04-17T18:02:31.584571+00:00"},{"runId":"20260411T005200_grok-realtime_62080adb","benchmark":"appointment_bench","model":"grok-realtime","turnsScored":25,"scores":{"toolUse":8,"toolUseDenom":8,"instructionFollowing":21,"kbGrounding":20,"ambiguityHandling":1,"stateTracking":6,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":79.12,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T08:22:39+00:00","createdAt":"2026-04-17T18:02:33.243764+00:00"},{"runId":"20260411T005410_gpt-realtime_7de07bfb","benchmark":"appointment_bench","model":"gpt-realtime","turnsScored":25,"scores":{"toolUse":8,"toolUseDenom":8,"instructionFollowing":10,"kbGrounding":18,"ambiguityHandling":1,"stateTracking":7,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":69.77,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T08:21:25+00:00","createdAt":"2026-04-17T18:02:34.520111+00:00"},{"runId":"20260411T005114_ultravox-v0.7_1711e444","benchmark":"appointment_bench","model":"ultravox-v0.7","turnsScored":25,"scores":{"toolUse":5,"toolUseDenom":8,"instructionFollowing":13,"kbGrounding":13,"ambiguityHandling":1,"stateTracking":9,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":62.77,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T08:19:38+00:00","createdAt":"2026-04-17T18:02:32.692862+00:00"},{"runId":"20260411T005102_gemini-3.1-flash-live-preview_2fe6536d","benchmark":"appointment_bench","model":"gemini-3.1-flash-live-preview","turnsScored":25,"scores":{"toolUse":0,"toolUseDenom":8,"instructionFollowing":10,"kbGrounding":14,"ambiguityHandling":1,"stateTracking":4,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":43.41,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T08:19:38+00:00","createdAt":"2026-04-17T18:02:32.130556+00:00"},{"runId":"20260411T004227_grok-realtime_24959ff6","benchmark":"appointment_bench","model":"grok-realtime","turnsScored":25,"scores":{"toolUse":5,"toolUseDenom":8,"instructionFollowing":24,"kbGrounding":20,"ambiguityHandling":1,"stateTracking":18,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":86.65,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T08:17:49+00:00","createdAt":"2026-04-17T18:02:29.581943+00:00"},{"runId":"20260411T005729_ultravox-v0.7_8e998088","benchmark":"appointment_bench","model":"ultravox-v0.7","turnsScored":25,"scores":{"toolUse":1,"toolUseDenom":8,"instructionFollowing":1,"kbGrounding":1,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":4.1,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T08:12:51+00:00","createdAt":"2026-04-17T18:02:36.574599+00:00"},{"runId":"20260411T004155_ultravox-v0.7_7c1bf55b","benchmark":"appointment_bench","model":"ultravox-v0.7","turnsScored":25,"scores":{"toolUse":3,"toolUseDenom":8,"instructionFollowing":22,"kbGrounding":23,"ambiguityHandling":1,"stateTracking":15,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":79.29,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T07:57:27+00:00","createdAt":"2026-04-17T18:02:28.901187+00:00"},{"runId":"20260411T004148_gemini-3.1-flash-live-preview_8ad71649","benchmark":"appointment_bench","model":"gemini-3.1-flash-live-preview","turnsScored":25,"scores":{"toolUse":8,"toolUseDenom":8,"instructionFollowing":23,"kbGrounding":24,"ambiguityHandling":1,"stateTracking":18,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":96.55,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T07:56:25+00:00","createdAt":"2026-04-17T18:02:28.297553+00:00"},{"runId":"20260411T004100_gemini-2.5-flash-native-audio-preview-12-2025_1dd07b10","benchmark":"appointment_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":25,"scores":{"toolUse":1,"toolUseDenom":8,"instructionFollowing":22,"kbGrounding":20,"ambiguityHandling":1,"stateTracking":17,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":73.99,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T07:56:13+00:00","createdAt":"2026-04-17T18:02:27.740656+00:00"},{"runId":"20260411T003652_ultravox-v0.7_42911563","benchmark":"appointment_bench","model":"ultravox-v0.7","turnsScored":25,"scores":{"toolUse":6,"toolUseDenom":8,"instructionFollowing":23,"kbGrounding":23,"ambiguityHandling":1,"stateTracking":16,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":88.64,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T07:54:08+00:00","createdAt":"2026-04-17T18:02:25.313759+00:00"},{"runId":"20260411T003731_grok-realtime_13462f10","benchmark":"appointment_bench","model":"grok-realtime","turnsScored":25,"scores":{"toolUse":1,"toolUseDenom":8,"instructionFollowing":24,"kbGrounding":15,"ambiguityHandling":1,"stateTracking":3,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":56.86,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T07:53:30+00:00","createdAt":"2026-04-17T18:02:25.953117+00:00"},{"runId":"20260411T003955_amazon.nova-2-sonic-v1_0_d7f40195","benchmark":"appointment_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":25,"scores":{"toolUse":4,"toolUseDenom":8,"instructionFollowing":13,"kbGrounding":23,"ambiguityHandling":1,"stateTracking":9,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":68.27,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T07:53:07+00:00","createdAt":"2026-04-17T18:02:26.512987+00:00"},{"runId":"20260411T002128_amazon.nova-2-sonic-v1_0_d14fe9ec","benchmark":"appointment_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":25,"scores":{"toolUse":7,"toolUseDenom":8,"instructionFollowing":12,"kbGrounding":23,"ambiguityHandling":1,"stateTracking":9,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":74.97,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T07:52:33+00:00","createdAt":"2026-04-17T18:02:15.838039+00:00"},{"runId":"20260411T003307_amazon.nova-2-sonic-v1_0_58959918","benchmark":"appointment_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":25,"scores":{"toolUse":5,"toolUseDenom":8,"instructionFollowing":17,"kbGrounding":21,"ambiguityHandling":1,"stateTracking":12,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":75.53,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T07:51:12+00:00","createdAt":"2026-04-17T18:02:23.028565+00:00"},{"runId":"20260411T004047_gpt-realtime_26de9247","benchmark":"appointment_bench","model":"gpt-realtime","turnsScored":25,"scores":{"toolUse":1,"toolUseDenom":8,"instructionFollowing":11,"kbGrounding":14,"ambiguityHandling":1,"stateTracking":5,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":47.76,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T07:51:01+00:00","createdAt":"2026-04-17T18:02:27.076462+00:00"},{"runId":"20260411T003417_gpt-realtime_5fe1ae53","benchmark":"appointment_bench","model":"gpt-realtime","turnsScored":25,"scores":{"toolUse":5,"toolUseDenom":8,"instructionFollowing":14,"kbGrounding":21,"ambiguityHandling":1,"stateTracking":6,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":66.82,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T07:48:44+00:00","createdAt":"2026-04-17T18:02:23.579512+00:00"},{"runId":"20260411T003500_gemini-2.5-flash-native-audio-preview-12-2025_fd91e552","benchmark":"appointment_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":25,"scores":{"toolUse":1,"toolUseDenom":8,"instructionFollowing":14,"kbGrounding":16,"ambiguityHandling":1,"stateTracking":10,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":57.03,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T07:48:29+00:00","createdAt":"2026-04-17T18:02:24.137749+00:00"},{"runId":"20260411T002552_ultravox-v0.7_c1304d39","benchmark":"appointment_bench","model":"ultravox-v0.7","turnsScored":25,"scores":{"toolUse":1,"toolUseDenom":8,"instructionFollowing":21,"kbGrounding":23,"ambiguityHandling":1,"stateTracking":18,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":76.65,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T07:46:05+00:00","createdAt":"2026-04-17T18:02:21.843658+00:00"},{"runId":"20260411T002445_amazon.nova-2-sonic-v1_0_f1010443","benchmark":"appointment_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":25,"scores":{"toolUse":4,"toolUseDenom":8,"instructionFollowing":8,"kbGrounding":18,"ambiguityHandling":1,"stateTracking":4,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":55.01,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T07:44:37+00:00","createdAt":"2026-04-17T18:02:19.331582+00:00"},{"runId":"20260411T002723_grok-realtime_b655ec91","benchmark":"appointment_bench","model":"grok-realtime","turnsScored":25,"scores":{"toolUse":3,"toolUseDenom":8,"instructionFollowing":23,"kbGrounding":22,"ambiguityHandling":1,"stateTracking":18,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":82.45,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T07:43:54+00:00","createdAt":"2026-04-17T18:02:22.423016+00:00"},{"runId":"20260411T002131_gemini-2.5-flash-native-audio-preview-12-2025_a3168353","benchmark":"appointment_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":25,"scores":{"toolUse":1,"toolUseDenom":8,"instructionFollowing":18,"kbGrounding":14,"ambiguityHandling":1,"stateTracking":11,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":59.68,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T07:42:25+00:00","createdAt":"2026-04-17T18:02:17.056951+00:00"},{"runId":"20260411T002328_ultravox-v0.7_2c00e6df","benchmark":"appointment_bench","model":"ultravox-v0.7","turnsScored":25,"scores":{"toolUse":6,"toolUseDenom":8,"instructionFollowing":23,"kbGrounding":24,"ambiguityHandling":1,"stateTracking":9,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":82.07,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T07:41:54+00:00","createdAt":"2026-04-17T18:02:18.209387+00:00"},{"runId":"20260411T001923_gemini-2.5-flash-native-audio-preview-12-2025_2a0d7235","benchmark":"appointment_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":25,"scores":{"toolUse":1,"toolUseDenom":8,"instructionFollowing":18,"kbGrounding":25,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":71.64,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T07:41:47+00:00","createdAt":"2026-04-17T18:02:13.451179+00:00"},{"runId":"20260411T002340_grok-realtime_e8980294","benchmark":"appointment_bench","model":"grok-realtime","turnsScored":25,"scores":{"toolUse":2,"toolUseDenom":8,"instructionFollowing":17,"kbGrounding":23,"ambiguityHandling":1,"stateTracking":17,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":74.89,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T07:40:58+00:00","createdAt":"2026-04-17T18:02:18.787128+00:00"},{"runId":"20260411T002536_gemini-3.1-flash-live-preview_898b3ac4","benchmark":"appointment_bench","model":"gemini-3.1-flash-live-preview","turnsScored":25,"scores":{"toolUse":0,"toolUseDenom":8,"instructionFollowing":13,"kbGrounding":18,"ambiguityHandling":1,"stateTracking":4,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":49.01,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T07:40:45+00:00","createdAt":"2026-04-17T18:02:21.304509+00:00"},{"runId":"20260411T002452_gpt-realtime_334eeb95","benchmark":"appointment_bench","model":"gpt-realtime","turnsScored":25,"scores":{"toolUse":3,"toolUseDenom":8,"instructionFollowing":14,"kbGrounding":21,"ambiguityHandling":1,"stateTracking":9,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":64.97,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T07:39:54+00:00","createdAt":"2026-04-17T18:02:19.888488+00:00"},{"runId":"20260411T002530_gemini-2.5-flash-native-audio-preview-12-2025_11a4f53c","benchmark":"appointment_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":25,"scores":{"toolUse":3,"toolUseDenom":8,"instructionFollowing":21,"kbGrounding":23,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":76.38,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T07:39:44+00:00","createdAt":"2026-04-17T18:02:20.4902+00:00"},{"runId":"20260411T001942_ultravox-v0.7_ed6e25c5","benchmark":"appointment_bench","model":"ultravox-v0.7","turnsScored":25,"scores":{"toolUse":8,"toolUseDenom":8,"instructionFollowing":24,"kbGrounding":24,"ambiguityHandling":1,"stateTracking":19,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":98.4,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T07:38:16+00:00","createdAt":"2026-04-17T18:02:14.594823+00:00"},{"runId":"20260411T000629_amazon.nova-2-sonic-v1_0_fee38984","benchmark":"appointment_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":25,"scores":{"toolUse":3,"toolUseDenom":8,"instructionFollowing":11,"kbGrounding":12,"ambiguityHandling":1,"stateTracking":4,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":50.11,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T07:37:29+00:00","createdAt":"2026-04-17T18:02:06.960395+00:00"},{"runId":"20260411T001943_grok-realtime_6835653a","benchmark":"appointment_bench","model":"grok-realtime","turnsScored":25,"scores":{"toolUse":3,"toolUseDenom":8,"instructionFollowing":19,"kbGrounding":21,"ambiguityHandling":1,"stateTracking":11,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":71.08,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T07:36:50+00:00","createdAt":"2026-04-17T18:02:15.218243+00:00"},{"runId":"20260411T002216_gemini-3.1-flash-live-preview_784ae753","benchmark":"appointment_bench","model":"gemini-3.1-flash-live-preview","turnsScored":25,"scores":{"toolUse":7,"toolUseDenom":8,"instructionFollowing":21,"kbGrounding":18,"ambiguityHandling":1,"stateTracking":11,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":80.28,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T07:36:35+00:00","createdAt":"2026-04-17T18:02:17.65265+00:00"},{"runId":"20260411T002130_gpt-realtime_35528321","benchmark":"appointment_bench","model":"gpt-realtime","turnsScored":25,"scores":{"toolUse":2,"toolUseDenom":8,"instructionFollowing":24,"kbGrounding":24,"ambiguityHandling":1,"stateTracking":18,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":82.35,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T07:34:58+00:00","createdAt":"2026-04-17T18:02:16.421108+00:00"},{"runId":"20260411T001755_gpt-realtime_0c029d1c","benchmark":"appointment_bench","model":"gpt-realtime","turnsScored":25,"scores":{"toolUse":6,"toolUseDenom":8,"instructionFollowing":17,"kbGrounding":21,"ambiguityHandling":1,"stateTracking":15,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":81.19,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T07:34:15+00:00","createdAt":"2026-04-17T18:02:12.862309+00:00"},{"runId":"20260411T001925_gemini-3.1-flash-live-preview_5d30e82f","benchmark":"appointment_bench","model":"gemini-3.1-flash-live-preview","turnsScored":25,"scores":{"toolUse":1,"toolUseDenom":8,"instructionFollowing":16,"kbGrounding":19,"ambiguityHandling":1,"stateTracking":15,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":66.29,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T07:33:05+00:00","createdAt":"2026-04-17T18:02:14.034666+00:00"},{"runId":"20260411T000627_amazon.nova-2-sonic-v1_0_5bd68b5a","benchmark":"appointment_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":25,"scores":{"toolUse":6,"toolUseDenom":8,"instructionFollowing":12,"kbGrounding":19,"ambiguityHandling":1,"stateTracking":9,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":69.27,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T07:31:44+00:00","createdAt":"2026-04-17T18:01:59.875095+00:00"},{"runId":"20260411T000630_grok-realtime_07f6c989","benchmark":"appointment_bench","model":"grok-realtime","turnsScored":25,"scores":{"toolUse":3,"toolUseDenom":8,"instructionFollowing":25,"kbGrounding":20,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":77.18,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T07:27:46+00:00","createdAt":"2026-04-17T18:02:12.15083+00:00"},{"runId":"20260411T000627_amazon.nova-2-sonic-v1_0_924646f1","benchmark":"appointment_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":25,"scores":{"toolUse":3,"toolUseDenom":8,"instructionFollowing":9,"kbGrounding":11,"ambiguityHandling":1,"stateTracking":6,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":49.82,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T07:27:21+00:00","createdAt":"2026-04-17T18:02:00.458842+00:00"},{"runId":"20260411T000628_grok-realtime_f8c1093b","benchmark":"appointment_bench","model":"grok-realtime","turnsScored":25,"scores":{"toolUse":2,"toolUseDenom":8,"instructionFollowing":21,"kbGrounding":22,"ambiguityHandling":1,"stateTracking":10,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":69.93,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T07:25:50+00:00","createdAt":"2026-04-17T18:02:05.809826+00:00"},{"runId":"20260411T000630_amazon.nova-2-sonic-v1_0_04895253","benchmark":"appointment_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":25,"scores":{"toolUse":5,"toolUseDenom":8,"instructionFollowing":11,"kbGrounding":22,"ambiguityHandling":1,"stateTracking":7,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":66.27,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T07:25:33+00:00","createdAt":"2026-04-17T18:02:10.331434+00:00"},{"runId":"20260411T000627_ultravox-v0.7_a8c3beea","benchmark":"appointment_bench","model":"ultravox-v0.7","turnsScored":25,"scores":{"toolUse":6,"toolUseDenom":8,"instructionFollowing":22,"kbGrounding":24,"ambiguityHandling":1,"stateTracking":15,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":87.59,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T07:25:27+00:00","createdAt":"2026-04-17T18:02:03.676137+00:00"},{"runId":"20260411T000629_grok-realtime_b2870db3","benchmark":"appointment_bench","model":"grok-realtime","turnsScored":25,"scores":{"toolUse":1,"toolUseDenom":8,"instructionFollowing":17,"kbGrounding":13,"ambiguityHandling":1,"stateTracking":3,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":49.66,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T07:24:51+00:00","createdAt":"2026-04-17T18:02:09.178768+00:00"},{"runId":"20260411T000628_ultravox-v0.7_67ee5f2d","benchmark":"appointment_bench","model":"ultravox-v0.7","turnsScored":25,"scores":{"toolUse":7,"toolUseDenom":8,"instructionFollowing":22,"kbGrounding":23,"ambiguityHandling":1,"stateTracking":16,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":90.34,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T07:24:43+00:00","createdAt":"2026-04-17T18:02:06.364475+00:00"},{"runId":"20260411T000626_gpt-realtime_5cbcfd75","benchmark":"appointment_bench","model":"gpt-realtime","turnsScored":25,"scores":{"toolUse":8,"toolUseDenom":8,"instructionFollowing":24,"kbGrounding":17,"ambiguityHandling":1,"stateTracking":5,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":78.06,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T07:24:21+00:00","createdAt":"2026-04-17T18:01:59.091822+00:00"},{"runId":"20260411T000629_grok-realtime_41a6bad7","benchmark":"appointment_bench","model":"grok-realtime","turnsScored":25,"scores":{"toolUse":6,"toolUseDenom":8,"instructionFollowing":17,"kbGrounding":16,"ambiguityHandling":1,"stateTracking":4,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":65.61,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T07:23:52+00:00","createdAt":"2026-04-17T18:02:08.60965+00:00"},{"runId":"20260411T000627_ultravox-v0.7_a37bfc10","benchmark":"appointment_bench","model":"ultravox-v0.7","turnsScored":25,"scores":{"toolUse":5,"toolUseDenom":8,"instructionFollowing":23,"kbGrounding":24,"ambiguityHandling":1,"stateTracking":17,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":87.99,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T07:23:38+00:00","createdAt":"2026-04-17T18:02:03.11611+00:00"},{"runId":"20260411T000629_ultravox-v0.7_516f064a","benchmark":"appointment_bench","model":"ultravox-v0.7","turnsScored":25,"scores":{"toolUse":6,"toolUseDenom":8,"instructionFollowing":22,"kbGrounding":23,"ambiguityHandling":1,"stateTracking":17,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":88.89,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T07:23:26+00:00","createdAt":"2026-04-17T18:02:09.731669+00:00"},{"runId":"20260411T000630_gpt-realtime_44551d6d","benchmark":"appointment_bench","model":"gpt-realtime","turnsScored":25,"scores":{"toolUse":8,"toolUseDenom":8,"instructionFollowing":21,"kbGrounding":23,"ambiguityHandling":1,"stateTracking":12,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":87.83,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T07:22:14+00:00","createdAt":"2026-04-17T18:02:11.516374+00:00"},{"runId":"20260411T000629_gemini-3.1-flash-live-preview_2086cf71","benchmark":"appointment_bench","model":"gemini-3.1-flash-live-preview","turnsScored":25,"scores":{"toolUse":3,"toolUseDenom":8,"instructionFollowing":10,"kbGrounding":12,"ambiguityHandling":1,"stateTracking":5,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":50.36,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T07:21:29+00:00","createdAt":"2026-04-17T18:02:07.503224+00:00"},{"runId":"20260411T000628_gemini-2.5-flash-native-audio-preview-12-2025_ff087b7d","benchmark":"appointment_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":25,"scores":{"toolUse":1,"toolUseDenom":8,"instructionFollowing":18,"kbGrounding":21,"ambiguityHandling":1,"stateTracking":10,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":64.23,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T07:21:29+00:00","createdAt":"2026-04-17T18:02:04.766943+00:00"},{"runId":"20260411T000629_gemini-3.1-flash-live-preview_3c945e5f","benchmark":"appointment_bench","model":"gemini-3.1-flash-live-preview","turnsScored":25,"scores":{"toolUse":4,"toolUseDenom":8,"instructionFollowing":15,"kbGrounding":21,"ambiguityHandling":1,"stateTracking":11,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":70.38,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T07:21:26+00:00","createdAt":"2026-04-17T18:02:08.044993+00:00"},{"runId":"20260411T000627_gemini-3.1-flash-live-preview_a2dc90f9","benchmark":"appointment_bench","model":"gemini-3.1-flash-live-preview","turnsScored":25,"scores":{"toolUse":1,"toolUseDenom":8,"instructionFollowing":13,"kbGrounding":14,"ambiguityHandling":1,"stateTracking":4,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":48.31,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T07:20:56+00:00","createdAt":"2026-04-17T18:02:01.938761+00:00"},{"runId":"20260411T000627_gemini-3.1-flash-live-preview_9b6149bd","benchmark":"appointment_bench","model":"gemini-3.1-flash-live-preview","turnsScored":25,"scores":{"toolUse":1,"toolUseDenom":8,"instructionFollowing":12,"kbGrounding":13,"ambiguityHandling":1,"stateTracking":5,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":47.76,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T07:19:44+00:00","createdAt":"2026-04-17T18:02:01.40327+00:00"},{"runId":"20260411T000627_gpt-realtime_a6b51314","benchmark":"appointment_bench","model":"gpt-realtime","turnsScored":25,"scores":{"toolUse":6,"toolUseDenom":8,"instructionFollowing":20,"kbGrounding":16,"ambiguityHandling":1,"stateTracking":6,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":70.12,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T07:19:42+00:00","createdAt":"2026-04-17T18:02:02.473892+00:00"},{"runId":"20260411T000627_gemini-2.5-flash-native-audio-preview-12-2025_0eedbc00","benchmark":"appointment_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":25,"scores":{"toolUse":7,"toolUseDenom":8,"instructionFollowing":12,"kbGrounding":11,"ambiguityHandling":1,"stateTracking":3,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":59.06,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T07:19:24+00:00","createdAt":"2026-04-17T18:02:00.879779+00:00"},{"runId":"20260411T000630_gemini-2.5-flash-native-audio-preview-12-2025_51e82bd0","benchmark":"appointment_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":25,"scores":{"toolUse":1,"toolUseDenom":8,"instructionFollowing":14,"kbGrounding":18,"ambiguityHandling":1,"stateTracking":12,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":60.73,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-11T07:19:21+00:00","createdAt":"2026-04-17T18:02:10.956417+00:00"},{"runId":"20260403T123021_glm-realtime-flash_f5371b11","benchmark":"appointment_bench","model":"glm-realtime-flash","turnsScored":25,"scores":{"toolUse":0,"toolUseDenom":8,"instructionFollowing":1,"kbGrounding":12,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":10.4,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-04T00:48:35+00:00","createdAt":"2026-04-05T07:08:02.536084+00:00"},{"runId":"20260403T132313_glm-realtime-flash_ce8839b2","benchmark":"appointment_bench","model":"glm-realtime-flash","turnsScored":25,"scores":{"toolUse":2,"toolUseDenom":8,"instructionFollowing":2,"kbGrounding":9,"ambiguityHandling":0,"stateTracking":1,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":14.85,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-04T00:48:17+00:00","createdAt":"2026-04-05T07:08:10.949891+00:00"},{"runId":"20260403T123025_glm-realtime-flash_697cdd35","benchmark":"appointment_bench","model":"glm-realtime-flash","turnsScored":25,"scores":{"toolUse":0,"toolUseDenom":8,"instructionFollowing":1,"kbGrounding":9,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":8,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-04T00:47:56+00:00","createdAt":"2026-04-05T07:08:03.831323+00:00"},{"runId":"20260330T191631_glm-realtime-air_de08dd4e","benchmark":"appointment_bench","model":"glm-realtime-air","turnsScored":25,"scores":{"toolUse":0,"toolUseDenom":8,"instructionFollowing":1,"kbGrounding":1,"ambiguityHandling":1,"stateTracking":0,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":21.6,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-03T00:43:51+00:00","createdAt":"2026-04-03T00:44:51.086951+00:00"},{"runId":"20260329T224939_glm-realtime-air_d0e54c4d","benchmark":"appointment_bench","model":"glm-realtime-air","turnsScored":25,"scores":{"toolUse":0,"toolUseDenom":8,"instructionFollowing":1,"kbGrounding":1,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":1.6,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-03T00:43:35+00:00","createdAt":"2026-04-03T00:44:50.559123+00:00"},{"runId":"20260330T191929_glm-realtime-air_78a7d546","benchmark":"appointment_bench","model":"glm-realtime-air","turnsScored":25,"scores":{"toolUse":7,"toolUseDenom":8,"instructionFollowing":3,"kbGrounding":9,"ambiguityHandling":1,"stateTracking":0,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":47.1,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T03:18:41+00:00","createdAt":"2026-04-01T07:15:19.553602+00:00"},{"runId":"20260330T191820_glm-realtime-air_b39e2d3b","benchmark":"appointment_bench","model":"glm-realtime-air","turnsScored":25,"scores":{"toolUse":0,"toolUseDenom":8,"instructionFollowing":1,"kbGrounding":8,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":7.2,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T03:17:59+00:00","createdAt":"2026-04-01T07:15:18.433718+00:00"},{"runId":"20260330T191644_glm-realtime-air_e1ca1559","benchmark":"appointment_bench","model":"glm-realtime-air","turnsScored":25,"scores":{"toolUse":0,"toolUseDenom":8,"instructionFollowing":2,"kbGrounding":2,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":3.2,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T03:17:36+00:00","createdAt":"2026-04-01T07:15:17.900388+00:00"},{"runId":"20260330T185159_glm-realtime-air_5c6fa6f9","benchmark":"appointment_bench","model":"glm-realtime-air","turnsScored":25,"scores":{"toolUse":0,"toolUseDenom":8,"instructionFollowing":0,"kbGrounding":3,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":2.4,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T03:12:08+00:00","createdAt":"2026-04-01T07:15:14.240825+00:00"},{"runId":"20260330T185159_glm-realtime-air_64538a19","benchmark":"appointment_bench","model":"glm-realtime-air","turnsScored":25,"scores":{"toolUse":0,"toolUseDenom":8,"instructionFollowing":4,"kbGrounding":8,"ambiguityHandling":0,"stateTracking":1,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":10.65,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T03:11:39+00:00","createdAt":"2026-04-01T07:15:14.765895+00:00"},{"runId":"20260330T221213_gemini-3.1-flash-live-preview_81cad894","benchmark":"appointment_bench","model":"gemini-3.1-flash-live-preview","turnsScored":25,"scores":{"toolUse":5,"toolUseDenom":8,"instructionFollowing":13,"kbGrounding":16,"ambiguityHandling":1,"stateTracking":3,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":58.86,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T03:11:27+00:00","createdAt":"2026-04-01T05:18:21.835246+00:00"},{"runId":"20260330T185159_glm-realtime-air_1f206b53","benchmark":"appointment_bench","model":"glm-realtime-air","turnsScored":25,"scores":{"toolUse":1,"toolUseDenom":8,"instructionFollowing":2,"kbGrounding":13,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":14.5,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T03:10:49+00:00","createdAt":"2026-04-01T07:15:13.714417+00:00"},{"runId":"20260330T185157_glm-realtime-air_61f665e8","benchmark":"appointment_bench","model":"glm-realtime-air","turnsScored":25,"scores":{"toolUse":0,"toolUseDenom":8,"instructionFollowing":0,"kbGrounding":4,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":3.2,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T03:07:50+00:00","createdAt":"2026-04-01T07:15:11.581471+00:00"},{"runId":"20260330T185157_glm-realtime-air_bba93959","benchmark":"appointment_bench","model":"glm-realtime-air","turnsScored":25,"scores":{"toolUse":0,"toolUseDenom":8,"instructionFollowing":2,"kbGrounding":6,"ambiguityHandling":1,"stateTracking":0,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":26.4,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T03:07:42+00:00","createdAt":"2026-04-01T07:15:12.123365+00:00"},{"runId":"20260330T185157_glm-realtime-air_1ae971cf","benchmark":"appointment_bench","model":"glm-realtime-air","turnsScored":25,"scores":{"toolUse":0,"toolUseDenom":8,"instructionFollowing":6,"kbGrounding":13,"ambiguityHandling":1,"stateTracking":0,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":35.2,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T03:07:40+00:00","createdAt":"2026-04-01T07:15:10.911583+00:00"},{"runId":"20260330T163841_gemini-3.1-flash-live-preview_95d2be0e","benchmark":"appointment_bench","model":"gemini-3.1-flash-live-preview","turnsScored":25,"scores":{"toolUse":0,"toolUseDenom":8,"instructionFollowing":15,"kbGrounding":15,"ambiguityHandling":1,"stateTracking":7,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":51.37,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-31T02:03:42+00:00","createdAt":"2026-03-31T04:45:31.068191+00:00"},{"runId":"20260330T163841_gemini-3.1-flash-live-preview_8cc4f287","benchmark":"appointment_bench","model":"gemini-3.1-flash-live-preview","turnsScored":25,"scores":{"toolUse":7,"toolUseDenom":8,"instructionFollowing":11,"kbGrounding":19,"ambiguityHandling":1,"stateTracking":3,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":64.66,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-31T02:00:27+00:00","createdAt":"2026-03-31T04:45:30.458785+00:00"},{"runId":"20260327T214647_gpt-realtime_7fe02dc0","benchmark":"appointment_bench","model":"gpt-realtime","turnsScored":25,"scores":{"toolUse":6,"toolUseDenom":8,"instructionFollowing":13,"kbGrounding":15,"ambiguityHandling":1,"stateTracking":10,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":67.93,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-30T05:26:40+00:00","createdAt":"2026-03-30T05:27:55.455846+00:00"},{"runId":"20260327T212540_gpt-realtime_d0f7bd62","benchmark":"appointment_bench","model":"gpt-realtime","turnsScored":25,"scores":{"toolUse":6,"toolUseDenom":8,"instructionFollowing":16,"kbGrounding":16,"ambiguityHandling":1,"stateTracking":12,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":73.23,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-30T05:26:30+00:00","createdAt":"2026-03-30T05:27:52.789437+00:00"},{"runId":"20260327T213314_gpt-realtime_45f9863e","benchmark":"appointment_bench","model":"gpt-realtime","turnsScored":25,"scores":{"toolUse":5,"toolUseDenom":8,"instructionFollowing":21,"kbGrounding":24,"ambiguityHandling":1,"stateTracking":18,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":87.45,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-28T05:59:48+00:00","createdAt":"2026-03-28T07:07:33.29144+00:00"},{"runId":"20260323T180612_ultravox-v0.7_cbe89e91","benchmark":"appointment_bench","model":"ultravox-v0.7","turnsScored":25,"scores":{"toolUse":5,"toolUseDenom":8,"instructionFollowing":21,"kbGrounding":23,"ambiguityHandling":1,"stateTracking":19,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":87.7,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-28T02:20:17+00:00","createdAt":"2026-03-24T02:15:13.30464+00:00"},{"runId":"20260323T180613_ultravox-v0.7_12fd8d96","benchmark":"appointment_bench","model":"ultravox-v0.7","turnsScored":25,"scores":{"toolUse":8,"toolUseDenom":8,"instructionFollowing":23,"kbGrounding":23,"ambiguityHandling":1,"stateTracking":19,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":96.8,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-28T02:19:40+00:00","createdAt":"2026-03-24T02:15:14.485607+00:00"},{"runId":"20260323T180612_ultravox-v0.7_1909c2fd","benchmark":"appointment_bench","model":"ultravox-v0.7","turnsScored":25,"scores":{"toolUse":4,"toolUseDenom":8,"instructionFollowing":21,"kbGrounding":23,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":79.94,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-28T02:19:26+00:00","createdAt":"2026-03-24T02:15:12.755695+00:00"},{"runId":"20260323T114047_grok-realtime_92f33750","benchmark":"appointment_bench","model":"grok-realtime","turnsScored":25,"scores":{"toolUse":4,"toolUseDenom":8,"instructionFollowing":18,"kbGrounding":20,"ambiguityHandling":1,"stateTracking":15,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":76.19,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-28T02:14:38+00:00","createdAt":"2026-03-23T22:44:54.055287+00:00"},{"runId":"20260323T180612_grok-realtime_0e78ec50","benchmark":"appointment_bench","model":"grok-realtime","turnsScored":25,"scores":{"toolUse":6,"toolUseDenom":8,"instructionFollowing":24,"kbGrounding":19,"ambiguityHandling":1,"stateTracking":12,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":82.03,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-28T02:11:34+00:00","createdAt":"2026-03-24T02:15:12.189961+00:00"},{"runId":"20260323T182049_grok-realtime_4aec7fb6","benchmark":"appointment_bench","model":"grok-realtime","turnsScored":25,"scores":{"toolUse":3,"toolUseDenom":8,"instructionFollowing":22,"kbGrounding":20,"ambiguityHandling":1,"stateTracking":11,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":72.68,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-28T02:10:29+00:00","createdAt":"2026-03-24T02:15:17.476047+00:00"},{"runId":"20260323T002631_gemini-2.5-flash-native-audio-preview-12-2025_5a1e2147","benchmark":"appointment_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":25,"scores":{"toolUse":3,"toolUseDenom":8,"instructionFollowing":18,"kbGrounding":17,"ambiguityHandling":1,"stateTracking":9,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":64.97,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-28T02:01:08+00:00","createdAt":"2026-03-23T15:29:55.212106+00:00"},{"runId":"20260323T180612_gemini-2.5-flash-native-audio-preview-12-2025_87b81d1e","benchmark":"appointment_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":25,"scores":{"toolUse":1,"toolUseDenom":8,"instructionFollowing":19,"kbGrounding":11,"ambiguityHandling":1,"stateTracking":16,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":63.34,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-28T02:00:35+00:00","createdAt":"2026-03-24T02:15:10.131668+00:00"},{"runId":"20260323T180612_gemini-2.5-flash-native-audio-preview-12-2025_78710276","benchmark":"appointment_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":25,"scores":{"toolUse":1,"toolUseDenom":8,"instructionFollowing":17,"kbGrounding":16,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":63.64,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-28T01:59:31+00:00","createdAt":"2026-03-24T02:15:09.596446+00:00"},{"runId":"20260323T180612_amazon.nova-2-sonic-v1_0_13f740ca","benchmark":"appointment_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":25,"scores":{"toolUse":4,"toolUseDenom":8,"instructionFollowing":12,"kbGrounding":19,"ambiguityHandling":1,"stateTracking":4,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":59.01,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-28T01:54:08+00:00","createdAt":"2026-03-24T02:15:08.537093+00:00"},{"runId":"20260323T182101_amazon.nova-2-sonic-v1_0_b281c094","benchmark":"appointment_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":25,"scores":{"toolUse":4,"toolUseDenom":8,"instructionFollowing":16,"kbGrounding":21,"ambiguityHandling":1,"stateTracking":12,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":72.23,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-28T01:32:59+00:00","createdAt":"2026-03-24T02:15:18.024771+00:00"},{"runId":"20260323T180613_amazon.nova-2-sonic-v1_0_b1332cdc","benchmark":"appointment_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":25,"scores":{"toolUse":7,"toolUseDenom":8,"instructionFollowing":18,"kbGrounding":22,"ambiguityHandling":1,"stateTracking":8,"ambiguityDenom":1,"stateTrackingDenom":19},"passRate":77.92,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-28T01:32:39+00:00","createdAt":"2026-03-24T02:15:13.936684+00:00"},{"runId":"20260507T213917_gpt-realtime-2_e31662be","benchmark":"assistant_bench","model":"gpt-realtime-2","turnsScored":31,"scores":{"toolUse":8,"toolUseDenom":13,"instructionFollowing":29,"kbGrounding":30,"ambiguityHandling":4,"stateTracking":23,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":85.54,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T22:05:20+00:00","createdAt":"2026-05-07T23:02:11.903407+00:00"},{"runId":"20260507T213852_gpt-realtime-2_cffc2d80","benchmark":"assistant_bench","model":"gpt-realtime-2","turnsScored":31,"scores":{"toolUse":6,"toolUseDenom":13,"instructionFollowing":30,"kbGrounding":29,"ambiguityHandling":3,"stateTracking":23,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":78.46,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T22:05:08+00:00","createdAt":"2026-05-07T23:02:10.014456+00:00"},{"runId":"20260507T213849_gpt-realtime-2_49a73963","benchmark":"assistant_bench","model":"gpt-realtime-2","turnsScored":31,"scores":{"toolUse":9,"toolUseDenom":13,"instructionFollowing":19,"kbGrounding":24,"ambiguityHandling":3,"stateTracking":19,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":69.42,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T22:04:10+00:00","createdAt":"2026-05-07T23:02:09.556974+00:00"},{"runId":"20260507T213829_gpt-realtime-2_ce1525a5","benchmark":"assistant_bench","model":"gpt-realtime-2","turnsScored":31,"scores":{"toolUse":8,"toolUseDenom":13,"instructionFollowing":30,"kbGrounding":30,"ambiguityHandling":5,"stateTracking":23,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":90.18,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T22:01:40+00:00","createdAt":"2026-05-07T23:02:07.17554+00:00"},{"runId":"20260507T213930_gpt-realtime-2_19860922","benchmark":"assistant_bench","model":"gpt-realtime-2","turnsScored":31,"scores":{"toolUse":8,"toolUseDenom":13,"instructionFollowing":28,"kbGrounding":29,"ambiguityHandling":4,"stateTracking":21,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":82.58,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T22:01:17+00:00","createdAt":"2026-05-07T23:02:12.145511+00:00"},{"runId":"20260507T213905_gpt-realtime-2_7ac0c433","benchmark":"assistant_bench","model":"gpt-realtime-2","turnsScored":31,"scores":{"toolUse":7,"toolUseDenom":13,"instructionFollowing":25,"kbGrounding":27,"ambiguityHandling":3,"stateTracking":16,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":69.65,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T22:00:49+00:00","createdAt":"2026-05-07T23:02:11.666518+00:00"},{"runId":"20260507T213844_gpt-realtime-2_41894e2d","benchmark":"assistant_bench","model":"gpt-realtime-2","turnsScored":31,"scores":{"toolUse":2,"toolUseDenom":13,"instructionFollowing":13,"kbGrounding":19,"ambiguityHandling":0,"stateTracking":7,"ambiguityDenom":4,"stateTrackingDenom":24},"passRate":29.56,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T22:00:19+00:00","createdAt":"2026-05-07T23:02:09.046381+00:00"},{"runId":"20260507T213849_gpt-realtime-2_f9138f5e","benchmark":"assistant_bench","model":"gpt-realtime-2","turnsScored":31,"scores":{"toolUse":2,"toolUseDenom":13,"instructionFollowing":13,"kbGrounding":23,"ambiguityHandling":2,"stateTracking":14,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":45.97,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T21:58:18+00:00","createdAt":"2026-05-07T23:02:09.78692+00:00"},{"runId":"20260507T213902_gpt-realtime-2_634e3527","benchmark":"assistant_bench","model":"gpt-realtime-2","turnsScored":31,"scores":{"toolUse":7,"toolUseDenom":13,"instructionFollowing":28,"kbGrounding":31,"ambiguityHandling":4,"stateTracking":21,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":82.33,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T21:57:53+00:00","createdAt":"2026-05-07T23:02:11.079445+00:00"},{"runId":"20260507T213854_gpt-realtime-2_164f0417","benchmark":"assistant_bench","model":"gpt-realtime-2","turnsScored":31,"scores":{"toolUse":8,"toolUseDenom":13,"instructionFollowing":27,"kbGrounding":27,"ambiguityHandling":3,"stateTracking":21,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":76.65,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T21:57:52+00:00","createdAt":"2026-05-07T23:02:10.350594+00:00"},{"runId":"20260507T213903_gpt-realtime-2_9af39143","benchmark":"assistant_bench","model":"gpt-realtime-2","turnsScored":31,"scores":{"toolUse":7,"toolUseDenom":13,"instructionFollowing":26,"kbGrounding":26,"ambiguityHandling":3,"stateTracking":21,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":73.82,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T21:56:52+00:00","createdAt":"2026-05-07T23:02:11.426204+00:00"},{"runId":"20260507T213857_gpt-realtime-2_a0c15a01","benchmark":"assistant_bench","model":"gpt-realtime-2","turnsScored":31,"scores":{"toolUse":2,"toolUseDenom":13,"instructionFollowing":11,"kbGrounding":18,"ambiguityHandling":1,"stateTracking":8,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":32.45,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T21:56:34+00:00","createdAt":"2026-05-07T23:02:10.831585+00:00"},{"runId":"20260507T213857_gpt-realtime-2_6f7abf9e","benchmark":"assistant_bench","model":"gpt-realtime-2","turnsScored":31,"scores":{"toolUse":9,"toolUseDenom":13,"instructionFollowing":18,"kbGrounding":22,"ambiguityHandling":0,"stateTracking":17,"ambiguityDenom":4,"stateTrackingDenom":24},"passRate":53.82,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T21:56:06+00:00","createdAt":"2026-05-07T23:02:10.590424+00:00"},{"runId":"20260507T213848_gpt-realtime-2_3bc32ef9","benchmark":"assistant_bench","model":"gpt-realtime-2","turnsScored":31,"scores":{"toolUse":2,"toolUseDenom":13,"instructionFollowing":27,"kbGrounding":27,"ambiguityHandling":3,"stateTracking":21,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":67.42,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T21:55:42+00:00","createdAt":"2026-05-07T23:02:09.293281+00:00"},{"runId":"20260507T213833_gpt-realtime-2_fb265a6c","benchmark":"assistant_bench","model":"gpt-realtime-2","turnsScored":31,"scores":{"toolUse":2,"toolUseDenom":13,"instructionFollowing":21,"kbGrounding":22,"ambiguityHandling":4,"stateTracking":15,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":59.32,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T21:55:40+00:00","createdAt":"2026-05-07T23:02:08.700219+00:00"},{"runId":"20260507T213815_gpt-realtime-2_ebd53341","benchmark":"assistant_bench","model":"gpt-realtime-2","turnsScored":31,"scores":{"toolUse":10,"toolUseDenom":13,"instructionFollowing":26,"kbGrounding":28,"ambiguityHandling":4,"stateTracking":19,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":82.06,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T21:55:37+00:00","createdAt":"2026-05-07T23:02:06.788283+00:00"},{"runId":"20260507T213830_gpt-realtime-2_739b770c","benchmark":"assistant_bench","model":"gpt-realtime-2","turnsScored":31,"scores":{"toolUse":2,"toolUseDenom":13,"instructionFollowing":29,"kbGrounding":27,"ambiguityHandling":3,"stateTracking":22,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":69.54,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T21:55:20+00:00","createdAt":"2026-05-07T23:02:07.579216+00:00"},{"runId":"20260507T213833_gpt-realtime-2_22dd2c3f","benchmark":"assistant_bench","model":"gpt-realtime-2","turnsScored":31,"scores":{"toolUse":8,"toolUseDenom":13,"instructionFollowing":23,"kbGrounding":26,"ambiguityHandling":2,"stateTracking":14,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":63.59,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T21:55:13+00:00","createdAt":"2026-05-07T23:02:08.371735+00:00"},{"runId":"20260507T213830_gpt-realtime-2_b1845747","benchmark":"assistant_bench","model":"gpt-realtime-2","turnsScored":31,"scores":{"toolUse":7,"toolUseDenom":13,"instructionFollowing":23,"kbGrounding":26,"ambiguityHandling":3,"stateTracking":23,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":73.55,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T21:54:20+00:00","createdAt":"2026-05-07T23:02:08.013476+00:00"},{"runId":"20260507T213809_gpt-realtime-2_72f7ad23","benchmark":"assistant_bench","model":"gpt-realtime-2","turnsScored":31,"scores":{"toolUse":3,"toolUseDenom":13,"instructionFollowing":20,"kbGrounding":24,"ambiguityHandling":4,"stateTracking":16,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":62.34,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T21:53:07+00:00","createdAt":"2026-05-07T23:02:06.536803+00:00"},{"runId":"20260507T061107_grok-voice-think-fast-1.0_5e948a23","benchmark":"assistant_bench","model":"grok-voice-think-fast-1.0","turnsScored":31,"scores":{"toolUse":6,"toolUseDenom":13,"instructionFollowing":29,"kbGrounding":31,"ambiguityHandling":4,"stateTracking":21,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":81.44,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T06:45:22+00:00","createdAt":"2026-05-07T20:58:00.852398+00:00"},{"runId":"20260507T061253_grok-voice-think-fast-1.0_2e4baf87","benchmark":"assistant_bench","model":"grok-voice-think-fast-1.0","turnsScored":31,"scores":{"toolUse":6,"toolUseDenom":13,"instructionFollowing":29,"kbGrounding":28,"ambiguityHandling":3,"stateTracking":21,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":75.5,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T06:38:37+00:00","createdAt":"2026-05-07T20:58:01.090618+00:00"},{"runId":"20260507T060900_grok-voice-think-fast-1.0_82281f05","benchmark":"assistant_bench","model":"grok-voice-think-fast-1.0","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":13,"instructionFollowing":28,"kbGrounding":31,"ambiguityHandling":4,"stateTracking":22,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":80.09,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T06:38:30+00:00","createdAt":"2026-05-07T20:57:57.015444+00:00"},{"runId":"20260507T060946_grok-voice-think-fast-1.0_f7133d0c","benchmark":"assistant_bench","model":"grok-voice-think-fast-1.0","turnsScored":31,"scores":{"toolUse":6,"toolUseDenom":13,"instructionFollowing":28,"kbGrounding":28,"ambiguityHandling":4,"stateTracking":21,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":78.86,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T06:37:21+00:00","createdAt":"2026-05-07T20:57:59.270535+00:00"},{"runId":"20260507T061033_grok-voice-think-fast-1.0_814229b2","benchmark":"assistant_bench","model":"grok-voice-think-fast-1.0","turnsScored":31,"scores":{"toolUse":6,"toolUseDenom":13,"instructionFollowing":26,"kbGrounding":31,"ambiguityHandling":4,"stateTracking":20,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":78.67,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T06:35:28+00:00","createdAt":"2026-05-07T20:58:00.354892+00:00"},{"runId":"20260507T061043_grok-voice-think-fast-1.0_50eae0c4","benchmark":"assistant_bench","model":"grok-voice-think-fast-1.0","turnsScored":31,"scores":{"toolUse":6,"toolUseDenom":13,"instructionFollowing":28,"kbGrounding":30,"ambiguityHandling":5,"stateTracking":22,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":84.98,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T06:34:41+00:00","createdAt":"2026-05-07T20:58:00.606881+00:00"},{"runId":"20260507T061009_grok-voice-think-fast-1.0_000d6119","benchmark":"assistant_bench","model":"grok-voice-think-fast-1.0","turnsScored":31,"scores":{"toolUse":9,"toolUseDenom":13,"instructionFollowing":29,"kbGrounding":31,"ambiguityHandling":5,"stateTracking":23,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":91.72,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T06:33:56+00:00","createdAt":"2026-05-07T20:57:59.763034+00:00"},{"runId":"20260507T060914_grok-voice-think-fast-1.0_6c0701bd","benchmark":"assistant_bench","model":"grok-voice-think-fast-1.0","turnsScored":31,"scores":{"toolUse":6,"toolUseDenom":13,"instructionFollowing":25,"kbGrounding":30,"ambiguityHandling":5,"stateTracking":19,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":80.55,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T06:33:44+00:00","createdAt":"2026-05-07T20:57:58.056207+00:00"},{"runId":"20260507T060928_grok-voice-think-fast-1.0_733be078","benchmark":"assistant_bench","model":"grok-voice-think-fast-1.0","turnsScored":31,"scores":{"toolUse":6,"toolUseDenom":13,"instructionFollowing":30,"kbGrounding":30,"ambiguityHandling":5,"stateTracking":19,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":83.77,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T06:33:31+00:00","createdAt":"2026-05-07T20:57:58.785518+00:00"},{"runId":"20260507T060902_grok-voice-think-fast-1.0_226ab5a2","benchmark":"assistant_bench","model":"grok-voice-think-fast-1.0","turnsScored":31,"scores":{"toolUse":6,"toolUseDenom":13,"instructionFollowing":26,"kbGrounding":31,"ambiguityHandling":4,"stateTracking":21,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":79.5,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T06:33:27+00:00","createdAt":"2026-05-07T20:57:57.26979+00:00"},{"runId":"20260507T060925_grok-voice-think-fast-1.0_c617ee4c","benchmark":"assistant_bench","model":"grok-voice-think-fast-1.0","turnsScored":31,"scores":{"toolUse":7,"toolUseDenom":13,"instructionFollowing":28,"kbGrounding":31,"ambiguityHandling":4,"stateTracking":21,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":82.33,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T06:33:18+00:00","createdAt":"2026-05-07T20:57:58.542881+00:00"},{"runId":"20260507T061014_grok-voice-think-fast-1.0_c914a963","benchmark":"assistant_bench","model":"grok-voice-think-fast-1.0","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":13,"instructionFollowing":27,"kbGrounding":31,"ambiguityHandling":4,"stateTracking":21,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":78.61,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T06:33:09+00:00","createdAt":"2026-05-07T20:58:00.096676+00:00"},{"runId":"20260507T060902_grok-voice-think-fast-1.0_91cb307a","benchmark":"assistant_bench","model":"grok-voice-think-fast-1.0","turnsScored":31,"scores":{"toolUse":6,"toolUseDenom":13,"instructionFollowing":27,"kbGrounding":29,"ambiguityHandling":5,"stateTracking":19,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":81.19,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T06:31:42+00:00","createdAt":"2026-05-07T20:57:57.51948+00:00"},{"runId":"20260507T060856_grok-voice-think-fast-1.0_6659c688","benchmark":"assistant_bench","model":"grok-voice-think-fast-1.0","turnsScored":31,"scores":{"toolUse":6,"toolUseDenom":13,"instructionFollowing":31,"kbGrounding":31,"ambiguityHandling":5,"stateTracking":23,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":88.4,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T06:31:40+00:00","createdAt":"2026-05-07T20:57:56.533563+00:00"},{"runId":"20260507T060943_grok-voice-think-fast-1.0_9187ad2c","benchmark":"assistant_bench","model":"grok-voice-think-fast-1.0","turnsScored":31,"scores":{"toolUse":6,"toolUseDenom":13,"instructionFollowing":27,"kbGrounding":29,"ambiguityHandling":4,"stateTracking":20,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":78.03,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T06:31:38+00:00","createdAt":"2026-05-07T20:57:59.03309+00:00"},{"runId":"20260507T060953_grok-voice-think-fast-1.0_2d7d6515","benchmark":"assistant_bench","model":"grok-voice-think-fast-1.0","turnsScored":31,"scores":{"toolUse":7,"toolUseDenom":13,"instructionFollowing":26,"kbGrounding":30,"ambiguityHandling":5,"stateTracking":20,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":83.56,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T06:31:35+00:00","createdAt":"2026-05-07T20:57:59.523525+00:00"},{"runId":"20260507T060919_grok-voice-think-fast-1.0_0f9fe6a8","benchmark":"assistant_bench","model":"grok-voice-think-fast-1.0","turnsScored":31,"scores":{"toolUse":6,"toolUseDenom":13,"instructionFollowing":23,"kbGrounding":31,"ambiguityHandling":5,"stateTracking":21,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":81.57,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T06:31:24+00:00","createdAt":"2026-05-07T20:57:58.294341+00:00"},{"runId":"20260507T060907_grok-voice-think-fast-1.0_692cf760","benchmark":"assistant_bench","model":"grok-voice-think-fast-1.0","turnsScored":31,"scores":{"toolUse":6,"toolUseDenom":13,"instructionFollowing":24,"kbGrounding":25,"ambiguityHandling":4,"stateTracking":19,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":72.68,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T06:31:09+00:00","createdAt":"2026-05-07T20:57:57.766768+00:00"},{"runId":"20260507T060857_grok-voice-think-fast-1.0_753d45e1","benchmark":"assistant_bench","model":"grok-voice-think-fast-1.0","turnsScored":31,"scores":{"toolUse":6,"toolUseDenom":13,"instructionFollowing":23,"kbGrounding":27,"ambiguityHandling":4,"stateTracking":20,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":74.16,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T06:30:31+00:00","createdAt":"2026-05-07T20:57:56.775896+00:00"},{"runId":"20260507T055235_grok-voice-think-fast-1.0_a51cd41e","benchmark":"assistant_bench","model":"grok-voice-think-fast-1.0","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":13,"instructionFollowing":26,"kbGrounding":26,"ambiguityHandling":3,"stateTracking":20,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":69.91,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T06:22:37+00:00","createdAt":"2026-05-07T20:57:56.245192+00:00"},{"runId":"20260411T231808_amazon.nova-2-sonic-v1_0_0086c433","benchmark":"assistant_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":13,"instructionFollowing":15,"kbGrounding":24,"ambiguityHandling":2,"stateTracking":8,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":47.52,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T23:06:57+00:00","createdAt":"2026-04-17T18:17:55.714288+00:00"},{"runId":"20260411T225532_grok-realtime_36dad076","benchmark":"assistant_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":6,"toolUseDenom":13,"instructionFollowing":31,"kbGrounding":31,"ambiguityHandling":5,"stateTracking":23,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":88.4,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T23:01:50+00:00","createdAt":"2026-04-17T18:17:38.663204+00:00"},{"runId":"20260411T232252_gemini-2.5-flash-native-audio-preview-12-2025_184f0031","benchmark":"assistant_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":8,"toolUseDenom":13,"instructionFollowing":29,"kbGrounding":29,"ambiguityHandling":3,"stateTracking":21,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":79.23,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T22:54:52+00:00","createdAt":"2026-04-17T18:18:00.13669+00:00"},{"runId":"20260411T222154_amazon.nova-2-sonic-v1_0_4b19f99f","benchmark":"assistant_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":3,"toolUseDenom":13,"instructionFollowing":25,"kbGrounding":17,"ambiguityHandling":2,"stateTracking":19,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":55.55,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T22:54:51+00:00","createdAt":"2026-04-17T18:17:09.835594+00:00"},{"runId":"20260411T223719_amazon.nova-2-sonic-v1_0_34f86bbd","benchmark":"assistant_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":7,"toolUseDenom":13,"instructionFollowing":18,"kbGrounding":25,"ambiguityHandling":3,"stateTracking":14,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":62.18,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T22:53:08+00:00","createdAt":"2026-04-17T18:17:23.934264+00:00"},{"runId":"20260411T230620_gemini-3.1-flash-live-preview_4eaa5260","benchmark":"assistant_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":8,"toolUseDenom":13,"instructionFollowing":24,"kbGrounding":26,"ambiguityHandling":2,"stateTracking":19,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":68.4,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T22:51:46+00:00","createdAt":"2026-04-17T18:17:47.490814+00:00"},{"runId":"20260411T224109_gpt-realtime_fb61b642","benchmark":"assistant_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":9,"toolUseDenom":13,"instructionFollowing":23,"kbGrounding":25,"ambiguityHandling":3,"stateTracking":17,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":70.98,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T22:50:42+00:00","createdAt":"2026-04-17T18:17:27.196299+00:00"},{"runId":"20260411T223410_gpt-realtime_19e17cb8","benchmark":"assistant_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":9,"toolUseDenom":13,"instructionFollowing":24,"kbGrounding":21,"ambiguityHandling":2,"stateTracking":16,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":64.21,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T22:48:55+00:00","createdAt":"2026-04-17T18:17:21.1784+00:00"},{"runId":"20260411T215343_gemini-3.1-flash-live-preview_33706e88","benchmark":"assistant_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":13,"toolUseDenom":13,"instructionFollowing":26,"kbGrounding":28,"ambiguityHandling":3,"stateTracking":18,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":81.84,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T22:48:45+00:00","createdAt":"2026-04-17T18:16:46.447136+00:00"},{"runId":"20260411T224650_gemini-2.5-flash-native-audio-preview-12-2025_c716c7f0","benchmark":"assistant_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":8,"toolUseDenom":13,"instructionFollowing":30,"kbGrounding":28,"ambiguityHandling":5,"stateTracking":21,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":87.23,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T22:47:35+00:00","createdAt":"2026-04-17T18:17:31.040374+00:00"},{"runId":"20260411T221853_gpt-realtime_07b351af","benchmark":"assistant_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":10,"toolUseDenom":13,"instructionFollowing":25,"kbGrounding":23,"ambiguityHandling":3,"stateTracking":20,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":75.02,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T22:46:54+00:00","createdAt":"2026-04-17T18:17:06.978851+00:00"},{"runId":"20260411T211250_grok-realtime_f968fea9","benchmark":"assistant_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":3,"toolUseDenom":13,"instructionFollowing":27,"kbGrounding":28,"ambiguityHandling":3,"stateTracking":20,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":68.77,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T22:45:23+00:00","createdAt":"2026-04-17T18:16:03.722619+00:00"},{"runId":"20260411T220639_gemini-2.5-flash-native-audio-preview-12-2025_64ae3eff","benchmark":"assistant_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":8,"toolUseDenom":13,"instructionFollowing":27,"kbGrounding":26,"ambiguityHandling":4,"stateTracking":20,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":79.17,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T22:45:13+00:00","createdAt":"2026-04-17T18:16:57.41964+00:00"},{"runId":"20260411T204636_gpt-realtime_46bec6cc","benchmark":"assistant_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":10,"toolUseDenom":13,"instructionFollowing":16,"kbGrounding":21,"ambiguityHandling":2,"stateTracking":12,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":57.26,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T22:42:38+00:00","createdAt":"2026-04-17T18:15:38.501475+00:00"},{"runId":"20260411T213824_gemini-2.5-flash-native-audio-preview-12-2025_f4a10302","benchmark":"assistant_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":8,"toolUseDenom":13,"instructionFollowing":22,"kbGrounding":28,"ambiguityHandling":3,"stateTracking":17,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":70.73,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T22:40:07+00:00","createdAt":"2026-04-17T18:16:29.644705+00:00"},{"runId":"20260411T210427_gemini-3.1-flash-live-preview_9505a537","benchmark":"assistant_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":8,"toolUseDenom":13,"instructionFollowing":24,"kbGrounding":26,"ambiguityHandling":1,"stateTracking":18,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":63.57,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T22:37:51+00:00","createdAt":"2026-04-17T18:15:55.25249+00:00"},{"runId":"20260411T211132_amazon.nova-2-sonic-v1_0_c1052c4d","benchmark":"assistant_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":10,"toolUseDenom":13,"instructionFollowing":16,"kbGrounding":21,"ambiguityHandling":3,"stateTracking":13,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":62.09,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T22:35:45+00:00","createdAt":"2026-04-17T18:16:00.227412+00:00"},{"runId":"20260411T211528_gpt-realtime_6bc2ee19","benchmark":"assistant_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":1,"toolUseDenom":13,"instructionFollowing":5,"kbGrounding":5,"ambiguityHandling":2,"stateTracking":4,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":19.32,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T22:34:32+00:00","createdAt":"2026-04-17T18:16:05.177949+00:00"},{"runId":"20260411T213944_grok-realtime_33325ef2","benchmark":"assistant_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":4,"toolUseDenom":13,"instructionFollowing":30,"kbGrounding":31,"ambiguityHandling":5,"stateTracking":17,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":79.68,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-13T22:53:40+00:00","createdAt":"2026-04-17T18:16:31.125637+00:00"},{"runId":"20260411T210755_grok-realtime_f91a567b","benchmark":"assistant_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":13,"instructionFollowing":31,"kbGrounding":31,"ambiguityHandling":4,"stateTracking":24,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":83.69,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-13T22:50:10+00:00","createdAt":"2026-04-17T18:15:56.731352+00:00"},{"runId":"20260411T215144_gemini-3.1-flash-live-preview_af853185","benchmark":"assistant_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":10,"toolUseDenom":13,"instructionFollowing":22,"kbGrounding":26,"ambiguityHandling":2,"stateTracking":19,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":70.19,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-13T22:50:09+00:00","createdAt":"2026-04-17T18:16:42.710638+00:00"},{"runId":"20260411T215115_gemini-2.5-flash-native-audio-preview-12-2025_dde29cd1","benchmark":"assistant_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":11,"toolUseDenom":13,"instructionFollowing":28,"kbGrounding":31,"ambiguityHandling":5,"stateTracking":19,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":90.82,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-13T22:46:22+00:00","createdAt":"2026-04-17T18:16:42.162332+00:00"},{"runId":"20260411T235637_amazon.nova-2-sonic-v1_0_764c1c9e","benchmark":"assistant_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":3,"toolUseDenom":13,"instructionFollowing":21,"kbGrounding":28,"ambiguityHandling":4,"stateTracking":16,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":65.56,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T07:23:12+00:00","createdAt":"2026-04-17T18:18:29.39974+00:00"},{"runId":"20260411T235338_amazon.nova-2-sonic-v1_0_0c474ed8","benchmark":"assistant_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":10,"toolUseDenom":13,"instructionFollowing":18,"kbGrounding":27,"ambiguityHandling":3,"stateTracking":16,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":69.75,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T07:22:51+00:00","createdAt":"2026-04-17T18:18:25.84765+00:00"},{"runId":"20260411T235606_ultravox-v0.7_82f92e08","benchmark":"assistant_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":2,"toolUseDenom":13,"instructionFollowing":24,"kbGrounding":22,"ambiguityHandling":4,"stateTracking":13,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":59.59,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T07:22:24+00:00","createdAt":"2026-04-17T18:18:28.245592+00:00"},{"runId":"20260411T235610_grok-realtime_52754240","benchmark":"assistant_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":6,"toolUseDenom":13,"instructionFollowing":31,"kbGrounding":31,"ambiguityHandling":4,"stateTracking":23,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":84.4,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T07:22:01+00:00","createdAt":"2026-04-17T18:18:28.831505+00:00"},{"runId":"20260411T235259_grok-realtime_a05b65c8","benchmark":"assistant_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":10,"toolUseDenom":13,"instructionFollowing":30,"kbGrounding":31,"ambiguityHandling":4,"stateTracking":24,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":90.74,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T07:19:02+00:00","createdAt":"2026-04-17T18:18:25.2489+00:00"},{"runId":"20260411T235217_ultravox-v0.7_d62687cf","benchmark":"assistant_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":1,"toolUseDenom":13,"instructionFollowing":23,"kbGrounding":18,"ambiguityHandling":3,"stateTracking":13,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":50.82,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T07:17:46+00:00","createdAt":"2026-04-17T18:18:24.640393+00:00"},{"runId":"20260411T234929_ultravox-v0.7_e3ff4a28","benchmark":"assistant_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":13,"instructionFollowing":26,"kbGrounding":31,"ambiguityHandling":4,"stateTracking":21,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":77.97,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T07:17:22+00:00","createdAt":"2026-04-17T18:18:21.291895+00:00"},{"runId":"20260411T235514_gemini-3.1-flash-live-preview_52646194","benchmark":"assistant_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":9,"toolUseDenom":13,"instructionFollowing":19,"kbGrounding":25,"ambiguityHandling":2,"stateTracking":13,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":61.07,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T07:15:37+00:00","createdAt":"2026-04-17T18:18:27.582549+00:00"},{"runId":"20260411T235003_grok-realtime_6832bbe0","benchmark":"assistant_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":13,"instructionFollowing":29,"kbGrounding":30,"ambiguityHandling":2,"stateTracking":18,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":68.76,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T07:14:34+00:00","createdAt":"2026-04-17T18:18:21.880488+00:00"},{"runId":"20260411T234546_grok-realtime_8f4ac75d","benchmark":"assistant_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":13,"instructionFollowing":29,"kbGrounding":31,"ambiguityHandling":4,"stateTracking":24,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":82.4,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T07:14:08+00:00","createdAt":"2026-04-17T18:18:19.096757+00:00"},{"runId":"20260411T235058_amazon.nova-2-sonic-v1_0_cc01b814","benchmark":"assistant_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":4,"toolUseDenom":13,"instructionFollowing":12,"kbGrounding":17,"ambiguityHandling":1,"stateTracking":9,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":36.36,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T07:14:06+00:00","createdAt":"2026-04-17T18:18:22.419029+00:00"},{"runId":"20260411T235215_gemini-3.1-flash-live-preview_f70a1d4e","benchmark":"assistant_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":9,"toolUseDenom":13,"instructionFollowing":22,"kbGrounding":23,"ambiguityHandling":2,"stateTracking":14,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":62.55,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T07:12:20+00:00","createdAt":"2026-04-17T18:18:24.072224+00:00"},{"runId":"20260411T234546_amazon.nova-2-sonic-v1_0_ba981a84","benchmark":"assistant_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":4,"toolUseDenom":13,"instructionFollowing":21,"kbGrounding":22,"ambiguityHandling":2,"stateTracking":11,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":51.06,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T07:11:03+00:00","createdAt":"2026-04-17T18:18:18.563371+00:00"},{"runId":"20260411T234541_ultravox-v0.7_ac18a562","benchmark":"assistant_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":4,"toolUseDenom":13,"instructionFollowing":25,"kbGrounding":24,"ambiguityHandling":5,"stateTracking":11,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":66.93,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T07:11:02+00:00","createdAt":"2026-04-17T18:18:17.991081+00:00"},{"runId":"20260411T235411_gpt-realtime_f6754091","benchmark":"assistant_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":13,"toolUseDenom":13,"instructionFollowing":26,"kbGrounding":29,"ambiguityHandling":4,"stateTracking":23,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":90.65,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T07:10:39+00:00","createdAt":"2026-04-17T18:18:26.487745+00:00"},{"runId":"20260411T235431_gemini-2.5-flash-native-audio-preview-12-2025_6381d3ea","benchmark":"assistant_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":13,"instructionFollowing":27,"kbGrounding":29,"ambiguityHandling":5,"stateTracking":21,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":81.32,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T07:09:23+00:00","createdAt":"2026-04-17T18:18:27.036622+00:00"},{"runId":"20260411T234806_gemini-3.1-flash-live-preview_d4a994f0","benchmark":"assistant_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":11,"toolUseDenom":13,"instructionFollowing":24,"kbGrounding":27,"ambiguityHandling":3,"stateTracking":20,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":78.49,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T07:07:53+00:00","createdAt":"2026-04-17T18:18:20.691162+00:00"},{"runId":"20260411T235114_gpt-realtime_bfe38ab0","benchmark":"assistant_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":12,"toolUseDenom":13,"instructionFollowing":29,"kbGrounding":28,"ambiguityHandling":4,"stateTracking":23,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":90.4,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T07:07:38+00:00","createdAt":"2026-04-17T18:18:22.969348+00:00"},{"runId":"20260411T234208_amazon.nova-2-sonic-v1_0_ffffc909","benchmark":"assistant_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":13,"instructionFollowing":16,"kbGrounding":20,"ambiguityHandling":3,"stateTracking":10,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":51.25,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T07:06:02+00:00","createdAt":"2026-04-17T18:18:15.720608+00:00"},{"runId":"20260411T234142_grok-realtime_3d03b6d9","benchmark":"assistant_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":13,"instructionFollowing":29,"kbGrounding":30,"ambiguityHandling":4,"stateTracking":21,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":79.26,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T07:05:46+00:00","createdAt":"2026-04-17T18:18:15.154246+00:00"},{"runId":"20260411T235142_gemini-2.5-flash-native-audio-preview-12-2025_9d3b7d3f","benchmark":"assistant_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":7,"toolUseDenom":13,"instructionFollowing":27,"kbGrounding":28,"ambiguityHandling":5,"stateTracking":11,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":75.42,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T07:05:16+00:00","createdAt":"2026-04-17T18:18:23.532657+00:00"},{"runId":"20260411T232755_amazon.nova-2-sonic-v1_0_7335933b","benchmark":"assistant_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":3,"toolUseDenom":13,"instructionFollowing":9,"kbGrounding":13,"ambiguityHandling":2,"stateTracking":4,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":30.14,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T07:05:14+00:00","createdAt":"2026-04-17T18:18:05.497564+00:00"},{"runId":"20260411T233634_grok-realtime_516712f6","benchmark":"assistant_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":2,"toolUseDenom":13,"instructionFollowing":31,"kbGrounding":31,"ambiguityHandling":4,"stateTracking":24,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":79.08,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T07:04:35+00:00","createdAt":"2026-04-17T18:18:11.778119+00:00"},{"runId":"20260411T234600_gemini-2.5-flash-native-audio-preview-12-2025_8f223cc7","benchmark":"assistant_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":3,"toolUseDenom":13,"instructionFollowing":19,"kbGrounding":28,"ambiguityHandling":4,"stateTracking":15,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":63.44,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T07:04:03+00:00","createdAt":"2026-04-17T18:18:20.13527+00:00"},{"runId":"20260411T234107_ultravox-v0.7_fa35b38e","benchmark":"assistant_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":4,"toolUseDenom":13,"instructionFollowing":31,"kbGrounding":28,"ambiguityHandling":4,"stateTracking":21,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":77.72,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T07:03:53+00:00","createdAt":"2026-04-17T18:18:14.598947+00:00"},{"runId":"20260411T234356_gemini-3.1-flash-live-preview_8c5926f2","benchmark":"assistant_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":11,"toolUseDenom":13,"instructionFollowing":23,"kbGrounding":26,"ambiguityHandling":3,"stateTracking":17,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":74.7,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T07:02:44+00:00","createdAt":"2026-04-17T18:18:17.462126+00:00"},{"runId":"20260411T234238_gpt-realtime_0d9eeb8e","benchmark":"assistant_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":12,"toolUseDenom":13,"instructionFollowing":30,"kbGrounding":30,"ambiguityHandling":4,"stateTracking":23,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":92.34,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T07:02:33+00:00","createdAt":"2026-04-17T18:18:16.289568+00:00"},{"runId":"20260411T232524_amazon.nova-2-sonic-v1_0_83b39319","benchmark":"assistant_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":9,"toolUseDenom":13,"instructionFollowing":13,"kbGrounding":14,"ambiguityHandling":1,"stateTracking":6,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":40.27,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T07:01:31+00:00","createdAt":"2026-04-17T18:18:01.804903+00:00"},{"runId":"20260411T233709_amazon.nova-2-sonic-v1_0_f2b8afd0","benchmark":"assistant_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":4,"toolUseDenom":13,"instructionFollowing":12,"kbGrounding":17,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":39.7,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T07:00:52+00:00","createdAt":"2026-04-17T18:18:12.360979+00:00"},{"runId":"20260411T233912_gpt-realtime_9b0da8a9","benchmark":"assistant_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":9,"toolUseDenom":13,"instructionFollowing":26,"kbGrounding":26,"ambiguityHandling":4,"stateTracking":22,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":81.73,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T06:58:14+00:00","createdAt":"2026-04-17T18:18:12.907175+00:00"},{"runId":"20260411T233041_grok-realtime_747875f7","benchmark":"assistant_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":9,"toolUseDenom":13,"instructionFollowing":30,"kbGrounding":31,"ambiguityHandling":5,"stateTracking":24,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":93.2,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T06:57:44+00:00","createdAt":"2026-04-17T18:18:08.375131+00:00"},{"runId":"20260411T234313_gemini-2.5-flash-native-audio-preview-12-2025_89154a02","benchmark":"assistant_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":4,"toolUseDenom":13,"instructionFollowing":27,"kbGrounding":30,"ambiguityHandling":3,"stateTracking":21,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":72.43,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T06:57:30+00:00","createdAt":"2026-04-17T18:18:16.831091+00:00"},{"runId":"20260411T232702_grok-realtime_9769abaa","benchmark":"assistant_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":6,"toolUseDenom":13,"instructionFollowing":31,"kbGrounding":31,"ambiguityHandling":3,"stateTracking":23,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":80.4,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T06:56:36+00:00","createdAt":"2026-04-17T18:18:04.886057+00:00"},{"runId":"20260411T234041_gemini-2.5-flash-native-audio-preview-12-2025_4943d8a7","benchmark":"assistant_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":8,"toolUseDenom":13,"instructionFollowing":24,"kbGrounding":26,"ambiguityHandling":4,"stateTracking":19,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":76.4,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T06:56:08+00:00","createdAt":"2026-04-17T18:18:13.494562+00:00"},{"runId":"20260411T233348_ultravox-v0.7_77dba379","benchmark":"assistant_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":0,"toolUseDenom":13,"instructionFollowing":24,"kbGrounding":29,"ambiguityHandling":5,"stateTracking":19,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":70.03,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T06:56:04+00:00","createdAt":"2026-04-17T18:18:11.196862+00:00"},{"runId":"20260411T232648_ultravox-v0.7_7cfa0636","benchmark":"assistant_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":1,"toolUseDenom":13,"instructionFollowing":26,"kbGrounding":29,"ambiguityHandling":4,"stateTracking":20,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":69.69,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T06:55:12+00:00","createdAt":"2026-04-17T18:18:04.28517+00:00"},{"runId":"20260411T232631_gemini-3.1-flash-live-preview_7e76e8c1","benchmark":"assistant_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":9,"toolUseDenom":13,"instructionFollowing":30,"kbGrounding":28,"ambiguityHandling":2,"stateTracking":14,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":70.93,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T06:54:29+00:00","createdAt":"2026-04-17T18:18:03.704151+00:00"},{"runId":"20260411T234041_gemini-3.1-flash-live-preview_949b603d","benchmark":"assistant_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":7,"toolUseDenom":13,"instructionFollowing":23,"kbGrounding":31,"ambiguityHandling":2,"stateTracking":24,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":73.61,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T06:54:10+00:00","createdAt":"2026-04-17T18:18:14.051009+00:00"},{"runId":"20260411T233059_amazon.nova-2-sonic-v1_0_a4e61fba","benchmark":"assistant_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":7,"toolUseDenom":13,"instructionFollowing":20,"kbGrounding":22,"ambiguityHandling":2,"stateTracking":16,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":59.2,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T06:53:36+00:00","createdAt":"2026-04-17T18:18:08.910404+00:00"},{"runId":"20260411T233219_gemini-2.5-flash-native-audio-preview-12-2025_5f74b261","benchmark":"assistant_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":8,"toolUseDenom":13,"instructionFollowing":25,"kbGrounding":27,"ambiguityHandling":3,"stateTracking":18,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":72.86,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T06:52:58+00:00","createdAt":"2026-04-17T18:18:10.055951+00:00"},{"runId":"20260411T232239_amazon.nova-2-sonic-v1_0_ab032de9","benchmark":"assistant_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":10,"toolUseDenom":13,"instructionFollowing":20,"kbGrounding":22,"ambiguityHandling":3,"stateTracking":12,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":64.48,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T06:52:16+00:00","createdAt":"2026-04-17T18:17:58.980989+00:00"},{"runId":"20260411T232358_grok-realtime_0983fcfd","benchmark":"assistant_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":4,"toolUseDenom":13,"instructionFollowing":28,"kbGrounding":31,"ambiguityHandling":3,"stateTracking":20,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":72.89,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T06:52:13+00:00","createdAt":"2026-04-17T18:18:01.254171+00:00"},{"runId":"20260411T233034_ultravox-v0.7_f4876a51","benchmark":"assistant_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":1,"toolUseDenom":13,"instructionFollowing":30,"kbGrounding":29,"ambiguityHandling":4,"stateTracking":21,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":73.1,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T06:51:40+00:00","createdAt":"2026-04-17T18:18:07.817022+00:00"},{"runId":"20260411T231710_grok-realtime_6cccde9c","benchmark":"assistant_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":6,"toolUseDenom":13,"instructionFollowing":29,"kbGrounding":31,"ambiguityHandling":5,"stateTracking":20,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":84.61,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T06:51:13+00:00","createdAt":"2026-04-17T18:17:55.188346+00:00"},{"runId":"20260411T233150_gpt-realtime_22b34ccb","benchmark":"assistant_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":10,"toolUseDenom":13,"instructionFollowing":29,"kbGrounding":29,"ambiguityHandling":4,"stateTracking":23,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":87.97,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T06:50:57+00:00","createdAt":"2026-04-17T18:18:09.485825+00:00"},{"runId":"20260411T234552_gpt-realtime_fda2dc5c","benchmark":"assistant_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":1,"toolUseDenom":13,"instructionFollowing":6,"kbGrounding":7,"ambiguityHandling":2,"stateTracking":4,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":21.26,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T06:50:02+00:00","createdAt":"2026-04-17T18:18:19.62774+00:00"},{"runId":"20260411T233319_gemini-3.1-flash-live-preview_d11205ad","benchmark":"assistant_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":10,"toolUseDenom":13,"instructionFollowing":23,"kbGrounding":26,"ambiguityHandling":4,"stateTracking":20,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":79.66,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T06:49:28+00:00","createdAt":"2026-04-17T18:18:10.643915+00:00"},{"runId":"20260411T231618_ultravox-v0.7_273b7133","benchmark":"assistant_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":1,"toolUseDenom":13,"instructionFollowing":23,"kbGrounding":27,"ambiguityHandling":3,"stateTracking":15,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":58.3,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T06:48:04+00:00","createdAt":"2026-04-17T18:17:54.604901+00:00"},{"runId":"20260411T232830_gpt-realtime_1bac365c","benchmark":"assistant_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":12,"toolUseDenom":13,"instructionFollowing":28,"kbGrounding":29,"ambiguityHandling":3,"stateTracking":23,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":86.4,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T06:45:51+00:00","createdAt":"2026-04-17T18:18:06.133407+00:00"},{"runId":"20260411T232927_gemini-3.1-flash-live-preview_887837c4","benchmark":"assistant_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":12,"toolUseDenom":13,"instructionFollowing":24,"kbGrounding":27,"ambiguityHandling":3,"stateTracking":18,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":78.36,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T06:45:45+00:00","createdAt":"2026-04-17T18:18:07.283109+00:00"},{"runId":"20260411T232908_gemini-2.5-flash-native-audio-preview-12-2025_66803f82","benchmark":"assistant_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":9,"toolUseDenom":13,"instructionFollowing":27,"kbGrounding":26,"ambiguityHandling":5,"stateTracking":20,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":84.71,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T06:45:45+00:00","createdAt":"2026-04-17T18:18:06.703503+00:00"},{"runId":"20260411T232132_grok-realtime_628f2dd5","benchmark":"assistant_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":8,"toolUseDenom":13,"instructionFollowing":30,"kbGrounding":30,"ambiguityHandling":4,"stateTracking":23,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":86.18,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T06:45:39+00:00","createdAt":"2026-04-17T18:17:58.454017+00:00"},{"runId":"20260411T232531_gpt-realtime_76efb7ed","benchmark":"assistant_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":10,"toolUseDenom":13,"instructionFollowing":28,"kbGrounding":28,"ambiguityHandling":4,"stateTracking":22,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":85.85,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T06:43:55+00:00","createdAt":"2026-04-17T18:18:02.418847+00:00"},{"runId":"20260411T232331_gemini-3.1-flash-live-preview_ac397e59","benchmark":"assistant_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":10,"toolUseDenom":13,"instructionFollowing":28,"kbGrounding":29,"ambiguityHandling":4,"stateTracking":23,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":87.33,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T06:42:36+00:00","createdAt":"2026-04-17T18:18:00.702144+00:00"},{"runId":"20260411T230722_amazon.nova-2-sonic-v1_0_2ed9eed4","benchmark":"assistant_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":13,"instructionFollowing":25,"kbGrounding":16,"ambiguityHandling":3,"stateTracking":13,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":56.98,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T06:42:07+00:00","createdAt":"2026-04-17T18:17:49.133955+00:00"},{"runId":"20260411T232034_ultravox-v0.7_2ca857a3","benchmark":"assistant_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":8,"toolUseDenom":13,"instructionFollowing":30,"kbGrounding":30,"ambiguityHandling":5,"stateTracking":23,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":90.18,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T06:41:40+00:00","createdAt":"2026-04-17T18:17:57.842205+00:00"},{"runId":"20260411T231043_grok-realtime_26d16329","benchmark":"assistant_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":6,"toolUseDenom":13,"instructionFollowing":30,"kbGrounding":29,"ambiguityHandling":4,"stateTracking":21,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":80.8,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T06:41:05+00:00","createdAt":"2026-04-17T18:17:51.863112+00:00"},{"runId":"20260411T232032_gemini-3.1-flash-live-preview_b6d8c281","benchmark":"assistant_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":7,"toolUseDenom":13,"instructionFollowing":21,"kbGrounding":25,"ambiguityHandling":1,"stateTracking":18,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":59.45,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T06:40:40+00:00","createdAt":"2026-04-17T18:17:57.315393+00:00"},{"runId":"20260411T231833_gpt-realtime_6276e8b3","benchmark":"assistant_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":12,"toolUseDenom":13,"instructionFollowing":27,"kbGrounding":26,"ambiguityHandling":4,"stateTracking":22,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":86.99,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T06:40:40+00:00","createdAt":"2026-04-17T18:17:56.261968+00:00"},{"runId":"20260411T232244_gpt-realtime_c7ca221a","benchmark":"assistant_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":12,"toolUseDenom":13,"instructionFollowing":29,"kbGrounding":31,"ambiguityHandling":3,"stateTracking":23,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":88.34,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T06:39:11+00:00","createdAt":"2026-04-17T18:17:59.535197+00:00"},{"runId":"20260411T232618_gemini-2.5-flash-native-audio-preview-12-2025_29cdf8e0","benchmark":"assistant_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":13,"instructionFollowing":22,"kbGrounding":29,"ambiguityHandling":3,"stateTracking":23,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":71.76,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T06:37:08+00:00","createdAt":"2026-04-17T18:18:03.017344+00:00"},{"runId":"20260411T231949_gemini-2.5-flash-native-audio-preview-12-2025_9a4deff3","benchmark":"assistant_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":8,"toolUseDenom":13,"instructionFollowing":28,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":18,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":68.08,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T06:36:32+00:00","createdAt":"2026-04-17T18:17:56.786431+00:00"},{"runId":"20260411T230903_ultravox-v0.7_ddd505c8","benchmark":"assistant_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":1,"toolUseDenom":13,"instructionFollowing":28,"kbGrounding":27,"ambiguityHandling":4,"stateTracking":22,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":71.36,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T06:33:46+00:00","createdAt":"2026-04-17T18:17:51.294951+00:00"},{"runId":"20260411T230630_ultravox-v0.7_82ce22d2","benchmark":"assistant_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":7,"toolUseDenom":13,"instructionFollowing":29,"kbGrounding":28,"ambiguityHandling":5,"stateTracking":21,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":85.04,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T06:33:18+00:00","createdAt":"2026-04-17T18:17:48.001368+00:00"},{"runId":"20260411T230524_grok-realtime_107f3275","benchmark":"assistant_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":7,"toolUseDenom":13,"instructionFollowing":30,"kbGrounding":30,"ambiguityHandling":5,"stateTracking":23,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":88.65,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T06:32:17+00:00","createdAt":"2026-04-17T18:17:44.943108+00:00"},{"runId":"20260411T230708_grok-realtime_4bd2fa49","benchmark":"assistant_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":7,"toolUseDenom":13,"instructionFollowing":30,"kbGrounding":30,"ambiguityHandling":4,"stateTracking":23,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":84.65,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T06:31:48+00:00","createdAt":"2026-04-17T18:17:48.603608+00:00"},{"runId":"20260411T231354_gemini-3.1-flash-live-preview_43c9fa12","benchmark":"assistant_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":12,"toolUseDenom":13,"instructionFollowing":24,"kbGrounding":27,"ambiguityHandling":1,"stateTracking":18,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":70.36,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T06:30:58+00:00","createdAt":"2026-04-17T18:17:54.059574+00:00"},{"runId":"20260411T231312_gpt-realtime_e112dbd1","benchmark":"assistant_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":12,"toolUseDenom":13,"instructionFollowing":28,"kbGrounding":30,"ambiguityHandling":5,"stateTracking":23,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":95.05,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T06:30:39+00:00","createdAt":"2026-04-17T18:17:52.935799+00:00"},{"runId":"20260411T230507_ultravox-v0.7_609cd62f","benchmark":"assistant_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":1,"toolUseDenom":13,"instructionFollowing":24,"kbGrounding":25,"ambiguityHandling":4,"stateTracking":15,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":61.65,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T06:30:32+00:00","createdAt":"2026-04-17T18:17:44.421964+00:00"},{"runId":"20260411T231347_gemini-2.5-flash-native-audio-preview-12-2025_28fbf2e0","benchmark":"assistant_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":8,"toolUseDenom":13,"instructionFollowing":25,"kbGrounding":25,"ambiguityHandling":3,"stateTracking":16,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":69.9,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T06:29:25+00:00","createdAt":"2026-04-17T18:17:53.501812+00:00"},{"runId":"20260411T230525_amazon.nova-2-sonic-v1_0_618e7f41","benchmark":"assistant_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":3,"toolUseDenom":13,"instructionFollowing":15,"kbGrounding":24,"ambiguityHandling":1,"stateTracking":16,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":47.11,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T06:29:06+00:00","createdAt":"2026-04-17T18:17:45.83984+00:00"},{"runId":"20260411T230528_gpt-realtime_a4439161","benchmark":"assistant_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":12,"toolUseDenom":13,"instructionFollowing":29,"kbGrounding":30,"ambiguityHandling":4,"stateTracking":22,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":90.86,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T06:28:28+00:00","createdAt":"2026-04-17T18:17:46.38227+00:00"},{"runId":"20260411T230308_amazon.nova-2-sonic-v1_0_e0890b0a","benchmark":"assistant_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":13,"instructionFollowing":9,"kbGrounding":12,"ambiguityHandling":1,"stateTracking":7,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":31.07,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T06:27:00+00:00","createdAt":"2026-04-17T18:17:42.319984+00:00"},{"runId":"20260411T230758_gpt-realtime_bbe8aefa","benchmark":"assistant_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":9,"toolUseDenom":13,"instructionFollowing":19,"kbGrounding":24,"ambiguityHandling":3,"stateTracking":15,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":66.09,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T06:26:30+00:00","createdAt":"2026-04-17T18:17:49.679744+00:00"},{"runId":"20260411T230247_grok-realtime_b629e4a7","benchmark":"assistant_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":13,"instructionFollowing":30,"kbGrounding":30,"ambiguityHandling":4,"stateTracking":23,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":81.57,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T06:26:16+00:00","createdAt":"2026-04-17T18:17:41.781376+00:00"},{"runId":"20260411T230437_gemini-3.1-flash-live-preview_d85de3e0","benchmark":"assistant_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":8,"toolUseDenom":13,"instructionFollowing":23,"kbGrounding":26,"ambiguityHandling":3,"stateTracking":15,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":68.42,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T06:25:30+00:00","createdAt":"2026-04-17T18:17:43.869915+00:00"},{"runId":"20260411T225033_amazon.nova-2-sonic-v1_0_aeb38776","benchmark":"assistant_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":13,"instructionFollowing":16,"kbGrounding":25,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":49.81,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T06:25:22+00:00","createdAt":"2026-04-17T18:17:35.971975+00:00"},{"runId":"20260411T230529_gemini-2.5-flash-native-audio-preview-12-2025_a645697b","benchmark":"assistant_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":6,"toolUseDenom":13,"instructionFollowing":29,"kbGrounding":28,"ambiguityHandling":4,"stateTracking":20,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":78.67,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T06:23:47+00:00","createdAt":"2026-04-17T18:17:46.961318+00:00"},{"runId":"20260411T230130_ultravox-v0.7_6deec087","benchmark":"assistant_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":1,"toolUseDenom":13,"instructionFollowing":26,"kbGrounding":26,"ambiguityHandling":3,"stateTracking":21,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":64.59,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T06:23:29+00:00","createdAt":"2026-04-17T18:17:41.251857+00:00"},{"runId":"20260411T230126_gemini-3.1-flash-live-preview_8c410662","benchmark":"assistant_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":8,"toolUseDenom":13,"instructionFollowing":24,"kbGrounding":23,"ambiguityHandling":4,"stateTracking":15,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":71.13,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T06:22:51+00:00","createdAt":"2026-04-17T18:17:40.728526+00:00"},{"runId":"20260411T230837_gemini-2.5-flash-native-audio-preview-12-2025_4504f00b","benchmark":"assistant_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":9,"toolUseDenom":13,"instructionFollowing":23,"kbGrounding":22,"ambiguityHandling":2,"stateTracking":11,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":60.05,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T06:22:43+00:00","createdAt":"2026-04-17T18:17:50.238772+00:00"},{"runId":"20260411T224747_amazon.nova-2-sonic-v1_0_392aa0d0","benchmark":"assistant_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":6,"toolUseDenom":13,"instructionFollowing":17,"kbGrounding":24,"ambiguityHandling":2,"stateTracking":19,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":59.52,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T06:22:37+00:00","createdAt":"2026-04-17T18:17:33.168547+00:00"},{"runId":"20260411T225507_ultravox-v0.7_c5493b9d","benchmark":"assistant_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":0,"toolUseDenom":13,"instructionFollowing":23,"kbGrounding":24,"ambiguityHandling":5,"stateTracking":12,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":60.32,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T06:21:31+00:00","createdAt":"2026-04-17T18:17:38.121091+00:00"},{"runId":"20260411T230345_gpt-realtime_d02fc6a1","benchmark":"assistant_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":11,"toolUseDenom":13,"instructionFollowing":29,"kbGrounding":28,"ambiguityHandling":4,"stateTracking":23,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":88.86,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T06:20:33+00:00","createdAt":"2026-04-17T18:17:42.862238+00:00"},{"runId":"20260411T230840_gemini-3.1-flash-live-preview_067c07e5","benchmark":"assistant_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":9,"toolUseDenom":13,"instructionFollowing":20,"kbGrounding":31,"ambiguityHandling":3,"stateTracking":23,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":77.92,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T06:20:31+00:00","createdAt":"2026-04-17T18:17:50.764488+00:00"},{"runId":"20260411T224455_amazon.nova-2-sonic-v1_0_aed04c0d","benchmark":"assistant_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":6,"toolUseDenom":13,"instructionFollowing":19,"kbGrounding":26,"ambiguityHandling":2,"stateTracking":15,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":58.76,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T06:19:48+00:00","createdAt":"2026-04-17T18:17:29.940099+00:00"},{"runId":"20260411T231219_amazon.nova-2-sonic-v1_0_d9538f54","benchmark":"assistant_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":1,"toolUseDenom":13,"instructionFollowing":5,"kbGrounding":6,"ambiguityHandling":1,"stateTracking":4,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":15.97,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T06:18:32+00:00","createdAt":"2026-04-17T18:17:52.420257+00:00"},{"runId":"20260411T230355_gemini-2.5-flash-native-audio-preview-12-2025_775d9388","benchmark":"assistant_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":13,"instructionFollowing":29,"kbGrounding":29,"ambiguityHandling":3,"stateTracking":21,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":74.61,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T06:18:07+00:00","createdAt":"2026-04-17T18:17:43.371266+00:00"},{"runId":"20260411T225831_gemini-2.5-flash-native-audio-preview-12-2025_ffc57feb","benchmark":"assistant_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":7,"toolUseDenom":13,"instructionFollowing":25,"kbGrounding":27,"ambiguityHandling":4,"stateTracking":19,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":76.15,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T06:17:09+00:00","createdAt":"2026-04-17T18:17:40.206714+00:00"},{"runId":"20260411T225449_gemini-3.1-flash-live-preview_fb78ca7f","benchmark":"assistant_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":11,"toolUseDenom":13,"instructionFollowing":24,"kbGrounding":27,"ambiguityHandling":5,"stateTracking":20,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":86.49,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T06:16:17+00:00","createdAt":"2026-04-17T18:17:37.576741+00:00"},{"runId":"20260411T224846_gemini-3.1-flash-live-preview_1eb23cbd","benchmark":"assistant_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":10,"toolUseDenom":13,"instructionFollowing":26,"kbGrounding":28,"ambiguityHandling":3,"stateTracking":20,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":78.89,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T06:13:53+00:00","createdAt":"2026-04-17T18:17:34.775254+00:00"},{"runId":"20260411T225035_gpt-realtime_e025b194","benchmark":"assistant_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":11,"toolUseDenom":13,"instructionFollowing":28,"kbGrounding":26,"ambiguityHandling":4,"stateTracking":20,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":84.43,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T06:13:46+00:00","createdAt":"2026-04-17T18:17:36.507953+00:00"},{"runId":"20260411T224740_grok-realtime_445d3f78","benchmark":"assistant_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":13,"instructionFollowing":26,"kbGrounding":29,"ambiguityHandling":5,"stateTracking":16,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":76.51,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T06:13:10+00:00","createdAt":"2026-04-17T18:17:32.637906+00:00"},{"runId":"20260411T224920_grok-realtime_728459b8","benchmark":"assistant_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":7,"toolUseDenom":13,"instructionFollowing":29,"kbGrounding":29,"ambiguityHandling":4,"stateTracking":22,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":82.52,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T06:12:17+00:00","createdAt":"2026-04-17T18:17:35.317614+00:00"},{"runId":"20260411T224336_ultravox-v0.7_4e1ff83a","benchmark":"assistant_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":1,"toolUseDenom":13,"instructionFollowing":21,"kbGrounding":18,"ambiguityHandling":3,"stateTracking":14,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":50.37,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T06:09:02+00:00","createdAt":"2026-04-17T18:17:28.824086+00:00"},{"runId":"20260411T225329_gemini-2.5-flash-native-audio-preview-12-2025_5c6726da","benchmark":"assistant_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":7,"toolUseDenom":13,"instructionFollowing":27,"kbGrounding":27,"ambiguityHandling":5,"stateTracking":18,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":80.61,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T06:08:39+00:00","createdAt":"2026-04-17T18:17:37.051772+00:00"},{"runId":"20260411T224737_ultravox-v0.7_e0d7cbf2","benchmark":"assistant_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":13,"instructionFollowing":31,"kbGrounding":31,"ambiguityHandling":4,"stateTracking":18,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":78.69,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T06:08:36+00:00","createdAt":"2026-04-17T18:17:32.111553+00:00"},{"runId":"20260411T224351_grok-realtime_9b4107b4","benchmark":"assistant_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":9,"toolUseDenom":13,"instructionFollowing":27,"kbGrounding":29,"ambiguityHandling":3,"stateTracking":23,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":81.14,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T06:07:06+00:00","createdAt":"2026-04-17T18:17:29.373204+00:00"},{"runId":"20260411T224714_gemini-3.1-flash-live-preview_0b0a0eed","benchmark":"assistant_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":12,"toolUseDenom":13,"instructionFollowing":28,"kbGrounding":27,"ambiguityHandling":2,"stateTracking":18,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":76.95,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T06:06:18+00:00","createdAt":"2026-04-17T18:17:31.557666+00:00"},{"runId":"20260411T223923_grok-realtime_4f33bf7f","benchmark":"assistant_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":4,"toolUseDenom":13,"instructionFollowing":30,"kbGrounding":30,"ambiguityHandling":4,"stateTracking":24,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":80.86,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T06:05:26+00:00","createdAt":"2026-04-17T18:17:26.102995+00:00"},{"runId":"20260411T223729_gemini-2.5-flash-native-audio-preview-12-2025_704a69e5","benchmark":"assistant_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":7,"toolUseDenom":13,"instructionFollowing":27,"kbGrounding":26,"ambiguityHandling":3,"stateTracking":16,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":70.3,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T06:05:22+00:00","createdAt":"2026-04-17T18:17:25.014211+00:00"},{"runId":"20260411T224813_gpt-realtime_ca8c3b34","benchmark":"assistant_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":12,"toolUseDenom":13,"instructionFollowing":29,"kbGrounding":28,"ambiguityHandling":4,"stateTracking":22,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":89.57,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T06:05:06+00:00","createdAt":"2026-04-17T18:17:33.736243+00:00"},{"runId":"20260411T225739_gpt-realtime_64b1d846","benchmark":"assistant_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":13,"instructionFollowing":14,"kbGrounding":31,"ambiguityHandling":2,"stateTracking":10,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":53.06,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T06:04:35+00:00","createdAt":"2026-04-17T18:17:39.706521+00:00"},{"runId":"20260411T223628_ultravox-v0.7_07931865","benchmark":"assistant_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":1,"toolUseDenom":13,"instructionFollowing":26,"kbGrounding":28,"ambiguityHandling":5,"stateTracking":18,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":71.38,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T06:01:25+00:00","createdAt":"2026-04-17T18:17:22.784961+00:00"},{"runId":"20260411T223055_ultravox-v0.7_41ceb26b","benchmark":"assistant_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":7,"toolUseDenom":13,"instructionFollowing":25,"kbGrounding":27,"ambiguityHandling":4,"stateTracking":13,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":71.15,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T05:55:31+00:00","createdAt":"2026-04-17T18:17:19.441898+00:00"},{"runId":"20260411T222837_ultravox-v0.7_daf981e6","benchmark":"assistant_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":1,"toolUseDenom":13,"instructionFollowing":29,"kbGrounding":31,"ambiguityHandling":5,"stateTracking":21,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":77.75,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T05:54:47+00:00","createdAt":"2026-04-17T18:17:15.989082+00:00"},{"runId":"20260411T214022_amazon.nova-2-sonic-v1_0_6a1c16b9","benchmark":"assistant_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":9,"toolUseDenom":13,"instructionFollowing":15,"kbGrounding":20,"ambiguityHandling":3,"stateTracking":12,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":58.43,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T05:09:25+00:00","createdAt":"2026-04-17T18:16:31.826789+00:00"},{"runId":"20260411T213050_amazon.nova-2-sonic-v1_0_4a042713","benchmark":"assistant_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":3,"toolUseDenom":13,"instructionFollowing":23,"kbGrounding":22,"ambiguityHandling":2,"stateTracking":20,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":58.31,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T05:03:50+00:00","createdAt":"2026-04-17T18:16:20.770316+00:00"},{"runId":"20260411T213722_grok-realtime_62a60d25","benchmark":"assistant_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":3,"toolUseDenom":13,"instructionFollowing":29,"kbGrounding":25,"ambiguityHandling":4,"stateTracking":22,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":73.79,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T05:02:35+00:00","createdAt":"2026-04-17T18:16:27.91766+00:00"},{"runId":"20260411T213330_amazon.nova-2-sonic-v1_0_3e1489ca","benchmark":"assistant_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":13,"instructionFollowing":17,"kbGrounding":19,"ambiguityHandling":2,"stateTracking":9,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":46.42,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T05:01:15+00:00","createdAt":"2026-04-17T18:16:24.543199+00:00"},{"runId":"20260411T213329_grok-realtime_8d268526","benchmark":"assistant_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":4,"toolUseDenom":13,"instructionFollowing":29,"kbGrounding":29,"ambiguityHandling":3,"stateTracking":22,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":73.91,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T05:01:05+00:00","createdAt":"2026-04-17T18:16:24.006131+00:00"},{"runId":"20260411T213619_ultravox-v0.7_572797d8","benchmark":"assistant_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":4,"toolUseDenom":13,"instructionFollowing":26,"kbGrounding":25,"ambiguityHandling":5,"stateTracking":20,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":75.72,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:58:40+00:00","createdAt":"2026-04-17T18:16:27.310605+00:00"},{"runId":"20260411T213908_gemini-3.1-flash-live-preview_3818c3cb","benchmark":"assistant_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":7,"toolUseDenom":13,"instructionFollowing":29,"kbGrounding":23,"ambiguityHandling":3,"stateTracking":19,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":72.15,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:56:14+00:00","createdAt":"2026-04-17T18:16:30.363521+00:00"},{"runId":"20260411T213724_amazon.nova-2-sonic-v1_0_d279fbf4","benchmark":"assistant_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":4,"toolUseDenom":13,"instructionFollowing":8,"kbGrounding":17,"ambiguityHandling":1,"stateTracking":3,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":28.78,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:55:50+00:00","createdAt":"2026-04-17T18:16:28.517842+00:00"},{"runId":"20260411T213750_gpt-realtime_312b9382","benchmark":"assistant_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":9,"toolUseDenom":13,"instructionFollowing":24,"kbGrounding":26,"ambiguityHandling":3,"stateTracking":18,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":73.1,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:53:43+00:00","createdAt":"2026-04-17T18:16:28.91043+00:00"},{"runId":"20260411T213010_grok-realtime_28aa8907","benchmark":"assistant_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":13,"instructionFollowing":16,"kbGrounding":25,"ambiguityHandling":4,"stateTracking":15,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":62.64,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:53:42+00:00","createdAt":"2026-04-17T18:16:20.03169+00:00"},{"runId":"20260411T213455_gemini-2.5-flash-native-audio-preview-12-2025_e5276e46","benchmark":"assistant_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":7,"toolUseDenom":13,"instructionFollowing":29,"kbGrounding":24,"ambiguityHandling":3,"stateTracking":16,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":70.3,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:53:37+00:00","createdAt":"2026-04-17T18:16:25.758663+00:00"},{"runId":"20260411T212828_ultravox-v0.7_2dda000e","benchmark":"assistant_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":4,"toolUseDenom":13,"instructionFollowing":27,"kbGrounding":28,"ambiguityHandling":4,"stateTracking":18,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":72.64,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:52:50+00:00","createdAt":"2026-04-17T18:16:19.107429+00:00"},{"runId":"20260411T213149_gpt-realtime_f7b36d1b","benchmark":"assistant_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":7,"toolUseDenom":13,"instructionFollowing":27,"kbGrounding":31,"ambiguityHandling":4,"stateTracking":21,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":81.69,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:52:33+00:00","createdAt":"2026-04-17T18:16:21.606667+00:00"},{"runId":"20260411T212339_ultravox-v0.7_b37620f1","benchmark":"assistant_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":1,"toolUseDenom":13,"instructionFollowing":29,"kbGrounding":29,"ambiguityHandling":4,"stateTracking":24,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":74.96,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:52:17+00:00","createdAt":"2026-04-17T18:16:14.864765+00:00"},{"runId":"20260411T213508_gemini-3.1-flash-live-preview_01b75f34","benchmark":"assistant_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":11,"toolUseDenom":13,"instructionFollowing":21,"kbGrounding":23,"ambiguityHandling":2,"stateTracking":7,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":59.14,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:52:10+00:00","createdAt":"2026-04-17T18:16:26.423364+00:00"},{"runId":"20260411T213235_ultravox-v0.7_29e09075","benchmark":"assistant_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":8,"toolUseDenom":13,"instructionFollowing":27,"kbGrounding":29,"ambiguityHandling":4,"stateTracking":21,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":81.94,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:51:43+00:00","createdAt":"2026-04-17T18:16:23.441516+00:00"},{"runId":"20260411T212157_amazon.nova-2-sonic-v1_0_fd97417e","benchmark":"assistant_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":4,"toolUseDenom":13,"instructionFollowing":19,"kbGrounding":27,"ambiguityHandling":4,"stateTracking":18,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":66.83,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:51:13+00:00","createdAt":"2026-04-17T18:16:12.263777+00:00"},{"runId":"20260411T212542_grok-realtime_d0096de8","benchmark":"assistant_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":7,"toolUseDenom":13,"instructionFollowing":26,"kbGrounding":29,"ambiguityHandling":2,"stateTracking":23,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":73.42,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:50:43+00:00","createdAt":"2026-04-17T18:16:15.605154+00:00"},{"runId":"20260411T213333_gpt-realtime_9985354a","benchmark":"assistant_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":12,"toolUseDenom":13,"instructionFollowing":30,"kbGrounding":28,"ambiguityHandling":4,"stateTracking":19,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":87.71,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:49:22+00:00","createdAt":"2026-04-17T18:16:25.084723+00:00"},{"runId":"20260411T212713_gpt-realtime_ca042649","benchmark":"assistant_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":7,"toolUseDenom":13,"instructionFollowing":24,"kbGrounding":22,"ambiguityHandling":2,"stateTracking":14,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":60.11,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:49:16+00:00","createdAt":"2026-04-17T18:16:17.02018+00:00"},{"runId":"20260411T211448_amazon.nova-2-sonic-v1_0_d84dfd29","benchmark":"assistant_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":8,"toolUseDenom":13,"instructionFollowing":19,"kbGrounding":27,"ambiguityHandling":4,"stateTracking":17,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":72.15,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:48:34+00:00","createdAt":"2026-04-17T18:16:04.404352+00:00"},{"runId":"20260411T212050_ultravox-v0.7_9da07ab2","benchmark":"assistant_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":1,"toolUseDenom":13,"instructionFollowing":27,"kbGrounding":29,"ambiguityHandling":4,"stateTracking":17,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":67.83,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:47:51+00:00","createdAt":"2026-04-17T18:16:10.584782+00:00"},{"runId":"20260411T213155_gemini-2.5-flash-native-audio-preview-12-2025_91ad200e","benchmark":"assistant_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":6,"toolUseDenom":13,"instructionFollowing":21,"kbGrounding":27,"ambiguityHandling":3,"stateTracking":19,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":68.03,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:47:49+00:00","createdAt":"2026-04-17T18:16:22.324187+00:00"},{"runId":"20260411T211857_amazon.nova-2-sonic-v1_0_b479bde0","benchmark":"assistant_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":13,"instructionFollowing":13,"kbGrounding":29,"ambiguityHandling":3,"stateTracking":13,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":57.62,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:47:01+00:00","createdAt":"2026-04-17T18:16:08.26613+00:00"},{"runId":"20260411T213217_gemini-3.1-flash-live-preview_56065a2a","benchmark":"assistant_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":10,"toolUseDenom":13,"instructionFollowing":28,"kbGrounding":26,"ambiguityHandling":4,"stateTracking":22,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":84.56,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:46:48+00:00","createdAt":"2026-04-17T18:16:22.910202+00:00"},{"runId":"20260411T212821_gemini-3.1-flash-live-preview_ce77f948","benchmark":"assistant_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":8,"toolUseDenom":13,"instructionFollowing":29,"kbGrounding":25,"ambiguityHandling":2,"stateTracking":21,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":72.65,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:46:17+00:00","createdAt":"2026-04-17T18:16:18.578277+00:00"},{"runId":"20260411T211851_grok-realtime_8a4665e2","benchmark":"assistant_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":13,"instructionFollowing":21,"kbGrounding":29,"ambiguityHandling":3,"stateTracking":20,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":68.62,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:44:42+00:00","createdAt":"2026-04-17T18:16:07.709659+00:00"},{"runId":"20260411T212121_grok-realtime_9cf9b8b8","benchmark":"assistant_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":4,"toolUseDenom":13,"instructionFollowing":24,"kbGrounding":27,"ambiguityHandling":2,"stateTracking":20,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":63.72,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:43:19+00:00","createdAt":"2026-04-17T18:16:11.450056+00:00"},{"runId":"20260411T210905_amazon.nova-2-sonic-v1_0_e8cd077e","benchmark":"assistant_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":4,"toolUseDenom":13,"instructionFollowing":19,"kbGrounding":23,"ambiguityHandling":4,"stateTracking":14,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":60.92,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:41:54+00:00","createdAt":"2026-04-17T18:15:57.300908+00:00"},{"runId":"20260411T212328_gemini-3.1-flash-live-preview_2cf5e474","benchmark":"assistant_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":8,"toolUseDenom":13,"instructionFollowing":21,"kbGrounding":24,"ambiguityHandling":3,"stateTracking":18,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":68.34,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:41:53+00:00","createdAt":"2026-04-17T18:16:14.202559+00:00"},{"runId":"20260411T212226_gpt-realtime_0f9f9069","benchmark":"assistant_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":13,"instructionFollowing":18,"kbGrounding":23,"ambiguityHandling":3,"stateTracking":17,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":60.31,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:40:20+00:00","createdAt":"2026-04-17T18:16:13.010721+00:00"},{"runId":"20260411T212738_gemini-2.5-flash-native-audio-preview-12-2025_4f44694e","benchmark":"assistant_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":9,"toolUseDenom":13,"instructionFollowing":20,"kbGrounding":22,"ambiguityHandling":4,"stateTracking":11,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":66.11,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:39:32+00:00","createdAt":"2026-04-17T18:16:17.850635+00:00"},{"runId":"20260411T211707_ultravox-v0.7_4eb24823","benchmark":"assistant_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":7,"toolUseDenom":13,"instructionFollowing":29,"kbGrounding":28,"ambiguityHandling":4,"stateTracking":17,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":77.71,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:37:48+00:00","createdAt":"2026-04-17T18:16:07.159809+00:00"},{"runId":"20260411T211925_gpt-realtime_19f6c4b6","benchmark":"assistant_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":13,"instructionFollowing":18,"kbGrounding":23,"ambiguityHandling":3,"stateTracking":13,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":56.98,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:37:23+00:00","createdAt":"2026-04-17T18:16:08.927508+00:00"},{"runId":"20260411T212315_gemini-2.5-flash-native-audio-preview-12-2025_083d71e9","benchmark":"assistant_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":8,"toolUseDenom":13,"instructionFollowing":27,"kbGrounding":29,"ambiguityHandling":5,"stateTracking":22,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":86.77,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:37:20+00:00","createdAt":"2026-04-17T18:16:13.596506+00:00"},{"runId":"20260411T210125_amazon.nova-2-sonic-v1_0_45dd520b","benchmark":"assistant_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":6,"toolUseDenom":13,"instructionFollowing":14,"kbGrounding":19,"ambiguityHandling":3,"stateTracking":11,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":51.69,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:36:18+00:00","createdAt":"2026-04-17T18:15:49.301036+00:00"},{"runId":"20260411T211113_ultravox-v0.7_137b6cec","benchmark":"assistant_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":12,"toolUseDenom":13,"instructionFollowing":28,"kbGrounding":29,"ambiguityHandling":4,"stateTracking":20,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":87.9,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:35:06+00:00","createdAt":"2026-04-17T18:15:59.589902+00:00"},{"runId":"20260411T212037_gemini-3.1-flash-live-preview_7bb1444a","benchmark":"assistant_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":8,"toolUseDenom":13,"instructionFollowing":23,"kbGrounding":25,"ambiguityHandling":3,"stateTracking":14,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":66.94,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:34:53+00:00","createdAt":"2026-04-17T18:16:10.021802+00:00"},{"runId":"20260411T212706_amazon.nova-2-sonic-v1_0_2213aceb","benchmark":"assistant_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":2,"toolUseDenom":13,"instructionFollowing":3,"kbGrounding":31,"ambiguityHandling":1,"stateTracking":3,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":31.51,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:33:32+00:00","createdAt":"2026-04-17T18:16:16.318297+00:00"},{"runId":"20260411T210947_gemini-2.5-flash-native-audio-preview-12-2025_340ff694","benchmark":"assistant_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":10,"toolUseDenom":13,"instructionFollowing":27,"kbGrounding":31,"ambiguityHandling":3,"stateTracking":24,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":84.8,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:33:29+00:00","createdAt":"2026-04-17T18:15:58.406352+00:00"},{"runId":"20260411T211132_grok-realtime_8cfbf30b","benchmark":"assistant_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":3,"toolUseDenom":13,"instructionFollowing":29,"kbGrounding":27,"ambiguityHandling":3,"stateTracking":22,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":71.08,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:33:27+00:00","createdAt":"2026-04-17T18:16:01.014274+00:00"},{"runId":"20260411T212003_gemini-2.5-flash-native-audio-preview-12-2025_55c5e7b2","benchmark":"assistant_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":8,"toolUseDenom":13,"instructionFollowing":26,"kbGrounding":23,"ambiguityHandling":4,"stateTracking":15,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":72.42,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:32:15+00:00","createdAt":"2026-04-17T18:16:09.460614+00:00"},{"runId":"20260411T211012_gemini-3.1-flash-live-preview_3b8ae08e","benchmark":"assistant_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":10,"toolUseDenom":13,"instructionFollowing":30,"kbGrounding":29,"ambiguityHandling":3,"stateTracking":20,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":82.12,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:31:54+00:00","createdAt":"2026-04-17T18:15:58.928837+00:00"},{"runId":"20260411T211650_gemini-3.1-flash-live-preview_c8f473b3","benchmark":"assistant_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":8,"toolUseDenom":13,"instructionFollowing":19,"kbGrounding":27,"ambiguityHandling":2,"stateTracking":15,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":62.49,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:31:47+00:00","createdAt":"2026-04-17T18:16:06.568252+00:00"},{"runId":"20260411T211530_gemini-2.5-flash-native-audio-preview-12-2025_9324bab2","benchmark":"assistant_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":8,"toolUseDenom":13,"instructionFollowing":25,"kbGrounding":28,"ambiguityHandling":4,"stateTracking":19,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":78.33,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:30:48+00:00","createdAt":"2026-04-17T18:16:05.724123+00:00"},{"runId":"20260411T210750_ultravox-v0.7_8b4132d5","benchmark":"assistant_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":10,"toolUseDenom":13,"instructionFollowing":27,"kbGrounding":31,"ambiguityHandling":5,"stateTracking":20,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":89.47,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:30:09+00:00","createdAt":"2026-04-17T18:15:55.97861+00:00"},{"runId":"20260411T210318_amazon.nova-2-sonic-v1_0_d40a2aa3","benchmark":"assistant_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":4,"toolUseDenom":13,"instructionFollowing":15,"kbGrounding":18,"ambiguityHandling":4,"stateTracking":12,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":53.44,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:28:27+00:00","createdAt":"2026-04-17T18:15:53.118034+00:00"},{"runId":"20260411T211155_gpt-realtime_c8326cd1","benchmark":"assistant_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":6,"toolUseDenom":13,"instructionFollowing":21,"kbGrounding":28,"ambiguityHandling":4,"stateTracking":22,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":75.18,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:28:20+00:00","createdAt":"2026-04-17T18:16:01.526163+00:00"},{"runId":"20260411T210014_grok-realtime_d8fdced9","benchmark":"assistant_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":6,"toolUseDenom":13,"instructionFollowing":27,"kbGrounding":25,"ambiguityHandling":5,"stateTracking":18,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":77.78,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:27:37+00:00","createdAt":"2026-04-17T18:15:48.562603+00:00"},{"runId":"20260411T211235_gemini-3.1-flash-live-preview_a97839c1","benchmark":"assistant_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":10,"toolUseDenom":13,"instructionFollowing":25,"kbGrounding":26,"ambiguityHandling":4,"stateTracking":18,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":79.29,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:27:12+00:00","createdAt":"2026-04-17T18:16:02.944888+00:00"},{"runId":"20260411T211159_gemini-2.5-flash-native-audio-preview-12-2025_555ac59f","benchmark":"assistant_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":10,"toolUseDenom":13,"instructionFollowing":30,"kbGrounding":29,"ambiguityHandling":5,"stateTracking":20,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":90.12,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:27:04+00:00","createdAt":"2026-04-17T18:16:02.187207+00:00"},{"runId":"20260411T210930_gpt-realtime_cf2d69bf","benchmark":"assistant_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":9,"toolUseDenom":13,"instructionFollowing":25,"kbGrounding":25,"ambiguityHandling":4,"stateTracking":20,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":78.77,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:25:41+00:00","createdAt":"2026-04-17T18:15:57.795863+00:00"},{"runId":"20260411T205940_ultravox-v0.7_3cf7103c","benchmark":"assistant_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":3,"toolUseDenom":13,"instructionFollowing":28,"kbGrounding":28,"ambiguityHandling":5,"stateTracking":22,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":79.08,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:23:38+00:00","createdAt":"2026-04-17T18:15:47.969378+00:00"},{"runId":"20260411T210314_grok-realtime_15665100","benchmark":"assistant_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":4,"toolUseDenom":13,"instructionFollowing":28,"kbGrounding":31,"ambiguityHandling":5,"stateTracking":20,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":80.89,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:23:27+00:00","createdAt":"2026-04-17T18:15:52.41829+00:00"},{"runId":"20260411T210321_gemini-2.5-flash-native-audio-preview-12-2025_c340d02d","benchmark":"assistant_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":7,"toolUseDenom":13,"instructionFollowing":29,"kbGrounding":30,"ambiguityHandling":5,"stateTracking":21,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":86.33,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:23:14+00:00","createdAt":"2026-04-17T18:15:54.472314+00:00"},{"runId":"20260411T205425_amazon.nova-2-sonic-v1_0_6538b882","benchmark":"assistant_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":4,"toolUseDenom":13,"instructionFollowing":16,"kbGrounding":17,"ambiguityHandling":2,"stateTracking":11,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":44.61,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:22:25+00:00","createdAt":"2026-04-17T18:15:45.04083+00:00"},{"runId":"20260411T210251_ultravox-v0.7_f858302a","benchmark":"assistant_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":6,"toolUseDenom":13,"instructionFollowing":29,"kbGrounding":29,"ambiguityHandling":4,"stateTracking":23,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":81.82,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:21:56+00:00","createdAt":"2026-04-17T18:15:51.789404+00:00"},{"runId":"20260411T204829_amazon.nova-2-sonic-v1_0_ee5d5a00","benchmark":"assistant_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":9,"toolUseDenom":13,"instructionFollowing":16,"kbGrounding":22,"ambiguityHandling":3,"stateTracking":10,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":58.7,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:21:19+00:00","createdAt":"2026-04-17T18:15:41.945855+00:00"},{"runId":"20260411T210213_gpt-realtime_d2c65b79","benchmark":"assistant_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":6,"toolUseDenom":13,"instructionFollowing":20,"kbGrounding":24,"ambiguityHandling":3,"stateTracking":16,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":62.95,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:20:35+00:00","createdAt":"2026-04-17T18:15:49.984567+00:00"},{"runId":"20260411T210238_gemini-3.1-flash-live-preview_4c7a328c","benchmark":"assistant_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":11,"toolUseDenom":13,"instructionFollowing":25,"kbGrounding":28,"ambiguityHandling":2,"stateTracking":11,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":68.28,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:18:56+00:00","createdAt":"2026-04-17T18:15:51.262911+00:00"},{"runId":"20260411T210320_gpt-realtime_cbca3986","benchmark":"assistant_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":10,"toolUseDenom":13,"instructionFollowing":29,"kbGrounding":28,"ambiguityHandling":5,"stateTracking":23,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":91.33,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:18:50+00:00","createdAt":"2026-04-17T18:15:53.776265+00:00"},{"runId":"20260411T205425_grok-realtime_4d28f8aa","benchmark":"assistant_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":13,"instructionFollowing":30,"kbGrounding":30,"ambiguityHandling":5,"stateTracking":20,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":83.07,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:17:05+00:00","createdAt":"2026-04-17T18:15:45.602804+00:00"},{"runId":"20260411T205249_ultravox-v0.7_aedbc4e4","benchmark":"assistant_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":2,"toolUseDenom":13,"instructionFollowing":26,"kbGrounding":26,"ambiguityHandling":4,"stateTracking":9,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":60.13,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:16:48+00:00","createdAt":"2026-04-17T18:15:44.46782+00:00"},{"runId":"20260411T204750_grok-realtime_eb3dcb6b","benchmark":"assistant_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":4,"toolUseDenom":13,"instructionFollowing":27,"kbGrounding":25,"ambiguityHandling":4,"stateTracking":20,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":72.37,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:15:29+00:00","createdAt":"2026-04-17T18:15:41.191856+00:00"},{"runId":"20260411T210222_gemini-2.5-flash-native-audio-preview-12-2025_d5eaa808","benchmark":"assistant_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":8,"toolUseDenom":13,"instructionFollowing":26,"kbGrounding":28,"ambiguityHandling":5,"stateTracking":21,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":84.65,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:15:27+00:00","createdAt":"2026-04-17T18:15:50.739445+00:00"},{"runId":"20260411T204631_amazon.nova-2-sonic-v1_0_1836f4f7","benchmark":"assistant_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":9,"toolUseDenom":13,"instructionFollowing":18,"kbGrounding":19,"ambiguityHandling":2,"stateTracking":10,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":54.05,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:14:46+00:00","createdAt":"2026-04-17T18:15:37.748928+00:00"},{"runId":"20260411T205723_gemini-3.1-flash-live-preview_368944da","benchmark":"assistant_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":10,"toolUseDenom":13,"instructionFollowing":27,"kbGrounding":28,"ambiguityHandling":3,"stateTracking":20,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":79.54,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:12:39+00:00","createdAt":"2026-04-17T18:15:47.435414+00:00"},{"runId":"20260411T205527_gpt-realtime_467e24b0","benchmark":"assistant_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":11,"toolUseDenom":13,"instructionFollowing":26,"kbGrounding":29,"ambiguityHandling":3,"stateTracking":21,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":81.91,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:12:33+00:00","createdAt":"2026-04-17T18:15:46.201031+00:00"},{"runId":"20260411T204629_grok-realtime_c5d184ce","benchmark":"assistant_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":4,"toolUseDenom":13,"instructionFollowing":31,"kbGrounding":31,"ambiguityHandling":5,"stateTracking":24,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":86.15,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:11:58+00:00","createdAt":"2026-04-17T18:15:37.144437+00:00"},{"runId":"20260411T205711_gemini-2.5-flash-native-audio-preview-12-2025_0924246e","benchmark":"assistant_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":9,"toolUseDenom":13,"instructionFollowing":29,"kbGrounding":27,"ambiguityHandling":5,"stateTracking":20,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":86.64,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:11:54+00:00","createdAt":"2026-04-17T18:15:46.773762+00:00"},{"runId":"20260411T204708_ultravox-v0.7_d6519650","benchmark":"assistant_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":1,"toolUseDenom":13,"instructionFollowing":28,"kbGrounding":29,"ambiguityHandling":5,"stateTracking":19,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":74.15,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:11:30+00:00","createdAt":"2026-04-17T18:15:40.570166+00:00"},{"runId":"20260411T204512_amazon.nova-2-sonic-v1_0_9336e797","benchmark":"assistant_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":3,"toolUseDenom":13,"instructionFollowing":25,"kbGrounding":26,"ambiguityHandling":3,"stateTracking":17,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":63.69,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:11:30+00:00","createdAt":"2026-04-17T18:15:33.73749+00:00"},{"runId":"20260411T203853_amazon.nova-2-sonic-v1_0_1cfba5ab","benchmark":"assistant_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":7,"toolUseDenom":13,"instructionFollowing":11,"kbGrounding":16,"ambiguityHandling":2,"stateTracking":6,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":41.19,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:11:11+00:00","createdAt":"2026-04-17T18:15:29.865537+00:00"},{"runId":"20260411T204340_grok-realtime_9bdf6543","benchmark":"assistant_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":4,"toolUseDenom":13,"instructionFollowing":30,"kbGrounding":31,"ambiguityHandling":4,"stateTracking":24,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":81.51,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:10:10+00:00","createdAt":"2026-04-17T18:15:33.215341+00:00"},{"runId":"20260411T205224_gemini-3.1-flash-live-preview_d1e20b4a","benchmark":"assistant_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":10,"toolUseDenom":13,"instructionFollowing":25,"kbGrounding":26,"ambiguityHandling":1,"stateTracking":17,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":66.45,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:09:29+00:00","createdAt":"2026-04-17T18:15:43.767454+00:00"},{"runId":"20260411T204610_ultravox-v0.7_9f299826","benchmark":"assistant_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":1,"toolUseDenom":13,"instructionFollowing":30,"kbGrounding":28,"ambiguityHandling":4,"stateTracking":19,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":70.79,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:09:04+00:00","createdAt":"2026-04-17T18:15:36.545553+00:00"},{"runId":"20260411T204328_ultravox-v0.7_ac1b45c7","benchmark":"assistant_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":7,"toolUseDenom":13,"instructionFollowing":31,"kbGrounding":27,"ambiguityHandling":3,"stateTracking":23,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":79.36,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:07:54+00:00","createdAt":"2026-04-17T18:15:32.687+00:00"},{"runId":"20260411T204939_gpt-realtime_cc6fb211","benchmark":"assistant_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":13,"instructionFollowing":16,"kbGrounding":20,"ambiguityHandling":4,"stateTracking":11,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":56.08,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:07:49+00:00","createdAt":"2026-04-17T18:15:42.508786+00:00"},{"runId":"20260411T204701_gemini-3.1-flash-live-preview_6b5c7da6","benchmark":"assistant_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":10,"toolUseDenom":13,"instructionFollowing":19,"kbGrounding":19,"ambiguityHandling":1,"stateTracking":8,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":50.57,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:04:26+00:00","createdAt":"2026-04-17T18:15:39.849614+00:00"},{"runId":"20260411T204545_gemini-3.1-flash-live-preview_401f5063","benchmark":"assistant_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":9,"toolUseDenom":13,"instructionFollowing":27,"kbGrounding":27,"ambiguityHandling":4,"stateTracking":19,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":80.52,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:03:20+00:00","createdAt":"2026-04-17T18:15:35.814784+00:00"},{"runId":"20260411T203824_ultravox-v0.7_e1473210","benchmark":"assistant_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":6,"toolUseDenom":13,"instructionFollowing":28,"kbGrounding":28,"ambiguityHandling":4,"stateTracking":19,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":77.19,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:03:19+00:00","createdAt":"2026-04-17T18:15:28.643959+00:00"},{"runId":"20260411T204953_gemini-2.5-flash-native-audio-preview-12-2025_073262c7","benchmark":"assistant_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":7,"toolUseDenom":13,"instructionFollowing":25,"kbGrounding":26,"ambiguityHandling":4,"stateTracking":16,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":73.01,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:03:13+00:00","createdAt":"2026-04-17T18:15:43.201036+00:00"},{"runId":"20260411T204534_gemini-2.5-flash-native-audio-preview-12-2025_c3f14313","benchmark":"assistant_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":10,"toolUseDenom":13,"instructionFollowing":25,"kbGrounding":26,"ambiguityHandling":5,"stateTracking":13,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":79.12,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:02:36+00:00","createdAt":"2026-04-17T18:15:34.983541+00:00"},{"runId":"20260411T203842_grok-realtime_8fd38ab8","benchmark":"assistant_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":10,"toolUseDenom":13,"instructionFollowing":30,"kbGrounding":30,"ambiguityHandling":4,"stateTracking":23,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":89.26,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:02:20+00:00","createdAt":"2026-04-17T18:15:29.238304+00:00"},{"runId":"20260411T204657_gemini-2.5-flash-native-audio-preview-12-2025_cebce59c","benchmark":"assistant_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":9,"toolUseDenom":13,"instructionFollowing":25,"kbGrounding":26,"ambiguityHandling":4,"stateTracking":18,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":77.75,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:02:12+00:00","createdAt":"2026-04-17T18:15:39.099126+00:00"},{"runId":"20260411T204513_gpt-realtime_4321209c","benchmark":"assistant_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":12,"toolUseDenom":13,"instructionFollowing":27,"kbGrounding":27,"ambiguityHandling":4,"stateTracking":20,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":85.97,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:01:24+00:00","createdAt":"2026-04-17T18:15:34.321532+00:00"},{"runId":"20260411T204143_gpt-realtime_e6128062","benchmark":"assistant_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":12,"toolUseDenom":13,"instructionFollowing":30,"kbGrounding":30,"ambiguityHandling":5,"stateTracking":23,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":96.34,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T04:00:13+00:00","createdAt":"2026-04-17T18:15:30.396677+00:00"},{"runId":"20260411T204326_gemini-3.1-flash-live-preview_bb3fa3b1","benchmark":"assistant_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":9,"toolUseDenom":13,"instructionFollowing":23,"kbGrounding":24,"ambiguityHandling":3,"stateTracking":17,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":70.34,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:59:38+00:00","createdAt":"2026-04-17T18:15:32.096137+00:00"},{"runId":"20260411T204323_gemini-2.5-flash-native-audio-preview-12-2025_695958b8","benchmark":"assistant_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":6,"toolUseDenom":13,"instructionFollowing":20,"kbGrounding":22,"ambiguityHandling":5,"stateTracking":16,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":69.66,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:57:22+00:00","createdAt":"2026-04-17T18:15:31.06574+00:00"},{"runId":"20260411T203728_gpt-realtime_8951d55c","benchmark":"assistant_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":13,"instructionFollowing":26,"kbGrounding":17,"ambiguityHandling":3,"stateTracking":14,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":59.1,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:57:09+00:00","createdAt":"2026-04-17T18:15:26.657935+00:00"},{"runId":"20260411T203823_gemini-3.1-flash-live-preview_aa532d4d","benchmark":"assistant_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":8,"toolUseDenom":13,"instructionFollowing":27,"kbGrounding":28,"ambiguityHandling":5,"stateTracking":21,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":85.29,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:54:24+00:00","createdAt":"2026-04-17T18:15:27.929329+00:00"},{"runId":"20260411T203749_gemini-2.5-flash-native-audio-preview-12-2025_7660b53e","benchmark":"assistant_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":7,"toolUseDenom":13,"instructionFollowing":23,"kbGrounding":27,"ambiguityHandling":3,"stateTracking":20,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":71.69,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T03:52:22+00:00","createdAt":"2026-04-17T18:15:27.192563+00:00"},{"runId":"20260403T172430_glm-realtime-flash_b9e92d7a","benchmark":"assistant_bench","model":"glm-realtime-flash","turnsScored":31,"scores":{"toolUse":2,"toolUseDenom":13,"instructionFollowing":7,"kbGrounding":13,"ambiguityHandling":0,"stateTracking":4,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":19.31,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-04T07:54:51+00:00","createdAt":"2026-04-05T07:08:40.548985+00:00"},{"runId":"20260403T155049_glm-realtime-flash_786a339f","benchmark":"assistant_bench","model":"glm-realtime-flash","turnsScored":31,"scores":{"toolUse":2,"toolUseDenom":13,"instructionFollowing":11,"kbGrounding":17,"ambiguityHandling":0,"stateTracking":3,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":23.64,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-04T01:27:31+00:00","createdAt":"2026-04-05T07:08:36.259779+00:00"},{"runId":"20260403T144443_glm-realtime-flash_666df8b0","benchmark":"assistant_bench","model":"glm-realtime-flash","turnsScored":31,"scores":{"toolUse":1,"toolUseDenom":13,"instructionFollowing":14,"kbGrounding":23,"ambiguityHandling":1,"stateTracking":7,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":35.24,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-04T01:27:19+00:00","createdAt":"2026-04-05T07:08:33.71667+00:00"},{"runId":"20260403T172426_glm-realtime-flash_144a52d3","benchmark":"assistant_bench","model":"glm-realtime-flash","turnsScored":31,"scores":{"toolUse":2,"toolUseDenom":13,"instructionFollowing":13,"kbGrounding":15,"ambiguityHandling":0,"stateTracking":6,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":26.14,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-04T01:26:20+00:00","createdAt":"2026-04-05T07:08:39.184826+00:00"},{"runId":"20260403T133910_glm-realtime-flash_4532b671","benchmark":"assistant_bench","model":"glm-realtime-flash","turnsScored":31,"scores":{"toolUse":2,"toolUseDenom":13,"instructionFollowing":7,"kbGrounding":17,"ambiguityHandling":0,"stateTracking":7,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":24.39,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-04T01:23:09+00:00","createdAt":"2026-04-05T07:08:30.783751+00:00"},{"runId":"20260403T170840_glm-realtime-flash_1f38dd8a","benchmark":"assistant_bench","model":"glm-realtime-flash","turnsScored":31,"scores":{"toolUse":1,"toolUseDenom":13,"instructionFollowing":10,"kbGrounding":18,"ambiguityHandling":0,"stateTracking":7,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":25.44,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-04T01:21:37+00:00","createdAt":"2026-04-05T07:08:38.580107+00:00"},{"runId":"20260403T155812_glm-realtime-flash_a279b991","benchmark":"assistant_bench","model":"glm-realtime-flash","turnsScored":31,"scores":{"toolUse":7,"toolUseDenom":13,"instructionFollowing":14,"kbGrounding":13,"ambiguityHandling":2,"stateTracking":3,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":38.69,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-04T01:20:57+00:00","createdAt":"2026-04-05T07:08:37.134658+00:00"},{"runId":"20260403T170417_glm-realtime-flash_e861f566","benchmark":"assistant_bench","model":"glm-realtime-flash","turnsScored":31,"scores":{"toolUse":2,"toolUseDenom":13,"instructionFollowing":15,"kbGrounding":22,"ambiguityHandling":0,"stateTracking":8,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":33.61,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-04T01:20:27+00:00","createdAt":"2026-04-05T07:08:37.932737+00:00"},{"runId":"20260403T154740_glm-realtime-flash_144629a4","benchmark":"assistant_bench","model":"glm-realtime-flash","turnsScored":31,"scores":{"toolUse":2,"toolUseDenom":13,"instructionFollowing":12,"kbGrounding":15,"ambiguityHandling":0,"stateTracking":8,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":27.16,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-04T01:20:10+00:00","createdAt":"2026-04-05T07:08:35.327845+00:00"},{"runId":"20260403T140107_glm-realtime-flash_0cc98cbd","benchmark":"assistant_bench","model":"glm-realtime-flash","turnsScored":31,"scores":{"toolUse":1,"toolUseDenom":13,"instructionFollowing":13,"kbGrounding":12,"ambiguityHandling":2,"stateTracking":9,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":33.17,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-04T01:19:20+00:00","createdAt":"2026-04-05T07:08:31.67419+00:00"},{"runId":"20260403T141932_glm-realtime-flash_02427276","benchmark":"assistant_bench","model":"glm-realtime-flash","turnsScored":31,"scores":{"toolUse":2,"toolUseDenom":13,"instructionFollowing":14,"kbGrounding":18,"ambiguityHandling":1,"stateTracking":7,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":33.56,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-04T01:16:28+00:00","createdAt":"2026-04-05T07:08:32.256993+00:00"},{"runId":"20260403T152015_glm-realtime-flash_148c2250","benchmark":"assistant_bench","model":"glm-realtime-flash","turnsScored":31,"scores":{"toolUse":2,"toolUseDenom":13,"instructionFollowing":13,"kbGrounding":16,"ambiguityHandling":1,"stateTracking":8,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":32.45,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-04T01:16:26+00:00","createdAt":"2026-04-05T07:08:34.474273+00:00"},{"runId":"20260403T142000_glm-realtime-flash_87da1f3c","benchmark":"assistant_bench","model":"glm-realtime-flash","turnsScored":31,"scores":{"toolUse":2,"toolUseDenom":13,"instructionFollowing":16,"kbGrounding":19,"ambiguityHandling":0,"stateTracking":8,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":32.32,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-04T01:14:54+00:00","createdAt":"2026-04-05T07:08:32.840931+00:00"},{"runId":"20260330T214108_glm-realtime-air_3ed6fdc9","benchmark":"assistant_bench","model":"glm-realtime-air","turnsScored":31,"scores":{"toolUse":0,"toolUseDenom":13,"instructionFollowing":2,"kbGrounding":22,"ambiguityHandling":0,"stateTracking":2,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":17.15,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T04:17:14+00:00","createdAt":"2026-04-01T07:15:32.424771+00:00"},{"runId":"20260330T213346_glm-realtime-air_87d957ae","benchmark":"assistant_bench","model":"glm-realtime-air","turnsScored":31,"scores":{"toolUse":0,"toolUseDenom":13,"instructionFollowing":0,"kbGrounding":4,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":2.58,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T04:15:50+00:00","createdAt":"2026-04-01T07:15:31.695595+00:00"},{"runId":"20260330T212124_glm-realtime-air_9dc76bf6","benchmark":"assistant_bench","model":"glm-realtime-air","turnsScored":31,"scores":{"toolUse":0,"toolUseDenom":13,"instructionFollowing":2,"kbGrounding":19,"ambiguityHandling":0,"stateTracking":1,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":14.38,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T04:14:55+00:00","createdAt":"2026-04-01T07:15:29.990932+00:00"},{"runId":"20260330T213041_glm-realtime-flash_6144243a","benchmark":"assistant_bench","model":"glm-realtime-flash","turnsScored":31,"scores":{"toolUse":0,"toolUseDenom":13,"instructionFollowing":2,"kbGrounding":7,"ambiguityHandling":1,"stateTracking":1,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":10.64,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T04:14:13+00:00","createdAt":"2026-04-01T07:15:31.161239+00:00"},{"runId":"20260330T211515_glm-realtime-flash_f8df8e82","benchmark":"assistant_bench","model":"glm-realtime-flash","turnsScored":31,"scores":{"toolUse":0,"toolUseDenom":13,"instructionFollowing":8,"kbGrounding":19,"ambiguityHandling":1,"stateTracking":5,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":25.59,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T04:13:36+00:00","createdAt":"2026-04-01T07:15:28.864442+00:00"},{"runId":"20260330T212646_glm-realtime-flash_382a6d80","benchmark":"assistant_bench","model":"glm-realtime-flash","turnsScored":31,"scores":{"toolUse":0,"toolUseDenom":13,"instructionFollowing":2,"kbGrounding":13,"ambiguityHandling":0,"stateTracking":3,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":12.18,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T04:13:35+00:00","createdAt":"2026-04-01T07:15:30.539624+00:00"},{"runId":"20260330T211627_glm-realtime-air_6957ee0c","benchmark":"assistant_bench","model":"glm-realtime-air","turnsScored":31,"scores":{"toolUse":0,"toolUseDenom":13,"instructionFollowing":2,"kbGrounding":8,"ambiguityHandling":0,"stateTracking":3,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":8.95,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T04:11:38+00:00","createdAt":"2026-04-01T07:15:29.440398+00:00"},{"runId":"20260330T211137_glm-realtime-flash_81c112c6","benchmark":"assistant_bench","model":"glm-realtime-flash","turnsScored":31,"scores":{"toolUse":0,"toolUseDenom":13,"instructionFollowing":4,"kbGrounding":6,"ambiguityHandling":0,"stateTracking":2,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":8.12,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T04:10:38+00:00","createdAt":"2026-04-01T07:15:27.777443+00:00"},{"runId":"20260330T211113_glm-realtime-flash_64bf6fd8","benchmark":"assistant_bench","model":"glm-realtime-flash","turnsScored":31,"scores":{"toolUse":0,"toolUseDenom":13,"instructionFollowing":8,"kbGrounding":13,"ambiguityHandling":2,"stateTracking":4,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":24.88,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T04:09:53+00:00","createdAt":"2026-04-01T07:15:27.236432+00:00"},{"runId":"20260330T210938_glm-realtime-air_bc28b79b","benchmark":"assistant_bench","model":"glm-realtime-air","turnsScored":31,"scores":{"toolUse":0,"toolUseDenom":13,"instructionFollowing":8,"kbGrounding":25,"ambiguityHandling":0,"stateTracking":2,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":22.96,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T04:08:49+00:00","createdAt":"2026-04-01T07:15:26.692906+00:00"},{"runId":"20260330T211239_glm-realtime-air_6f20b37b","benchmark":"assistant_bench","model":"glm-realtime-air","turnsScored":31,"scores":{"toolUse":0,"toolUseDenom":13,"instructionFollowing":2,"kbGrounding":23,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":16.13,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T04:08:05+00:00","createdAt":"2026-04-01T07:15:28.326189+00:00"},{"runId":"20260330T210738_glm-realtime-air_ccc06727","benchmark":"assistant_bench","model":"glm-realtime-air","turnsScored":31,"scores":{"toolUse":0,"toolUseDenom":13,"instructionFollowing":5,"kbGrounding":20,"ambiguityHandling":0,"stateTracking":3,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":18.63,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T04:08:05+00:00","createdAt":"2026-04-01T07:15:26.155266+00:00"},{"runId":"20260330T210308_glm-realtime-air_932aea4a","benchmark":"assistant_bench","model":"glm-realtime-air","turnsScored":31,"scores":{"toolUse":0,"toolUseDenom":13,"instructionFollowing":2,"kbGrounding":14,"ambiguityHandling":1,"stateTracking":2,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":15.99,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T04:04:00+00:00","createdAt":"2026-04-01T07:15:25.523004+00:00"},{"runId":"20260330T205835_glm-realtime-flash_adc48d69","benchmark":"assistant_bench","model":"glm-realtime-flash","turnsScored":31,"scores":{"toolUse":0,"toolUseDenom":13,"instructionFollowing":0,"kbGrounding":4,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":2.58,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T04:03:26+00:00","createdAt":"2026-04-01T07:15:25.008762+00:00"},{"runId":"20260330T205404_glm-realtime-air_0c36ea2e","benchmark":"assistant_bench","model":"glm-realtime-air","turnsScored":31,"scores":{"toolUse":0,"toolUseDenom":13,"instructionFollowing":4,"kbGrounding":25,"ambiguityHandling":0,"stateTracking":3,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":21.21,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T04:02:46+00:00","createdAt":"2026-04-01T07:15:23.957347+00:00"},{"runId":"20260330T205825_glm-realtime-air_490cdb15","benchmark":"assistant_bench","model":"glm-realtime-air","turnsScored":31,"scores":{"toolUse":1,"toolUseDenom":13,"instructionFollowing":2,"kbGrounding":16,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":13.15,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T04:02:34+00:00","createdAt":"2026-04-01T07:15:24.475732+00:00"},{"runId":"20260330T205202_glm-realtime-flash_a75cfc0e","benchmark":"assistant_bench","model":"glm-realtime-flash","turnsScored":31,"scores":{"toolUse":0,"toolUseDenom":13,"instructionFollowing":5,"kbGrounding":16,"ambiguityHandling":1,"stateTracking":4,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":20.88,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T04:02:18+00:00","createdAt":"2026-04-01T07:15:23.427735+00:00"},{"runId":"20260330T204932_glm-realtime-flash_64163faa","benchmark":"assistant_bench","model":"glm-realtime-flash","turnsScored":31,"scores":{"toolUse":0,"toolUseDenom":13,"instructionFollowing":3,"kbGrounding":16,"ambiguityHandling":0,"stateTracking":3,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":14.76,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T03:59:26+00:00","createdAt":"2026-04-01T07:15:22.335534+00:00"},{"runId":"20260330T204430_glm-realtime-flash_67646827","benchmark":"assistant_bench","model":"glm-realtime-flash","turnsScored":31,"scores":{"toolUse":3,"toolUseDenom":13,"instructionFollowing":4,"kbGrounding":11,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":14.29,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T03:57:45+00:00","createdAt":"2026-04-01T07:15:21.739108+00:00"},{"runId":"20260330T205149_glm-realtime-air_31b7e3b6","benchmark":"assistant_bench","model":"glm-realtime-air","turnsScored":31,"scores":{"toolUse":0,"toolUseDenom":13,"instructionFollowing":0,"kbGrounding":19,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":12.26,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T03:57:23+00:00","createdAt":"2026-04-01T07:15:22.901055+00:00"},{"runId":"20260330T203248_glm-realtime-flash_d0163ead","benchmark":"assistant_bench","model":"glm-realtime-flash","turnsScored":31,"scores":{"toolUse":0,"toolUseDenom":13,"instructionFollowing":3,"kbGrounding":12,"ambiguityHandling":1,"stateTracking":2,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":15.34,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T03:56:45+00:00","createdAt":"2026-04-01T07:15:20.607831+00:00"},{"runId":"20260330T203404_glm-realtime-flash_17e2f913","benchmark":"assistant_bench","model":"glm-realtime-flash","turnsScored":31,"scores":{"toolUse":0,"toolUseDenom":13,"instructionFollowing":1,"kbGrounding":18,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":12.26,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T03:56:23+00:00","createdAt":"2026-04-01T07:15:21.165185+00:00"},{"runId":"20260330T170213_gemini-3.1-flash-live-preview_973ee8c8","benchmark":"assistant_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":8,"toolUseDenom":13,"instructionFollowing":25,"kbGrounding":26,"ambiguityHandling":0,"stateTracking":13,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":56.04,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-31T03:20:00+00:00","createdAt":"2026-03-31T04:45:35.928883+00:00"},{"runId":"20260330T170017_gemini-3.1-flash-live-preview_c85e7d2e","benchmark":"assistant_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":8,"toolUseDenom":13,"instructionFollowing":19,"kbGrounding":27,"ambiguityHandling":4,"stateTracking":15,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":70.49,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-31T03:12:10+00:00","createdAt":"2026-03-31T02:24:38.374037+00:00"},{"runId":"20260329T232358_gemini-3.1-flash-live-preview_261bca21","benchmark":"assistant_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":8,"toolUseDenom":13,"instructionFollowing":17,"kbGrounding":21,"ambiguityHandling":2,"stateTracking":12,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":54.82,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-30T07:54:44+00:00","createdAt":"2026-03-30T14:05:36.933634+00:00"},{"runId":"20260322T183512_ultravox-v0.7_694b54fe","benchmark":"assistant_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":13,"instructionFollowing":23,"kbGrounding":27,"ambiguityHandling":5,"stateTracking":22,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":78.28,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-28T01:23:05+00:00","createdAt":"2026-03-23T07:26:10.070623+00:00"},{"runId":"20260323T140030_ultravox-v0.7_d704ecff","benchmark":"assistant_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":2,"toolUseDenom":13,"instructionFollowing":25,"kbGrounding":29,"ambiguityHandling":5,"stateTracking":13,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":68.75,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-28T01:22:52+00:00","createdAt":"2026-03-23T22:45:27.10363+00:00"},{"runId":"20260322T183515_ultravox-v0.7_604d2275","benchmark":"assistant_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":1,"toolUseDenom":13,"instructionFollowing":27,"kbGrounding":26,"ambiguityHandling":5,"stateTracking":19,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":71.57,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-28T01:21:48+00:00","createdAt":"2026-03-23T06:42:44.832548+00:00"},{"runId":"20260323T135957_ultravox-v0.7_3212b2c3","benchmark":"assistant_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":7,"toolUseDenom":13,"instructionFollowing":31,"kbGrounding":28,"ambiguityHandling":5,"stateTracking":23,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":88,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-28T01:17:14+00:00","createdAt":"2026-03-23T22:45:26.558889+00:00"},{"runId":"20260323T140149_grok-realtime_34cc28ee","benchmark":"assistant_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":4,"toolUseDenom":13,"instructionFollowing":20,"kbGrounding":29,"ambiguityHandling":3,"stateTracking":16,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":63.1,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-28T01:12:56+00:00","createdAt":"2026-03-23T22:45:27.61539+00:00"},{"runId":"20260323T125749_grok-realtime_7ba78dfe","benchmark":"assistant_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":13,"instructionFollowing":29,"kbGrounding":30,"ambiguityHandling":3,"stateTracking":22,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":76.09,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-28T01:11:45+00:00","createdAt":"2026-03-23T22:45:13.543996+00:00"},{"runId":"20260323T140151_grok-realtime_604dd089","benchmark":"assistant_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":7,"toolUseDenom":13,"instructionFollowing":24,"kbGrounding":24,"ambiguityHandling":2,"stateTracking":17,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":63.9,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-28T01:11:20+00:00","createdAt":"2026-03-23T22:45:28.1371+00:00"},{"runId":"20260323T134835_gpt-realtime_a3af8a46","benchmark":"assistant_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":12,"toolUseDenom":13,"instructionFollowing":27,"kbGrounding":29,"ambiguityHandling":3,"stateTracking":20,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":83.26,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-28T01:03:55+00:00","createdAt":"2026-03-23T22:45:24.846161+00:00"},{"runId":"20260326T103749_gpt-realtime_aec7259d","benchmark":"assistant_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":12,"toolUseDenom":13,"instructionFollowing":27,"kbGrounding":31,"ambiguityHandling":5,"stateTracking":20,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":92.55,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-28T01:02:03+00:00","createdAt":"2026-03-26T20:35:15.822306+00:00"},{"runId":"20260326T103752_gpt-realtime_07fcd7fc","benchmark":"assistant_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":8,"toolUseDenom":13,"instructionFollowing":24,"kbGrounding":28,"ambiguityHandling":5,"stateTracking":23,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":85.02,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-28T00:59:46+00:00","createdAt":"2026-03-26T20:35:16.512644+00:00"},{"runId":"20260323T135103_gemini-2.5-flash-native-audio-preview-12-2025_ff2d38ff","benchmark":"assistant_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":4,"toolUseDenom":13,"instructionFollowing":28,"kbGrounding":27,"ambiguityHandling":4,"stateTracking":18,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":72.64,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-28T00:57:14+00:00","createdAt":"2026-03-23T22:45:25.969709+00:00"},{"runId":"20260323T191751_gemini-2.5-flash-native-audio-preview-12-2025_a15d053d","benchmark":"assistant_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":9,"toolUseDenom":13,"instructionFollowing":30,"kbGrounding":25,"ambiguityHandling":5,"stateTracking":21,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":86.83,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-28T00:52:50+00:00","createdAt":"2026-03-24T02:41:26.54436+00:00"},{"runId":"20260323T134835_gemini-2.5-flash-native-audio-preview-12-2025_00ae13ef","benchmark":"assistant_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":8,"toolUseDenom":13,"instructionFollowing":24,"kbGrounding":26,"ambiguityHandling":5,"stateTracking":19,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":80.4,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-28T00:51:59+00:00","createdAt":"2026-03-23T22:45:23.136052+00:00"},{"runId":"20260323T142046_amazon.nova-2-sonic-v1_0_8401ebec","benchmark":"assistant_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":6,"toolUseDenom":13,"instructionFollowing":15,"kbGrounding":21,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":48.12,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-28T00:41:39+00:00","createdAt":"2026-03-23T22:45:29.952289+00:00"},{"runId":"20260323T142931_amazon.nova-2-sonic-v1_0_06271ec1","benchmark":"assistant_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":2,"toolUseDenom":13,"instructionFollowing":19,"kbGrounding":22,"ambiguityHandling":3,"stateTracking":13,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":52.36,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-28T00:41:30+00:00","createdAt":"2026-03-23T23:12:42.160477+00:00"},{"runId":"20260323T010916_ultravox-v0.7_cc0dd3a2","benchmark":"assistant_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":1,"toolUseDenom":13,"instructionFollowing":20,"kbGrounding":20,"ambiguityHandling":2,"stateTracking":6,"ambiguityDenom":5,"stateTrackingDenom":24},"passRate":40.34,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"gpt-5.2","judgedAt":"2026-03-23T18:55:49+00:00","createdAt":"2026-03-23T15:30:04.199302+00:00"},{"runId":"20260507T214322_gpt-realtime-2_cacef141","benchmark":"conversation_bench","model":"gpt-realtime-2","turnsScored":75,"scores":{"toolUse":14,"toolUseDenom":16,"instructionFollowing":73,"kbGrounding":74,"ambiguityHandling":6,"stateTracking":26,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":87.46,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T22:53:38+00:00","createdAt":"2026-05-07T23:02:17.886878+00:00"},{"runId":"20260507T214235_gpt-realtime-2_865b6e70","benchmark":"conversation_bench","model":"gpt-realtime-2","turnsScored":75,"scores":{"toolUse":14,"toolUseDenom":16,"instructionFollowing":62,"kbGrounding":66,"ambiguityHandling":7,"stateTracking":23,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":83.07,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T22:52:39+00:00","createdAt":"2026-05-07T23:02:13.541745+00:00"},{"runId":"20260507T214337_gpt-realtime-2_3753e67d","benchmark":"conversation_bench","model":"gpt-realtime-2","turnsScored":75,"scores":{"toolUse":14,"toolUseDenom":16,"instructionFollowing":58,"kbGrounding":73,"ambiguityHandling":6,"stateTracking":27,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":83.8,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T22:43:34+00:00","createdAt":"2026-05-07T23:02:18.655723+00:00"},{"runId":"20260507T214257_gpt-realtime-2_1fcc17df","benchmark":"conversation_bench","model":"gpt-realtime-2","turnsScored":75,"scores":{"toolUse":16,"toolUseDenom":16,"instructionFollowing":71,"kbGrounding":73,"ambiguityHandling":8,"stateTracking":18,"ambiguityDenom":8,"stateTrackingDenom":34},"passRate":88.99,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T22:36:24+00:00","createdAt":"2026-05-07T23:02:15.883821+00:00"},{"runId":"20260507T214251_gpt-realtime-2_6d2e8181","benchmark":"conversation_bench","model":"gpt-realtime-2","turnsScored":75,"scores":{"toolUse":15,"toolUseDenom":16,"instructionFollowing":56,"kbGrounding":70,"ambiguityHandling":6,"stateTracking":19,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":78.87,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T22:34:28+00:00","createdAt":"2026-05-07T23:02:14.648867+00:00"},{"runId":"20260507T214326_gpt-realtime-2_3b84599d","benchmark":"conversation_bench","model":"gpt-realtime-2","turnsScored":75,"scores":{"toolUse":16,"toolUseDenom":16,"instructionFollowing":70,"kbGrounding":74,"ambiguityHandling":8,"stateTracking":12,"ambiguityDenom":8,"stateTrackingDenom":34},"passRate":85.46,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T22:34:06+00:00","createdAt":"2026-05-07T23:02:18.259483+00:00"},{"runId":"20260507T214339_gpt-realtime-2_eabc4f5f","benchmark":"conversation_bench","model":"gpt-realtime-2","turnsScored":75,"scores":{"toolUse":14,"toolUseDenom":16,"instructionFollowing":72,"kbGrounding":74,"ambiguityHandling":6,"stateTracking":32,"ambiguityDenom":6,"stateTrackingDenom":33},"passRate":95.83,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T22:33:18+00:00","createdAt":"2026-05-07T23:02:19.045536+00:00"},{"runId":"20260507T214310_gpt-realtime-2_d403a590","benchmark":"conversation_bench","model":"gpt-realtime-2","turnsScored":75,"scores":{"toolUse":14,"toolUseDenom":16,"instructionFollowing":66,"kbGrounding":73,"ambiguityHandling":7,"stateTracking":25,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":87.22,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T22:31:32+00:00","createdAt":"2026-05-07T23:02:17.126225+00:00"},{"runId":"20260507T214257_gpt-realtime-2_c87ed2dd","benchmark":"conversation_bench","model":"gpt-realtime-2","turnsScored":75,"scores":{"toolUse":15,"toolUseDenom":16,"instructionFollowing":72,"kbGrounding":74,"ambiguityHandling":7,"stateTracking":22,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":88.52,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T22:30:50+00:00","createdAt":"2026-05-07T23:02:16.312202+00:00"},{"runId":"20260507T214347_gpt-realtime-2_ebe1c15f","benchmark":"conversation_bench","model":"gpt-realtime-2","turnsScored":75,"scores":{"toolUse":14,"toolUseDenom":16,"instructionFollowing":61,"kbGrounding":73,"ambiguityHandling":7,"stateTracking":25,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":85.88,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T22:30:31+00:00","createdAt":"2026-05-07T23:02:19.350236+00:00"},{"runId":"20260507T214318_gpt-realtime-2_8729dbc1","benchmark":"conversation_bench","model":"gpt-realtime-2","turnsScored":75,"scores":{"toolUse":13,"toolUseDenom":16,"instructionFollowing":63,"kbGrounding":68,"ambiguityHandling":5,"stateTracking":29,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":81.26,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T22:29:54+00:00","createdAt":"2026-05-07T23:02:17.496307+00:00"},{"runId":"20260507T214245_gpt-realtime-2_0ddc28cc","benchmark":"conversation_bench","model":"gpt-realtime-2","turnsScored":75,"scores":{"toolUse":13,"toolUseDenom":16,"instructionFollowing":63,"kbGrounding":73,"ambiguityHandling":6,"stateTracking":22,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":80.85,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T22:29:52+00:00","createdAt":"2026-05-07T23:02:14.456524+00:00"},{"runId":"20260507T214301_gpt-realtime-2_38217c5a","benchmark":"conversation_bench","model":"gpt-realtime-2","turnsScored":75,"scores":{"toolUse":13,"toolUseDenom":16,"instructionFollowing":72,"kbGrounding":73,"ambiguityHandling":6,"stateTracking":25,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":85.07,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T22:28:29+00:00","createdAt":"2026-05-07T23:02:16.717516+00:00"},{"runId":"20260507T214228_gpt-realtime-2_57dde48e","benchmark":"conversation_bench","model":"gpt-realtime-2","turnsScored":75,"scores":{"toolUse":13,"toolUseDenom":16,"instructionFollowing":62,"kbGrounding":66,"ambiguityHandling":6,"stateTracking":22,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":78.72,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T22:28:21+00:00","createdAt":"2026-05-07T23:02:13.161644+00:00"},{"runId":"20260507T214421_gpt-realtime-2_02679ee9","benchmark":"conversation_bench","model":"gpt-realtime-2","turnsScored":75,"scores":{"toolUse":14,"toolUseDenom":16,"instructionFollowing":56,"kbGrounding":72,"ambiguityHandling":6,"stateTracking":26,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":82.39,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T22:27:27+00:00","createdAt":"2026-05-07T23:02:19.928523+00:00"},{"runId":"20260507T214226_gpt-realtime-2_fa828dbb","benchmark":"conversation_bench","model":"gpt-realtime-2","turnsScored":75,"scores":{"toolUse":12,"toolUseDenom":16,"instructionFollowing":55,"kbGrounding":72,"ambiguityHandling":6,"stateTracking":24,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":78.41,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T22:27:24+00:00","createdAt":"2026-05-07T23:02:12.770474+00:00"},{"runId":"20260507T214255_gpt-realtime-2_186b7903","benchmark":"conversation_bench","model":"gpt-realtime-2","turnsScored":75,"scores":{"toolUse":12,"toolUseDenom":16,"instructionFollowing":61,"kbGrounding":73,"ambiguityHandling":6,"stateTracking":24,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":80.28,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T22:27:16+00:00","createdAt":"2026-05-07T23:02:15.497927+00:00"},{"runId":"20260507T214253_gpt-realtime-2_abc2a7e4","benchmark":"conversation_bench","model":"gpt-realtime-2","turnsScored":75,"scores":{"toolUse":14,"toolUseDenom":16,"instructionFollowing":69,"kbGrounding":75,"ambiguityHandling":6,"stateTracking":30,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":89.08,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T22:26:35+00:00","createdAt":"2026-05-07T23:02:15.060285+00:00"},{"runId":"20260507T214220_gpt-realtime-2_6f6656a2","benchmark":"conversation_bench","model":"gpt-realtime-2","turnsScored":75,"scores":{"toolUse":14,"toolUseDenom":16,"instructionFollowing":69,"kbGrounding":73,"ambiguityHandling":6,"stateTracking":24,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":84.91,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T22:26:21+00:00","createdAt":"2026-05-07T23:02:12.388652+00:00"},{"runId":"20260507T214243_gpt-realtime-2_c543874f","benchmark":"conversation_bench","model":"gpt-realtime-2","turnsScored":75,"scores":{"toolUse":15,"toolUseDenom":16,"instructionFollowing":67,"kbGrounding":70,"ambiguityHandling":6,"stateTracking":22,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":83.62,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T22:25:03+00:00","createdAt":"2026-05-07T23:02:13.943837+00:00"},{"runId":"20260507T181531_grok-voice-think-fast-1.0_c467003b","benchmark":"conversation_bench","model":"grok-voice-think-fast-1.0","turnsScored":75,"scores":{"toolUse":12,"toolUseDenom":16,"instructionFollowing":58,"kbGrounding":71,"ambiguityHandling":4,"stateTracking":25,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":74.55,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T19:19:41+00:00","createdAt":"2026-05-07T20:58:05.806294+00:00"},{"runId":"20260507T181514_grok-voice-think-fast-1.0_3b9b6d57","benchmark":"conversation_bench","model":"grok-voice-think-fast-1.0","turnsScored":75,"scores":{"toolUse":9,"toolUseDenom":16,"instructionFollowing":56,"kbGrounding":69,"ambiguityHandling":6,"stateTracking":22,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":72.92,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T19:16:19+00:00","createdAt":"2026-05-07T20:58:03.428207+00:00"},{"runId":"20260507T181531_grok-voice-think-fast-1.0_62701b5c","benchmark":"conversation_bench","model":"grok-voice-think-fast-1.0","turnsScored":75,"scores":{"toolUse":9,"toolUseDenom":16,"instructionFollowing":55,"kbGrounding":68,"ambiguityHandling":5,"stateTracking":16,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":66.25,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T19:16:03+00:00","createdAt":"2026-05-07T20:58:05.382294+00:00"},{"runId":"20260507T181507_grok-voice-think-fast-1.0_a272a9c9","benchmark":"conversation_bench","model":"grok-voice-think-fast-1.0","turnsScored":75,"scores":{"toolUse":13,"toolUseDenom":16,"instructionFollowing":52,"kbGrounding":69,"ambiguityHandling":6,"stateTracking":23,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":77.46,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T19:15:28+00:00","createdAt":"2026-05-07T20:58:03.031111+00:00"},{"runId":"20260507T181703_grok-voice-think-fast-1.0_ba852a60","benchmark":"conversation_bench","model":"grok-voice-think-fast-1.0","turnsScored":75,"scores":{"toolUse":10,"toolUseDenom":16,"instructionFollowing":64,"kbGrounding":72,"ambiguityHandling":7,"stateTracking":25,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":81.42,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T19:14:39+00:00","createdAt":"2026-05-07T20:58:08.807436+00:00"},{"runId":"20260507T181515_grok-voice-think-fast-1.0_d0edcbe8","benchmark":"conversation_bench","model":"grok-voice-think-fast-1.0","turnsScored":75,"scores":{"toolUse":10,"toolUseDenom":16,"instructionFollowing":64,"kbGrounding":74,"ambiguityHandling":5,"stateTracking":26,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":77.56,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T19:12:28+00:00","createdAt":"2026-05-07T20:58:03.816536+00:00"},{"runId":"20260507T181447_grok-voice-think-fast-1.0_ca119c23","benchmark":"conversation_bench","model":"grok-voice-think-fast-1.0","turnsScored":75,"scores":{"toolUse":12,"toolUseDenom":16,"instructionFollowing":54,"kbGrounding":62,"ambiguityHandling":6,"stateTracking":24,"ambiguityDenom":8,"stateTrackingDenom":35},"passRate":74.65,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T19:11:40+00:00","createdAt":"2026-05-07T20:58:01.864553+00:00"},{"runId":"20260507T181448_grok-voice-think-fast-1.0_b8cc1af0","benchmark":"conversation_bench","model":"grok-voice-think-fast-1.0","turnsScored":75,"scores":{"toolUse":10,"toolUseDenom":16,"instructionFollowing":54,"kbGrounding":69,"ambiguityHandling":6,"stateTracking":24,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":74.85,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T19:10:41+00:00","createdAt":"2026-05-07T20:58:02.262754+00:00"},{"runId":"20260507T181542_grok-voice-think-fast-1.0_f06c5da8","benchmark":"conversation_bench","model":"grok-voice-think-fast-1.0","turnsScored":75,"scores":{"toolUse":15,"toolUseDenom":16,"instructionFollowing":62,"kbGrounding":62,"ambiguityHandling":8,"stateTracking":11,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":78.48,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T19:10:31+00:00","createdAt":"2026-05-07T20:58:07.38719+00:00"},{"runId":"20260507T181536_grok-voice-think-fast-1.0_ca9b8cd5","benchmark":"conversation_bench","model":"grok-voice-think-fast-1.0","turnsScored":75,"scores":{"toolUse":11,"toolUseDenom":16,"instructionFollowing":57,"kbGrounding":73,"ambiguityHandling":7,"stateTracking":22,"ambiguityDenom":8,"stateTrackingDenom":34},"passRate":78.86,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T19:10:27+00:00","createdAt":"2026-05-07T20:58:06.983325+00:00"},{"runId":"20260507T181532_grok-voice-think-fast-1.0_887beb00","benchmark":"conversation_bench","model":"grok-voice-think-fast-1.0","turnsScored":75,"scores":{"toolUse":13,"toolUseDenom":16,"instructionFollowing":66,"kbGrounding":71,"ambiguityHandling":6,"stateTracking":25,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":82.93,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T19:10:06+00:00","createdAt":"2026-05-07T20:58:06.207618+00:00"},{"runId":"20260507T181518_grok-voice-think-fast-1.0_dc82b619","benchmark":"conversation_bench","model":"grok-voice-think-fast-1.0","turnsScored":75,"scores":{"toolUse":13,"toolUseDenom":16,"instructionFollowing":59,"kbGrounding":70,"ambiguityHandling":7,"stateTracking":24,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":82.7,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T19:09:38+00:00","createdAt":"2026-05-07T20:58:04.588887+00:00"},{"runId":"20260507T181445_grok-voice-think-fast-1.0_d11adc13","benchmark":"conversation_bench","model":"grok-voice-think-fast-1.0","turnsScored":75,"scores":{"toolUse":14,"toolUseDenom":16,"instructionFollowing":66,"kbGrounding":72,"ambiguityHandling":6,"stateTracking":25,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":84.45,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T19:04:42+00:00","createdAt":"2026-05-07T20:58:01.351799+00:00"},{"runId":"20260507T181459_grok-voice-think-fast-1.0_58de12ec","benchmark":"conversation_bench","model":"grok-voice-think-fast-1.0","turnsScored":75,"scores":{"toolUse":11,"toolUseDenom":16,"instructionFollowing":68,"kbGrounding":72,"ambiguityHandling":5,"stateTracking":26,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":79.34,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T19:03:04+00:00","createdAt":"2026-05-07T20:58:02.646625+00:00"},{"runId":"20260507T181658_grok-voice-think-fast-1.0_364b6b33","benchmark":"conversation_bench","model":"grok-voice-think-fast-1.0","turnsScored":75,"scores":{"toolUse":15,"toolUseDenom":16,"instructionFollowing":73,"kbGrounding":73,"ambiguityHandling":8,"stateTracking":6,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":81.32,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T19:02:28+00:00","createdAt":"2026-05-07T20:58:08.439814+00:00"},{"runId":"20260507T181543_grok-voice-think-fast-1.0_e30c9b6d","benchmark":"conversation_bench","model":"grok-voice-think-fast-1.0","turnsScored":75,"scores":{"toolUse":12,"toolUseDenom":16,"instructionFollowing":49,"kbGrounding":68,"ambiguityHandling":6,"stateTracking":18,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":72.11,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T18:59:53+00:00","createdAt":"2026-05-07T20:58:07.781048+00:00"},{"runId":"20260507T181532_grok-voice-think-fast-1.0_f453b724","benchmark":"conversation_bench","model":"grok-voice-think-fast-1.0","turnsScored":75,"scores":{"toolUse":11,"toolUseDenom":16,"instructionFollowing":60,"kbGrounding":70,"ambiguityHandling":6,"stateTracking":20,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":75.54,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T18:59:36+00:00","createdAt":"2026-05-07T20:58:06.6064+00:00"},{"runId":"20260507T181518_grok-voice-think-fast-1.0_06fd0b6b","benchmark":"conversation_bench","model":"grok-voice-think-fast-1.0","turnsScored":75,"scores":{"toolUse":8,"toolUseDenom":16,"instructionFollowing":57,"kbGrounding":69,"ambiguityHandling":5,"stateTracking":22,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":69.43,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T18:59:07+00:00","createdAt":"2026-05-07T20:58:04.200486+00:00"},{"runId":"20260507T181528_grok-voice-think-fast-1.0_d1ac103d","benchmark":"conversation_bench","model":"grok-voice-think-fast-1.0","turnsScored":75,"scores":{"toolUse":9,"toolUseDenom":16,"instructionFollowing":73,"kbGrounding":75,"ambiguityHandling":7,"stateTracking":23,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":82.16,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T18:58:56+00:00","createdAt":"2026-05-07T20:58:04.992866+00:00"},{"runId":"20260507T181600_grok-voice-think-fast-1.0_5ece7d00","benchmark":"conversation_bench","model":"grok-voice-think-fast-1.0","turnsScored":75,"scores":{"toolUse":7,"toolUseDenom":16,"instructionFollowing":32,"kbGrounding":72,"ambiguityHandling":3,"stateTracking":6,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":47.62,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T18:41:55+00:00","createdAt":"2026-05-07T20:58:07.916027+00:00"},{"runId":"20260412T051508_gpt-realtime_62c8844d","benchmark":"conversation_bench","model":"gpt-realtime","turnsScored":75,"scores":{"toolUse":11,"toolUseDenom":16,"instructionFollowing":73,"kbGrounding":54,"ambiguityHandling":4,"stateTracking":21,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":70.34,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-15T00:17:16+00:00","createdAt":"2026-04-17T18:23:29.365641+00:00"},{"runId":"20260412T051724_gpt-realtime_ee6b99a0","benchmark":"conversation_bench","model":"gpt-realtime","turnsScored":75,"scores":{"toolUse":14,"toolUseDenom":16,"instructionFollowing":71,"kbGrounding":61,"ambiguityHandling":4,"stateTracking":23,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":76.64,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-15T00:07:09+00:00","createdAt":"2026-04-17T18:23:33.390393+00:00"},{"runId":"20260412T034150_gpt-realtime_4de3b420","benchmark":"conversation_bench","model":"gpt-realtime","turnsScored":75,"scores":{"toolUse":13,"toolUseDenom":16,"instructionFollowing":67,"kbGrounding":64,"ambiguityHandling":6,"stateTracking":20,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":78.3,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-15T00:02:31+00:00","createdAt":"2026-04-17T18:22:40.218295+00:00"},{"runId":"20260412T035257_gpt-realtime_537e29ad","benchmark":"conversation_bench","model":"gpt-realtime","turnsScored":75,"scores":{"toolUse":11,"toolUseDenom":16,"instructionFollowing":58,"kbGrounding":66,"ambiguityHandling":5,"stateTracking":19,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":70.83,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T23:59:45+00:00","createdAt":"2026-04-17T18:22:44.045307+00:00"},{"runId":"20260412T040639_grok-realtime_0f535421","benchmark":"conversation_bench","model":"grok-realtime","turnsScored":75,"scores":{"toolUse":14,"toolUseDenom":16,"instructionFollowing":60,"kbGrounding":67,"ambiguityHandling":6,"stateTracking":11,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":73.03,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T23:58:17+00:00","createdAt":"2026-04-17T18:22:51.145078+00:00"},{"runId":"20260412T050057_gemini-3.1-flash-live-preview_93ec13fb","benchmark":"conversation_bench","model":"gemini-3.1-flash-live-preview","turnsScored":75,"scores":{"toolUse":11,"toolUseDenom":16,"instructionFollowing":62,"kbGrounding":70,"ambiguityHandling":5,"stateTracking":16,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":71.15,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T23:56:27+00:00","createdAt":"2026-04-17T18:23:20.094764+00:00"},{"runId":"20260412T033304_gemini-3.1-flash-live-preview_738b30af","benchmark":"conversation_bench","model":"gemini-3.1-flash-live-preview","turnsScored":75,"scores":{"toolUse":13,"toolUseDenom":16,"instructionFollowing":74,"kbGrounding":68,"ambiguityHandling":4,"stateTracking":20,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":76.24,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T23:50:18+00:00","createdAt":"2026-04-17T18:22:34.711828+00:00"},{"runId":"20260412T014925_gpt-realtime_135e2e68","benchmark":"conversation_bench","model":"gpt-realtime","turnsScored":75,"scores":{"toolUse":13,"toolUseDenom":16,"instructionFollowing":63,"kbGrounding":64,"ambiguityHandling":6,"stateTracking":13,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":73,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T23:43:45+00:00","createdAt":"2026-04-17T18:21:40.386262+00:00"},{"runId":"20260412T041153_gemini-3.1-flash-live-preview_4766fea1","benchmark":"conversation_bench","model":"gemini-3.1-flash-live-preview","turnsScored":75,"scores":{"toolUse":0,"toolUseDenom":16,"instructionFollowing":0,"kbGrounding":75,"ambiguityHandling":4,"stateTracking":0,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":30,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T23:38:19+00:00","createdAt":"2026-04-17T18:22:53.988313+00:00"},{"runId":"20260412T022918_gemini-3.1-flash-live-preview_4dabfb0a","benchmark":"conversation_bench","model":"gemini-3.1-flash-live-preview","turnsScored":75,"scores":{"toolUse":13,"toolUseDenom":16,"instructionFollowing":60,"kbGrounding":63,"ambiguityHandling":3,"stateTracking":24,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":71.1,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T23:36:55+00:00","createdAt":"2026-04-17T18:21:57.843278+00:00"},{"runId":"20260412T013429_gpt-realtime_1ae7f046","benchmark":"conversation_bench","model":"gpt-realtime","turnsScored":75,"scores":{"toolUse":14,"toolUseDenom":16,"instructionFollowing":70,"kbGrounding":61,"ambiguityHandling":5,"stateTracking":23,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":78.87,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T23:35:17+00:00","createdAt":"2026-04-17T18:21:27.627378+00:00"},{"runId":"20260412T012852_gemini-3.1-flash-live-preview_9cb0181f","benchmark":"conversation_bench","model":"gemini-3.1-flash-live-preview","turnsScored":75,"scores":{"toolUse":15,"toolUseDenom":16,"instructionFollowing":68,"kbGrounding":69,"ambiguityHandling":6,"stateTracking":26,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":86.04,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T23:32:43+00:00","createdAt":"2026-04-17T18:21:25.72019+00:00"},{"runId":"20260412T014720_grok-realtime_c0da1c1c","benchmark":"conversation_bench","model":"grok-realtime","turnsScored":75,"scores":{"toolUse":6,"toolUseDenom":16,"instructionFollowing":63,"kbGrounding":69,"ambiguityHandling":5,"stateTracking":22,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":68.53,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T23:31:23+00:00","createdAt":"2026-04-17T18:21:34.910023+00:00"},{"runId":"20260412T010115_grok-realtime_ed648413","benchmark":"conversation_bench","model":"grok-realtime","turnsScored":75,"scores":{"toolUse":10,"toolUseDenom":16,"instructionFollowing":70,"kbGrounding":75,"ambiguityHandling":6,"stateTracking":25,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":81.32,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T23:29:09+00:00","createdAt":"2026-04-17T18:21:15.246756+00:00"},{"runId":"20260412T005958_grok-realtime_e3d1264c","benchmark":"conversation_bench","model":"grok-realtime","turnsScored":75,"scores":{"toolUse":10,"toolUseDenom":16,"instructionFollowing":71,"kbGrounding":74,"ambiguityHandling":6,"stateTracking":27,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":82.53,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T23:26:52+00:00","createdAt":"2026-04-17T18:21:11.536821+00:00"},{"runId":"20260412T005047_gpt-realtime_3dd33818","benchmark":"conversation_bench","model":"gpt-realtime","turnsScored":75,"scores":{"toolUse":14,"toolUseDenom":16,"instructionFollowing":67,"kbGrounding":55,"ambiguityHandling":5,"stateTracking":23,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":76.47,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T23:24:30+00:00","createdAt":"2026-04-17T18:21:02.095382+00:00"},{"runId":"20260412T005143_grok-realtime_a243b695","benchmark":"conversation_bench","model":"grok-realtime","turnsScored":75,"scores":{"toolUse":10,"toolUseDenom":16,"instructionFollowing":75,"kbGrounding":73,"ambiguityHandling":6,"stateTracking":27,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":83.33,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T23:24:03+00:00","createdAt":"2026-04-17T18:21:04.664667+00:00"},{"runId":"20260412T005237_gpt-realtime_1cdd6824","benchmark":"conversation_bench","model":"gpt-realtime","turnsScored":75,"scores":{"toolUse":13,"toolUseDenom":16,"instructionFollowing":69,"kbGrounding":67,"ambiguityHandling":6,"stateTracking":24,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":82.06,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T23:21:02+00:00","createdAt":"2026-04-17T18:21:05.521329+00:00"},{"runId":"20260412T005239_gemini-2.5-flash-native-audio-preview-12-2025_5d65cf7f","benchmark":"conversation_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":75,"scores":{"toolUse":15,"toolUseDenom":16,"instructionFollowing":72,"kbGrounding":71,"ambiguityHandling":6,"stateTracking":10,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":77.94,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T23:16:28+00:00","createdAt":"2026-04-17T18:21:06.436859+00:00"},{"runId":"20260412T005827_gemini-2.5-flash-native-audio-preview-12-2025_ca2922fa","benchmark":"conversation_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":75,"scores":{"toolUse":13,"toolUseDenom":16,"instructionFollowing":69,"kbGrounding":70,"ambiguityHandling":6,"stateTracking":27,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":84.68,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T23:11:33+00:00","createdAt":"2026-04-17T18:21:09.855158+00:00"},{"runId":"20260412T001724_gemini-2.5-flash-native-audio-preview-12-2025_ca83b0c2","benchmark":"conversation_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":75,"scores":{"toolUse":14,"toolUseDenom":16,"instructionFollowing":71,"kbGrounding":67,"ambiguityHandling":5,"stateTracking":21,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":79.53,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T23:04:07+00:00","createdAt":"2026-04-17T18:20:52.097761+00:00"},{"runId":"20260414T000111_ultravox-v0.7_11c1a7fb","benchmark":"conversation_bench","model":"ultravox-v0.7","turnsScored":75,"scores":{"toolUse":4,"toolUseDenom":16,"instructionFollowing":31,"kbGrounding":30,"ambiguityHandling":4,"stateTracking":3,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":33.08,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T07:32:33+00:00","createdAt":"2026-04-17T18:23:46.324521+00:00"},{"runId":"20260414T000101_ultravox-v0.7_2175ccfe","benchmark":"conversation_bench","model":"ultravox-v0.7","turnsScored":75,"scores":{"toolUse":5,"toolUseDenom":16,"instructionFollowing":31,"kbGrounding":28,"ambiguityHandling":3,"stateTracking":3,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":31.3,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T07:30:11+00:00","createdAt":"2026-04-17T18:23:45.689432+00:00"},{"runId":"20260413T230101_ultravox-v0.7_2277d2ed","benchmark":"conversation_bench","model":"ultravox-v0.7","turnsScored":75,"scores":{"toolUse":5,"toolUseDenom":16,"instructionFollowing":24,"kbGrounding":24,"ambiguityHandling":2,"stateTracking":3,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":25.87,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T06:25:03+00:00","createdAt":"2026-04-17T18:23:45.094058+00:00"},{"runId":"20260412T040253_gemini-3.1-flash-live-preview_120cdc06","benchmark":"conversation_bench","model":"gemini-3.1-flash-live-preview","turnsScored":75,"scores":{"toolUse":2,"toolUseDenom":16,"instructionFollowing":49,"kbGrounding":59,"ambiguityHandling":5,"stateTracking":10,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":49.86,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-13T23:39:18+00:00","createdAt":"2026-04-17T18:22:50.203694+00:00"},{"runId":"20260412T050111_grok-realtime_0e65f9a0","benchmark":"conversation_bench","model":"grok-realtime","turnsScored":75,"scores":{"toolUse":5,"toolUseDenom":16,"instructionFollowing":61,"kbGrounding":58,"ambiguityHandling":5,"stateTracking":21,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":63.21,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-13T23:37:53+00:00","createdAt":"2026-04-17T18:23:21.086158+00:00"},{"runId":"20260412T042930_grok-realtime_fb6a22ca","benchmark":"conversation_bench","model":"grok-realtime","turnsScored":75,"scores":{"toolUse":4,"toolUseDenom":16,"instructionFollowing":69,"kbGrounding":63,"ambiguityHandling":6,"stateTracking":21,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":67.93,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-13T23:33:02+00:00","createdAt":"2026-04-17T18:23:05.753272+00:00"},{"runId":"20260412T013702_gemini-3.1-flash-live-preview_b47bdd99","benchmark":"conversation_bench","model":"gemini-3.1-flash-live-preview","turnsScored":75,"scores":{"toolUse":10,"toolUseDenom":16,"instructionFollowing":67,"kbGrounding":64,"ambiguityHandling":5,"stateTracking":27,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":76.3,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-13T23:30:53+00:00","createdAt":"2026-04-17T18:21:29.531558+00:00"},{"runId":"20260412T032425_grok-realtime_eba2ff6b","benchmark":"conversation_bench","model":"grok-realtime","turnsScored":75,"scores":{"toolUse":9,"toolUseDenom":16,"instructionFollowing":72,"kbGrounding":72,"ambiguityHandling":6,"stateTracking":31,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":83.44,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-13T23:27:04+00:00","createdAt":"2026-04-17T18:22:27.589255+00:00"},{"runId":"20260412T024850_grok-realtime_dc62ec21","benchmark":"conversation_bench","model":"grok-realtime","turnsScored":75,"scores":{"toolUse":9,"toolUseDenom":16,"instructionFollowing":73,"kbGrounding":72,"ambiguityHandling":6,"stateTracking":29,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":82.49,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-13T23:23:40+00:00","createdAt":"2026-04-17T18:22:12.944922+00:00"},{"runId":"20260412T025836_grok-realtime_3f946642","benchmark":"conversation_bench","model":"grok-realtime","turnsScored":75,"scores":{"toolUse":9,"toolUseDenom":16,"instructionFollowing":63,"kbGrounding":75,"ambiguityHandling":6,"stateTracking":25,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":78.2,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-13T23:20:41+00:00","createdAt":"2026-04-17T18:22:16.657991+00:00"},{"runId":"20260412T005653_gpt-realtime_bcde19f2","benchmark":"conversation_bench","model":"gpt-realtime","turnsScored":75,"scores":{"toolUse":14,"toolUseDenom":16,"instructionFollowing":63,"kbGrounding":67,"ambiguityHandling":5,"stateTracking":29,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":82.24,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-13T23:11:38+00:00","createdAt":"2026-04-17T18:21:08.940847+00:00"},{"runId":"20260412T003922_grok-realtime_a0a1514c","benchmark":"conversation_bench","model":"grok-realtime","turnsScored":75,"scores":{"toolUse":12,"toolUseDenom":16,"instructionFollowing":72,"kbGrounding":71,"ambiguityHandling":6,"stateTracking":22,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":81.47,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-13T23:09:04+00:00","createdAt":"2026-04-17T18:20:57.43266+00:00"},{"runId":"20260412T004405_gemini-2.5-flash-native-audio-preview-12-2025_f79f2d6d","benchmark":"conversation_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":75,"scores":{"toolUse":15,"toolUseDenom":16,"instructionFollowing":72,"kbGrounding":72,"ambiguityHandling":6,"stateTracking":15,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":81.24,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-13T23:04:08+00:00","createdAt":"2026-04-17T18:20:59.55938+00:00"},{"runId":"20260411T235815_gemini-3.1-flash-live-preview_24fd3643","benchmark":"conversation_bench","model":"gemini-3.1-flash-live-preview","turnsScored":75,"scores":{"toolUse":13,"toolUseDenom":16,"instructionFollowing":60,"kbGrounding":68,"ambiguityHandling":4,"stateTracking":13,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":68.26,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-13T23:00:20+00:00","createdAt":"2026-04-17T18:20:34.339672+00:00"},{"runId":"20260413T125019_ultravox-v0.7_92bfbed7","benchmark":"conversation_bench","model":"ultravox-v0.7","turnsScored":75,"scores":{"toolUse":14,"toolUseDenom":16,"instructionFollowing":71,"kbGrounding":70,"ambiguityHandling":4,"stateTracking":10,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":71.16,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-13T20:55:22+00:00","createdAt":"2026-04-17T18:23:44.156385+00:00"},{"runId":"20260412T054204_grok-realtime_26339d1d","benchmark":"conversation_bench","model":"grok-realtime","turnsScored":75,"scores":{"toolUse":7,"toolUseDenom":16,"instructionFollowing":66,"kbGrounding":68,"ambiguityHandling":6,"stateTracking":21,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":72.21,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T13:36:32+00:00","createdAt":"2026-04-17T18:23:43.289839+00:00"},{"runId":"20260412T052840_gemini-2.5-flash-native-audio-preview-12-2025_8340bc58","benchmark":"conversation_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":75,"scores":{"toolUse":9,"toolUseDenom":16,"instructionFollowing":54,"kbGrounding":66,"ambiguityHandling":6,"stateTracking":22,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":71.58,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T13:28:16+00:00","createdAt":"2026-04-17T18:23:37.924292+00:00"},{"runId":"20260412T053522_gpt-realtime_23d9d834","benchmark":"conversation_bench","model":"gpt-realtime","turnsScored":75,"scores":{"toolUse":12,"toolUseDenom":16,"instructionFollowing":72,"kbGrounding":63,"ambiguityHandling":6,"stateTracking":20,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":78.12,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T13:27:31+00:00","createdAt":"2026-04-17T18:23:40.620481+00:00"},{"runId":"20260412T053714_gemini-3.1-flash-live-preview_2b7ab7f6","benchmark":"conversation_bench","model":"gemini-3.1-flash-live-preview","turnsScored":75,"scores":{"toolUse":12,"toolUseDenom":16,"instructionFollowing":46,"kbGrounding":59,"ambiguityHandling":4,"stateTracking":11,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":59.67,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T13:26:23+00:00","createdAt":"2026-04-17T18:23:42.365422+00:00"},{"runId":"20260412T052756_gpt-realtime_96e2bbd0","benchmark":"conversation_bench","model":"gpt-realtime","turnsScored":75,"scores":{"toolUse":14,"toolUseDenom":16,"instructionFollowing":65,"kbGrounding":61,"ambiguityHandling":4,"stateTracking":25,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":76.25,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T13:23:53+00:00","createdAt":"2026-04-17T18:23:37.040316+00:00"},{"runId":"20260412T053441_grok-realtime_9f2c6738","benchmark":"conversation_bench","model":"grok-realtime","turnsScored":75,"scores":{"toolUse":12,"toolUseDenom":16,"instructionFollowing":68,"kbGrounding":69,"ambiguityHandling":6,"stateTracking":18,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":77.44,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T13:23:18+00:00","createdAt":"2026-04-17T18:23:39.694697+00:00"},{"runId":"20260412T052708_grok-realtime_00aeae1b","benchmark":"conversation_bench","model":"grok-realtime","turnsScored":75,"scores":{"toolUse":5,"toolUseDenom":16,"instructionFollowing":56,"kbGrounding":64,"ambiguityHandling":6,"stateTracking":15,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":62.34,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T13:22:41+00:00","createdAt":"2026-04-17T18:23:36.105009+00:00"},{"runId":"20260412T053601_gemini-2.5-flash-native-audio-preview-12-2025_5d1c7bf0","benchmark":"conversation_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":75,"scores":{"toolUse":14,"toolUseDenom":16,"instructionFollowing":57,"kbGrounding":55,"ambiguityHandling":5,"stateTracking":14,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":68.35,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T13:13:34+00:00","createdAt":"2026-04-17T18:23:41.501395+00:00"},{"runId":"20260412T051725_gemini-2.5-flash-native-audio-preview-12-2025_8698377d","benchmark":"conversation_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":75,"scores":{"toolUse":14,"toolUseDenom":16,"instructionFollowing":68,"kbGrounding":66,"ambiguityHandling":6,"stateTracking":21,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":80.96,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T13:13:01+00:00","createdAt":"2026-04-17T18:23:34.296376+00:00"},{"runId":"20260412T051703_grok-realtime_173a2eb7","benchmark":"conversation_bench","model":"grok-realtime","turnsScored":75,"scores":{"toolUse":8,"toolUseDenom":16,"instructionFollowing":69,"kbGrounding":73,"ambiguityHandling":6,"stateTracking":26,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":78.62,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T13:10:01+00:00","createdAt":"2026-04-17T18:23:32.383788+00:00"},{"runId":"20260412T051430_grok-realtime_5c2e9075","benchmark":"conversation_bench","model":"grok-realtime","turnsScored":75,"scores":{"toolUse":7,"toolUseDenom":16,"instructionFollowing":58,"kbGrounding":51,"ambiguityHandling":6,"stateTracking":8,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":57.67,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T13:09:32+00:00","createdAt":"2026-04-17T18:23:28.460414+00:00"},{"runId":"20260412T051558_gemini-3.1-flash-live-preview_20ede26b","benchmark":"conversation_bench","model":"gemini-3.1-flash-live-preview","turnsScored":75,"scores":{"toolUse":10,"toolUseDenom":16,"instructionFollowing":46,"kbGrounding":61,"ambiguityHandling":3,"stateTracking":15,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":57.62,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T13:05:07+00:00","createdAt":"2026-04-17T18:23:31.35392+00:00"},{"runId":"20260412T050944_gpt-realtime_302c4d53","benchmark":"conversation_bench","model":"gpt-realtime","turnsScored":75,"scores":{"toolUse":13,"toolUseDenom":16,"instructionFollowing":60,"kbGrounding":67,"ambiguityHandling":3,"stateTracking":20,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":69.74,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T13:02:39+00:00","createdAt":"2026-04-17T18:23:25.702136+00:00"},{"runId":"20260412T051135_gemini-3.1-flash-live-preview_2d2ed889","benchmark":"conversation_bench","model":"gemini-3.1-flash-live-preview","turnsScored":75,"scores":{"toolUse":10,"toolUseDenom":16,"instructionFollowing":47,"kbGrounding":55,"ambiguityHandling":4,"stateTracking":19,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":61.22,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T13:02:31+00:00","createdAt":"2026-04-17T18:23:27.557901+00:00"},{"runId":"20260412T050914_grok-realtime_ccef2af4","benchmark":"conversation_bench","model":"grok-realtime","turnsScored":75,"scores":{"toolUse":6,"toolUseDenom":16,"instructionFollowing":70,"kbGrounding":70,"ambiguityHandling":5,"stateTracking":18,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":68.24,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T12:58:17+00:00","createdAt":"2026-04-17T18:23:24.721957+00:00"},{"runId":"20260412T051539_gemini-2.5-flash-native-audio-preview-12-2025_8204bbc5","benchmark":"conversation_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":75,"scores":{"toolUse":16,"toolUseDenom":16,"instructionFollowing":60,"kbGrounding":70,"ambiguityHandling":6,"stateTracking":13,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":77.55,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T12:55:16+00:00","createdAt":"2026-04-17T18:23:30.375448+00:00"},{"runId":"20260412T050444_gpt-realtime_8123afe6","benchmark":"conversation_bench","model":"gpt-realtime","turnsScored":75,"scores":{"toolUse":15,"toolUseDenom":16,"instructionFollowing":70,"kbGrounding":67,"ambiguityHandling":5,"stateTracking":23,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":81.72,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T12:53:05+00:00","createdAt":"2026-04-17T18:23:22.120743+00:00"},{"runId":"20260412T050951_gemini-2.5-flash-native-audio-preview-12-2025_04bd457c","benchmark":"conversation_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":75,"scores":{"toolUse":13,"toolUseDenom":16,"instructionFollowing":66,"kbGrounding":59,"ambiguityHandling":6,"stateTracking":13,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":72.46,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T12:52:52+00:00","createdAt":"2026-04-17T18:23:26.644891+00:00"},{"runId":"20260412T045131_gpt-realtime_c3582cc0","benchmark":"conversation_bench","model":"gpt-realtime","turnsScored":75,"scores":{"toolUse":15,"toolUseDenom":16,"instructionFollowing":73,"kbGrounding":70,"ambiguityHandling":6,"stateTracking":23,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":85.82,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T12:50:15+00:00","createdAt":"2026-04-17T18:23:14.157911+00:00"},{"runId":"20260412T045533_gpt-realtime_5bffc114","benchmark":"conversation_bench","model":"gpt-realtime","turnsScored":75,"scores":{"toolUse":12,"toolUseDenom":16,"instructionFollowing":65,"kbGrounding":61,"ambiguityHandling":7,"stateTracking":21,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":78.83,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T12:49:33+00:00","createdAt":"2026-04-17T18:23:18.065379+00:00"},{"runId":"20260412T045439_grok-realtime_44a5d469","benchmark":"conversation_bench","model":"grok-realtime","turnsScored":75,"scores":{"toolUse":10,"toolUseDenom":16,"instructionFollowing":67,"kbGrounding":71,"ambiguityHandling":4,"stateTracking":15,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":68.39,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T12:49:32+00:00","createdAt":"2026-04-17T18:23:17.100637+00:00"},{"runId":"20260412T050712_gemini-2.5-flash-native-audio-preview-12-2025_554da440","benchmark":"conversation_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":75,"scores":{"toolUse":13,"toolUseDenom":16,"instructionFollowing":66,"kbGrounding":72,"ambiguityHandling":7,"stateTracking":8,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":75.4,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T12:46:52+00:00","createdAt":"2026-04-17T18:23:23.041107+00:00"},{"runId":"20260412T044711_grok-realtime_375ef059","benchmark":"conversation_bench","model":"grok-realtime","turnsScored":75,"scores":{"toolUse":8,"toolUseDenom":16,"instructionFollowing":65,"kbGrounding":62,"ambiguityHandling":5,"stateTracking":13,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":64.25,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T12:43:06+00:00","createdAt":"2026-04-17T18:23:13.191287+00:00"},{"runId":"20260412T052916_gemini-3.1-flash-live-preview_ccee2619","benchmark":"conversation_bench","model":"gemini-3.1-flash-live-preview","turnsScored":75,"scores":{"toolUse":2,"toolUseDenom":16,"instructionFollowing":12,"kbGrounding":75,"ambiguityHandling":2,"stateTracking":1,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":31.31,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T12:42:02+00:00","createdAt":"2026-04-17T18:23:38.795258+00:00"},{"runId":"20260412T043737_grok-realtime_515cdd95","benchmark":"conversation_bench","model":"grok-realtime","turnsScored":75,"scores":{"toolUse":8,"toolUseDenom":16,"instructionFollowing":54,"kbGrounding":65,"ambiguityHandling":6,"stateTracking":13,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":64.61,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T12:37:44+00:00","createdAt":"2026-04-17T18:23:09.343057+00:00"},{"runId":"20260412T051752_gemini-3.1-flash-live-preview_767b6b55","benchmark":"conversation_bench","model":"gemini-3.1-flash-live-preview","turnsScored":75,"scores":{"toolUse":2,"toolUseDenom":16,"instructionFollowing":12,"kbGrounding":75,"ambiguityHandling":3,"stateTracking":0,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":33.2,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T12:35:59+00:00","createdAt":"2026-04-17T18:23:35.217532+00:00"},{"runId":"20260412T045211_gemini-2.5-flash-native-audio-preview-12-2025_e875a09e","benchmark":"conversation_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":75,"scores":{"toolUse":15,"toolUseDenom":16,"instructionFollowing":57,"kbGrounding":65,"ambiguityHandling":4,"stateTracking":13,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":69.16,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T12:35:10+00:00","createdAt":"2026-04-17T18:23:15.145181+00:00"},{"runId":"20260412T045705_gemini-2.5-flash-native-audio-preview-12-2025_26a016b1","benchmark":"conversation_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":75,"scores":{"toolUse":13,"toolUseDenom":16,"instructionFollowing":47,"kbGrounding":70,"ambiguityHandling":4,"stateTracking":5,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":60.48,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T12:34:39+00:00","createdAt":"2026-04-17T18:23:19.085602+00:00"},{"runId":"20260412T044103_gemini-2.5-flash-native-audio-preview-12-2025_06f4467b","benchmark":"conversation_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":75,"scores":{"toolUse":15,"toolUseDenom":16,"instructionFollowing":70,"kbGrounding":72,"ambiguityHandling":4,"stateTracking":14,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":75.1,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T12:30:03+00:00","createdAt":"2026-04-17T18:23:11.323986+00:00"},{"runId":"20260412T043405_gemini-3.1-flash-live-preview_122aa3ee","benchmark":"conversation_bench","model":"gemini-3.1-flash-live-preview","turnsScored":75,"scores":{"toolUse":14,"toolUseDenom":16,"instructionFollowing":46,"kbGrounding":58,"ambiguityHandling":3,"stateTracking":8,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":57.58,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T12:28:39+00:00","createdAt":"2026-04-17T18:23:08.463003+00:00"},{"runId":"20260412T043919_gpt-realtime_e2ee334e","benchmark":"conversation_bench","model":"gpt-realtime","turnsScored":75,"scores":{"toolUse":14,"toolUseDenom":16,"instructionFollowing":71,"kbGrounding":63,"ambiguityHandling":3,"stateTracking":19,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":72.25,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T12:27:06+00:00","createdAt":"2026-04-17T18:23:10.321454+00:00"},{"runId":"20260412T050745_gemini-3.1-flash-live-preview_e719a5ef","benchmark":"conversation_bench","model":"gemini-3.1-flash-live-preview","turnsScored":75,"scores":{"toolUse":13,"toolUseDenom":16,"instructionFollowing":11,"kbGrounding":74,"ambiguityHandling":3,"stateTracking":12,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":53.69,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T12:22:26+00:00","createdAt":"2026-04-17T18:23:23.900192+00:00"},{"runId":"20260412T042443_gemini-3.1-flash-live-preview_d1ec3af3","benchmark":"conversation_bench","model":"gemini-3.1-flash-live-preview","turnsScored":75,"scores":{"toolUse":10,"toolUseDenom":16,"instructionFollowing":41,"kbGrounding":59,"ambiguityHandling":2,"stateTracking":11,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":50.83,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T12:17:24+00:00","createdAt":"2026-04-17T18:23:04.775967+00:00"},{"runId":"20260412T043002_gpt-realtime_3a8dfeea","benchmark":"conversation_bench","model":"gpt-realtime","turnsScored":75,"scores":{"toolUse":14,"toolUseDenom":16,"instructionFollowing":67,"kbGrounding":63,"ambiguityHandling":4,"stateTracking":24,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":76.71,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T12:17:12+00:00","createdAt":"2026-04-17T18:23:06.705027+00:00"},{"runId":"20260412T042051_grok-realtime_1cb35a46","benchmark":"conversation_bench","model":"grok-realtime","turnsScored":75,"scores":{"toolUse":10,"toolUseDenom":16,"instructionFollowing":67,"kbGrounding":64,"ambiguityHandling":7,"stateTracking":17,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":75.24,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T12:16:51+00:00","createdAt":"2026-04-17T18:23:02.216428+00:00"},{"runId":"20260412T042032_gemini-3.1-flash-live-preview_e36232a0","benchmark":"conversation_bench","model":"gemini-3.1-flash-live-preview","turnsScored":75,"scores":{"toolUse":8,"toolUseDenom":16,"instructionFollowing":44,"kbGrounding":51,"ambiguityHandling":3,"stateTracking":10,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":48.89,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T12:15:56+00:00","createdAt":"2026-04-17T18:23:01.287901+00:00"},{"runId":"20260412T043321_gemini-2.5-flash-native-audio-preview-12-2025_13323658","benchmark":"conversation_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":75,"scores":{"toolUse":16,"toolUseDenom":16,"instructionFollowing":74,"kbGrounding":71,"ambiguityHandling":6,"stateTracking":20,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":85.79,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T12:15:38+00:00","createdAt":"2026-04-17T18:23:07.545643+00:00"},{"runId":"20260412T041915_gpt-realtime_65f5f463","benchmark":"conversation_bench","model":"gpt-realtime","turnsScored":75,"scores":{"toolUse":13,"toolUseDenom":16,"instructionFollowing":72,"kbGrounding":64,"ambiguityHandling":5,"stateTracking":27,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":81.38,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T12:14:56+00:00","createdAt":"2026-04-17T18:22:59.561291+00:00"},{"runId":"20260412T042305_gpt-realtime_b9224560","benchmark":"conversation_bench","model":"gpt-realtime","turnsScored":75,"scores":{"toolUse":14,"toolUseDenom":16,"instructionFollowing":68,"kbGrounding":58,"ambiguityHandling":4,"stateTracking":13,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":68.98,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T12:14:19+00:00","createdAt":"2026-04-17T18:23:03.098563+00:00"},{"runId":"20260412T042346_gemini-2.5-flash-native-audio-preview-12-2025_1f3900e9","benchmark":"conversation_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":75,"scores":{"toolUse":10,"toolUseDenom":16,"instructionFollowing":63,"kbGrounding":67,"ambiguityHandling":6,"stateTracking":11,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":68.83,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T12:11:34+00:00","createdAt":"2026-04-17T18:23:03.951945+00:00"},{"runId":"20260412T041929_gemini-2.5-flash-native-audio-preview-12-2025_e85610a2","benchmark":"conversation_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":75,"scores":{"toolUse":11,"toolUseDenom":16,"instructionFollowing":63,"kbGrounding":73,"ambiguityHandling":4,"stateTracking":15,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":69.11,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T12:09:50+00:00","createdAt":"2026-04-17T18:23:00.45276+00:00"},{"runId":"20260412T041523_gpt-realtime_448dbdbd","benchmark":"conversation_bench","model":"gpt-realtime","turnsScored":75,"scores":{"toolUse":12,"toolUseDenom":16,"instructionFollowing":65,"kbGrounding":58,"ambiguityHandling":6,"stateTracking":23,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":76.74,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T12:09:32+00:00","createdAt":"2026-04-17T18:22:55.959281+00:00"},{"runId":"20260412T045228_gemini-3.1-flash-live-preview_8a0cfabd","benchmark":"conversation_bench","model":"gemini-3.1-flash-live-preview","turnsScored":75,"scores":{"toolUse":1,"toolUseDenom":16,"instructionFollowing":10,"kbGrounding":12,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":7.12,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T12:09:02+00:00","createdAt":"2026-04-17T18:23:16.145406+00:00"},{"runId":"20260412T041905_grok-realtime_b057b17d","benchmark":"conversation_bench","model":"grok-realtime","turnsScored":75,"scores":{"toolUse":7,"toolUseDenom":16,"instructionFollowing":69,"kbGrounding":68,"ambiguityHandling":6,"stateTracking":19,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":71.8,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T12:07:44+00:00","createdAt":"2026-04-17T18:22:58.70966+00:00"},{"runId":"20260412T044426_gemini-3.1-flash-live-preview_6c605f46","benchmark":"conversation_bench","model":"gemini-3.1-flash-live-preview","turnsScored":75,"scores":{"toolUse":1,"toolUseDenom":16,"instructionFollowing":20,"kbGrounding":19,"ambiguityHandling":1,"stateTracking":3,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":15.97,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T12:07:11+00:00","createdAt":"2026-04-17T18:23:12.264883+00:00"},{"runId":"20260412T041450_grok-realtime_70fc8a12","benchmark":"conversation_bench","model":"grok-realtime","turnsScored":75,"scores":{"toolUse":12,"toolUseDenom":16,"instructionFollowing":64,"kbGrounding":61,"ambiguityHandling":5,"stateTracking":19,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":72.35,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T12:04:32+00:00","createdAt":"2026-04-17T18:22:54.907874+00:00"},{"runId":"20260412T041527_gemini-2.5-flash-native-audio-preview-12-2025_91acffc2","benchmark":"conversation_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":75,"scores":{"toolUse":8,"toolUseDenom":16,"instructionFollowing":69,"kbGrounding":72,"ambiguityHandling":4,"stateTracking":18,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":68.51,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T12:01:00+00:00","createdAt":"2026-04-17T18:22:56.857345+00:00"},{"runId":"20260412T035754_grok-realtime_c53fcc07","benchmark":"conversation_bench","model":"grok-realtime","turnsScored":75,"scores":{"toolUse":5,"toolUseDenom":16,"instructionFollowing":71,"kbGrounding":73,"ambiguityHandling":6,"stateTracking":21,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":72.38,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T11:57:03+00:00","createdAt":"2026-04-17T18:22:47.338592+00:00"},{"runId":"20260412T040659_gpt-realtime_ed2ce2f2","benchmark":"conversation_bench","model":"gpt-realtime","turnsScored":75,"scores":{"toolUse":14,"toolUseDenom":16,"instructionFollowing":66,"kbGrounding":64,"ambiguityHandling":5,"stateTracking":21,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":77.39,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T11:55:22+00:00","createdAt":"2026-04-17T18:22:52.09117+00:00"},{"runId":"20260412T040703_gemini-2.5-flash-native-audio-preview-12-2025_18a8c288","benchmark":"conversation_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":75,"scores":{"toolUse":12,"toolUseDenom":16,"instructionFollowing":67,"kbGrounding":70,"ambiguityHandling":4,"stateTracking":26,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":77.29,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T11:53:40+00:00","createdAt":"2026-04-17T18:22:53.066466+00:00"},{"runId":"20260412T035318_gemini-2.5-flash-native-audio-preview-12-2025_bde12fd5","benchmark":"conversation_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":75,"scores":{"toolUse":12,"toolUseDenom":16,"instructionFollowing":55,"kbGrounding":68,"ambiguityHandling":6,"stateTracking":22,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":76.13,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T11:52:09+00:00","createdAt":"2026-04-17T18:22:44.979252+00:00"},{"runId":"20260412T040036_gpt-realtime_5f4c3a5b","benchmark":"conversation_bench","model":"gpt-realtime","turnsScored":75,"scores":{"toolUse":13,"toolUseDenom":16,"instructionFollowing":71,"kbGrounding":53,"ambiguityHandling":3,"stateTracking":24,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":71.36,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T11:51:19+00:00","createdAt":"2026-04-17T18:22:48.39645+00:00"},{"runId":"20260412T035448_gemini-3.1-flash-live-preview_51763981","benchmark":"conversation_bench","model":"gemini-3.1-flash-live-preview","turnsScored":75,"scores":{"toolUse":12,"toolUseDenom":16,"instructionFollowing":54,"kbGrounding":68,"ambiguityHandling":6,"stateTracking":15,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":71.62,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T11:44:31+00:00","createdAt":"2026-04-17T18:22:46.212122+00:00"},{"runId":"20260412T040141_gemini-2.5-flash-native-audio-preview-12-2025_8ddc4797","benchmark":"conversation_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":75,"scores":{"toolUse":13,"toolUseDenom":16,"instructionFollowing":69,"kbGrounding":70,"ambiguityHandling":6,"stateTracking":9,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":73.77,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T11:44:24+00:00","createdAt":"2026-04-17T18:22:49.39244+00:00"},{"runId":"20260412T035240_grok-realtime_dbace128","benchmark":"conversation_bench","model":"grok-realtime","turnsScored":75,"scores":{"toolUse":11,"toolUseDenom":16,"instructionFollowing":66,"kbGrounding":64,"ambiguityHandling":6,"stateTracking":21,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":76.14,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T11:41:01+00:00","createdAt":"2026-04-17T18:22:43.120514+00:00"},{"runId":"20260412T034813_gemini-3.1-flash-live-preview_482c67c4","benchmark":"conversation_bench","model":"gemini-3.1-flash-live-preview","turnsScored":75,"scores":{"toolUse":14,"toolUseDenom":16,"instructionFollowing":49,"kbGrounding":63,"ambiguityHandling":4,"stateTracking":13,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":65.25,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T11:37:36+00:00","createdAt":"2026-04-17T18:22:42.173245+00:00"},{"runId":"20260412T034546_gemini-2.5-flash-native-audio-preview-12-2025_8e41e048","benchmark":"conversation_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":75,"scores":{"toolUse":11,"toolUseDenom":16,"instructionFollowing":59,"kbGrounding":68,"ambiguityHandling":6,"stateTracking":23,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":76.56,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T11:29:29+00:00","createdAt":"2026-04-17T18:22:41.204621+00:00"},{"runId":"20260412T033615_gpt-realtime_5fafcebd","benchmark":"conversation_bench","model":"gpt-realtime","turnsScored":75,"scores":{"toolUse":14,"toolUseDenom":16,"instructionFollowing":59,"kbGrounding":52,"ambiguityHandling":6,"stateTracking":17,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":72.4,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T11:27:43+00:00","createdAt":"2026-04-17T18:22:36.59809+00:00"},{"runId":"20260412T033908_gemini-3.1-flash-live-preview_5adf4861","benchmark":"conversation_bench","model":"gemini-3.1-flash-live-preview","turnsScored":75,"scores":{"toolUse":11,"toolUseDenom":16,"instructionFollowing":73,"kbGrounding":61,"ambiguityHandling":5,"stateTracking":26,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":77.74,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T11:24:41+00:00","createdAt":"2026-04-17T18:22:38.34617+00:00"},{"runId":"20260412T041750_gemini-3.1-flash-live-preview_9eca7c17","benchmark":"conversation_bench","model":"gemini-3.1-flash-live-preview","turnsScored":75,"scores":{"toolUse":0,"toolUseDenom":16,"instructionFollowing":0,"kbGrounding":75,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":20,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T11:20:40+00:00","createdAt":"2026-04-17T18:22:57.737624+00:00"},{"runId":"20260412T031645_gpt-realtime_66d1a007","benchmark":"conversation_bench","model":"gpt-realtime","turnsScored":75,"scores":{"toolUse":12,"toolUseDenom":16,"instructionFollowing":71,"kbGrounding":68,"ambiguityHandling":5,"stateTracking":21,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":77.29,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T11:19:03+00:00","createdAt":"2026-04-17T18:22:24.637534+00:00"},{"runId":"20260412T033620_gemini-2.5-flash-native-audio-preview-12-2025_9bc7dbdb","benchmark":"conversation_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":75,"scores":{"toolUse":14,"toolUseDenom":16,"instructionFollowing":75,"kbGrounding":73,"ambiguityHandling":6,"stateTracking":17,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":82.27,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T11:15:11+00:00","createdAt":"2026-04-17T18:22:37.480459+00:00"},{"runId":"20260412T032712_gemini-2.5-flash-native-audio-preview-12-2025_1cc28429","benchmark":"conversation_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":75,"scores":{"toolUse":13,"toolUseDenom":16,"instructionFollowing":70,"kbGrounding":71,"ambiguityHandling":4,"stateTracking":16,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":73.55,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T11:06:27+00:00","createdAt":"2026-04-17T18:22:29.686634+00:00"},{"runId":"20260412T033001_gemini-2.5-flash-native-audio-preview-12-2025_e9bf1419","benchmark":"conversation_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":75,"scores":{"toolUse":14,"toolUseDenom":16,"instructionFollowing":65,"kbGrounding":64,"ambiguityHandling":5,"stateTracking":18,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":75.31,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T11:01:39+00:00","createdAt":"2026-04-17T18:22:33.718738+00:00"},{"runId":"20260412T014119_gpt-realtime_042e8810","benchmark":"conversation_bench","model":"gpt-realtime","turnsScored":75,"scores":{"toolUse":12,"toolUseDenom":16,"instructionFollowing":67,"kbGrounding":65,"ambiguityHandling":4,"stateTracking":20,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":72.32,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T09:39:07+00:00","createdAt":"2026-04-17T18:21:31.869352+00:00"},{"runId":"20260412T013354_grok-realtime_37079d41","benchmark":"conversation_bench","model":"grok-realtime","turnsScored":75,"scores":{"toolUse":7,"toolUseDenom":16,"instructionFollowing":74,"kbGrounding":72,"ambiguityHandling":6,"stateTracking":22,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":76.02,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T09:35:21+00:00","createdAt":"2026-04-17T18:21:26.67666+00:00"},{"runId":"20260412T013958_grok-realtime_ca95de87","benchmark":"conversation_bench","model":"grok-realtime","turnsScored":75,"scores":{"toolUse":13,"toolUseDenom":16,"instructionFollowing":70,"kbGrounding":69,"ambiguityHandling":5,"stateTracking":19,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":77.33,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T09:34:37+00:00","createdAt":"2026-04-17T18:21:30.559835+00:00"},{"runId":"20260412T014501_gemini-3.1-flash-live-preview_a1d247e1","benchmark":"conversation_bench","model":"gemini-3.1-flash-live-preview","turnsScored":75,"scores":{"toolUse":15,"toolUseDenom":16,"instructionFollowing":66,"kbGrounding":67,"ambiguityHandling":6,"stateTracking":27,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":85.58,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T09:33:03+00:00","createdAt":"2026-04-17T18:21:34.108241+00:00"},{"runId":"20260412T012459_gpt-realtime_3c14b1e9","benchmark":"conversation_bench","model":"gpt-realtime","turnsScored":75,"scores":{"toolUse":12,"toolUseDenom":16,"instructionFollowing":66,"kbGrounding":51,"ambiguityHandling":4,"stateTracking":18,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":67.11,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T09:26:55+00:00","createdAt":"2026-04-17T18:21:23.710677+00:00"},{"runId":"20260412T014304_gemini-2.5-flash-native-audio-preview-12-2025_b9483aac","benchmark":"conversation_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":75,"scores":{"toolUse":15,"toolUseDenom":16,"instructionFollowing":74,"kbGrounding":70,"ambiguityHandling":5,"stateTracking":21,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":82.38,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T09:22:41+00:00","createdAt":"2026-04-17T18:21:32.960525+00:00"},{"runId":"20260412T013552_gemini-2.5-flash-native-audio-preview-12-2025_0f7562b6","benchmark":"conversation_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":75,"scores":{"toolUse":15,"toolUseDenom":16,"instructionFollowing":72,"kbGrounding":61,"ambiguityHandling":5,"stateTracking":10,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":72.78,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T09:14:40+00:00","createdAt":"2026-04-17T18:21:28.563865+00:00"},{"runId":"20260412T011410_gemini-3.1-flash-live-preview_118c5a6c","benchmark":"conversation_bench","model":"gemini-3.1-flash-live-preview","turnsScored":75,"scores":{"toolUse":13,"toolUseDenom":16,"instructionFollowing":60,"kbGrounding":63,"ambiguityHandling":4,"stateTracking":25,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":74.2,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T09:12:13+00:00","createdAt":"2026-04-17T18:21:21.961423+00:00"},{"runId":"20260412T011903_grok-realtime_71d7732e","benchmark":"conversation_bench","model":"grok-realtime","turnsScored":75,"scores":{"toolUse":15,"toolUseDenom":16,"instructionFollowing":75,"kbGrounding":72,"ambiguityHandling":6,"stateTracking":14,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":81.43,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T09:07:07+00:00","createdAt":"2026-04-17T18:21:22.846951+00:00"},{"runId":"20260412T012753_gemini-2.5-flash-native-audio-preview-12-2025_ebe4dc08","benchmark":"conversation_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":75,"scores":{"toolUse":15,"toolUseDenom":16,"instructionFollowing":69,"kbGrounding":66,"ambiguityHandling":4,"stateTracking":13,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":72.63,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T09:05:48+00:00","createdAt":"2026-04-17T18:21:24.713186+00:00"},{"runId":"20260412T010841_grok-realtime_c38066a1","benchmark":"conversation_bench","model":"grok-realtime","turnsScored":75,"scores":{"toolUse":13,"toolUseDenom":16,"instructionFollowing":73,"kbGrounding":69,"ambiguityHandling":7,"stateTracking":18,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":82.53,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T08:58:41+00:00","createdAt":"2026-04-17T18:21:19.276338+00:00"},{"runId":"20260412T010933_gpt-realtime_91f197c7","benchmark":"conversation_bench","model":"gpt-realtime","turnsScored":75,"scores":{"toolUse":14,"toolUseDenom":16,"instructionFollowing":70,"kbGrounding":68,"ambiguityHandling":6,"stateTracking":24,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":83.85,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T08:55:21+00:00","createdAt":"2026-04-17T18:21:20.245794+00:00"},{"runId":"20260412T010033_gpt-realtime_75424df3","benchmark":"conversation_bench","model":"gpt-realtime","turnsScored":75,"scores":{"toolUse":13,"toolUseDenom":16,"instructionFollowing":69,"kbGrounding":69,"ambiguityHandling":5,"stateTracking":26,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":81.31,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T08:55:11+00:00","createdAt":"2026-04-17T18:21:12.447193+00:00"},{"runId":"20260412T010101_gemini-3.1-flash-live-preview_cb7255eb","benchmark":"conversation_bench","model":"gemini-3.1-flash-live-preview","turnsScored":75,"scores":{"toolUse":15,"toolUseDenom":16,"instructionFollowing":72,"kbGrounding":69,"ambiguityHandling":6,"stateTracking":22,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":84.68,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T08:51:48+00:00","createdAt":"2026-04-17T18:21:14.256624+00:00"},{"runId":"20260412T010234_gpt-realtime_6f14f3ac","benchmark":"conversation_bench","model":"gpt-realtime","turnsScored":75,"scores":{"toolUse":15,"toolUseDenom":16,"instructionFollowing":74,"kbGrounding":70,"ambiguityHandling":6,"stateTracking":9,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":77.6,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T08:50:17+00:00","createdAt":"2026-04-17T18:21:16.329736+00:00"},{"runId":"20260412T010429_gemini-2.5-flash-native-audio-preview-12-2025_91194d22","benchmark":"conversation_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":75,"scores":{"toolUse":15,"toolUseDenom":16,"instructionFollowing":72,"kbGrounding":72,"ambiguityHandling":7,"stateTracking":22,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":87.98,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T08:49:13+00:00","createdAt":"2026-04-17T18:21:17.306109+00:00"},{"runId":"20260412T011041_gemini-2.5-flash-native-audio-preview-12-2025_aa03977d","benchmark":"conversation_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":75,"scores":{"toolUse":15,"toolUseDenom":16,"instructionFollowing":72,"kbGrounding":28,"ambiguityHandling":4,"stateTracking":23,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":69.36,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T08:47:39+00:00","createdAt":"2026-04-17T18:21:21.13767+00:00"},{"runId":"20260412T005418_grok-realtime_ed113225","benchmark":"conversation_bench","model":"grok-realtime","turnsScored":75,"scores":{"toolUse":8,"toolUseDenom":16,"instructionFollowing":72,"kbGrounding":71,"ambiguityHandling":6,"stateTracking":27,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":79.5,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T08:47:38+00:00","createdAt":"2026-04-17T18:21:08.116+00:00"},{"runId":"20260412T010548_gemini-3.1-flash-live-preview_9b331f60","benchmark":"conversation_bench","model":"gemini-3.1-flash-live-preview","turnsScored":75,"scores":{"toolUse":15,"toolUseDenom":16,"instructionFollowing":64,"kbGrounding":65,"ambiguityHandling":5,"stateTracking":22,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":78.98,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T08:47:20+00:00","createdAt":"2026-04-17T18:21:18.329217+00:00"},{"runId":"20260412T004842_grok-realtime_2191eb7b","benchmark":"conversation_bench","model":"grok-realtime","turnsScored":75,"scores":{"toolUse":11,"toolUseDenom":16,"instructionFollowing":72,"kbGrounding":73,"ambiguityHandling":6,"stateTracking":31,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":86.2,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T08:43:02+00:00","createdAt":"2026-04-17T18:21:01.256511+00:00"},{"runId":"20260412T010054_gemini-2.5-flash-native-audio-preview-12-2025_88274311","benchmark":"conversation_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":75,"scores":{"toolUse":14,"toolUseDenom":16,"instructionFollowing":74,"kbGrounding":71,"ambiguityHandling":4,"stateTracking":21,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":78.89,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T08:41:07+00:00","createdAt":"2026-04-17T18:21:13.380366+00:00"},{"runId":"20260412T005909_gemini-3.1-flash-live-preview_3751abec","benchmark":"conversation_bench","model":"gemini-3.1-flash-live-preview","turnsScored":75,"scores":{"toolUse":15,"toolUseDenom":16,"instructionFollowing":71,"kbGrounding":63,"ambiguityHandling":6,"stateTracking":24,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":84.03,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T08:39:57+00:00","createdAt":"2026-04-17T18:21:10.674463+00:00"},{"runId":"20260412T005125_gemini-3.1-flash-live-preview_4c8c58a9","benchmark":"conversation_bench","model":"gemini-3.1-flash-live-preview","turnsScored":75,"scores":{"toolUse":15,"toolUseDenom":16,"instructionFollowing":67,"kbGrounding":62,"ambiguityHandling":5,"stateTracking":22,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":78.98,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T08:38:06+00:00","createdAt":"2026-04-17T18:21:03.784281+00:00"},{"runId":"20260412T005054_gemini-2.5-flash-native-audio-preview-12-2025_8d9e089f","benchmark":"conversation_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":75,"scores":{"toolUse":14,"toolUseDenom":16,"instructionFollowing":66,"kbGrounding":71,"ambiguityHandling":4,"stateTracking":24,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":78.58,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T08:37:00+00:00","createdAt":"2026-04-17T18:21:02.93863+00:00"},{"runId":"20260412T005408_gemini-3.1-flash-live-preview_b73357c0","benchmark":"conversation_bench","model":"gemini-3.1-flash-live-preview","turnsScored":75,"scores":{"toolUse":12,"toolUseDenom":16,"instructionFollowing":67,"kbGrounding":67,"ambiguityHandling":6,"stateTracking":26,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":81.49,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T08:33:53+00:00","createdAt":"2026-04-17T18:21:07.268397+00:00"},{"runId":"20260412T004656_gemini-3.1-flash-live-preview_f5990cd0","benchmark":"conversation_bench","model":"gemini-3.1-flash-live-preview","turnsScored":75,"scores":{"toolUse":14,"toolUseDenom":16,"instructionFollowing":70,"kbGrounding":71,"ambiguityHandling":5,"stateTracking":25,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":82.75,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T08:28:50+00:00","createdAt":"2026-04-17T18:21:00.40771+00:00"},{"runId":"20260412T004027_gpt-realtime_22fb127d","benchmark":"conversation_bench","model":"gpt-realtime","turnsScored":75,"scores":{"toolUse":15,"toolUseDenom":16,"instructionFollowing":75,"kbGrounding":69,"ambiguityHandling":7,"stateTracking":22,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":87.98,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T08:27:52+00:00","createdAt":"2026-04-17T18:20:58.296382+00:00"},{"runId":"20260412T002203_grok-realtime_8ac4fce4","benchmark":"conversation_bench","model":"grok-realtime","turnsScored":75,"scores":{"toolUse":11,"toolUseDenom":16,"instructionFollowing":70,"kbGrounding":68,"ambiguityHandling":7,"stateTracking":25,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":83.2,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T08:16:33+00:00","createdAt":"2026-04-17T18:20:53.942642+00:00"},{"runId":"20260412T002236_gpt-realtime_28eead8d","benchmark":"conversation_bench","model":"gpt-realtime","turnsScored":75,"scores":{"toolUse":15,"toolUseDenom":16,"instructionFollowing":73,"kbGrounding":67,"ambiguityHandling":5,"stateTracking":9,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":74.04,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T08:09:20+00:00","createdAt":"2026-04-17T18:20:54.844015+00:00"},{"runId":"20260412T001550_gpt-realtime_0f8fd3f4","benchmark":"conversation_bench","model":"gpt-realtime","turnsScored":75,"scores":{"toolUse":13,"toolUseDenom":16,"instructionFollowing":72,"kbGrounding":62,"ambiguityHandling":6,"stateTracking":21,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":79.71,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T08:08:39+00:00","createdAt":"2026-04-17T18:20:51.225434+00:00"},{"runId":"20260412T001436_grok-realtime_1695603c","benchmark":"conversation_bench","model":"grok-realtime","turnsScored":75,"scores":{"toolUse":7,"toolUseDenom":16,"instructionFollowing":67,"kbGrounding":72,"ambiguityHandling":5,"stateTracking":33,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":78.32,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T08:08:13+00:00","createdAt":"2026-04-17T18:20:50.380276+00:00"},{"runId":"20260412T001103_grok-realtime_cb812acf","benchmark":"conversation_bench","model":"grok-realtime","turnsScored":75,"scores":{"toolUse":6,"toolUseDenom":16,"instructionFollowing":75,"kbGrounding":74,"ambiguityHandling":7,"stateTracking":28,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":81.7,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T08:04:28+00:00","createdAt":"2026-04-17T18:20:46.860239+00:00"},{"runId":"20260412T000415_grok-realtime_5d4ba6b1","benchmark":"conversation_bench","model":"grok-realtime","turnsScored":75,"scores":{"toolUse":8,"toolUseDenom":16,"instructionFollowing":70,"kbGrounding":72,"ambiguityHandling":6,"stateTracking":27,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":79.23,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T08:02:22+00:00","createdAt":"2026-04-17T18:20:39.405714+00:00"},{"runId":"20260412T001116_gpt-realtime_2e1e83de","benchmark":"conversation_bench","model":"gpt-realtime","turnsScored":75,"scores":{"toolUse":14,"toolUseDenom":16,"instructionFollowing":65,"kbGrounding":61,"ambiguityHandling":6,"stateTracking":20,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":78.22,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T08:01:14+00:00","createdAt":"2026-04-17T18:20:47.708325+00:00"},{"runId":"20260412T002314_gemini-3.1-flash-live-preview_1672c57a","benchmark":"conversation_bench","model":"gemini-3.1-flash-live-preview","turnsScored":75,"scores":{"toolUse":15,"toolUseDenom":16,"instructionFollowing":69,"kbGrounding":65,"ambiguityHandling":4,"stateTracking":24,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":79.03,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T08:01:09+00:00","createdAt":"2026-04-17T18:20:56.549528+00:00"},{"runId":"20260412T000603_grok-realtime_a8714539","benchmark":"conversation_bench","model":"grok-realtime","turnsScored":75,"scores":{"toolUse":12,"toolUseDenom":16,"instructionFollowing":73,"kbGrounding":71,"ambiguityHandling":5,"stateTracking":21,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":78.63,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T08:01:00+00:00","createdAt":"2026-04-17T18:20:43.069882+00:00"},{"runId":"20260412T001747_gemini-3.1-flash-live-preview_4a990099","benchmark":"conversation_bench","model":"gemini-3.1-flash-live-preview","turnsScored":75,"scores":{"toolUse":13,"toolUseDenom":16,"instructionFollowing":72,"kbGrounding":69,"ambiguityHandling":6,"stateTracking":21,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":81.58,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T08:00:22+00:00","createdAt":"2026-04-17T18:20:53.012768+00:00"},{"runId":"20260412T002253_gemini-2.5-flash-native-audio-preview-12-2025_20a39c1b","benchmark":"conversation_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":75,"scores":{"toolUse":14,"toolUseDenom":16,"instructionFollowing":73,"kbGrounding":66,"ambiguityHandling":5,"stateTracking":9,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":72.52,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T07:59:46+00:00","createdAt":"2026-04-17T18:20:55.727205+00:00"},{"runId":"20260412T000447_gpt-realtime_05835269","benchmark":"conversation_bench","model":"gpt-realtime","turnsScored":75,"scores":{"toolUse":13,"toolUseDenom":16,"instructionFollowing":63,"kbGrounding":60,"ambiguityHandling":5,"stateTracking":21,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":74.28,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T07:59:08+00:00","createdAt":"2026-04-17T18:20:40.28378+00:00"},{"runId":"20260412T000924_gemini-3.1-flash-live-preview_dbc50210","benchmark":"conversation_bench","model":"gemini-3.1-flash-live-preview","turnsScored":75,"scores":{"toolUse":15,"toolUseDenom":16,"instructionFollowing":73,"kbGrounding":67,"ambiguityHandling":5,"stateTracking":19,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":80.1,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T07:58:26+00:00","createdAt":"2026-04-17T18:20:45.966649+00:00"},{"runId":"20260412T000750_gpt-realtime_d463fca8","benchmark":"conversation_bench","model":"gpt-realtime","turnsScored":75,"scores":{"toolUse":12,"toolUseDenom":16,"instructionFollowing":67,"kbGrounding":57,"ambiguityHandling":4,"stateTracking":21,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":70.79,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T07:56:41+00:00","createdAt":"2026-04-17T18:20:44.087146+00:00"},{"runId":"20260412T001408_gemini-3.1-flash-live-preview_84fab47c","benchmark":"conversation_bench","model":"gemini-3.1-flash-live-preview","turnsScored":75,"scores":{"toolUse":14,"toolUseDenom":16,"instructionFollowing":71,"kbGrounding":64,"ambiguityHandling":5,"stateTracking":25,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":81.15,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T07:54:07+00:00","createdAt":"2026-04-17T18:20:49.500428+00:00"},{"runId":"20260412T000245_gpt-realtime_7d1d4ab8","benchmark":"conversation_bench","model":"gpt-realtime","turnsScored":75,"scores":{"toolUse":16,"toolUseDenom":16,"instructionFollowing":74,"kbGrounding":68,"ambiguityHandling":6,"stateTracking":14,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":81.35,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T07:54:07+00:00","createdAt":"2026-04-17T18:20:37.660518+00:00"},{"runId":"20260412T000354_gemini-3.1-flash-live-preview_fed23caf","benchmark":"conversation_bench","model":"gemini-3.1-flash-live-preview","turnsScored":75,"scores":{"toolUse":13,"toolUseDenom":16,"instructionFollowing":70,"kbGrounding":70,"ambiguityHandling":6,"stateTracking":25,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":83.73,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T07:52:36+00:00","createdAt":"2026-04-17T18:20:38.56655+00:00"},{"runId":"20260412T000133_grok-realtime_fced5f0e","benchmark":"conversation_bench","model":"grok-realtime","turnsScored":75,"scores":{"toolUse":13,"toolUseDenom":16,"instructionFollowing":73,"kbGrounding":71,"ambiguityHandling":6,"stateTracking":23,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":83.59,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T07:52:27+00:00","createdAt":"2026-04-17T18:20:35.437701+00:00"},{"runId":"20260412T001222_gemini-2.5-flash-native-audio-preview-12-2025_33f674a1","benchmark":"conversation_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":75,"scores":{"toolUse":14,"toolUseDenom":16,"instructionFollowing":74,"kbGrounding":71,"ambiguityHandling":7,"stateTracking":15,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":82.76,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T07:51:23+00:00","createdAt":"2026-04-17T18:20:48.593021+00:00"},{"runId":"20260411T235731_gpt-realtime_4508df82","benchmark":"conversation_bench","model":"gpt-realtime","turnsScored":75,"scores":{"toolUse":13,"toolUseDenom":16,"instructionFollowing":69,"kbGrounding":66,"ambiguityHandling":6,"stateTracking":22,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":80.58,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T07:50:46+00:00","createdAt":"2026-04-17T18:20:31.795705+00:00"},{"runId":"20260412T000245_gemini-2.5-flash-native-audio-preview-12-2025_7a573282","benchmark":"conversation_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":75,"scores":{"toolUse":15,"toolUseDenom":16,"instructionFollowing":74,"kbGrounding":66,"ambiguityHandling":5,"stateTracking":17,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":78.89,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T07:50:42+00:00","createdAt":"2026-04-17T18:20:36.609897+00:00"},{"runId":"20260412T000754_gemini-2.5-flash-native-audio-preview-12-2025_192b933b","benchmark":"conversation_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":75,"scores":{"toolUse":16,"toolUseDenom":16,"instructionFollowing":72,"kbGrounding":72,"ambiguityHandling":6,"stateTracking":9,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":78.85,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T07:48:31+00:00","createdAt":"2026-04-17T18:20:45.022585+00:00"},{"runId":"20260412T000517_gemini-3.1-flash-live-preview_180c6ef2","benchmark":"conversation_bench","model":"gemini-3.1-flash-live-preview","turnsScored":75,"scores":{"toolUse":15,"toolUseDenom":16,"instructionFollowing":68,"kbGrounding":70,"ambiguityHandling":7,"stateTracking":20,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":85.17,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T07:46:55+00:00","createdAt":"2026-04-17T18:20:42.171642+00:00"},{"runId":"20260412T000516_gemini-2.5-flash-native-audio-preview-12-2025_4cad7845","benchmark":"conversation_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":75,"scores":{"toolUse":16,"toolUseDenom":16,"instructionFollowing":72,"kbGrounding":71,"ambiguityHandling":5,"stateTracking":14,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":79.12,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T07:44:04+00:00","createdAt":"2026-04-17T18:20:41.244864+00:00"},{"runId":"20260411T235746_gemini-2.5-flash-native-audio-preview-12-2025_91e1fc55","benchmark":"conversation_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":75,"scores":{"toolUse":14,"toolUseDenom":16,"instructionFollowing":75,"kbGrounding":71,"ambiguityHandling":6,"stateTracking":10,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":77.49,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T07:39:10+00:00","createdAt":"2026-04-17T18:20:33.171727+00:00"},{"runId":"20260403T175432_glm-realtime-flash_80197c52","benchmark":"conversation_bench","model":"glm-realtime-flash","turnsScored":75,"scores":{"toolUse":10,"toolUseDenom":16,"instructionFollowing":12,"kbGrounding":47,"ambiguityHandling":2,"stateTracking":1,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":33.84,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-04T08:11:01+00:00","createdAt":"2026-04-05T07:09:02.009115+00:00"},{"runId":"20260403T162550_glm-realtime-flash_9903bc2f","benchmark":"conversation_bench","model":"glm-realtime-flash","turnsScored":75,"scores":{"toolUse":3,"toolUseDenom":16,"instructionFollowing":20,"kbGrounding":51,"ambiguityHandling":1,"stateTracking":5,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":28.21,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-04T08:07:23+00:00","createdAt":"2026-04-05T07:09:00.040967+00:00"},{"runId":"20260403T174148_glm-realtime-flash_9e5f3586","benchmark":"conversation_bench","model":"glm-realtime-flash","turnsScored":75,"scores":{"toolUse":13,"toolUseDenom":16,"instructionFollowing":28,"kbGrounding":35,"ambiguityHandling":3,"stateTracking":3,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":42.37,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-04T08:01:47+00:00","createdAt":"2026-04-05T07:09:01.047481+00:00"},{"runId":"20260403T161056_glm-realtime-flash_02ac13ea","benchmark":"conversation_bench","model":"glm-realtime-flash","turnsScored":75,"scores":{"toolUse":8,"toolUseDenom":16,"instructionFollowing":21,"kbGrounding":25,"ambiguityHandling":4,"stateTracking":4,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":34.69,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-04T08:00:49+00:00","createdAt":"2026-04-05T07:08:58.136234+00:00"},{"runId":"20260403T135539_glm-realtime-flash_77a2f6c1","benchmark":"conversation_bench","model":"glm-realtime-flash","turnsScored":75,"scores":{"toolUse":10,"toolUseDenom":16,"instructionFollowing":15,"kbGrounding":23,"ambiguityHandling":0,"stateTracking":3,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":24.45,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-04T01:55:52+00:00","createdAt":"2026-04-05T07:08:50.040276+00:00"},{"runId":"20260403T145928_glm-realtime-flash_e48e0551","benchmark":"conversation_bench","model":"glm-realtime-flash","turnsScored":75,"scores":{"toolUse":8,"toolUseDenom":16,"instructionFollowing":21,"kbGrounding":37,"ambiguityHandling":2,"stateTracking":6,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":34.1,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-04T01:54:57+00:00","createdAt":"2026-04-05T07:08:53.840554+00:00"},{"runId":"20260403T153426_glm-realtime-flash_6c054c55","benchmark":"conversation_bench","model":"glm-realtime-flash","turnsScored":75,"scores":{"toolUse":5,"toolUseDenom":16,"instructionFollowing":27,"kbGrounding":40,"ambiguityHandling":1,"stateTracking":8,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":31.47,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-04T01:53:15+00:00","createdAt":"2026-04-05T07:08:56.051213+00:00"},{"runId":"20260403T161051_glm-realtime-flash_9574a88a","benchmark":"conversation_bench","model":"glm-realtime-flash","turnsScored":75,"scores":{"toolUse":2,"toolUseDenom":16,"instructionFollowing":19,"kbGrounding":41,"ambiguityHandling":4,"stateTracking":2,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":29.71,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-04T01:50:59+00:00","createdAt":"2026-04-05T07:08:57.146018+00:00"},{"runId":"20260403T142307_glm-realtime-flash_8fa6e612","benchmark":"conversation_bench","model":"glm-realtime-flash","turnsScored":75,"scores":{"toolUse":11,"toolUseDenom":16,"instructionFollowing":20,"kbGrounding":39,"ambiguityHandling":2,"stateTracking":8,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":39.33,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-04T01:44:17+00:00","createdAt":"2026-04-05T07:08:51.208823+00:00"},{"runId":"20260403T144237_glm-realtime-flash_a61e17c2","benchmark":"conversation_bench","model":"glm-realtime-flash","turnsScored":75,"scores":{"toolUse":3,"toolUseDenom":16,"instructionFollowing":21,"kbGrounding":40,"ambiguityHandling":3,"stateTracking":13,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":35.4,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-04T01:44:11+00:00","createdAt":"2026-04-05T07:08:52.69093+00:00"},{"runId":"20260403T150517_glm-realtime-flash_bb80b72b","benchmark":"conversation_bench","model":"glm-realtime-flash","turnsScored":75,"scores":{"toolUse":13,"toolUseDenom":16,"instructionFollowing":24,"kbGrounding":42,"ambiguityHandling":2,"stateTracking":3,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":40.67,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-04T01:42:58+00:00","createdAt":"2026-04-05T07:08:54.879634+00:00"},{"runId":"20260403T161859_glm-realtime-flash_2997e4ca","benchmark":"conversation_bench","model":"glm-realtime-flash","turnsScored":75,"scores":{"toolUse":7,"toolUseDenom":16,"instructionFollowing":19,"kbGrounding":33,"ambiguityHandling":5,"stateTracking":9,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":40.57,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-04T01:42:14+00:00","createdAt":"2026-04-05T07:08:59.105907+00:00"},{"runId":"20260331T234529_glm-realtime-flash_240ad424","benchmark":"conversation_bench","model":"glm-realtime-flash","turnsScored":75,"scores":{"toolUse":12,"toolUseDenom":16,"instructionFollowing":4,"kbGrounding":28,"ambiguityHandling":2,"stateTracking":0,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":28.53,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T19:06:27+00:00","createdAt":"2026-04-01T19:18:56.350379+00:00"},{"runId":"20260331T233530_glm-realtime-flash_722cc9cf","benchmark":"conversation_bench","model":"glm-realtime-flash","turnsScored":75,"scores":{"toolUse":12,"toolUseDenom":16,"instructionFollowing":12,"kbGrounding":36,"ambiguityHandling":1,"stateTracking":12,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":37.57,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T19:04:05+00:00","createdAt":"2026-04-01T19:18:54.774872+00:00"},{"runId":"20260331T233211_glm-realtime-flash_b2e769c0","benchmark":"conversation_bench","model":"glm-realtime-flash","turnsScored":75,"scores":{"toolUse":12,"toolUseDenom":16,"instructionFollowing":9,"kbGrounding":35,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":26.73,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T18:57:06+00:00","createdAt":"2026-04-01T19:18:53.733431+00:00"},{"runId":"20260331T225822_glm-realtime-flash_c7f1aa5d","benchmark":"conversation_bench","model":"glm-realtime-flash","turnsScored":75,"scores":{"toolUse":12,"toolUseDenom":16,"instructionFollowing":12,"kbGrounding":47,"ambiguityHandling":1,"stateTracking":12,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":40.51,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T18:56:34+00:00","createdAt":"2026-04-01T19:18:50.887897+00:00"},{"runId":"20260331T230104_glm-realtime-flash_d2c92364","benchmark":"conversation_bench","model":"glm-realtime-flash","turnsScored":75,"scores":{"toolUse":10,"toolUseDenom":16,"instructionFollowing":26,"kbGrounding":53,"ambiguityHandling":3,"stateTracking":8,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":45.92,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T18:56:16+00:00","createdAt":"2026-04-01T19:18:52.750815+00:00"},{"runId":"20260331T230018_glm-realtime-flash_a22a2628","benchmark":"conversation_bench","model":"glm-realtime-flash","turnsScored":75,"scores":{"toolUse":1,"toolUseDenom":16,"instructionFollowing":14,"kbGrounding":14,"ambiguityHandling":0,"stateTracking":4,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":11.14,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T18:55:32+00:00","createdAt":"2026-04-01T19:18:51.847524+00:00"},{"runId":"20260331T221832_glm-realtime-flash_1daf77c3","benchmark":"conversation_bench","model":"glm-realtime-flash","turnsScored":75,"scores":{"toolUse":12,"toolUseDenom":16,"instructionFollowing":10,"kbGrounding":38,"ambiguityHandling":4,"stateTracking":1,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":38.41,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T18:49:26+00:00","createdAt":"2026-04-01T19:18:45.868813+00:00"},{"runId":"20260331T221832_glm-realtime-flash_442d8ecf","benchmark":"conversation_bench","model":"glm-realtime-flash","turnsScored":75,"scores":{"toolUse":13,"toolUseDenom":16,"instructionFollowing":16,"kbGrounding":58,"ambiguityHandling":0,"stateTracking":1,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":36.59,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T18:48:28+00:00","createdAt":"2026-04-01T19:18:47.066254+00:00"},{"runId":"20260331T224230_glm-realtime-flash_bac235a2","benchmark":"conversation_bench","model":"glm-realtime-flash","turnsScored":75,"scores":{"toolUse":10,"toolUseDenom":16,"instructionFollowing":7,"kbGrounding":31,"ambiguityHandling":3,"stateTracking":4,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":32.56,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T18:45:46+00:00","createdAt":"2026-04-01T19:18:49.954477+00:00"},{"runId":"20260331T224230_glm-realtime-flash_2563fdec","benchmark":"conversation_bench","model":"glm-realtime-flash","turnsScored":75,"scores":{"toolUse":11,"toolUseDenom":16,"instructionFollowing":6,"kbGrounding":32,"ambiguityHandling":0,"stateTracking":2,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":25.1,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T18:44:09+00:00","createdAt":"2026-04-01T19:18:48.893611+00:00"},{"runId":"20260331T221832_glm-realtime-flash_8861df3c","benchmark":"conversation_bench","model":"glm-realtime-flash","turnsScored":75,"scores":{"toolUse":12,"toolUseDenom":16,"instructionFollowing":10,"kbGrounding":26,"ambiguityHandling":1,"stateTracking":0,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":27.1,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T07:27:06+00:00","createdAt":"2026-04-01T19:18:47.963255+00:00"},{"runId":"20260330T171326_gemini-3.1-flash-live-preview_b78b61f2","benchmark":"conversation_bench","model":"gemini-3.1-flash-live-preview","turnsScored":75,"scores":{"toolUse":13,"toolUseDenom":16,"instructionFollowing":69,"kbGrounding":66,"ambiguityHandling":5,"stateTracking":24,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":79.3,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-31T02:58:18+00:00","createdAt":"2026-03-31T04:45:43.608123+00:00"},{"runId":"20260330T172050_gemini-3.1-flash-live-preview_5b8183fc","benchmark":"conversation_bench","model":"gemini-3.1-flash-live-preview","turnsScored":75,"scores":{"toolUse":13,"toolUseDenom":16,"instructionFollowing":64,"kbGrounding":64,"ambiguityHandling":5,"stateTracking":20,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":75,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-31T02:57:39+00:00","createdAt":"2026-03-31T04:45:44.545107+00:00"},{"runId":"20260329T235222_gemini-3.1-flash-live-preview_42bcf232","benchmark":"conversation_bench","model":"gemini-3.1-flash-live-preview","turnsScored":75,"scores":{"toolUse":13,"toolUseDenom":16,"instructionFollowing":55,"kbGrounding":67,"ambiguityHandling":5,"stateTracking":18,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":72.19,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-30T07:56:34+00:00","createdAt":"2026-03-30T14:05:41.404098+00:00"},{"runId":"20260324T231550_gpt-realtime_03dd549b","benchmark":"conversation_bench","model":"gpt-realtime","turnsScored":75,"scores":{"toolUse":12,"toolUseDenom":16,"instructionFollowing":66,"kbGrounding":64,"ambiguityHandling":3,"stateTracking":22,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":70.5,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-26T20:33:22+00:00","createdAt":"2026-03-26T20:36:29.148681+00:00"},{"runId":"20260326T103752_gpt-realtime_278da144","benchmark":"conversation_bench","model":"gpt-realtime","turnsScored":75,"scores":{"toolUse":15,"toolUseDenom":16,"instructionFollowing":64,"kbGrounding":67,"ambiguityHandling":5,"stateTracking":16,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":75.88,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-26T20:23:16+00:00","createdAt":"2026-03-26T20:36:34.085295+00:00"},{"runId":"20260326T105136_gpt-realtime_a957e353","benchmark":"conversation_bench","model":"gpt-realtime","turnsScored":75,"scores":{"toolUse":13,"toolUseDenom":16,"instructionFollowing":70,"kbGrounding":65,"ambiguityHandling":6,"stateTracking":24,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":81.8,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-26T20:21:04+00:00","createdAt":"2026-03-26T20:36:34.954761+00:00"},{"runId":"20260325T032314_ultravox-v0.7_47cbcc5b","benchmark":"conversation_bench","model":"ultravox-v0.7","turnsScored":75,"scores":{"toolUse":13,"toolUseDenom":16,"instructionFollowing":47,"kbGrounding":57,"ambiguityHandling":7,"stateTracking":16,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":71.18,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-25T11:31:12+00:00","createdAt":"2026-03-25T11:34:31.981821+00:00"},{"runId":"20260324T233107_gemini-2.5-flash-native-audio-preview-12-2025_210c3ec7","benchmark":"conversation_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":75,"scores":{"toolUse":15,"toolUseDenom":16,"instructionFollowing":67,"kbGrounding":66,"ambiguityHandling":5,"stateTracking":10,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":72.78,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-25T07:59:21+00:00","createdAt":"2026-03-25T08:11:03.039149+00:00"},{"runId":"20260324T231550_gemini-2.5-flash-native-audio-preview-12-2025_fac98cad","benchmark":"conversation_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":75,"scores":{"toolUse":12,"toolUseDenom":16,"instructionFollowing":55,"kbGrounding":67,"ambiguityHandling":4,"stateTracking":24,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":72.08,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-25T07:57:29+00:00","createdAt":"2026-03-25T08:11:02.11793+00:00"},{"runId":"20260324T234431_ultravox-v0.7_33d88503","benchmark":"conversation_bench","model":"ultravox-v0.7","turnsScored":75,"scores":{"toolUse":12,"toolUseDenom":16,"instructionFollowing":56,"kbGrounding":60,"ambiguityHandling":5,"stateTracking":24,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":72.98,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-25T07:52:08+00:00","createdAt":"2026-03-25T08:11:03.864428+00:00"},{"runId":"20260324T230955_ultravox-v0.7_2350279f","benchmark":"conversation_bench","model":"ultravox-v0.7","turnsScored":75,"scores":{"toolUse":13,"toolUseDenom":16,"instructionFollowing":64,"kbGrounding":56,"ambiguityHandling":6,"stateTracking":22,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":76.58,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-03-25T07:14:53+00:00","createdAt":"2026-03-25T07:18:05.438995+00:00"},{"runId":"20260324T213949_ultravox-v0.7_30717720","benchmark":"conversation_bench","model":"ultravox-v0.7","turnsScored":75,"scores":{"toolUse":10,"toolUseDenom":16,"instructionFollowing":58,"kbGrounding":51,"ambiguityHandling":6,"stateTracking":25,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":71.72,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-03-25T06:04:53+00:00","createdAt":"2026-03-25T06:08:13.15737+00:00"},{"runId":"20260324T203939_ultravox-v0.7_d6720c05","benchmark":"conversation_bench","model":"ultravox-v0.7","turnsScored":75,"scores":{"toolUse":10,"toolUseDenom":16,"instructionFollowing":52,"kbGrounding":53,"ambiguityHandling":7,"stateTracking":20,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":70.12,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-03-25T05:48:32+00:00","createdAt":"2026-03-25T05:53:53.283051+00:00"},{"runId":"20260324T203939_ultravox-v0.7_31b6fea6","benchmark":"conversation_bench","model":"ultravox-v0.7","turnsScored":75,"scores":{"toolUse":11,"toolUseDenom":16,"instructionFollowing":65,"kbGrounding":54,"ambiguityHandling":5,"stateTracking":25,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":73.13,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-03-25T05:46:11+00:00","createdAt":"2026-03-25T05:53:52.376201+00:00"},{"runId":"20260324T060740_ultravox-v0.7_23d7db44","benchmark":"conversation_bench","model":"ultravox-v0.7","turnsScored":75,"scores":{"toolUse":11,"toolUseDenom":16,"instructionFollowing":65,"kbGrounding":62,"ambiguityHandling":6,"stateTracking":21,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":75.34,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-03-24T14:10:42+00:00","createdAt":"2026-03-24T14:22:19.339588+00:00"},{"runId":"20260324T032143_amazon.nova-2-sonic-v1_0_01a2d21a","benchmark":"conversation_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":75,"scores":{"toolUse":13,"toolUseDenom":16,"instructionFollowing":54,"kbGrounding":58,"ambiguityHandling":5,"stateTracking":4,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":61.04,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-03-24T14:06:12+00:00","createdAt":"2026-03-24T14:08:52.046514+00:00"},{"runId":"20260324T040740_amazon.nova-2-sonic-v1_0_5ab29e6b","benchmark":"conversation_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":75,"scores":{"toolUse":15,"toolUseDenom":16,"instructionFollowing":39,"kbGrounding":57,"ambiguityHandling":5,"stateTracking":3,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":58.67,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-03-24T13:54:01+00:00","createdAt":"2026-03-24T13:56:45.617594+00:00"},{"runId":"20260324T050732_ultravox-v0.7_a8659a0d","benchmark":"conversation_bench","model":"ultravox-v0.7","turnsScored":75,"scores":{"toolUse":12,"toolUseDenom":16,"instructionFollowing":62,"kbGrounding":65,"ambiguityHandling":7,"stateTracking":31,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":85.15,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-03-24T13:29:18+00:00","createdAt":"2026-03-24T13:32:02.878713+00:00"},{"runId":"20260324T045436_ultravox-v0.7_d47b8a47","benchmark":"conversation_bench","model":"ultravox-v0.7","turnsScored":75,"scores":{"toolUse":12,"toolUseDenom":16,"instructionFollowing":64,"kbGrounding":65,"ambiguityHandling":7,"stateTracking":32,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":86.29,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-03-24T13:27:53+00:00","createdAt":"2026-03-24T13:32:01.919948+00:00"},{"runId":"20260324T035427_ultravox-v0.7_4dc55ff8","benchmark":"conversation_bench","model":"ultravox-v0.7","turnsScored":75,"scores":{"toolUse":12,"toolUseDenom":16,"instructionFollowing":59,"kbGrounding":57,"ambiguityHandling":5,"stateTracking":26,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":74.19,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-03-24T13:26:32+00:00","createdAt":"2026-03-24T13:31:58.244111+00:00"},{"runId":"20260324T035925_ultravox-v0.7_b4829db8","benchmark":"conversation_bench","model":"ultravox-v0.7","turnsScored":75,"scores":{"toolUse":12,"toolUseDenom":16,"instructionFollowing":63,"kbGrounding":63,"ambiguityHandling":5,"stateTracking":28,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":78.07,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-03-24T13:20:17+00:00","createdAt":"2026-03-24T13:31:59.176387+00:00"},{"runId":"20260324T040652_amazon.nova-2-sonic-v1_0_8fa80034","benchmark":"conversation_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":75,"scores":{"toolUse":15,"toolUseDenom":16,"instructionFollowing":27,"kbGrounding":59,"ambiguityHandling":3,"stateTracking":5,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":52.21,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-03-24T12:37:35+00:00","createdAt":"2026-03-24T13:32:00.126626+00:00"},{"runId":"20260324T041650_amazon.nova-2-sonic-v1_0_e53ae63e","benchmark":"conversation_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":75,"scores":{"toolUse":13,"toolUseDenom":16,"instructionFollowing":51,"kbGrounding":60,"ambiguityHandling":5,"stateTracking":8,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":63.2,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-03-24T12:24:09+00:00","createdAt":"2026-03-24T13:32:01.017857+00:00"},{"runId":"20260324T031816_amazon.nova-2-sonic-v1_0_3a80f191","benchmark":"conversation_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":75,"scores":{"toolUse":16,"toolUseDenom":16,"instructionFollowing":42,"kbGrounding":43,"ambiguityHandling":5,"stateTracking":8,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":60.02,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-03-24T12:16:55+00:00","createdAt":"2026-03-24T12:23:35.095607+00:00"},{"runId":"20260323T221159_ultravox-v0.7_cfa001fe","benchmark":"conversation_bench","model":"ultravox-v0.7","turnsScored":75,"scores":{"toolUse":12,"toolUseDenom":16,"instructionFollowing":61,"kbGrounding":58,"ambiguityHandling":5,"stateTracking":24,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":73.78,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-03-24T06:20:31+00:00","createdAt":"2026-03-24T06:22:27.641936+00:00"},{"runId":"20260323T200804_amazon.nova-2-sonic-v1_0_4a28b245","benchmark":"conversation_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":75,"scores":{"toolUse":13,"toolUseDenom":16,"instructionFollowing":45,"kbGrounding":51,"ambiguityHandling":5,"stateTracking":6,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":57.99,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-24T03:49:03+00:00","createdAt":"2026-03-24T03:50:46.274167+00:00"},{"runId":"20260323T200802_amazon.nova-2-sonic-v1_0_1d5697f1","benchmark":"conversation_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":75,"scores":{"toolUse":5,"toolUseDenom":16,"instructionFollowing":40,"kbGrounding":54,"ambiguityHandling":3,"stateTracking":8,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":43.67,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-24T03:47:42+00:00","createdAt":"2026-03-24T03:50:45.237544+00:00"},{"runId":"20260323T141250_amazon.nova-2-sonic-v1_0_3d911fa2","benchmark":"conversation_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":75,"scores":{"toolUse":5,"toolUseDenom":16,"instructionFollowing":32,"kbGrounding":68,"ambiguityHandling":2,"stateTracking":6,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":41.55,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-03-24T01:53:09+00:00","createdAt":"2026-03-24T01:54:26.64679+00:00"},{"runId":"20260323T141332_amazon.nova-2-sonic-v1_0_41195388","benchmark":"conversation_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":75,"scores":{"toolUse":15,"toolUseDenom":16,"instructionFollowing":30,"kbGrounding":60,"ambiguityHandling":6,"stateTracking":4,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":60.17,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-03-24T01:50:53+00:00","createdAt":"2026-03-24T01:54:27.621342+00:00"},{"runId":"20260323T144229_grok-realtime_8ff8e535","benchmark":"conversation_bench","model":"grok-realtime","turnsScored":75,"scores":{"toolUse":13,"toolUseDenom":16,"instructionFollowing":60,"kbGrounding":64,"ambiguityHandling":7,"stateTracking":7,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":71.06,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-24T01:44:08+00:00","createdAt":"2026-03-24T01:54:30.433947+00:00"},{"runId":"20260323T145056_amazon.nova-2-sonic-v1_0_08a6d9c1","benchmark":"conversation_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":75,"scores":{"toolUse":12,"toolUseDenom":16,"instructionFollowing":56,"kbGrounding":55,"ambiguityHandling":4,"stateTracking":14,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":63.08,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-03-24T01:44:00+00:00","createdAt":"2026-03-24T01:54:31.284923+00:00"},{"runId":"20260323T144148_grok-realtime_0055c044","benchmark":"conversation_bench","model":"grok-realtime","turnsScored":75,"scores":{"toolUse":14,"toolUseDenom":16,"instructionFollowing":63,"kbGrounding":63,"ambiguityHandling":6,"stateTracking":18,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":77.01,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-24T01:43:06+00:00","createdAt":"2026-03-24T01:54:29.527113+00:00"},{"runId":"20260323T135114_amazon.nova-2-sonic-v1_0_8fc3ffdb","benchmark":"conversation_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":75,"scores":{"toolUse":13,"toolUseDenom":16,"instructionFollowing":41,"kbGrounding":52,"ambiguityHandling":6,"stateTracking":5,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":59.08,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-03-24T01:42:11+00:00","createdAt":"2026-03-24T01:54:24.725345+00:00"},{"runId":"20260323T134443_grok-realtime_b499a692","benchmark":"conversation_bench","model":"grok-realtime","turnsScored":75,"scores":{"toolUse":14,"toolUseDenom":16,"instructionFollowing":57,"kbGrounding":71,"ambiguityHandling":6,"stateTracking":10,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":72.69,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-23T22:39:24+00:00","createdAt":"2026-03-23T22:45:49.557427+00:00"},{"runId":"20260323T014433_ultravox-v0.7_e1fcdaf4","benchmark":"conversation_bench","model":"ultravox-v0.7","turnsScored":75,"scores":{"toolUse":12,"toolUseDenom":16,"instructionFollowing":49,"kbGrounding":52,"ambiguityHandling":6,"stateTracking":19,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":68.45,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-23T15:29:48+00:00","createdAt":"2026-03-25T03:16:10.431363+00:00"},{"runId":"20260323T012937_ultravox-v0.7_6bd82015","benchmark":"conversation_bench","model":"ultravox-v0.7","turnsScored":75,"scores":{"toolUse":12,"toolUseDenom":16,"instructionFollowing":59,"kbGrounding":63,"ambiguityHandling":5,"stateTracking":22,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":73.37,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-23T15:23:01+00:00","createdAt":"2026-03-25T03:16:08.434998+00:00"},{"runId":"20260323T013157_ultravox-v0.7_69b8b170","benchmark":"conversation_bench","model":"ultravox-v0.7","turnsScored":75,"scores":{"toolUse":12,"toolUseDenom":16,"instructionFollowing":64,"kbGrounding":63,"ambiguityHandling":5,"stateTracking":30,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":79.55,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-23T15:22:04+00:00","createdAt":"2026-03-23T15:30:13.81808+00:00"},{"runId":"20260322T170910_amazon.nova-2-sonic-v1_0_cf9ab5c3","benchmark":"conversation_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":75,"scores":{"toolUse":13,"toolUseDenom":16,"instructionFollowing":37,"kbGrounding":48,"ambiguityHandling":3,"stateTracking":7,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":50.66,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-23T03:02:40+00:00","createdAt":"2026-03-25T18:07:19.28073+00:00"},{"runId":"20260322T170910_amazon.nova-2-sonic-v1_0_56e2b556","benchmark":"conversation_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":75,"scores":{"toolUse":14,"toolUseDenom":16,"instructionFollowing":41,"kbGrounding":57,"ambiguityHandling":3,"stateTracking":8,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":55.98,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-23T03:01:07+00:00","createdAt":"2026-03-23T06:42:51.292538+00:00"},{"runId":"20260322T170910_gemini-2.5-flash-native-audio-preview-12-2025_59cff399","benchmark":"conversation_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":75,"scores":{"toolUse":10,"toolUseDenom":16,"instructionFollowing":64,"kbGrounding":69,"ambiguityHandling":6,"stateTracking":14,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":71.45,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-23T02:42:12+00:00","createdAt":"2026-03-23T02:44:58.899748+00:00"},{"runId":"20260322T145229_amazon.nova-2-sonic-v1_0_230a86cb","benchmark":"conversation_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":75,"scores":{"toolUse":15,"toolUseDenom":16,"instructionFollowing":54,"kbGrounding":57,"ambiguityHandling":3,"stateTracking":12,"ambiguityDenom":8,"stateTrackingDenom":33},"passRate":63.12,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-22T23:41:21+00:00","createdAt":"2026-03-23T02:41:14.809561+00:00"},{"runId":"20260507T213919_gpt-realtime-2_fac3fd63","benchmark":"event_bench","model":"gpt-realtime-2","turnsScored":29,"scores":{"toolUse":9,"toolUseDenom":12,"instructionFollowing":28,"kbGrounding":26,"ambiguityHandling":2,"stateTracking":16,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":91.06,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T22:03:58+00:00","createdAt":"2026-05-07T23:02:21.165418+00:00"},{"runId":"20260507T214029_gpt-realtime-2_550b6a04","benchmark":"event_bench","model":"gpt-realtime-2","turnsScored":29,"scores":{"toolUse":6,"toolUseDenom":12,"instructionFollowing":24,"kbGrounding":23,"ambiguityHandling":1,"stateTracking":15,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":70.06,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T22:02:59+00:00","createdAt":"2026-05-07T23:02:24.443659+00:00"},{"runId":"20260507T214011_gpt-realtime-2_fc81db64","benchmark":"event_bench","model":"gpt-realtime-2","turnsScored":29,"scores":{"toolUse":2,"toolUseDenom":12,"instructionFollowing":24,"kbGrounding":26,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":64.29,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T22:02:51+00:00","createdAt":"2026-05-07T23:02:23.934668+00:00"},{"runId":"20260507T214003_gpt-realtime-2_1edbc5a9","benchmark":"event_bench","model":"gpt-realtime-2","turnsScored":29,"scores":{"toolUse":9,"toolUseDenom":12,"instructionFollowing":27,"kbGrounding":25,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":77.33,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T22:02:47+00:00","createdAt":"2026-05-07T23:02:22.96096+00:00"},{"runId":"20260507T213945_gpt-realtime-2_9d04b89d","benchmark":"event_bench","model":"gpt-realtime-2","turnsScored":29,"scores":{"toolUse":9,"toolUseDenom":12,"instructionFollowing":22,"kbGrounding":28,"ambiguityHandling":1,"stateTracking":10,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":71.25,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T22:02:42+00:00","createdAt":"2026-05-07T23:02:22.467743+00:00"},{"runId":"20260507T213916_gpt-realtime-2_a78e3c9b","benchmark":"event_bench","model":"gpt-realtime-2","turnsScored":29,"scores":{"toolUse":11,"toolUseDenom":12,"instructionFollowing":22,"kbGrounding":26,"ambiguityHandling":2,"stateTracking":12,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":85.55,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T22:01:16+00:00","createdAt":"2026-05-07T23:02:20.90639+00:00"},{"runId":"20260507T214030_gpt-realtime-2_a7b2bda8","benchmark":"event_bench","model":"gpt-realtime-2","turnsScored":29,"scores":{"toolUse":9,"toolUseDenom":12,"instructionFollowing":24,"kbGrounding":26,"ambiguityHandling":2,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":84.78,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T22:00:56+00:00","createdAt":"2026-05-07T23:02:24.680608+00:00"},{"runId":"20260507T214003_gpt-realtime-2_8e5e0feb","benchmark":"event_bench","model":"gpt-realtime-2","turnsScored":29,"scores":{"toolUse":10,"toolUseDenom":12,"instructionFollowing":23,"kbGrounding":23,"ambiguityHandling":2,"stateTracking":12,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":82.51,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T22:00:51+00:00","createdAt":"2026-05-07T23:02:23.456221+00:00"},{"runId":"20260507T214017_gpt-realtime-2_c5fa66d5","benchmark":"event_bench","model":"gpt-realtime-2","turnsScored":29,"scores":{"toolUse":10,"toolUseDenom":12,"instructionFollowing":27,"kbGrounding":28,"ambiguityHandling":2,"stateTracking":12,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":88.72,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T22:00:46+00:00","createdAt":"2026-05-07T23:02:24.173659+00:00"},{"runId":"20260507T214103_gpt-realtime-2_97797506","benchmark":"event_bench","model":"gpt-realtime-2","turnsScored":29,"scores":{"toolUse":12,"toolUseDenom":12,"instructionFollowing":24,"kbGrounding":23,"ambiguityHandling":2,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":87.71,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T22:00:30+00:00","createdAt":"2026-05-07T23:02:24.925361+00:00"},{"runId":"20260507T214003_gpt-realtime-2_a5412507","benchmark":"event_bench","model":"gpt-realtime-2","turnsScored":29,"scores":{"toolUse":10,"toolUseDenom":12,"instructionFollowing":29,"kbGrounding":29,"ambiguityHandling":2,"stateTracking":12,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":90.78,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T22:00:21+00:00","createdAt":"2026-05-07T23:02:23.702512+00:00"},{"runId":"20260507T214001_gpt-realtime-2_1d4a4675","benchmark":"event_bench","model":"gpt-realtime-2","turnsScored":29,"scores":{"toolUse":10,"toolUseDenom":12,"instructionFollowing":28,"kbGrounding":27,"ambiguityHandling":2,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":16},"passRate":90.85,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T22:00:21+00:00","createdAt":"2026-05-07T23:02:22.722156+00:00"},{"runId":"20260507T213937_gpt-realtime-2_fe006696","benchmark":"event_bench","model":"gpt-realtime-2","turnsScored":29,"scores":{"toolUse":10,"toolUseDenom":12,"instructionFollowing":28,"kbGrounding":26,"ambiguityHandling":1,"stateTracking":11,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":76.85,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T22:00:12+00:00","createdAt":"2026-05-07T23:02:21.959943+00:00"},{"runId":"20260507T213935_gpt-realtime-2_44a1c052","benchmark":"event_bench","model":"gpt-realtime-2","turnsScored":29,"scores":{"toolUse":10,"toolUseDenom":12,"instructionFollowing":29,"kbGrounding":29,"ambiguityHandling":2,"stateTracking":12,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":90.78,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T21:59:04+00:00","createdAt":"2026-05-07T23:02:21.713862+00:00"},{"runId":"20260507T213920_gpt-realtime-2_8bf804e8","benchmark":"event_bench","model":"gpt-realtime-2","turnsScored":29,"scores":{"toolUse":4,"toolUseDenom":12,"instructionFollowing":28,"kbGrounding":28,"ambiguityHandling":1,"stateTracking":16,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":74.11,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T21:59:01+00:00","createdAt":"2026-05-07T23:02:21.421846+00:00"},{"runId":"20260507T213912_gpt-realtime-2_6fc6a469","benchmark":"event_bench","model":"gpt-realtime-2","turnsScored":29,"scores":{"toolUse":8,"toolUseDenom":12,"instructionFollowing":27,"kbGrounding":25,"ambiguityHandling":2,"stateTracking":16,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":88.02,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T21:58:14+00:00","createdAt":"2026-05-07T23:02:20.641492+00:00"},{"runId":"20260507T213909_gpt-realtime-2_4f094d66","benchmark":"event_bench","model":"gpt-realtime-2","turnsScored":29,"scores":{"toolUse":11,"toolUseDenom":12,"instructionFollowing":25,"kbGrounding":25,"ambiguityHandling":2,"stateTracking":11,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":85.76,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T21:58:10+00:00","createdAt":"2026-05-07T23:02:20.379392+00:00"},{"runId":"20260507T214117_gpt-realtime-2_3c541a85","benchmark":"event_bench","model":"gpt-realtime-2","turnsScored":29,"scores":{"toolUse":4,"toolUseDenom":12,"instructionFollowing":11,"kbGrounding":29,"ambiguityHandling":2,"stateTracking":6,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":61.31,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T21:51:53+00:00","createdAt":"2026-05-07T23:02:25.164705+00:00"},{"runId":"20260507T213943_gpt-realtime-2_15e85a1e","benchmark":"event_bench","model":"gpt-realtime-2","turnsScored":29,"scores":{"toolUse":4,"toolUseDenom":12,"instructionFollowing":12,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":6,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":52,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T21:49:19+00:00","createdAt":"2026-05-07T23:02:22.2172+00:00"},{"runId":"20260507T214003_gpt-realtime-2_407153e7","benchmark":"event_bench","model":"gpt-realtime-2","turnsScored":29,"scores":{"toolUse":2,"toolUseDenom":12,"instructionFollowing":7,"kbGrounding":7,"ambiguityHandling":1,"stateTracking":1,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":24.16,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T21:47:02+00:00","createdAt":"2026-05-07T23:02:23.22293+00:00"},{"runId":"20260507T061101_grok-voice-think-fast-1.0_43d5287e","benchmark":"event_bench","model":"grok-voice-think-fast-1.0","turnsScored":29,"scores":{"toolUse":11,"toolUseDenom":12,"instructionFollowing":29,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":15,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":85.98,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T06:46:07+00:00","createdAt":"2026-05-07T20:58:13.783753+00:00"},{"runId":"20260507T061047_grok-voice-think-fast-1.0_12650d0a","benchmark":"event_bench","model":"grok-voice-think-fast-1.0","turnsScored":29,"scores":{"toolUse":12,"toolUseDenom":12,"instructionFollowing":29,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":16,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":88.82,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T06:40:27+00:00","createdAt":"2026-05-07T20:58:13.297606+00:00"},{"runId":"20260507T061026_grok-voice-think-fast-1.0_7771ddd7","benchmark":"event_bench","model":"grok-voice-think-fast-1.0","turnsScored":29,"scores":{"toolUse":10,"toolUseDenom":12,"instructionFollowing":26,"kbGrounding":27,"ambiguityHandling":1,"stateTracking":16,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":82.04,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T06:39:33+00:00","createdAt":"2026-05-07T20:58:12.136228+00:00"},{"runId":"20260507T061034_grok-voice-think-fast-1.0_7bd292c6","benchmark":"event_bench","model":"grok-voice-think-fast-1.0","turnsScored":29,"scores":{"toolUse":11,"toolUseDenom":12,"instructionFollowing":28,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":17,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":87.64,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T06:39:26+00:00","createdAt":"2026-05-07T20:58:12.834641+00:00"},{"runId":"20260507T061024_grok-voice-think-fast-1.0_47cd8034","benchmark":"event_bench","model":"grok-voice-think-fast-1.0","turnsScored":29,"scores":{"toolUse":12,"toolUseDenom":12,"instructionFollowing":28,"kbGrounding":28,"ambiguityHandling":1,"stateTracking":15,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":86.27,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T06:39:14+00:00","createdAt":"2026-05-07T20:58:11.901436+00:00"},{"runId":"20260507T061041_grok-voice-think-fast-1.0_11f685a6","benchmark":"event_bench","model":"grok-voice-think-fast-1.0","turnsScored":29,"scores":{"toolUse":11,"toolUseDenom":12,"instructionFollowing":26,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":81.56,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T06:38:58+00:00","createdAt":"2026-05-07T20:58:13.065207+00:00"},{"runId":"20260507T061016_grok-voice-think-fast-1.0_147f0f31","benchmark":"event_bench","model":"grok-voice-think-fast-1.0","turnsScored":29,"scores":{"toolUse":10,"toolUseDenom":12,"instructionFollowing":24,"kbGrounding":28,"ambiguityHandling":1,"stateTracking":15,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":80.18,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T06:38:23+00:00","createdAt":"2026-05-07T20:58:11.187894+00:00"},{"runId":"20260507T061022_grok-voice-think-fast-1.0_39d7fae3","benchmark":"event_bench","model":"grok-voice-think-fast-1.0","turnsScored":29,"scores":{"toolUse":11,"toolUseDenom":12,"instructionFollowing":29,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":15,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":85.98,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T06:38:22+00:00","createdAt":"2026-05-07T20:58:11.667762+00:00"},{"runId":"20260507T061000_grok-voice-think-fast-1.0_3d4e43d6","benchmark":"event_bench","model":"grok-voice-think-fast-1.0","turnsScored":29,"scores":{"toolUse":8,"toolUseDenom":12,"instructionFollowing":29,"kbGrounding":25,"ambiguityHandling":1,"stateTracking":9,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":71.16,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T06:38:21+00:00","createdAt":"2026-05-07T20:58:10.480757+00:00"},{"runId":"20260507T060953_grok-voice-think-fast-1.0_6ceb903f","benchmark":"event_bench","model":"grok-voice-think-fast-1.0","turnsScored":29,"scores":{"toolUse":10,"toolUseDenom":12,"instructionFollowing":29,"kbGrounding":28,"ambiguityHandling":1,"stateTracking":16,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":84.8,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T06:37:33+00:00","createdAt":"2026-05-07T20:58:09.732388+00:00"},{"runId":"20260507T061010_grok-voice-think-fast-1.0_412fdbb6","benchmark":"event_bench","model":"grok-voice-think-fast-1.0","turnsScored":29,"scores":{"toolUse":9,"toolUseDenom":12,"instructionFollowing":26,"kbGrounding":26,"ambiguityHandling":1,"stateTracking":16,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":79.69,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T06:37:25+00:00","createdAt":"2026-05-07T20:58:10.712831+00:00"},{"runId":"20260507T061000_grok-voice-think-fast-1.0_2d03c36b","benchmark":"event_bench","model":"grok-voice-think-fast-1.0","turnsScored":29,"scores":{"toolUse":10,"toolUseDenom":12,"instructionFollowing":24,"kbGrounding":25,"ambiguityHandling":1,"stateTracking":12,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":74.58,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T06:37:25+00:00","createdAt":"2026-05-07T20:58:10.229596+00:00"},{"runId":"20260507T061021_grok-voice-think-fast-1.0_288520f4","benchmark":"event_bench","model":"grok-voice-think-fast-1.0","turnsScored":29,"scores":{"toolUse":10,"toolUseDenom":12,"instructionFollowing":24,"kbGrounding":27,"ambiguityHandling":1,"stateTracking":16,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":80.66,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T06:36:46+00:00","createdAt":"2026-05-07T20:58:11.425464+00:00"},{"runId":"20260507T061206_grok-voice-think-fast-1.0_c0336b3d","benchmark":"event_bench","model":"grok-voice-think-fast-1.0","turnsScored":29,"scores":{"toolUse":9,"toolUseDenom":12,"instructionFollowing":25,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":17,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":82.24,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T06:36:43+00:00","createdAt":"2026-05-07T20:58:14.027551+00:00"},{"runId":"20260507T061028_grok-voice-think-fast-1.0_3f401ed4","benchmark":"event_bench","model":"grok-voice-think-fast-1.0","turnsScored":29,"scores":{"toolUse":7,"toolUseDenom":12,"instructionFollowing":25,"kbGrounding":26,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":72.13,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T06:36:34+00:00","createdAt":"2026-05-07T20:58:12.372589+00:00"},{"runId":"20260507T060953_grok-voice-think-fast-1.0_89058ce7","benchmark":"event_bench","model":"grok-voice-think-fast-1.0","turnsScored":29,"scores":{"toolUse":10,"toolUseDenom":12,"instructionFollowing":28,"kbGrounding":25,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":79.69,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T06:36:33+00:00","createdAt":"2026-05-07T20:58:09.986392+00:00"},{"runId":"20260507T061011_grok-voice-think-fast-1.0_5cf2a4d3","benchmark":"event_bench","model":"grok-voice-think-fast-1.0","turnsScored":29,"scores":{"toolUse":10,"toolUseDenom":12,"instructionFollowing":26,"kbGrounding":26,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":79,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T06:35:22+00:00","createdAt":"2026-05-07T20:58:10.957107+00:00"},{"runId":"20260507T061033_grok-voice-think-fast-1.0_ea0cc044","benchmark":"event_bench","model":"grok-voice-think-fast-1.0","turnsScored":29,"scores":{"toolUse":10,"toolUseDenom":12,"instructionFollowing":26,"kbGrounding":25,"ambiguityHandling":1,"stateTracking":15,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":79.49,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T06:34:45+00:00","createdAt":"2026-05-07T20:58:12.605972+00:00"},{"runId":"20260507T061056_grok-voice-think-fast-1.0_c9259626","benchmark":"event_bench","model":"grok-voice-think-fast-1.0","turnsScored":29,"scores":{"toolUse":10,"toolUseDenom":12,"instructionFollowing":25,"kbGrounding":27,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":79,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T06:34:40+00:00","createdAt":"2026-05-07T20:58:13.537527+00:00"},{"runId":"20260507T055242_grok-voice-think-fast-1.0_08615b7d","benchmark":"event_bench","model":"grok-voice-think-fast-1.0","turnsScored":29,"scores":{"toolUse":11,"toolUseDenom":12,"instructionFollowing":25,"kbGrounding":26,"ambiguityHandling":1,"stateTracking":12,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":77.62,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T06:21:27+00:00","createdAt":"2026-05-07T20:58:09.344093+00:00"},{"runId":"20260412T065820_amazon.nova-2-sonic-v1_0_cc418ea3","benchmark":"event_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":29,"scores":{"toolUse":9,"toolUseDenom":12,"instructionFollowing":17,"kbGrounding":24,"ambiguityHandling":1,"stateTracking":4,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":57.98,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-15T00:03:03+00:00","createdAt":"2026-04-17T18:28:18.821514+00:00"},{"runId":"20260414T154236_gemini-2.5-flash-native-audio-preview-12-2025_01c09e14","benchmark":"event_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":29,"scores":{"toolUse":4,"toolUseDenom":12,"instructionFollowing":27,"kbGrounding":24,"ambiguityHandling":2,"stateTracking":16,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":80.66,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T23:02:04+00:00","createdAt":"2026-04-17T18:30:21.897011+00:00"},{"runId":"20260414T154408_gemini-2.5-flash-native-audio-preview-12-2025_63e1251f","benchmark":"event_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":29,"scores":{"toolUse":5,"toolUseDenom":12,"instructionFollowing":25,"kbGrounding":21,"ambiguityHandling":2,"stateTracking":11,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":73,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T22:58:28+00:00","createdAt":"2026-04-17T18:30:22.473702+00:00"},{"runId":"20260412T091953_ultravox-v0.7_9c8b184a","benchmark":"event_bench","model":"ultravox-v0.7","turnsScored":29,"scores":{"toolUse":8,"toolUseDenom":12,"instructionFollowing":28,"kbGrounding":27,"ambiguityHandling":1,"stateTracking":17,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":81.26,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:56:18+00:00","createdAt":"2026-04-17T18:30:16.786789+00:00"},{"runId":"20260412T092432_grok-realtime_62a022d5","benchmark":"event_bench","model":"grok-realtime","turnsScored":29,"scores":{"toolUse":9,"toolUseDenom":12,"instructionFollowing":25,"kbGrounding":26,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":76.64,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:49:20+00:00","createdAt":"2026-04-17T18:30:20.706872+00:00"},{"runId":"20260412T092458_amazon.nova-2-sonic-v1_0_fd445bd2","benchmark":"event_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":29,"scores":{"toolUse":2,"toolUseDenom":12,"instructionFollowing":5,"kbGrounding":16,"ambiguityHandling":0,"stateTracking":2,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":20.17,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:47:24+00:00","createdAt":"2026-04-17T18:30:21.300566+00:00"},{"runId":"20260412T091631_amazon.nova-2-sonic-v1_0_4b1ce533","benchmark":"event_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":29,"scores":{"toolUse":8,"toolUseDenom":12,"instructionFollowing":16,"kbGrounding":27,"ambiguityHandling":1,"stateTracking":5,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":58.87,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:45:31+00:00","createdAt":"2026-04-17T18:30:14.235795+00:00"},{"runId":"20260412T092008_grok-realtime_b71de302","benchmark":"event_bench","model":"grok-realtime","turnsScored":29,"scores":{"toolUse":3,"toolUseDenom":12,"instructionFollowing":28,"kbGrounding":27,"ambiguityHandling":1,"stateTracking":17,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":72.93,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:44:35+00:00","createdAt":"2026-04-17T18:30:17.561114+00:00"},{"runId":"20260412T091018_grok-realtime_14e6b203","benchmark":"event_bench","model":"grok-realtime","turnsScored":29,"scores":{"toolUse":4,"toolUseDenom":12,"instructionFollowing":25,"kbGrounding":23,"ambiguityHandling":1,"stateTracking":11,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":62.71,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:43:08+00:00","createdAt":"2026-04-17T18:30:10.408225+00:00"},{"runId":"20260412T092026_gpt-realtime_23d374de","benchmark":"event_bench","model":"gpt-realtime","turnsScored":29,"scores":{"toolUse":6,"toolUseDenom":12,"instructionFollowing":23,"kbGrounding":24,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":68.88,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:41:27+00:00","createdAt":"2026-04-17T18:30:18.854119+00:00"},{"runId":"20260412T090601_ultravox-v0.7_0e65c3fe","benchmark":"event_bench","model":"ultravox-v0.7","turnsScored":29,"scores":{"toolUse":6,"toolUseDenom":12,"instructionFollowing":29,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":75.29,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:41:26+00:00","createdAt":"2026-04-17T18:30:06.558723+00:00"},{"runId":"20260412T092322_gemini-3.1-flash-live-preview_9300dd85","benchmark":"event_bench","model":"gemini-3.1-flash-live-preview","turnsScored":29,"scores":{"toolUse":5,"toolUseDenom":12,"instructionFollowing":23,"kbGrounding":21,"ambiguityHandling":2,"stateTracking":11,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":71.62,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:41:24+00:00","createdAt":"2026-04-17T18:30:20.129233+00:00"},{"runId":"20260412T091016_ultravox-v0.7_919f5b1e","benchmark":"event_bench","model":"ultravox-v0.7","turnsScored":29,"scores":{"toolUse":7,"toolUseDenom":12,"instructionFollowing":27,"kbGrounding":24,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":73.31,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:40:36+00:00","createdAt":"2026-04-17T18:30:09.678771+00:00"},{"runId":"20260412T092022_amazon.nova-2-sonic-v1_0_b663a1f1","benchmark":"event_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":29,"scores":{"toolUse":7,"toolUseDenom":12,"instructionFollowing":21,"kbGrounding":22,"ambiguityHandling":2,"stateTracking":9,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":71.91,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:39:27+00:00","createdAt":"2026-04-17T18:30:18.200167+00:00"},{"runId":"20260412T091111_amazon.nova-2-sonic-v1_0_9f00a136","benchmark":"event_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":29,"scores":{"toolUse":10,"toolUseDenom":12,"instructionFollowing":9,"kbGrounding":6,"ambiguityHandling":0,"stateTracking":2,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":29.36,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:39:25+00:00","createdAt":"2026-04-17T18:30:11.025681+00:00"},{"runId":"20260412T091758_gemini-2.5-flash-native-audio-preview-12-2025_322fdb34","benchmark":"event_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":29,"scores":{"toolUse":8,"toolUseDenom":12,"instructionFollowing":26,"kbGrounding":23,"ambiguityHandling":1,"stateTracking":11,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":70.07,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:38:52+00:00","createdAt":"2026-04-17T18:30:15.521744+00:00"},{"runId":"20260412T091637_gpt-realtime_2795fc2e","benchmark":"event_bench","model":"gpt-realtime","turnsScored":29,"scores":{"toolUse":7,"toolUseDenom":12,"instructionFollowing":28,"kbGrounding":28,"ambiguityHandling":1,"stateTracking":17,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":80.29,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:38:26+00:00","createdAt":"2026-04-17T18:30:14.867955+00:00"},{"runId":"20260412T092210_gemini-2.5-flash-native-audio-preview-12-2025_9523d830","benchmark":"event_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":29,"scores":{"toolUse":8,"toolUseDenom":12,"instructionFollowing":24,"kbGrounding":23,"ambiguityHandling":2,"stateTracking":15,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":83.39,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:37:24+00:00","createdAt":"2026-04-17T18:30:19.510251+00:00"},{"runId":"20260412T091415_grok-realtime_ecb2add7","benchmark":"event_bench","model":"grok-realtime","turnsScored":29,"scores":{"toolUse":8,"toolUseDenom":12,"instructionFollowing":29,"kbGrounding":27,"ambiguityHandling":1,"stateTracking":17,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":81.95,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:37:11+00:00","createdAt":"2026-04-17T18:30:13.569334+00:00"},{"runId":"20260412T091854_gemini-3.1-flash-live-preview_42024faa","benchmark":"event_bench","model":"gemini-3.1-flash-live-preview","turnsScored":29,"scores":{"toolUse":10,"toolUseDenom":12,"instructionFollowing":26,"kbGrounding":26,"ambiguityHandling":2,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":89,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:34:57+00:00","createdAt":"2026-04-17T18:30:16.147451+00:00"},{"runId":"20260412T090826_gemini-2.5-flash-native-audio-preview-12-2025_9b056212","benchmark":"event_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":29,"scores":{"toolUse":4,"toolUseDenom":12,"instructionFollowing":20,"kbGrounding":25,"ambiguityHandling":2,"stateTracking":15,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":75.35,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:32:49+00:00","createdAt":"2026-04-17T18:30:08.420229+00:00"},{"runId":"20260412T090252_ultravox-v0.7_09cef8a8","benchmark":"event_bench","model":"ultravox-v0.7","turnsScored":29,"scores":{"toolUse":6,"toolUseDenom":12,"instructionFollowing":24,"kbGrounding":28,"ambiguityHandling":1,"stateTracking":15,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":73.51,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:32:41+00:00","createdAt":"2026-04-17T18:30:02.769205+00:00"},{"runId":"20260412T091159_gpt-realtime_b78ef6c0","benchmark":"event_bench","model":"gpt-realtime","turnsScored":29,"scores":{"toolUse":9,"toolUseDenom":12,"instructionFollowing":29,"kbGrounding":29,"ambiguityHandling":2,"stateTracking":17,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":95,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:32:19+00:00","createdAt":"2026-04-17T18:30:11.671273+00:00"},{"runId":"20260412T090617_grok-realtime_c37a088b","benchmark":"event_bench","model":"grok-realtime","turnsScored":29,"scores":{"toolUse":5,"toolUseDenom":12,"instructionFollowing":25,"kbGrounding":25,"ambiguityHandling":2,"stateTracking":11,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":75.76,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:32:18+00:00","createdAt":"2026-04-17T18:30:07.187145+00:00"},{"runId":"20260412T085624_ultravox-v0.7_615ac397","benchmark":"event_bench","model":"ultravox-v0.7","turnsScored":29,"scores":{"toolUse":8,"toolUseDenom":12,"instructionFollowing":26,"kbGrounding":28,"ambiguityHandling":1,"stateTracking":17,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":80.57,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:30:40+00:00","createdAt":"2026-04-17T18:29:55.707115+00:00"},{"runId":"20260412T090245_gemini-3.1-flash-live-preview_33effc54","benchmark":"event_bench","model":"gemini-3.1-flash-live-preview","turnsScored":29,"scores":{"toolUse":3,"toolUseDenom":12,"instructionFollowing":26,"kbGrounding":24,"ambiguityHandling":2,"stateTracking":7,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":67.72,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:30:34+00:00","createdAt":"2026-04-17T18:30:02.155054+00:00"},{"runId":"20260412T090328_amazon.nova-2-sonic-v1_0_98bb5b60","benchmark":"event_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":29,"scores":{"toolUse":8,"toolUseDenom":12,"instructionFollowing":15,"kbGrounding":27,"ambiguityHandling":2,"stateTracking":4,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":67,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:30:03+00:00","createdAt":"2026-04-17T18:30:04.120962+00:00"},{"runId":"20260412T091314_gemini-3.1-flash-live-preview_e8f7195d","benchmark":"event_bench","model":"gemini-3.1-flash-live-preview","turnsScored":29,"scores":{"toolUse":11,"toolUseDenom":12,"instructionFollowing":29,"kbGrounding":27,"ambiguityHandling":1,"stateTracking":17,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":86.95,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:29:39+00:00","createdAt":"2026-04-17T18:30:12.920443+00:00"},{"runId":"20260412T091213_gemini-2.5-flash-native-audio-preview-12-2025_415476bf","benchmark":"event_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":29,"scores":{"toolUse":7,"toolUseDenom":12,"instructionFollowing":28,"kbGrounding":24,"ambiguityHandling":2,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":84,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:28:48+00:00","createdAt":"2026-04-17T18:30:12.294954+00:00"},{"runId":"20260412T090948_gemini-3.1-flash-live-preview_24ed53ba","benchmark":"event_bench","model":"gemini-3.1-flash-live-preview","turnsScored":29,"scores":{"toolUse":3,"toolUseDenom":12,"instructionFollowing":19,"kbGrounding":23,"ambiguityHandling":2,"stateTracking":10,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":65.73,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:28:39+00:00","createdAt":"2026-04-17T18:30:09.057884+00:00"},{"runId":"20260412T090311_grok-realtime_ad091524","benchmark":"event_bench","model":"grok-realtime","turnsScored":29,"scores":{"toolUse":2,"toolUseDenom":12,"instructionFollowing":29,"kbGrounding":25,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":67.05,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:28:33+00:00","createdAt":"2026-04-17T18:30:03.476736+00:00"},{"runId":"20260412T090512_gpt-realtime_b9e8adbc","benchmark":"event_bench","model":"gpt-realtime","turnsScored":29,"scores":{"toolUse":8,"toolUseDenom":12,"instructionFollowing":25,"kbGrounding":24,"ambiguityHandling":2,"stateTracking":9,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":77.71,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:27:42+00:00","createdAt":"2026-04-17T18:30:04.706606+00:00"},{"runId":"20260412T085849_grok-realtime_cc3194f1","benchmark":"event_bench","model":"grok-realtime","turnsScored":29,"scores":{"toolUse":7,"toolUseDenom":12,"instructionFollowing":28,"kbGrounding":25,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":74.69,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:26:45+00:00","createdAt":"2026-04-17T18:29:59.507328+00:00"},{"runId":"20260412T090620_amazon.nova-2-sonic-v1_0_7852dbf9","benchmark":"event_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":29,"scores":{"toolUse":10,"toolUseDenom":12,"instructionFollowing":10,"kbGrounding":27,"ambiguityHandling":1,"stateTracking":3,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":55.71,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:26:13+00:00","createdAt":"2026-04-17T18:30:07.790044+00:00"},{"runId":"20260412T090129_gpt-realtime_151739d3","benchmark":"event_bench","model":"gpt-realtime","turnsScored":29,"scores":{"toolUse":5,"toolUseDenom":12,"instructionFollowing":24,"kbGrounding":22,"ambiguityHandling":2,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":76.53,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:24:57+00:00","createdAt":"2026-04-17T18:30:00.841653+00:00"},{"runId":"20260412T085642_grok-realtime_8a2b3eda","benchmark":"event_bench","model":"grok-realtime","turnsScored":29,"scores":{"toolUse":9,"toolUseDenom":12,"instructionFollowing":22,"kbGrounding":19,"ambiguityHandling":1,"stateTracking":8,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":62.69,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:24:20+00:00","createdAt":"2026-04-17T18:29:56.36413+00:00"},{"runId":"20260412T085102_amazon.nova-2-sonic-v1_0_7d51454b","benchmark":"event_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":29,"scores":{"toolUse":6,"toolUseDenom":12,"instructionFollowing":16,"kbGrounding":23,"ambiguityHandling":1,"stateTracking":5,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":52.78,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:23:20+00:00","createdAt":"2026-04-17T18:29:53.256525+00:00"},{"runId":"20260412T085659_amazon.nova-2-sonic-v1_0_9811d507","benchmark":"event_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":29,"scores":{"toolUse":7,"toolUseDenom":12,"instructionFollowing":11,"kbGrounding":21,"ambiguityHandling":1,"stateTracking":7,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":51.97,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:22:09+00:00","createdAt":"2026-04-17T18:29:56.996587+00:00"},{"runId":"20260412T085049_grok-realtime_ad5ba0a5","benchmark":"event_bench","model":"grok-realtime","turnsScored":29,"scores":{"toolUse":8,"toolUseDenom":12,"instructionFollowing":29,"kbGrounding":28,"ambiguityHandling":1,"stateTracking":16,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":81.47,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:20:25+00:00","createdAt":"2026-04-17T18:29:52.697543+00:00"},{"runId":"20260412T085953_amazon.nova-2-sonic-v1_0_781787e4","benchmark":"event_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":29,"scores":{"toolUse":9,"toolUseDenom":12,"instructionFollowing":11,"kbGrounding":19,"ambiguityHandling":1,"stateTracking":4,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":50.4,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:20:20+00:00","createdAt":"2026-04-17T18:30:00.184443+00:00"},{"runId":"20260412T090548_gemini-3.1-flash-live-preview_66a6d9f9","benchmark":"event_bench","model":"gemini-3.1-flash-live-preview","turnsScored":29,"scores":{"toolUse":11,"toolUseDenom":12,"instructionFollowing":26,"kbGrounding":29,"ambiguityHandling":2,"stateTracking":17,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":96.26,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:20:07+00:00","createdAt":"2026-04-17T18:30:05.980124+00:00"},{"runId":"20260412T090516_gemini-2.5-flash-native-audio-preview-12-2025_1222a373","benchmark":"event_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":29,"scores":{"toolUse":8,"toolUseDenom":12,"instructionFollowing":24,"kbGrounding":25,"ambiguityHandling":2,"stateTracking":15,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":84.77,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:19:52+00:00","createdAt":"2026-04-17T18:30:05.311053+00:00"},{"runId":"20260412T085026_ultravox-v0.7_8c7c2d4d","benchmark":"event_bench","model":"ultravox-v0.7","turnsScored":29,"scores":{"toolUse":8,"toolUseDenom":12,"instructionFollowing":25,"kbGrounding":22,"ambiguityHandling":1,"stateTracking":12,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":69.86,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:18:53+00:00","createdAt":"2026-04-17T18:29:51.999173+00:00"},{"runId":"20260412T090137_gemini-2.5-flash-native-audio-preview-12-2025_221af40b","benchmark":"event_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":29,"scores":{"toolUse":6,"toolUseDenom":12,"instructionFollowing":19,"kbGrounding":23,"ambiguityHandling":1,"stateTracking":12,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":63.08,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:17:56+00:00","createdAt":"2026-04-17T18:30:01.51315+00:00"},{"runId":"20260412T084639_grok-realtime_6afdc16b","benchmark":"event_bench","model":"grok-realtime","turnsScored":29,"scores":{"toolUse":9,"toolUseDenom":12,"instructionFollowing":22,"kbGrounding":24,"ambiguityHandling":1,"stateTracking":6,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":63.78,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:16:35+00:00","createdAt":"2026-04-17T18:29:49.03125+00:00"},{"runId":"20260412T084149_amazon.nova-2-sonic-v1_0_07ef506a","benchmark":"event_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":29,"scores":{"toolUse":8,"toolUseDenom":12,"instructionFollowing":15,"kbGrounding":27,"ambiguityHandling":1,"stateTracking":9,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":62.89,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:16:29+00:00","createdAt":"2026-04-17T18:29:43.433661+00:00"},{"runId":"20260412T085745_gemini-3.1-flash-live-preview_3ca914a1","benchmark":"event_bench","model":"gemini-3.1-flash-live-preview","turnsScored":29,"scores":{"toolUse":10,"toolUseDenom":12,"instructionFollowing":24,"kbGrounding":24,"ambiguityHandling":2,"stateTracking":12,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":83.89,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:14:03+00:00","createdAt":"2026-04-17T18:29:58.856611+00:00"},{"runId":"20260412T084113_ultravox-v0.7_cafdd8c6","benchmark":"event_bench","model":"ultravox-v0.7","turnsScored":29,"scores":{"toolUse":9,"toolUseDenom":12,"instructionFollowing":26,"kbGrounding":26,"ambiguityHandling":1,"stateTracking":17,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":80.86,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:13:13+00:00","createdAt":"2026-04-17T18:29:42.05946+00:00"},{"runId":"20260412T085736_gemini-2.5-flash-native-audio-preview-12-2025_a225e01a","benchmark":"event_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":29,"scores":{"toolUse":3,"toolUseDenom":12,"instructionFollowing":25,"kbGrounding":17,"ambiguityHandling":2,"stateTracking":12,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":68.08,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:12:12+00:00","createdAt":"2026-04-17T18:29:58.241424+00:00"},{"runId":"20260412T085221_gpt-realtime_7f850f35","benchmark":"event_bench","model":"gpt-realtime","turnsScored":29,"scores":{"toolUse":4,"toolUseDenom":12,"instructionFollowing":26,"kbGrounding":26,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":69,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:11:58+00:00","createdAt":"2026-04-17T18:29:53.850773+00:00"},{"runId":"20260412T085403_gemini-3.1-flash-live-preview_e92c9dc7","benchmark":"event_bench","model":"gemini-3.1-flash-live-preview","turnsScored":29,"scores":{"toolUse":7,"toolUseDenom":12,"instructionFollowing":27,"kbGrounding":24,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":73.31,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:11:10+00:00","createdAt":"2026-04-17T18:29:55.049232+00:00"},{"runId":"20260412T084753_amazon.nova-2-sonic-v1_0_e4ac4a58","benchmark":"event_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":29,"scores":{"toolUse":10,"toolUseDenom":12,"instructionFollowing":17,"kbGrounding":25,"ambiguityHandling":1,"stateTracking":4,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":60.34,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:10:17+00:00","createdAt":"2026-04-17T18:29:49.650057+00:00"},{"runId":"20260412T085253_gemini-2.5-flash-native-audio-preview-12-2025_b723d514","benchmark":"event_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":29,"scores":{"toolUse":7,"toolUseDenom":12,"instructionFollowing":21,"kbGrounding":20,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":66.41,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:10:15+00:00","createdAt":"2026-04-17T18:29:54.420663+00:00"},{"runId":"20260412T084442_grok-realtime_205fd82c","benchmark":"event_bench","model":"grok-realtime","turnsScored":29,"scores":{"toolUse":8,"toolUseDenom":12,"instructionFollowing":29,"kbGrounding":27,"ambiguityHandling":2,"stateTracking":17,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":91.95,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:09:46+00:00","createdAt":"2026-04-17T18:29:46.4659+00:00"},{"runId":"20260412T084123_grok-realtime_343bb074","benchmark":"event_bench","model":"grok-realtime","turnsScored":29,"scores":{"toolUse":8,"toolUseDenom":12,"instructionFollowing":27,"kbGrounding":24,"ambiguityHandling":1,"stateTracking":15,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":76.15,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:08:25+00:00","createdAt":"2026-04-17T18:29:42.843638+00:00"},{"runId":"20260412T084202_gpt-realtime_1c351320","benchmark":"event_bench","model":"gpt-realtime","turnsScored":29,"scores":{"toolUse":6,"toolUseDenom":12,"instructionFollowing":24,"kbGrounding":22,"ambiguityHandling":2,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":77.02,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:06:35+00:00","createdAt":"2026-04-17T18:29:44.093001+00:00"},{"runId":"20260412T085721_gpt-realtime_83fca317","benchmark":"event_bench","model":"gpt-realtime","turnsScored":29,"scores":{"toolUse":3,"toolUseDenom":12,"instructionFollowing":11,"kbGrounding":10,"ambiguityHandling":1,"stateTracking":4,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":34.19,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:06:19+00:00","createdAt":"2026-04-17T18:29:57.624169+00:00"},{"runId":"20260412T085021_gemini-3.1-flash-live-preview_3dd5b57d","benchmark":"event_bench","model":"gemini-3.1-flash-live-preview","turnsScored":29,"scores":{"toolUse":11,"toolUseDenom":12,"instructionFollowing":29,"kbGrounding":24,"ambiguityHandling":2,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":91.36,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:06:16+00:00","createdAt":"2026-04-17T18:29:51.41325+00:00"},{"runId":"20260412T084601_gemini-2.5-flash-native-audio-preview-12-2025_c612c4f4","benchmark":"event_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":29,"scores":{"toolUse":6,"toolUseDenom":12,"instructionFollowing":27,"kbGrounding":23,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":69.78,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:05:59+00:00","createdAt":"2026-04-17T18:29:47.751485+00:00"},{"runId":"20260412T083639_amazon.nova-2-sonic-v1_0_152977b3","benchmark":"event_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":29,"scores":{"toolUse":8,"toolUseDenom":12,"instructionFollowing":14,"kbGrounding":25,"ambiguityHandling":1,"stateTracking":2,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":52.58,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:05:46+00:00","createdAt":"2026-04-17T18:29:39.60037+00:00"},{"runId":"20260412T083624_grok-realtime_f75397d0","benchmark":"event_bench","model":"grok-realtime","turnsScored":29,"scores":{"toolUse":7,"toolUseDenom":12,"instructionFollowing":25,"kbGrounding":20,"ambiguityHandling":1,"stateTracking":10,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":64.47,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:05:15+00:00","createdAt":"2026-04-17T18:29:38.988954+00:00"},{"runId":"20260412T084442_amazon.nova-2-sonic-v1_0_76c4f1c9","benchmark":"event_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":29,"scores":{"toolUse":4,"toolUseDenom":12,"instructionFollowing":22,"kbGrounding":19,"ambiguityHandling":1,"stateTracking":11,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":57.88,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:05:10+00:00","createdAt":"2026-04-17T18:29:45.838773+00:00"},{"runId":"20260412T084907_gemini-2.5-flash-native-audio-preview-12-2025_b7796aea","benchmark":"event_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":29,"scores":{"toolUse":3,"toolUseDenom":12,"instructionFollowing":22,"kbGrounding":24,"ambiguityHandling":2,"stateTracking":12,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":70.84,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:03:26+00:00","createdAt":"2026-04-17T18:29:50.840002+00:00"},{"runId":"20260412T083617_ultravox-v0.7_191cfc57","benchmark":"event_bench","model":"ultravox-v0.7","turnsScored":29,"scores":{"toolUse":9,"toolUseDenom":12,"instructionFollowing":26,"kbGrounding":26,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":77.33,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:03:10+00:00","createdAt":"2026-04-17T18:29:38.234052+00:00"},{"runId":"20260412T084626_gemini-3.1-flash-live-preview_515fdc85","benchmark":"event_bench","model":"gemini-3.1-flash-live-preview","turnsScored":29,"scores":{"toolUse":10,"toolUseDenom":12,"instructionFollowing":27,"kbGrounding":27,"ambiguityHandling":2,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":90.38,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:02:51+00:00","createdAt":"2026-04-17T18:29:48.376017+00:00"},{"runId":"20260412T082601_ultravox-v0.7_28037de1","benchmark":"event_bench","model":"ultravox-v0.7","turnsScored":29,"scores":{"toolUse":9,"toolUseDenom":12,"instructionFollowing":28,"kbGrounding":26,"ambiguityHandling":1,"stateTracking":17,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":82.24,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:02:44+00:00","createdAt":"2026-04-17T18:29:31.594546+00:00"},{"runId":"20260412T083912_gpt-realtime_f3dd30c3","benchmark":"event_bench","model":"gpt-realtime","turnsScored":29,"scores":{"toolUse":7,"toolUseDenom":12,"instructionFollowing":28,"kbGrounding":24,"ambiguityHandling":1,"stateTracking":12,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":71.65,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:01:35+00:00","createdAt":"2026-04-17T18:29:40.254355+00:00"},{"runId":"20260412T082520_ultravox-v0.7_399730bc","benchmark":"event_bench","model":"ultravox-v0.7","turnsScored":29,"scores":{"toolUse":9,"toolUseDenom":12,"instructionFollowing":28,"kbGrounding":25,"ambiguityHandling":1,"stateTracking":16,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":80.38,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:01:27+00:00","createdAt":"2026-04-17T18:29:28.051542+00:00"},{"runId":"20260412T084334_gemini-3.1-flash-live-preview_cc56fc2c","benchmark":"event_bench","model":"gemini-3.1-flash-live-preview","turnsScored":29,"scores":{"toolUse":9,"toolUseDenom":12,"instructionFollowing":24,"kbGrounding":26,"ambiguityHandling":2,"stateTracking":11,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":82.42,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T15:59:52+00:00","createdAt":"2026-04-17T18:29:45.26897+00:00"},{"runId":"20260412T083115_grok-realtime_1fb912ad","benchmark":"event_bench","model":"grok-realtime","turnsScored":29,"scores":{"toolUse":4,"toolUseDenom":12,"instructionFollowing":27,"kbGrounding":26,"ambiguityHandling":2,"stateTracking":11,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":76.16,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T15:58:37+00:00","createdAt":"2026-04-17T18:29:35.177654+00:00"},{"runId":"20260412T083005_gemini-2.5-flash-native-audio-preview-12-2025_a221ea74","benchmark":"event_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":29,"scores":{"toolUse":6,"toolUseDenom":12,"instructionFollowing":28,"kbGrounding":26,"ambiguityHandling":2,"stateTracking":15,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":84.89,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T15:57:43+00:00","createdAt":"2026-04-17T18:29:34.025415+00:00"},{"runId":"20260412T084036_gemini-3.1-flash-live-preview_2c6a3096","benchmark":"event_bench","model":"gemini-3.1-flash-live-preview","turnsScored":29,"scores":{"toolUse":5,"toolUseDenom":12,"instructionFollowing":27,"kbGrounding":23,"ambiguityHandling":2,"stateTracking":11,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":75.76,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T15:57:34+00:00","createdAt":"2026-04-17T18:29:41.419134+00:00"},{"runId":"20260412T084834_gpt-realtime_5d230790","benchmark":"event_bench","model":"gpt-realtime","turnsScored":29,"scores":{"toolUse":3,"toolUseDenom":12,"instructionFollowing":9,"kbGrounding":10,"ambiguityHandling":1,"stateTracking":4,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":32.81,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T15:57:20+00:00","createdAt":"2026-04-17T18:29:50.272543+00:00"},{"runId":"20260412T084219_gemini-2.5-flash-native-audio-preview-12-2025_c31ec1ac","benchmark":"event_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":29,"scores":{"toolUse":5,"toolUseDenom":12,"instructionFollowing":23,"kbGrounding":28,"ambiguityHandling":2,"stateTracking":16,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":82.33,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T15:56:57+00:00","createdAt":"2026-04-17T18:29:44.68031+00:00"},{"runId":"20260412T083339_gpt-realtime_cc063549","benchmark":"event_bench","model":"gpt-realtime","turnsScored":29,"scores":{"toolUse":7,"toolUseDenom":12,"instructionFollowing":22,"kbGrounding":26,"ambiguityHandling":2,"stateTracking":11,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":77.71,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T15:56:40+00:00","createdAt":"2026-04-17T18:29:36.406371+00:00"},{"runId":"20260412T083125_amazon.nova-2-sonic-v1_0_e0b9bde3","benchmark":"event_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":29,"scores":{"toolUse":9,"toolUseDenom":12,"instructionFollowing":12,"kbGrounding":20,"ambiguityHandling":1,"stateTracking":5,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":52.95,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T15:56:23+00:00","createdAt":"2026-04-17T18:29:35.751775+00:00"},{"runId":"20260412T084450_gpt-realtime_780fe060","benchmark":"event_bench","model":"gpt-realtime","turnsScored":29,"scores":{"toolUse":3,"toolUseDenom":12,"instructionFollowing":10,"kbGrounding":10,"ambiguityHandling":1,"stateTracking":4,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":33.5,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T15:54:02+00:00","createdAt":"2026-04-17T18:29:47.118787+00:00"},{"runId":"20260412T084011_gemini-2.5-flash-native-audio-preview-12-2025_5025ed61","benchmark":"event_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":29,"scores":{"toolUse":7,"toolUseDenom":12,"instructionFollowing":23,"kbGrounding":21,"ambiguityHandling":2,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":77.31,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T15:52:52+00:00","createdAt":"2026-04-17T18:29:40.854288+00:00"},{"runId":"20260412T083458_gemini-3.1-flash-live-preview_6fe6d83a","benchmark":"event_bench","model":"gemini-3.1-flash-live-preview","turnsScored":29,"scores":{"toolUse":8,"toolUseDenom":12,"instructionFollowing":27,"kbGrounding":24,"ambiguityHandling":1,"stateTracking":11,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":71.45,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T15:52:19+00:00","createdAt":"2026-04-17T18:29:37.593675+00:00"},{"runId":"20260412T081518_amazon.nova-2-sonic-v1_0_73460e54","benchmark":"event_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":29,"scores":{"toolUse":9,"toolUseDenom":12,"instructionFollowing":12,"kbGrounding":20,"ambiguityHandling":1,"stateTracking":5,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":52.95,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T15:51:00+00:00","createdAt":"2026-04-17T18:29:22.864087+00:00"},{"runId":"20260412T082612_grok-realtime_ff708127","benchmark":"event_bench","model":"grok-realtime","turnsScored":29,"scores":{"toolUse":7,"toolUseDenom":12,"instructionFollowing":29,"kbGrounding":26,"ambiguityHandling":1,"stateTracking":17,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":79.6,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T15:50:48+00:00","createdAt":"2026-04-17T18:29:32.253629+00:00"},{"runId":"20260412T083426_gemini-2.5-flash-native-audio-preview-12-2025_2c5a3e59","benchmark":"event_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":29,"scores":{"toolUse":4,"toolUseDenom":12,"instructionFollowing":21,"kbGrounding":22,"ambiguityHandling":2,"stateTracking":12,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":70.44,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T15:50:25+00:00","createdAt":"2026-04-17T18:29:37.021396+00:00"},{"runId":"20260412T082751_gpt-realtime_47b211a5","benchmark":"event_bench","model":"gpt-realtime","turnsScored":29,"scores":{"toolUse":5,"toolUseDenom":12,"instructionFollowing":24,"kbGrounding":21,"ambiguityHandling":2,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":74.66,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T15:50:19+00:00","createdAt":"2026-04-17T18:29:33.429751+00:00"},{"runId":"20260412T081431_ultravox-v0.7_48fc87ca","benchmark":"event_bench","model":"ultravox-v0.7","turnsScored":29,"scores":{"toolUse":10,"toolUseDenom":12,"instructionFollowing":28,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":17,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":85.98,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T15:49:06+00:00","createdAt":"2026-04-17T18:29:21.621599+00:00"},{"runId":"20260412T082535_grok-realtime_57fa702e","benchmark":"event_bench","model":"grok-realtime","turnsScored":29,"scores":{"toolUse":5,"toolUseDenom":12,"instructionFollowing":27,"kbGrounding":24,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":68.8,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T15:48:32+00:00","createdAt":"2026-04-17T18:29:28.680555+00:00"},{"runId":"20260412T081816_gemini-2.5-flash-native-audio-preview-12-2025_53385609","benchmark":"event_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":29,"scores":{"toolUse":4,"toolUseDenom":12,"instructionFollowing":22,"kbGrounding":24,"ambiguityHandling":2,"stateTracking":10,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":70.16,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T15:47:52+00:00","createdAt":"2026-04-17T18:29:24.057312+00:00"},{"runId":"20260412T082644_amazon.nova-2-sonic-v1_0_229ee853","benchmark":"event_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":29,"scores":{"toolUse":9,"toolUseDenom":12,"instructionFollowing":6,"kbGrounding":18,"ambiguityHandling":0,"stateTracking":2,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":33.9,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T15:46:28+00:00","createdAt":"2026-04-17T18:29:32.858584+00:00"},{"runId":"20260412T083012_gemini-3.1-flash-live-preview_228aae86","benchmark":"event_bench","model":"gemini-3.1-flash-live-preview","turnsScored":29,"scores":{"toolUse":11,"toolUseDenom":12,"instructionFollowing":25,"kbGrounding":19,"ambiguityHandling":2,"stateTracking":9,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":79.27,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T15:46:25+00:00","createdAt":"2026-04-17T18:29:34.603947+00:00"},{"runId":"20260412T082551_gpt-realtime_f5127381","benchmark":"event_bench","model":"gpt-realtime","turnsScored":29,"scores":{"toolUse":9,"toolUseDenom":12,"instructionFollowing":29,"kbGrounding":29,"ambiguityHandling":2,"stateTracking":17,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":95,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T15:46:00+00:00","createdAt":"2026-04-17T18:29:30.430836+00:00"},{"runId":"20260412T082204_grok-realtime_28297fce","benchmark":"event_bench","model":"grok-realtime","turnsScored":29,"scores":{"toolUse":8,"toolUseDenom":12,"instructionFollowing":29,"kbGrounding":27,"ambiguityHandling":1,"stateTracking":12,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":76.07,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T15:44:49+00:00","createdAt":"2026-04-17T18:29:25.194048+00:00"},{"runId":"20260412T081648_gpt-realtime_ab3ef81e","benchmark":"event_bench","model":"gpt-realtime","turnsScored":29,"scores":{"toolUse":7,"toolUseDenom":12,"instructionFollowing":22,"kbGrounding":24,"ambiguityHandling":1,"stateTracking":10,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":65.16,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T15:44:40+00:00","createdAt":"2026-04-17T18:29:23.474103+00:00"},{"runId":"20260412T081229_amazon.nova-2-sonic-v1_0_cd347ace","benchmark":"event_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":29,"scores":{"toolUse":9,"toolUseDenom":12,"instructionFollowing":14,"kbGrounding":19,"ambiguityHandling":1,"stateTracking":4,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":52.46,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T15:44:30+00:00","createdAt":"2026-04-17T18:29:19.126648+00:00"},{"runId":"20260412T082211_amazon.nova-2-sonic-v1_0_2649d232","benchmark":"event_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":29,"scores":{"toolUse":10,"toolUseDenom":12,"instructionFollowing":23,"kbGrounding":27,"ambiguityHandling":2,"stateTracking":11,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":84.09,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T15:43:32+00:00","createdAt":"2026-04-17T18:29:25.745435+00:00"},{"runId":"20260412T082551_gemini-2.5-flash-native-audio-preview-12-2025_7fdc5e38","benchmark":"event_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":29,"scores":{"toolUse":7,"toolUseDenom":12,"instructionFollowing":26,"kbGrounding":22,"ambiguityHandling":1,"stateTracking":12,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":68.89,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T15:42:18+00:00","createdAt":"2026-04-17T18:29:29.828224+00:00"},{"runId":"20260412T082552_gemini-3.1-flash-live-preview_d1cb93e6","benchmark":"event_bench","model":"gemini-3.1-flash-live-preview","turnsScored":29,"scores":{"toolUse":8,"toolUseDenom":12,"instructionFollowing":24,"kbGrounding":27,"ambiguityHandling":2,"stateTracking":16,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":87.33,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T15:42:01+00:00","createdAt":"2026-04-17T18:29:31.004698+00:00"},{"runId":"20260412T082226_gpt-realtime_4eedf924","benchmark":"event_bench","model":"gpt-realtime","turnsScored":29,"scores":{"toolUse":8,"toolUseDenom":12,"instructionFollowing":29,"kbGrounding":29,"ambiguityHandling":2,"stateTracking":17,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":93.33,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T15:41:47+00:00","createdAt":"2026-04-17T18:29:26.409228+00:00"},{"runId":"20260412T082538_amazon.nova-2-sonic-v1_0_ecf7707c","benchmark":"event_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":29,"scores":{"toolUse":8,"toolUseDenom":12,"instructionFollowing":9,"kbGrounding":25,"ambiguityHandling":0,"stateTracking":4,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":41.49,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T15:41:21+00:00","createdAt":"2026-04-17T18:29:29.242996+00:00"},{"runId":"20260412T082427_gemini-2.5-flash-native-audio-preview-12-2025_f746a8c9","benchmark":"event_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":29,"scores":{"toolUse":8,"toolUseDenom":12,"instructionFollowing":23,"kbGrounding":23,"ambiguityHandling":2,"stateTracking":16,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":83.88,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T15:41:11+00:00","createdAt":"2026-04-17T18:29:26.954153+00:00"},{"runId":"20260412T081439_grok-realtime_8cb43761","benchmark":"event_bench","model":"grok-realtime","turnsScored":29,"scores":{"toolUse":8,"toolUseDenom":12,"instructionFollowing":29,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":17,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":83.33,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T15:40:35+00:00","createdAt":"2026-04-17T18:29:22.273679+00:00"},{"runId":"20260412T082512_gemini-3.1-flash-live-preview_0fcc99af","benchmark":"event_bench","model":"gemini-3.1-flash-live-preview","turnsScored":29,"scores":{"toolUse":11,"toolUseDenom":12,"instructionFollowing":28,"kbGrounding":25,"ambiguityHandling":2,"stateTracking":17,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":94.89,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T15:40:10+00:00","createdAt":"2026-04-17T18:29:27.504716+00:00"},{"runId":"20260412T081102_ultravox-v0.7_2c1d898d","benchmark":"event_bench","model":"ultravox-v0.7","turnsScored":29,"scores":{"toolUse":7,"toolUseDenom":12,"instructionFollowing":23,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":17,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":77.53,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T15:39:10+00:00","createdAt":"2026-04-17T18:29:17.926687+00:00"},{"runId":"20260412T081153_grok-realtime_7a747076","benchmark":"event_bench","model":"grok-realtime","turnsScored":29,"scores":{"toolUse":8,"toolUseDenom":12,"instructionFollowing":29,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":17,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":83.33,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T15:36:38+00:00","createdAt":"2026-04-17T18:29:18.545707+00:00"},{"runId":"20260412T080751_amazon.nova-2-sonic-v1_0_20716cbf","benchmark":"event_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":29,"scores":{"toolUse":11,"toolUseDenom":12,"instructionFollowing":11,"kbGrounding":19,"ambiguityHandling":0,"stateTracking":2,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":41.38,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T15:36:23+00:00","createdAt":"2026-04-17T18:29:15.517134+00:00"},{"runId":"20260412T081358_gemini-2.5-flash-native-audio-preview-12-2025_8421affc","benchmark":"event_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":29,"scores":{"toolUse":5,"toolUseDenom":12,"instructionFollowing":28,"kbGrounding":25,"ambiguityHandling":2,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":80.18,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T15:36:16+00:00","createdAt":"2026-04-17T18:29:20.400249+00:00"},{"runId":"20260412T080202_ultravox-v0.7_45aa8b1f","benchmark":"event_bench","model":"ultravox-v0.7","turnsScored":29,"scores":{"toolUse":9,"toolUseDenom":12,"instructionFollowing":28,"kbGrounding":26,"ambiguityHandling":1,"stateTracking":16,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":81.06,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T15:34:56+00:00","createdAt":"2026-04-17T18:29:10.805528+00:00"},{"runId":"20260412T081853_gemini-3.1-flash-live-preview_f3909910","benchmark":"event_bench","model":"gemini-3.1-flash-live-preview","turnsScored":29,"scores":{"toolUse":8,"toolUseDenom":12,"instructionFollowing":25,"kbGrounding":26,"ambiguityHandling":2,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":84.98,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T15:34:24+00:00","createdAt":"2026-04-17T18:29:24.627224+00:00"},{"runId":"20260412T081340_gpt-realtime_0b6124f1","benchmark":"event_bench","model":"gpt-realtime","turnsScored":29,"scores":{"toolUse":5,"toolUseDenom":12,"instructionFollowing":23,"kbGrounding":24,"ambiguityHandling":2,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":77.22,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T15:33:37+00:00","createdAt":"2026-04-17T18:29:19.744111+00:00"},{"runId":"20260412T081428_gemini-3.1-flash-live-preview_dc40153e","benchmark":"event_bench","model":"gemini-3.1-flash-live-preview","turnsScored":29,"scores":{"toolUse":6,"toolUseDenom":12,"instructionFollowing":27,"kbGrounding":21,"ambiguityHandling":2,"stateTracking":11,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":76.04,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T15:31:24+00:00","createdAt":"2026-04-17T18:29:21.006435+00:00"},{"runId":"20260412T080215_grok-realtime_1afeaf93","benchmark":"event_bench","model":"grok-realtime","turnsScored":29,"scores":{"toolUse":6,"toolUseDenom":12,"instructionFollowing":28,"kbGrounding":28,"ambiguityHandling":1,"stateTracking":17,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":78.62,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T15:31:04+00:00","createdAt":"2026-04-17T18:29:11.604883+00:00"},{"runId":"20260412T080925_gpt-realtime_5e2fe390","benchmark":"event_bench","model":"gpt-realtime","turnsScored":29,"scores":{"toolUse":7,"toolUseDenom":12,"instructionFollowing":28,"kbGrounding":28,"ambiguityHandling":2,"stateTracking":17,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":90.29,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T15:30:10+00:00","createdAt":"2026-04-17T18:29:16.165512+00:00"},{"runId":"20260412T075452_ultravox-v0.7_da4a5e76","benchmark":"event_bench","model":"ultravox-v0.7","turnsScored":29,"scores":{"toolUse":11,"toolUseDenom":12,"instructionFollowing":27,"kbGrounding":23,"ambiguityHandling":2,"stateTracking":10,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":84.58,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T15:27:49+00:00","createdAt":"2026-04-17T18:29:04.055119+00:00"},{"runId":"20260412T081026_gemini-2.5-flash-native-audio-preview-12-2025_bbed6aff","benchmark":"event_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":29,"scores":{"toolUse":8,"toolUseDenom":12,"instructionFollowing":20,"kbGrounding":23,"ambiguityHandling":2,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":78.28,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T15:26:43+00:00","createdAt":"2026-04-17T18:29:16.77533+00:00"},{"runId":"20260412T075630_gemini-2.5-flash-native-audio-preview-12-2025_a0194af4","benchmark":"event_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":29,"scores":{"toolUse":10,"toolUseDenom":12,"instructionFollowing":21,"kbGrounding":20,"ambiguityHandling":1,"stateTracking":15,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":72.59,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T15:26:11+00:00","createdAt":"2026-04-17T18:29:06.539908+00:00"},{"runId":"20260412T081051_gemini-3.1-flash-live-preview_5713ddf7","benchmark":"event_bench","model":"gemini-3.1-flash-live-preview","turnsScored":29,"scores":{"toolUse":11,"toolUseDenom":12,"instructionFollowing":26,"kbGrounding":27,"ambiguityHandling":1,"stateTracking":15,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":82.53,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T15:25:49+00:00","createdAt":"2026-04-17T18:29:17.351009+00:00"},{"runId":"20260412T075057_ultravox-v0.7_b893ccc2","benchmark":"event_bench","model":"ultravox-v0.7","turnsScored":29,"scores":{"toolUse":8,"toolUseDenom":12,"instructionFollowing":29,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":17,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":83.33,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T15:25:49+00:00","createdAt":"2026-04-17T18:29:00.47233+00:00"},{"runId":"20260412T080100_gemini-3.1-flash-live-preview_5beb93f7","benchmark":"event_bench","model":"gemini-3.1-flash-live-preview","turnsScored":29,"scores":{"toolUse":12,"toolUseDenom":12,"instructionFollowing":23,"kbGrounding":28,"ambiguityHandling":2,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":91.64,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T15:25:33+00:00","createdAt":"2026-04-17T18:29:10.105796+00:00"},{"runId":"20260412T080252_gpt-realtime_27d4eff6","benchmark":"event_bench","model":"gpt-realtime","turnsScored":29,"scores":{"toolUse":8,"toolUseDenom":12,"instructionFollowing":26,"kbGrounding":26,"ambiguityHandling":1,"stateTracking":15,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":76.84,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T15:24:25+00:00","createdAt":"2026-04-17T18:29:12.914802+00:00"},{"runId":"20260412T075537_gpt-realtime_09906f74","benchmark":"event_bench","model":"gpt-realtime","turnsScored":29,"scores":{"toolUse":7,"toolUseDenom":12,"instructionFollowing":27,"kbGrounding":24,"ambiguityHandling":2,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":82.13,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T15:22:03+00:00","createdAt":"2026-04-17T18:29:05.97244+00:00"},{"runId":"20260412T074527_ultravox-v0.7_63cd2085","benchmark":"event_bench","model":"ultravox-v0.7","turnsScored":29,"scores":{"toolUse":4,"toolUseDenom":12,"instructionFollowing":26,"kbGrounding":22,"ambiguityHandling":1,"stateTracking":11,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":62.71,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T15:15:17+00:00","createdAt":"2026-04-17T18:28:56.870422+00:00"},{"runId":"20260412T073947_ultravox-v0.7_f2302b60","benchmark":"event_bench","model":"ultravox-v0.7","turnsScored":29,"scores":{"toolUse":11,"toolUseDenom":12,"instructionFollowing":26,"kbGrounding":29,"ambiguityHandling":2,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":92.73,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T15:13:39+00:00","createdAt":"2026-04-17T18:28:52.951192+00:00"},{"runId":"20260412T072952_ultravox-v0.7_c9d0c454","benchmark":"event_bench","model":"ultravox-v0.7","turnsScored":29,"scores":{"toolUse":7,"toolUseDenom":12,"instructionFollowing":27,"kbGrounding":24,"ambiguityHandling":1,"stateTracking":12,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":70.96,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T15:04:14+00:00","createdAt":"2026-04-17T18:28:43.067587+00:00"},{"runId":"20260412T072533_ultravox-v0.7_4120c3f5","benchmark":"event_bench","model":"ultravox-v0.7","turnsScored":29,"scores":{"toolUse":9,"toolUseDenom":12,"instructionFollowing":29,"kbGrounding":29,"ambiguityHandling":2,"stateTracking":17,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":95,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T14:55:23+00:00","createdAt":"2026-04-17T18:28:39.243115+00:00"},{"runId":"20260412T072004_ultravox-v0.7_3783fbcb","benchmark":"event_bench","model":"ultravox-v0.7","turnsScored":29,"scores":{"toolUse":10,"toolUseDenom":12,"instructionFollowing":25,"kbGrounding":27,"ambiguityHandling":1,"stateTracking":17,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":82.53,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T14:49:51+00:00","createdAt":"2026-04-17T18:28:34.712203+00:00"},{"runId":"20260412T070408_amazon.nova-2-sonic-v1_0_cfb54c5f","benchmark":"event_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":29,"scores":{"toolUse":10,"toolUseDenom":12,"instructionFollowing":20,"kbGrounding":28,"ambiguityHandling":1,"stateTracking":11,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":72.71,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T14:31:37+00:00","createdAt":"2026-04-17T18:28:21.222863+00:00"},{"runId":"20260412T065426_ultravox-v0.7_216664ec","benchmark":"event_bench","model":"ultravox-v0.7","turnsScored":29,"scores":{"toolUse":10,"toolUseDenom":12,"instructionFollowing":27,"kbGrounding":28,"ambiguityHandling":1,"stateTracking":17,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":84.6,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T14:28:57+00:00","createdAt":"2026-04-17T18:28:13.39103+00:00"},{"runId":"20260412T065525_amazon.nova-2-sonic-v1_0_2d94a783","benchmark":"event_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":29,"scores":{"toolUse":9,"toolUseDenom":12,"instructionFollowing":15,"kbGrounding":27,"ambiguityHandling":2,"stateTracking":7,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":72.2,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T14:28:27+00:00","createdAt":"2026-04-17T18:28:14.725824+00:00"},{"runId":"20260412T070334_grok-realtime_b47ffdaf","benchmark":"event_bench","model":"grok-realtime","turnsScored":29,"scores":{"toolUse":7,"toolUseDenom":12,"instructionFollowing":29,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":17,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":81.67,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T14:27:39+00:00","createdAt":"2026-04-17T18:28:20.626791+00:00"},{"runId":"20260412T065459_grok-realtime_b6e03994","benchmark":"event_bench","model":"grok-realtime","turnsScored":29,"scores":{"toolUse":4,"toolUseDenom":12,"instructionFollowing":24,"kbGrounding":22,"ambiguityHandling":1,"stateTracking":10,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":60.16,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T14:23:24+00:00","createdAt":"2026-04-17T18:28:14.134493+00:00"},{"runId":"20260412T065706_ultravox-v0.7_b59bc744","benchmark":"event_bench","model":"ultravox-v0.7","turnsScored":29,"scores":{"toolUse":6,"toolUseDenom":12,"instructionFollowing":26,"kbGrounding":28,"ambiguityHandling":1,"stateTracking":17,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":77.24,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T14:22:33+00:00","createdAt":"2026-04-17T18:28:17.358037+00:00"},{"runId":"20260412T065028_ultravox-v0.7_1e8f1969","benchmark":"event_bench","model":"ultravox-v0.7","turnsScored":29,"scores":{"toolUse":8,"toolUseDenom":12,"instructionFollowing":28,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":16,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":81.47,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T14:21:20+00:00","createdAt":"2026-04-17T18:28:09.67439+00:00"},{"runId":"20260412T065719_grok-realtime_fe0704bf","benchmark":"event_bench","model":"grok-realtime","turnsScored":29,"scores":{"toolUse":4,"toolUseDenom":12,"instructionFollowing":26,"kbGrounding":20,"ambiguityHandling":1,"stateTracking":10,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":60.16,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T14:20:33+00:00","createdAt":"2026-04-17T18:28:18.120842+00:00"},{"runId":"20260412T065153_amazon.nova-2-sonic-v1_0_4484eacf","benchmark":"event_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":29,"scores":{"toolUse":9,"toolUseDenom":12,"instructionFollowing":24,"kbGrounding":25,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":74.09,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T14:20:02+00:00","createdAt":"2026-04-17T18:28:10.959805+00:00"},{"runId":"20260412T070304_gemini-3.1-flash-live-preview_da702435","benchmark":"event_bench","model":"gemini-3.1-flash-live-preview","turnsScored":29,"scores":{"toolUse":6,"toolUseDenom":12,"instructionFollowing":28,"kbGrounding":24,"ambiguityHandling":1,"stateTracking":10,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":67.63,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T14:19:13+00:00","createdAt":"2026-04-17T18:28:20.039877+00:00"},{"runId":"20260412T064639_amazon.nova-2-sonic-v1_0_6054af79","benchmark":"event_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":29,"scores":{"toolUse":9,"toolUseDenom":12,"instructionFollowing":23,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":9,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":71.45,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T14:19:05+00:00","createdAt":"2026-04-17T18:28:07.124583+00:00"},{"runId":"20260412T065543_gemini-2.5-flash-native-audio-preview-12-2025_d7a63754","benchmark":"event_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":29,"scores":{"toolUse":8,"toolUseDenom":12,"instructionFollowing":24,"kbGrounding":28,"ambiguityHandling":2,"stateTracking":15,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":86.84,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T14:17:20+00:00","createdAt":"2026-04-17T18:28:16.119613+00:00"},{"runId":"20260412T065835_gpt-realtime_1e1cd9cd","benchmark":"event_bench","model":"gpt-realtime","turnsScored":29,"scores":{"toolUse":7,"toolUseDenom":12,"instructionFollowing":27,"kbGrounding":26,"ambiguityHandling":2,"stateTracking":15,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":85.87,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T14:16:41+00:00","createdAt":"2026-04-17T18:28:19.459523+00:00"},{"runId":"20260412T064042_grok-realtime_641b1893","benchmark":"event_bench","model":"grok-realtime","turnsScored":29,"scores":{"toolUse":5,"toolUseDenom":12,"instructionFollowing":28,"kbGrounding":25,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":70.18,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T14:16:04+00:00","createdAt":"2026-04-17T18:28:03.632651+00:00"},{"runId":"20260412T065426_gemini-3.1-flash-live-preview_79e53a60","benchmark":"event_bench","model":"gemini-3.1-flash-live-preview","turnsScored":29,"scores":{"toolUse":8,"toolUseDenom":12,"instructionFollowing":29,"kbGrounding":25,"ambiguityHandling":2,"stateTracking":11,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":83.52,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T14:15:40+00:00","createdAt":"2026-04-17T18:28:12.834299+00:00"},{"runId":"20260412T065320_gemini-2.5-flash-native-audio-preview-12-2025_bd77b19a","benchmark":"event_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":29,"scores":{"toolUse":4,"toolUseDenom":12,"instructionFollowing":22,"kbGrounding":25,"ambiguityHandling":2,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":74.37,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T14:15:03+00:00","createdAt":"2026-04-17T18:28:12.208442+00:00"},{"runId":"20260412T065527_gpt-realtime_a7fdd883","benchmark":"event_bench","model":"gpt-realtime","turnsScored":29,"scores":{"toolUse":5,"toolUseDenom":12,"instructionFollowing":27,"kbGrounding":26,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":70.18,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T14:14:57+00:00","createdAt":"2026-04-17T18:28:15.438802+00:00"},{"runId":"20260412T065110_grok-realtime_29a2eaf9","benchmark":"event_bench","model":"grok-realtime","turnsScored":29,"scores":{"toolUse":4,"toolUseDenom":12,"instructionFollowing":27,"kbGrounding":26,"ambiguityHandling":1,"stateTracking":17,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":73.22,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T14:14:46+00:00","createdAt":"2026-04-17T18:28:10.359943+00:00"},{"runId":"20260412T065153_gpt-realtime_abf3e013","benchmark":"event_bench","model":"gpt-realtime","turnsScored":29,"scores":{"toolUse":7,"toolUseDenom":12,"instructionFollowing":29,"kbGrounding":25,"ambiguityHandling":2,"stateTracking":16,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":87.73,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T14:13:28+00:00","createdAt":"2026-04-17T18:28:11.600443+00:00"},{"runId":"20260412T065706_gemini-3.1-flash-live-preview_a61881ee","benchmark":"event_bench","model":"gemini-3.1-flash-live-preview","turnsScored":29,"scores":{"toolUse":8,"toolUseDenom":12,"instructionFollowing":26,"kbGrounding":24,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":73.11,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T14:12:57+00:00","createdAt":"2026-04-17T18:28:16.739117+00:00"},{"runId":"20260412T064549_grok-realtime_b94a54c2","benchmark":"event_bench","model":"grok-realtime","turnsScored":29,"scores":{"toolUse":4,"toolUseDenom":12,"instructionFollowing":25,"kbGrounding":24,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":65.75,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T14:12:25+00:00","createdAt":"2026-04-17T18:28:06.534882+00:00"},{"runId":"20260412T064130_amazon.nova-2-sonic-v1_0_3b294109","benchmark":"event_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":29,"scores":{"toolUse":9,"toolUseDenom":12,"instructionFollowing":19,"kbGrounding":24,"ambiguityHandling":2,"stateTracking":6,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":71.71,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T14:11:36+00:00","createdAt":"2026-04-17T18:28:04.216083+00:00"},{"runId":"20260412T064022_ultravox-v0.7_5f6714f1","benchmark":"event_bench","model":"ultravox-v0.7","turnsScored":29,"scores":{"toolUse":7,"toolUseDenom":12,"instructionFollowing":25,"kbGrounding":28,"ambiguityHandling":1,"stateTracking":17,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":78.22,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T14:09:47+00:00","createdAt":"2026-04-17T18:28:03.018014+00:00"},{"runId":"20260412T063653_ultravox-v0.7_578f4f74","benchmark":"event_bench","model":"ultravox-v0.7","turnsScored":29,"scores":{"toolUse":8,"toolUseDenom":12,"instructionFollowing":26,"kbGrounding":21,"ambiguityHandling":1,"stateTracking":12,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":69.86,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T14:09:44+00:00","createdAt":"2026-04-17T18:28:00.129299+00:00"},{"runId":"20260412T064443_gemini-3.1-flash-live-preview_690c0675","benchmark":"event_bench","model":"gemini-3.1-flash-live-preview","turnsScored":29,"scores":{"toolUse":7,"toolUseDenom":12,"instructionFollowing":27,"kbGrounding":22,"ambiguityHandling":2,"stateTracking":9,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":76.05,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T14:08:50+00:00","createdAt":"2026-04-17T18:28:05.965627+00:00"},{"runId":"20260412T064842_gpt-realtime_abb771e4","benchmark":"event_bench","model":"gpt-realtime","turnsScored":29,"scores":{"toolUse":5,"toolUseDenom":12,"instructionFollowing":27,"kbGrounding":26,"ambiguityHandling":2,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":81.36,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T14:07:52+00:00","createdAt":"2026-04-17T18:28:07.78819+00:00"},{"runId":"20260412T065028_gemini-3.1-flash-live-preview_6ee075fa","benchmark":"event_bench","model":"gemini-3.1-flash-live-preview","turnsScored":29,"scores":{"toolUse":10,"toolUseDenom":12,"instructionFollowing":23,"kbGrounding":25,"ambiguityHandling":1,"stateTracking":12,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":73.89,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T14:05:57+00:00","createdAt":"2026-04-17T18:28:09.070211+00:00"},{"runId":"20260412T063313_amazon.nova-2-sonic-v1_0_6d771e66","benchmark":"event_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":29,"scores":{"toolUse":3,"toolUseDenom":12,"instructionFollowing":16,"kbGrounding":17,"ambiguityHandling":1,"stateTracking":7,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":45.99,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T14:05:04+00:00","createdAt":"2026-04-17T18:27:57.81922+00:00"},{"runId":"20260412T064856_gemini-2.5-flash-native-audio-preview-12-2025_20f26c1c","benchmark":"event_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":29,"scores":{"toolUse":4,"toolUseDenom":12,"instructionFollowing":21,"kbGrounding":24,"ambiguityHandling":2,"stateTracking":16,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":76.52,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T14:04:09+00:00","createdAt":"2026-04-17T18:28:08.459528+00:00"},{"runId":"20260412T062625_grok-realtime_3f87468c","benchmark":"event_bench","model":"grok-realtime","turnsScored":29,"scores":{"toolUse":9,"toolUseDenom":12,"instructionFollowing":29,"kbGrounding":28,"ambiguityHandling":1,"stateTracking":15,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":81.96,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T14:04:06+00:00","createdAt":"2026-04-17T18:27:50.577239+00:00"},{"runId":"20260412T063707_grok-realtime_f3e48379","benchmark":"event_bench","model":"grok-realtime","turnsScored":29,"scores":{"toolUse":10,"toolUseDenom":12,"instructionFollowing":27,"kbGrounding":27,"ambiguityHandling":1,"stateTracking":16,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":82.73,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T14:03:22+00:00","createdAt":"2026-04-17T18:28:00.74296+00:00"},{"runId":"20260412T064405_gpt-realtime_f468ca0c","benchmark":"event_bench","model":"gpt-realtime","turnsScored":29,"scores":{"toolUse":8,"toolUseDenom":12,"instructionFollowing":26,"kbGrounding":26,"ambiguityHandling":2,"stateTracking":15,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":86.84,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T14:03:03+00:00","createdAt":"2026-04-17T18:28:04.777462+00:00"},{"runId":"20260412T063708_amazon.nova-2-sonic-v1_0_1e126457","benchmark":"event_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":29,"scores":{"toolUse":11,"toolUseDenom":12,"instructionFollowing":22,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":5,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":69.39,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T14:02:43+00:00","createdAt":"2026-04-17T18:28:01.300898+00:00"},{"runId":"20260412T063218_ultravox-v0.7_605e046e","benchmark":"event_bench","model":"ultravox-v0.7","turnsScored":29,"scores":{"toolUse":8,"toolUseDenom":12,"instructionFollowing":27,"kbGrounding":23,"ambiguityHandling":1,"stateTracking":9,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":68.4,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T13:58:33+00:00","createdAt":"2026-04-17T18:27:56.548762+00:00"},{"runId":"20260412T064418_gemini-2.5-flash-native-audio-preview-12-2025_75af1b90","benchmark":"event_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":29,"scores":{"toolUse":10,"toolUseDenom":12,"instructionFollowing":27,"kbGrounding":23,"ambiguityHandling":1,"stateTracking":16,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":79.97,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T13:58:19+00:00","createdAt":"2026-04-17T18:28:05.349051+00:00"},{"runId":"20260412T063102_gemini-2.5-flash-native-audio-preview-12-2025_4b4c702a","benchmark":"event_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":29,"scores":{"toolUse":4,"toolUseDenom":12,"instructionFollowing":22,"kbGrounding":23,"ambiguityHandling":2,"stateTracking":10,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":69.47,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T13:57:17+00:00","createdAt":"2026-04-17T18:27:55.416703+00:00"},{"runId":"20260412T063633_gemini-3.1-flash-live-preview_5a4d753b","benchmark":"event_bench","model":"gemini-3.1-flash-live-preview","turnsScored":29,"scores":{"toolUse":10,"toolUseDenom":12,"instructionFollowing":24,"kbGrounding":27,"ambiguityHandling":1,"stateTracking":16,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":80.66,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T13:57:05+00:00","createdAt":"2026-04-17T18:27:59.548339+00:00"},{"runId":"20260412T063746_gpt-realtime_054e6119","benchmark":"event_bench","model":"gpt-realtime","turnsScored":29,"scores":{"toolUse":8,"toolUseDenom":12,"instructionFollowing":28,"kbGrounding":25,"ambiguityHandling":2,"stateTracking":15,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":87.53,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T13:57:04+00:00","createdAt":"2026-04-17T18:28:01.878544+00:00"},{"runId":"20260412T063004_amazon.nova-2-sonic-v1_0_eff1c7c8","benchmark":"event_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":29,"scores":{"toolUse":9,"toolUseDenom":12,"instructionFollowing":11,"kbGrounding":24,"ambiguityHandling":1,"stateTracking":3,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":52.67,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T13:55:41+00:00","createdAt":"2026-04-17T18:27:54.239674+00:00"},{"runId":"20260412T063437_gpt-realtime_00580bc4","benchmark":"event_bench","model":"gpt-realtime","turnsScored":29,"scores":{"toolUse":7,"toolUseDenom":12,"instructionFollowing":28,"kbGrounding":24,"ambiguityHandling":1,"stateTracking":16,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":76.35,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T13:55:26+00:00","createdAt":"2026-04-17T18:27:58.378956+00:00"},{"runId":"20260412T062608_ultravox-v0.7_01261af8","benchmark":"event_bench","model":"ultravox-v0.7","turnsScored":29,"scores":{"toolUse":8,"toolUseDenom":12,"instructionFollowing":25,"kbGrounding":28,"ambiguityHandling":1,"stateTracking":15,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":77.53,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T13:55:24+00:00","createdAt":"2026-04-17T18:27:49.741252+00:00"},{"runId":"20260412T063253_grok-realtime_1157656d","benchmark":"event_bench","model":"grok-realtime","turnsScored":29,"scores":{"toolUse":3,"toolUseDenom":12,"instructionFollowing":24,"kbGrounding":22,"ambiguityHandling":1,"stateTracking":10,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":58.49,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T13:54:58+00:00","createdAt":"2026-04-17T18:27:57.262424+00:00"},{"runId":"20260412T063803_gemini-3.1-flash-live-preview_2595c8bf","benchmark":"event_bench","model":"gemini-3.1-flash-live-preview","turnsScored":29,"scores":{"toolUse":5,"toolUseDenom":12,"instructionFollowing":26,"kbGrounding":24,"ambiguityHandling":2,"stateTracking":11,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":75.76,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T13:54:25+00:00","createdAt":"2026-04-17T18:28:02.456375+00:00"},{"runId":"20260412T063449_gemini-2.5-flash-native-audio-preview-12-2025_a6e14e39","benchmark":"event_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":29,"scores":{"toolUse":5,"toolUseDenom":12,"instructionFollowing":19,"kbGrounding":21,"ambiguityHandling":1,"stateTracking":9,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":56.51,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T13:54:24+00:00","createdAt":"2026-04-17T18:27:58.955506+00:00"},{"runId":"20260412T062840_grok-realtime_f7d8a7e1","benchmark":"event_bench","model":"grok-realtime","turnsScored":29,"scores":{"toolUse":9,"toolUseDenom":12,"instructionFollowing":27,"kbGrounding":25,"ambiguityHandling":1,"stateTracking":11,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":73.8,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T13:53:19+00:00","createdAt":"2026-04-17T18:27:53.675528+00:00"},{"runId":"20260412T063208_gemini-3.1-flash-live-preview_408064e4","benchmark":"event_bench","model":"gemini-3.1-flash-live-preview","turnsScored":29,"scores":{"toolUse":6,"toolUseDenom":12,"instructionFollowing":27,"kbGrounding":25,"ambiguityHandling":2,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":81.16,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T13:51:52+00:00","createdAt":"2026-04-17T18:27:55.9901+00:00"},{"runId":"20260412T062242_amazon.nova-2-sonic-v1_0_08abe518","benchmark":"event_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":29,"scores":{"toolUse":6,"toolUseDenom":12,"instructionFollowing":18,"kbGrounding":23,"ambiguityHandling":1,"stateTracking":6,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":55.33,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T13:51:52+00:00","createdAt":"2026-04-17T18:27:47.1525+00:00"},{"runId":"20260412T062138_ultravox-v0.7_5c56ab69","benchmark":"event_bench","model":"ultravox-v0.7","turnsScored":29,"scores":{"toolUse":8,"toolUseDenom":12,"instructionFollowing":27,"kbGrounding":25,"ambiguityHandling":1,"stateTracking":15,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":76.84,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T13:51:09+00:00","createdAt":"2026-04-17T18:27:45.756172+00:00"},{"runId":"20260412T063045_gpt-realtime_1720ae5b","benchmark":"event_bench","model":"gpt-realtime","turnsScored":29,"scores":{"toolUse":7,"toolUseDenom":12,"instructionFollowing":23,"kbGrounding":26,"ambiguityHandling":2,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":80.75,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T13:50:27+00:00","createdAt":"2026-04-17T18:27:54.798426+00:00"},{"runId":"20260412T062639_amazon.nova-2-sonic-v1_0_320f30bb","benchmark":"event_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":29,"scores":{"toolUse":8,"toolUseDenom":12,"instructionFollowing":15,"kbGrounding":27,"ambiguityHandling":1,"stateTracking":6,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":59.36,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T13:50:26+00:00","createdAt":"2026-04-17T18:27:51.211236+00:00"},{"runId":"20260412T062817_gemini-2.5-flash-native-audio-preview-12-2025_16359eb4","benchmark":"event_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":29,"scores":{"toolUse":7,"toolUseDenom":12,"instructionFollowing":27,"kbGrounding":29,"ambiguityHandling":2,"stateTracking":11,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":83.23,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T13:48:55+00:00","createdAt":"2026-04-17T18:27:52.512105+00:00"},{"runId":"20260412T062228_grok-realtime_072d7118","benchmark":"event_bench","model":"grok-realtime","turnsScored":29,"scores":{"toolUse":8,"toolUseDenom":12,"instructionFollowing":29,"kbGrounding":29,"ambiguityHandling":2,"stateTracking":12,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":87.45,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T13:48:41+00:00","createdAt":"2026-04-17T18:27:46.46644+00:00"},{"runId":"20260412T061740_amazon.nova-2-sonic-v1_0_8b12f8c5","benchmark":"event_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":29,"scores":{"toolUse":8,"toolUseDenom":12,"instructionFollowing":18,"kbGrounding":21,"ambiguityHandling":1,"stateTracking":5,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":56.11,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T13:46:37+00:00","createdAt":"2026-04-17T18:27:43.120481+00:00"},{"runId":"20260412T062733_gpt-realtime_e1a54590","benchmark":"event_bench","model":"gpt-realtime","turnsScored":29,"scores":{"toolUse":6,"toolUseDenom":12,"instructionFollowing":24,"kbGrounding":27,"ambiguityHandling":2,"stateTracking":15,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":82.82,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T13:45:37+00:00","createdAt":"2026-04-17T18:27:51.813705+00:00"},{"runId":"20260412T062319_gpt-realtime_8d685714","benchmark":"event_bench","model":"gpt-realtime","turnsScored":29,"scores":{"toolUse":6,"toolUseDenom":12,"instructionFollowing":20,"kbGrounding":24,"ambiguityHandling":2,"stateTracking":12,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":74.46,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T13:44:41+00:00","createdAt":"2026-04-17T18:27:47.766478+00:00"},{"runId":"20260412T062825_gemini-3.1-flash-live-preview_d0adf878","benchmark":"event_bench","model":"gemini-3.1-flash-live-preview","turnsScored":29,"scores":{"toolUse":9,"toolUseDenom":12,"instructionFollowing":25,"kbGrounding":24,"ambiguityHandling":2,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":85.26,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T13:44:16+00:00","createdAt":"2026-04-17T18:27:53.084812+00:00"},{"runId":"20260412T061956_gemini-2.5-flash-native-audio-preview-12-2025_51101efd","benchmark":"event_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":29,"scores":{"toolUse":6,"toolUseDenom":12,"instructionFollowing":19,"kbGrounding":25,"ambiguityHandling":2,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":76.82,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T13:44:03+00:00","createdAt":"2026-04-17T18:27:44.635904+00:00"},{"runId":"20260412T062354_gemini-2.5-flash-native-audio-preview-12-2025_a50d9c92","benchmark":"event_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":29,"scores":{"toolUse":4,"toolUseDenom":12,"instructionFollowing":25,"kbGrounding":20,"ambiguityHandling":2,"stateTracking":10,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":69.47,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T13:41:28+00:00","createdAt":"2026-04-17T18:27:48.42316+00:00"},{"runId":"20260412T062434_gemini-3.1-flash-live-preview_5a371a8a","benchmark":"event_bench","model":"gemini-3.1-flash-live-preview","turnsScored":29,"scores":{"toolUse":10,"toolUseDenom":12,"instructionFollowing":27,"kbGrounding":23,"ambiguityHandling":2,"stateTracking":11,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":84.09,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T13:40:41+00:00","createdAt":"2026-04-17T18:27:49.070312+00:00"},{"runId":"20260412T061045_grok-realtime_76190c80","benchmark":"event_bench","model":"grok-realtime","turnsScored":29,"scores":{"toolUse":6,"toolUseDenom":12,"instructionFollowing":26,"kbGrounding":27,"ambiguityHandling":1,"stateTracking":15,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":74.2,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T13:40:21+00:00","createdAt":"2026-04-17T18:27:38.813713+00:00"},{"runId":"20260412T061302_gpt-realtime_f6062e85","benchmark":"event_bench","model":"gpt-realtime","turnsScored":29,"scores":{"toolUse":9,"toolUseDenom":12,"instructionFollowing":26,"kbGrounding":25,"ambiguityHandling":2,"stateTracking":15,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":87.82,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T13:38:02+00:00","createdAt":"2026-04-17T18:27:40.037417+00:00"},{"runId":"20260412T061539_grok-realtime_066df951","benchmark":"event_bench","model":"grok-realtime","turnsScored":29,"scores":{"toolUse":2,"toolUseDenom":12,"instructionFollowing":28,"kbGrounding":22,"ambiguityHandling":1,"stateTracking":10,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":59.58,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T13:37:48+00:00","createdAt":"2026-04-17T18:27:42.240456+00:00"},{"runId":"20260412T061752_gpt-realtime_fab9a230","benchmark":"event_bench","model":"gpt-realtime","turnsScored":29,"scores":{"toolUse":9,"toolUseDenom":12,"instructionFollowing":27,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":16,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":82.44,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T13:37:07+00:00","createdAt":"2026-04-17T18:27:44.07318+00:00"},{"runId":"20260412T062026_gemini-3.1-flash-live-preview_9aac11b6","benchmark":"event_bench","model":"gemini-3.1-flash-live-preview","turnsScored":29,"scores":{"toolUse":7,"toolUseDenom":12,"instructionFollowing":26,"kbGrounding":24,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":71.44,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T13:37:05+00:00","createdAt":"2026-04-17T18:27:45.201043+00:00"},{"runId":"20260412T060850_amazon.nova-2-sonic-v1_0_d8076f1b","benchmark":"event_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":29,"scores":{"toolUse":7,"toolUseDenom":12,"instructionFollowing":17,"kbGrounding":27,"ambiguityHandling":1,"stateTracking":10,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":63.78,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T13:36:52+00:00","createdAt":"2026-04-17T18:27:36.4331+00:00"},{"runId":"20260412T061117_amazon.nova-2-sonic-v1_0_437a3d44","benchmark":"event_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":29,"scores":{"toolUse":8,"toolUseDenom":12,"instructionFollowing":19,"kbGrounding":21,"ambiguityHandling":1,"stateTracking":7,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":59.15,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T13:34:47+00:00","createdAt":"2026-04-17T18:27:39.451959+00:00"},{"runId":"20260412T060933_gemini-2.5-flash-native-audio-preview-12-2025_36db9ec1","benchmark":"event_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":29,"scores":{"toolUse":6,"toolUseDenom":12,"instructionFollowing":25,"kbGrounding":23,"ambiguityHandling":2,"stateTracking":10,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":74.87,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T13:34:36+00:00","createdAt":"2026-04-17T18:27:37.623234+00:00"},{"runId":"20260412T060241_ultravox-v0.7_9c125bba","benchmark":"event_bench","model":"ultravox-v0.7","turnsScored":29,"scores":{"toolUse":7,"toolUseDenom":12,"instructionFollowing":27,"kbGrounding":28,"ambiguityHandling":1,"stateTracking":16,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":78.42,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T13:33:11+00:00","createdAt":"2026-04-17T18:27:32.039046+00:00"},{"runId":"20260412T061335_gemini-2.5-flash-native-audio-preview-12-2025_6520dfdb","benchmark":"event_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":29,"scores":{"toolUse":5,"toolUseDenom":12,"instructionFollowing":29,"kbGrounding":23,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":70.67,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T13:32:52+00:00","createdAt":"2026-04-17T18:27:40.699695+00:00"},{"runId":"20260412T061506_gemini-3.1-flash-live-preview_81ca311d","benchmark":"event_bench","model":"gemini-3.1-flash-live-preview","turnsScored":29,"scores":{"toolUse":11,"toolUseDenom":12,"instructionFollowing":27,"kbGrounding":25,"ambiguityHandling":2,"stateTracking":16,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":93.02,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T13:32:17+00:00","createdAt":"2026-04-17T18:27:41.288999+00:00"},{"runId":"20260412T060508_gemini-2.5-flash-native-audio-preview-12-2025_e7258d54","benchmark":"event_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":29,"scores":{"toolUse":6,"toolUseDenom":12,"instructionFollowing":22,"kbGrounding":24,"ambiguityHandling":2,"stateTracking":10,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":73.49,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T13:32:06+00:00","createdAt":"2026-04-17T18:27:34.552534+00:00"},{"runId":"20260412T060848_grok-realtime_ec8719d8","benchmark":"event_bench","model":"grok-realtime","turnsScored":29,"scores":{"toolUse":4,"toolUseDenom":12,"instructionFollowing":24,"kbGrounding":28,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":67.82,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T13:31:01+00:00","createdAt":"2026-04-17T18:27:35.761612+00:00"},{"runId":"20260412T060642_gemini-3.1-flash-live-preview_24355297","benchmark":"event_bench","model":"gemini-3.1-flash-live-preview","turnsScored":29,"scores":{"toolUse":5,"toolUseDenom":12,"instructionFollowing":28,"kbGrounding":24,"ambiguityHandling":2,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":79.49,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T13:30:43+00:00","createdAt":"2026-04-17T18:27:35.164695+00:00"},{"runId":"20260412T060411_amazon.nova-2-sonic-v1_0_4fb94cd6","benchmark":"event_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":29,"scores":{"toolUse":9,"toolUseDenom":12,"instructionFollowing":16,"kbGrounding":24,"ambiguityHandling":2,"stateTracking":5,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":68.47,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T13:28:29+00:00","createdAt":"2026-04-17T18:27:33.24401+00:00"},{"runId":"20260412T060902_gpt-realtime_021ecb07","benchmark":"event_bench","model":"gpt-realtime","turnsScored":29,"scores":{"toolUse":6,"toolUseDenom":12,"instructionFollowing":23,"kbGrounding":26,"ambiguityHandling":2,"stateTracking":15,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":81.44,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T13:28:24+00:00","createdAt":"2026-04-17T18:27:37.036412+00:00"},{"runId":"20260412T061002_gemini-3.1-flash-live-preview_942aac70","benchmark":"event_bench","model":"gemini-3.1-flash-live-preview","turnsScored":29,"scores":{"toolUse":9,"toolUseDenom":12,"instructionFollowing":25,"kbGrounding":24,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":74.09,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T13:26:37+00:00","createdAt":"2026-04-17T18:27:38.207172+00:00"},{"runId":"20260412T060312_grok-realtime_4754e539","benchmark":"event_bench","model":"grok-realtime","turnsScored":29,"scores":{"toolUse":3,"toolUseDenom":12,"instructionFollowing":29,"kbGrounding":28,"ambiguityHandling":1,"stateTracking":17,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":74.31,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T13:26:06+00:00","createdAt":"2026-04-17T18:27:32.657964+00:00"},{"runId":"20260412T060421_gpt-realtime_43dba217","benchmark":"event_bench","model":"gpt-realtime","turnsScored":29,"scores":{"toolUse":6,"toolUseDenom":12,"instructionFollowing":27,"kbGrounding":28,"ambiguityHandling":2,"stateTracking":16,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":86.75,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T13:24:33+00:00","createdAt":"2026-04-17T18:27:33.924599+00:00"},{"runId":"20260412T054934_ultravox-v0.7_fe5799f9","benchmark":"event_bench","model":"ultravox-v0.7","turnsScored":29,"scores":{"toolUse":10,"toolUseDenom":12,"instructionFollowing":27,"kbGrounding":25,"ambiguityHandling":1,"stateTracking":17,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":82.53,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T13:21:36+00:00","createdAt":"2026-04-17T18:27:26.351677+00:00"},{"runId":"20260412T055016_amazon.nova-2-sonic-v1_0_a1fa5fdd","benchmark":"event_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":29,"scores":{"toolUse":5,"toolUseDenom":12,"instructionFollowing":14,"kbGrounding":21,"ambiguityHandling":1,"stateTracking":7,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":50.71,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T13:20:25+00:00","createdAt":"2026-04-17T18:27:26.934037+00:00"},{"runId":"20260412T055818_gpt-realtime_fbfd9efa","benchmark":"event_bench","model":"gpt-realtime","turnsScored":29,"scores":{"toolUse":8,"toolUseDenom":12,"instructionFollowing":28,"kbGrounding":26,"ambiguityHandling":2,"stateTracking":15,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":88.22,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T13:19:54+00:00","createdAt":"2026-04-17T18:27:30.316366+00:00"},{"runId":"20260412T055351_grok-realtime_cf722738","benchmark":"event_bench","model":"grok-realtime","turnsScored":29,"scores":{"toolUse":3,"toolUseDenom":12,"instructionFollowing":28,"kbGrounding":26,"ambiguityHandling":1,"stateTracking":11,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":65.18,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T13:17:51+00:00","createdAt":"2026-04-17T18:27:29.207923+00:00"},{"runId":"20260412T060233_gemini-3.1-flash-live-preview_5490f2fb","benchmark":"event_bench","model":"gemini-3.1-flash-live-preview","turnsScored":29,"scores":{"toolUse":11,"toolUseDenom":12,"instructionFollowing":25,"kbGrounding":26,"ambiguityHandling":2,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":89.98,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T13:17:38+00:00","createdAt":"2026-04-17T18:27:31.484232+00:00"},{"runId":"20260412T054934_grok-realtime_058f4b3d","benchmark":"event_bench","model":"grok-realtime","turnsScored":29,"scores":{"toolUse":4,"toolUseDenom":12,"instructionFollowing":23,"kbGrounding":26,"ambiguityHandling":1,"stateTracking":15,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":68.11,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T13:15:28+00:00","createdAt":"2026-04-17T18:27:25.668874+00:00"},{"runId":"20260412T055859_gemini-2.5-flash-native-audio-preview-12-2025_6d76f924","benchmark":"event_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":29,"scores":{"toolUse":6,"toolUseDenom":12,"instructionFollowing":25,"kbGrounding":25,"ambiguityHandling":2,"stateTracking":12,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":78.6,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T13:15:04+00:00","createdAt":"2026-04-17T18:27:30.922731+00:00"},{"runId":"20260412T055252_gpt-realtime_fa269c07","benchmark":"event_bench","model":"gpt-realtime","turnsScored":29,"scores":{"toolUse":7,"toolUseDenom":12,"instructionFollowing":24,"kbGrounding":25,"ambiguityHandling":2,"stateTracking":15,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":83.11,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T13:11:16+00:00","createdAt":"2026-04-17T18:27:27.492552+00:00"},{"runId":"20260412T054318_gpt-realtime_9dc90b3e","benchmark":"event_bench","model":"gpt-realtime","turnsScored":29,"scores":{"toolUse":9,"toolUseDenom":12,"instructionFollowing":27,"kbGrounding":27,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":77.54,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T13:10:34+00:00","createdAt":"2026-04-17T18:27:23.879983+00:00"},{"runId":"20260412T055306_gemini-3.1-flash-live-preview_7c6d4bff","benchmark":"event_bench","model":"gemini-3.1-flash-live-preview","turnsScored":29,"scores":{"toolUse":9,"toolUseDenom":12,"instructionFollowing":25,"kbGrounding":23,"ambiguityHandling":1,"stateTracking":15,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":75.75,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T13:09:01+00:00","createdAt":"2026-04-17T18:27:28.634293+00:00"},{"runId":"20260412T055253_gemini-2.5-flash-native-audio-preview-12-2025_ed9897d9","benchmark":"event_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":29,"scores":{"toolUse":9,"toolUseDenom":12,"instructionFollowing":25,"kbGrounding":27,"ambiguityHandling":2,"stateTracking":16,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":89.69,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T13:08:49+00:00","createdAt":"2026-04-17T18:27:28.07466+00:00"},{"runId":"20260412T055518_amazon.nova-2-sonic-v1_0_6c252782","benchmark":"event_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":29,"scores":{"toolUse":4,"toolUseDenom":12,"instructionFollowing":10,"kbGrounding":11,"ambiguityHandling":1,"stateTracking":4,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":35.86,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T13:06:41+00:00","createdAt":"2026-04-17T18:27:29.765818+00:00"},{"runId":"20260412T054654_gemini-3.1-flash-live-preview_150e42cc","benchmark":"event_bench","model":"gemini-3.1-flash-live-preview","turnsScored":29,"scores":{"toolUse":10,"toolUseDenom":12,"instructionFollowing":27,"kbGrounding":24,"ambiguityHandling":1,"stateTracking":12,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":75.96,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T13:04:20+00:00","createdAt":"2026-04-17T18:27:25.083711+00:00"},{"runId":"20260412T054433_gemini-2.5-flash-native-audio-preview-12-2025_6b35b390","benchmark":"event_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":29,"scores":{"toolUse":5,"toolUseDenom":12,"instructionFollowing":24,"kbGrounding":24,"ambiguityHandling":2,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":76.73,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T12:58:58+00:00","createdAt":"2026-04-17T18:27:24.498851+00:00"},{"runId":"20260403T142511_glm-realtime-flash_05cd1d4e","benchmark":"event_bench","model":"glm-realtime-flash","turnsScored":29,"scores":{"toolUse":2,"toolUseDenom":12,"instructionFollowing":17,"kbGrounding":19,"ambiguityHandling":1,"stateTracking":8,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":47.57,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-04T01:00:07+00:00","createdAt":"2026-04-05T07:08:18.386916+00:00"},{"runId":"20260403T140107_glm-realtime-flash_c7683136","benchmark":"event_bench","model":"glm-realtime-flash","turnsScored":29,"scores":{"toolUse":9,"toolUseDenom":12,"instructionFollowing":7,"kbGrounding":12,"ambiguityHandling":1,"stateTracking":0,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":38.1,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-04T00:58:47+00:00","createdAt":"2026-04-05T07:08:17.825642+00:00"},{"runId":"20260403T133910_glm-realtime-flash_e1bf8e71","benchmark":"event_bench","model":"glm-realtime-flash","turnsScored":29,"scores":{"toolUse":1,"toolUseDenom":12,"instructionFollowing":14,"kbGrounding":19,"ambiguityHandling":2,"stateTracking":1,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":45.6,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-04T00:57:30+00:00","createdAt":"2026-04-05T07:08:14.274325+00:00"},{"runId":"20260403T162910_glm-realtime-flash_7d4ff717","benchmark":"event_bench","model":"glm-realtime-flash","turnsScored":29,"scores":{"toolUse":1,"toolUseDenom":12,"instructionFollowing":10,"kbGrounding":18,"ambiguityHandling":1,"stateTracking":3,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":34.51,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-04T00:56:22+00:00","createdAt":"2026-04-05T07:08:20.12788+00:00"},{"runId":"20260403T140104_glm-realtime-flash_24a9e124","benchmark":"event_bench","model":"glm-realtime-flash","turnsScored":29,"scores":{"toolUse":1,"toolUseDenom":12,"instructionFollowing":8,"kbGrounding":21,"ambiguityHandling":1,"stateTracking":3,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":35.2,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-04T00:56:05+00:00","createdAt":"2026-04-05T07:08:16.677917+00:00"},{"runId":"20260403T133911_glm-realtime-flash_7b1c4537","benchmark":"event_bench","model":"glm-realtime-flash","turnsScored":29,"scores":{"toolUse":1,"toolUseDenom":12,"instructionFollowing":11,"kbGrounding":22,"ambiguityHandling":1,"stateTracking":6,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":41.48,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-04T00:55:30+00:00","createdAt":"2026-04-05T07:08:14.946015+00:00"},{"runId":"20260403T135147_glm-realtime-flash_a2f57d6e","benchmark":"event_bench","model":"glm-realtime-flash","turnsScored":29,"scores":{"toolUse":3,"toolUseDenom":12,"instructionFollowing":10,"kbGrounding":17,"ambiguityHandling":1,"stateTracking":3,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":37.15,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-04T00:54:45+00:00","createdAt":"2026-04-05T07:08:15.5365+00:00"},{"runId":"20260403T151806_glm-realtime-flash_325b2035","benchmark":"event_bench","model":"glm-realtime-flash","turnsScored":29,"scores":{"toolUse":2,"toolUseDenom":12,"instructionFollowing":11,"kbGrounding":17,"ambiguityHandling":1,"stateTracking":5,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":38.53,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-04T00:53:35+00:00","createdAt":"2026-04-05T07:08:19.535255+00:00"},{"runId":"20260403T150004_glm-realtime-flash_e731ba5c","benchmark":"event_bench","model":"glm-realtime-flash","turnsScored":29,"scores":{"toolUse":0,"toolUseDenom":12,"instructionFollowing":12,"kbGrounding":19,"ambiguityHandling":1,"stateTracking":7,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":39.61,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-04T00:52:21+00:00","createdAt":"2026-04-05T07:08:18.963224+00:00"},{"runId":"20260403T140104_glm-realtime-flash_d6525e7f","benchmark":"event_bench","model":"glm-realtime-flash","turnsScored":29,"scores":{"toolUse":3,"toolUseDenom":12,"instructionFollowing":9,"kbGrounding":9,"ambiguityHandling":1,"stateTracking":3,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":30.94,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-04T00:52:15+00:00","createdAt":"2026-04-05T07:08:17.258932+00:00"},{"runId":"20260403T133301_glm-realtime-flash_096b8897","benchmark":"event_bench","model":"glm-realtime-flash","turnsScored":29,"scores":{"toolUse":0,"toolUseDenom":12,"instructionFollowing":12,"kbGrounding":17,"ambiguityHandling":1,"stateTracking":3,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":33.53,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-04T00:51:50+00:00","createdAt":"2026-04-05T07:08:13.676774+00:00"},{"runId":"20260403T135619_glm-realtime-flash_88bcaafa","benchmark":"event_bench","model":"glm-realtime-flash","turnsScored":29,"scores":{"toolUse":9,"toolUseDenom":12,"instructionFollowing":12,"kbGrounding":17,"ambiguityHandling":1,"stateTracking":4,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":49.71,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-04T00:50:29+00:00","createdAt":"2026-04-05T07:08:16.108737+00:00"},{"runId":"20260401T002329_glm-realtime-flash_8a74ef4c","benchmark":"event_bench","model":"glm-realtime-flash","turnsScored":29,"scores":{"toolUse":0,"toolUseDenom":12,"instructionFollowing":4,"kbGrounding":8,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":8.28,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T19:13:09+00:00","createdAt":"2026-04-01T19:19:13.206702+00:00"},{"runId":"20260401T002223_glm-realtime-flash_572501e6","benchmark":"event_bench","model":"glm-realtime-flash","turnsScored":29,"scores":{"toolUse":0,"toolUseDenom":12,"instructionFollowing":8,"kbGrounding":7,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":10.34,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T18:32:47+00:00","createdAt":"2026-04-01T19:19:12.561782+00:00"},{"runId":"20260330T200508_glm-realtime-air_edf286a8","benchmark":"event_bench","model":"glm-realtime-air","turnsScored":29,"scores":{"toolUse":3,"toolUseDenom":12,"instructionFollowing":9,"kbGrounding":18,"ambiguityHandling":1,"stateTracking":5,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":39.5,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T03:35:50+00:00","createdAt":"2026-04-01T07:15:42.393219+00:00"},{"runId":"20260330T201408_glm-realtime-air_6e1d69a3","benchmark":"event_bench","model":"glm-realtime-air","turnsScored":29,"scores":{"toolUse":1,"toolUseDenom":12,"instructionFollowing":6,"kbGrounding":11,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":13.39,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T03:34:32+00:00","createdAt":"2026-04-01T07:15:44.120466+00:00"},{"runId":"20260330T201347_glm-realtime-air_71add3cf","benchmark":"event_bench","model":"glm-realtime-air","turnsScored":29,"scores":{"toolUse":0,"toolUseDenom":12,"instructionFollowing":3,"kbGrounding":15,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":12.41,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T03:34:18+00:00","createdAt":"2026-04-01T07:15:43.562992+00:00"},{"runId":"20260330T202529_glm-realtime-air_389ea294","benchmark":"event_bench","model":"glm-realtime-air","turnsScored":29,"scores":{"toolUse":0,"toolUseDenom":12,"instructionFollowing":12,"kbGrounding":14,"ambiguityHandling":1,"stateTracking":2,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":30.28,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T03:33:44+00:00","createdAt":"2026-04-01T07:15:44.618771+00:00"},{"runId":"20260330T200541_glm-realtime-air_1966b7e9","benchmark":"event_bench","model":"glm-realtime-air","turnsScored":29,"scores":{"toolUse":0,"toolUseDenom":12,"instructionFollowing":4,"kbGrounding":17,"ambiguityHandling":1,"stateTracking":0,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":24.48,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T03:32:44+00:00","createdAt":"2026-04-01T07:15:42.9492+00:00"},{"runId":"20260330T200300_glm-realtime-air_ee9557c1","benchmark":"event_bench","model":"glm-realtime-air","turnsScored":29,"scores":{"toolUse":0,"toolUseDenom":12,"instructionFollowing":6,"kbGrounding":18,"ambiguityHandling":2,"stateTracking":5,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":42.43,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T03:31:41+00:00","createdAt":"2026-04-01T07:15:41.835938+00:00"},{"runId":"20260330T200224_glm-realtime-flash_a1be4812","benchmark":"event_bench","model":"glm-realtime-flash","turnsScored":29,"scores":{"toolUse":0,"toolUseDenom":12,"instructionFollowing":4,"kbGrounding":7,"ambiguityHandling":1,"stateTracking":1,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":18.76,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T03:31:15+00:00","createdAt":"2026-04-01T07:15:41.30451+00:00"},{"runId":"20260330T200136_glm-realtime-flash_28bd44c1","benchmark":"event_bench","model":"glm-realtime-flash","turnsScored":29,"scores":{"toolUse":0,"toolUseDenom":12,"instructionFollowing":5,"kbGrounding":21,"ambiguityHandling":0,"stateTracking":2,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":20.28,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T03:30:26+00:00","createdAt":"2026-04-01T07:15:40.753523+00:00"},{"runId":"20260330T195520_glm-realtime-flash_a5554d19","benchmark":"event_bench","model":"glm-realtime-flash","turnsScored":29,"scores":{"toolUse":2,"toolUseDenom":12,"instructionFollowing":8,"kbGrounding":15,"ambiguityHandling":1,"stateTracking":0,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":29.2,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T03:29:05+00:00","createdAt":"2026-04-01T07:15:39.647426+00:00"},{"runId":"20260330T194607_glm-realtime-flash_27483ca7","benchmark":"event_bench","model":"glm-realtime-flash","turnsScored":29,"scores":{"toolUse":0,"toolUseDenom":12,"instructionFollowing":2,"kbGrounding":12,"ambiguityHandling":1,"stateTracking":0,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":19.66,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T03:28:56+00:00","createdAt":"2026-04-01T07:15:38.971549+00:00"},{"runId":"20260330T195711_glm-realtime-flash_c34012dc","benchmark":"event_bench","model":"glm-realtime-flash","turnsScored":29,"scores":{"toolUse":0,"toolUseDenom":12,"instructionFollowing":8,"kbGrounding":17,"ambiguityHandling":1,"stateTracking":1,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":28.42,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T03:28:03+00:00","createdAt":"2026-04-01T07:15:40.223506+00:00"},{"runId":"20260330T194340_glm-realtime-air_e5197932","benchmark":"event_bench","model":"glm-realtime-air","turnsScored":29,"scores":{"toolUse":0,"toolUseDenom":12,"instructionFollowing":4,"kbGrounding":21,"ambiguityHandling":2,"stateTracking":0,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":37.24,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T03:27:20+00:00","createdAt":"2026-04-01T07:15:38.450698+00:00"},{"runId":"20260330T193924_glm-realtime-air_6f61d046","benchmark":"event_bench","model":"glm-realtime-air","turnsScored":29,"scores":{"toolUse":0,"toolUseDenom":12,"instructionFollowing":4,"kbGrounding":9,"ambiguityHandling":1,"stateTracking":0,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":18.97,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T03:26:44+00:00","createdAt":"2026-04-01T07:15:36.763464+00:00"},{"runId":"20260330T194053_glm-realtime-flash_78c5230e","benchmark":"event_bench","model":"glm-realtime-flash","turnsScored":29,"scores":{"toolUse":0,"toolUseDenom":12,"instructionFollowing":6,"kbGrounding":17,"ambiguityHandling":1,"stateTracking":1,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":27.04,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T03:26:12+00:00","createdAt":"2026-04-01T07:15:37.903821+00:00"},{"runId":"20260330T194043_glm-realtime-air_81fbc2c7","benchmark":"event_bench","model":"glm-realtime-air","turnsScored":29,"scores":{"toolUse":8,"toolUseDenom":12,"instructionFollowing":4,"kbGrounding":22,"ambiguityHandling":1,"stateTracking":0,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":41.26,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T03:25:24+00:00","createdAt":"2026-04-01T07:15:37.33385+00:00"},{"runId":"20260330T193917_glm-realtime-flash_397554bd","benchmark":"event_bench","model":"glm-realtime-flash","turnsScored":29,"scores":{"toolUse":0,"toolUseDenom":12,"instructionFollowing":4,"kbGrounding":5,"ambiguityHandling":1,"stateTracking":0,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":16.21,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T03:25:18+00:00","createdAt":"2026-04-01T07:15:36.170411+00:00"},{"runId":"20260330T193905_glm-realtime-air_f58b60d8","benchmark":"event_bench","model":"glm-realtime-air","turnsScored":29,"scores":{"toolUse":2,"toolUseDenom":12,"instructionFollowing":1,"kbGrounding":0,"ambiguityHandling":1,"stateTracking":0,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":14.02,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T03:24:12+00:00","createdAt":"2026-04-01T07:15:35.598113+00:00"},{"runId":"20260330T193710_glm-realtime-flash_0abb970c","benchmark":"event_bench","model":"glm-realtime-flash","turnsScored":29,"scores":{"toolUse":1,"toolUseDenom":12,"instructionFollowing":10,"kbGrounding":18,"ambiguityHandling":1,"stateTracking":5,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":36.86,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T03:24:05+00:00","createdAt":"2026-04-01T07:15:35.019565+00:00"},{"runId":"20260330T192038_glm-realtime-flash_d4786925","benchmark":"event_bench","model":"glm-realtime-flash","turnsScored":29,"scores":{"toolUse":0,"toolUseDenom":12,"instructionFollowing":3,"kbGrounding":19,"ambiguityHandling":1,"stateTracking":1,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":26.35,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T03:21:55+00:00","createdAt":"2026-04-01T07:15:33.03035+00:00"},{"runId":"20260330T192116_glm-realtime-air_aceeab47","benchmark":"event_bench","model":"glm-realtime-air","turnsScored":29,"scores":{"toolUse":7,"toolUseDenom":12,"instructionFollowing":8,"kbGrounding":10,"ambiguityHandling":1,"stateTracking":0,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":34.08,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T03:21:37+00:00","createdAt":"2026-04-01T07:15:33.717559+00:00"},{"runId":"20260330T193433_glm-realtime-flash_4a271686","benchmark":"event_bench","model":"glm-realtime-flash","turnsScored":29,"scores":{"toolUse":7,"toolUseDenom":12,"instructionFollowing":6,"kbGrounding":12,"ambiguityHandling":0,"stateTracking":1,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":25.26,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T03:21:27+00:00","createdAt":"2026-04-01T07:15:34.442841+00:00"},{"runId":"20260330T165010_gemini-3.1-flash-live-preview_c13d3942","benchmark":"event_bench","model":"gemini-3.1-flash-live-preview","turnsScored":29,"scores":{"toolUse":3,"toolUseDenom":12,"instructionFollowing":26,"kbGrounding":22,"ambiguityHandling":2,"stateTracking":9,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":68.69,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-31T02:13:56+00:00","createdAt":"2026-03-31T04:45:48.150569+00:00"},{"runId":"20260330T163841_gemini-3.1-flash-live-preview_9146ee85","benchmark":"event_bench","model":"gemini-3.1-flash-live-preview","turnsScored":29,"scores":{"toolUse":11,"toolUseDenom":12,"instructionFollowing":29,"kbGrounding":27,"ambiguityHandling":1,"stateTracking":17,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":86.95,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-31T02:09:55+00:00","createdAt":"2026-03-31T02:03:32.117262+00:00"},{"runId":"20260329T224939_gemini-3.1-flash-live-preview_fac7e49a","benchmark":"event_bench","model":"gemini-3.1-flash-live-preview","turnsScored":29,"scores":{"toolUse":6,"toolUseDenom":12,"instructionFollowing":18,"kbGrounding":19,"ambiguityHandling":2,"stateTracking":11,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":68.46,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-30T07:43:55+00:00","createdAt":"2026-03-30T14:05:32.625723+00:00"},{"runId":"20260323T002631_ultravox-v0.7_61de0924","benchmark":"event_bench","model":"ultravox-v0.7","turnsScored":29,"scores":{"toolUse":6,"toolUseDenom":12,"instructionFollowing":26,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":17,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":77.93,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-28T03:03:04+00:00","createdAt":"2026-03-23T15:30:20.159521+00:00"},{"runId":"20260323T182251_ultravox-v0.7_3844942d","benchmark":"event_bench","model":"ultravox-v0.7","turnsScored":29,"scores":{"toolUse":9,"toolUseDenom":12,"instructionFollowing":28,"kbGrounding":28,"ambiguityHandling":1,"stateTracking":12,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":77.74,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-28T03:02:47+00:00","createdAt":"2026-03-24T02:16:44.054519+00:00"},{"runId":"20260323T002631_ultravox-v0.7_54b3edb8","benchmark":"event_bench","model":"ultravox-v0.7","turnsScored":29,"scores":{"toolUse":10,"toolUseDenom":12,"instructionFollowing":25,"kbGrounding":24,"ambiguityHandling":1,"stateTracking":16,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":79.28,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-28T03:02:45+00:00","createdAt":"2026-03-23T15:30:19.365375+00:00"},{"runId":"20260324T231550_ultravox-v0.7_6df9ad4b","benchmark":"event_bench","model":"ultravox-v0.7","turnsScored":29,"scores":{"toolUse":10,"toolUseDenom":12,"instructionFollowing":25,"kbGrounding":23,"ambiguityHandling":2,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":86.24,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-28T02:59:42+00:00","createdAt":"2026-03-25T08:12:02.257937+00:00"},{"runId":"20260323T122018_grok-realtime_9b37dc6b","benchmark":"event_bench","model":"grok-realtime","turnsScored":29,"scores":{"toolUse":5,"toolUseDenom":12,"instructionFollowing":27,"kbGrounding":25,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":70.67,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-28T02:55:11+00:00","createdAt":"2026-03-23T22:46:08.53673+00:00"},{"runId":"20260323T121020_grok-realtime_2c66704a","benchmark":"event_bench","model":"grok-realtime","turnsScored":29,"scores":{"toolUse":4,"toolUseDenom":12,"instructionFollowing":29,"kbGrounding":25,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":70.38,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-28T02:55:06+00:00","createdAt":"2026-03-23T22:46:06.34278+00:00"},{"runId":"20260323T121020_grok-realtime_00ae8c92","benchmark":"event_bench","model":"grok-realtime","turnsScored":29,"scores":{"toolUse":5,"toolUseDenom":12,"instructionFollowing":28,"kbGrounding":23,"ambiguityHandling":1,"stateTracking":15,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":71.15,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-28T02:54:56+00:00","createdAt":"2026-03-23T22:46:05.401602+00:00"},{"runId":"20260323T182139_gpt-realtime_9e239a7a","benchmark":"event_bench","model":"gpt-realtime","turnsScored":29,"scores":{"toolUse":8,"toolUseDenom":12,"instructionFollowing":29,"kbGrounding":24,"ambiguityHandling":2,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":85.18,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-28T02:49:56+00:00","createdAt":"2026-03-24T02:16:43.510562+00:00"},{"runId":"20260323T181521_gpt-realtime_2cf30d58","benchmark":"event_bench","model":"gpt-realtime","turnsScored":29,"scores":{"toolUse":10,"toolUseDenom":12,"instructionFollowing":28,"kbGrounding":21,"ambiguityHandling":2,"stateTracking":11,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":83.4,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-28T02:46:57+00:00","createdAt":"2026-03-24T02:16:42.368225+00:00"},{"runId":"20260323T181842_gpt-realtime_037f3e7d","benchmark":"event_bench","model":"gpt-realtime","turnsScored":29,"scores":{"toolUse":10,"toolUseDenom":12,"instructionFollowing":27,"kbGrounding":25,"ambiguityHandling":2,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":87.82,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-28T02:45:06+00:00","createdAt":"2026-03-24T02:16:42.928794+00:00"},{"runId":"20260323T003518_gemini-2.5-flash-native-audio-preview-12-2025_6b347011","benchmark":"event_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":29,"scores":{"toolUse":9,"toolUseDenom":12,"instructionFollowing":29,"kbGrounding":25,"ambiguityHandling":2,"stateTracking":9,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":82.83,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-28T02:39:33+00:00","createdAt":"2026-03-23T18:07:05.299436+00:00"},{"runId":"20260323T184028_gemini-2.5-flash-native-audio-preview-12-2025_72928546","benchmark":"event_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":29,"scores":{"toolUse":6,"toolUseDenom":12,"instructionFollowing":24,"kbGrounding":22,"ambiguityHandling":2,"stateTracking":8,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":71.14,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-28T02:38:27+00:00","createdAt":"2026-03-24T02:16:47.541471+00:00"},{"runId":"20260323T182824_gemini-2.5-flash-native-audio-preview-12-2025_8b954039","benchmark":"event_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":29,"scores":{"toolUse":8,"toolUseDenom":12,"instructionFollowing":26,"kbGrounding":23,"ambiguityHandling":2,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":82.42,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-28T02:37:52+00:00","createdAt":"2026-03-24T02:16:45.229246+00:00"},{"runId":"20260323T182823_amazon.nova-2-sonic-v1_0_bebbf306","benchmark":"event_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":29,"scores":{"toolUse":10,"toolUseDenom":12,"instructionFollowing":16,"kbGrounding":24,"ambiguityHandling":1,"stateTracking":11,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":67.19,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-28T02:30:30+00:00","createdAt":"2026-03-24T02:16:44.669603+00:00"},{"runId":"20260323T185600_amazon.nova-2-sonic-v1_0_ec089280","benchmark":"event_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":29,"scores":{"toolUse":8,"toolUseDenom":12,"instructionFollowing":24,"kbGrounding":27,"ambiguityHandling":1,"stateTracking":12,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":72.62,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-28T02:29:05+00:00","createdAt":"2026-03-24T02:32:27.117438+00:00"},{"runId":"20260323T184258_amazon.nova-2-sonic-v1_0_42192bb8","benchmark":"event_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":29,"scores":{"toolUse":4,"toolUseDenom":12,"instructionFollowing":16,"kbGrounding":23,"ambiguityHandling":1,"stateTracking":7,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":51.8,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-28T02:28:13+00:00","createdAt":"2026-03-24T02:32:25.983271+00:00"},{"runId":"20260323T004155_ultravox-v0.7_8a17e08f","benchmark":"event_bench","model":"ultravox-v0.7","turnsScored":29,"scores":{"toolUse":6,"toolUseDenom":12,"instructionFollowing":24,"kbGrounding":20,"ambiguityHandling":1,"stateTracking":8,"ambiguityDenom":2,"stateTrackingDenom":17},"passRate":59.76,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"gpt-5.2","judgedAt":"2026-03-23T18:58:04+00:00","createdAt":"2026-03-23T15:30:20.845648+00:00"},{"runId":"20260507T214103_gpt-realtime-2_3357d5fd","benchmark":"grocery_bench","model":"gpt-realtime-2","turnsScored":30,"scores":{"toolUse":10,"toolUseDenom":22,"instructionFollowing":21,"kbGrounding":26,"ambiguityHandling":2,"stateTracking":0,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":44.42,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T22:00:57+00:00","createdAt":"2026-05-07T23:02:29.23992+00:00"},{"runId":"20260507T214114_gpt-realtime-2_e129e0a9","benchmark":"grocery_bench","model":"gpt-realtime-2","turnsScored":30,"scores":{"toolUse":15,"toolUseDenom":22,"instructionFollowing":23,"kbGrounding":27,"ambiguityHandling":4,"stateTracking":9,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":68.82,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T22:00:42+00:00","createdAt":"2026-05-07T23:02:29.756719+00:00"},{"runId":"20260507T214055_gpt-realtime-2_3f80341f","benchmark":"grocery_bench","model":"gpt-realtime-2","turnsScored":30,"scores":{"toolUse":3,"toolUseDenom":22,"instructionFollowing":7,"kbGrounding":27,"ambiguityHandling":2,"stateTracking":1,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":30.93,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T22:00:21+00:00","createdAt":"2026-05-07T23:02:28.29135+00:00"},{"runId":"20260507T214048_gpt-realtime-2_46224628","benchmark":"grocery_bench","model":"gpt-realtime-2","turnsScored":30,"scores":{"toolUse":3,"toolUseDenom":22,"instructionFollowing":7,"kbGrounding":27,"ambiguityHandling":9,"stateTracking":0,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":43.39,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T21:59:48+00:00","createdAt":"2026-05-07T23:02:28.03942+00:00"},{"runId":"20260507T214059_gpt-realtime-2_23cd28ad","benchmark":"grocery_bench","model":"gpt-realtime-2","turnsScored":30,"scores":{"toolUse":3,"toolUseDenom":22,"instructionFollowing":21,"kbGrounding":18,"ambiguityHandling":9,"stateTracking":2,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":49.8,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T21:59:38+00:00","createdAt":"2026-05-07T23:02:28.518906+00:00"},{"runId":"20260507T214134_gpt-realtime-2_77f0d8a1","benchmark":"grocery_bench","model":"gpt-realtime-2","turnsScored":30,"scores":{"toolUse":2,"toolUseDenom":22,"instructionFollowing":9,"kbGrounding":22,"ambiguityHandling":9,"stateTracking":0,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":40.48,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T21:59:29+00:00","createdAt":"2026-05-07T23:02:30.234107+00:00"},{"runId":"20260507T214039_gpt-realtime-2_60d1610b","benchmark":"grocery_bench","model":"gpt-realtime-2","turnsScored":30,"scores":{"toolUse":20,"toolUseDenom":22,"instructionFollowing":15,"kbGrounding":26,"ambiguityHandling":1,"stateTracking":0,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":47.52,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T21:59:29+00:00","createdAt":"2026-05-07T23:02:27.102459+00:00"},{"runId":"20260507T214123_gpt-realtime-2_e180cef8","benchmark":"grocery_bench","model":"gpt-realtime-2","turnsScored":30,"scores":{"toolUse":4,"toolUseDenom":22,"instructionFollowing":11,"kbGrounding":26,"ambiguityHandling":10,"stateTracking":4,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":54.46,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T21:59:13+00:00","createdAt":"2026-05-07T23:02:29.99882+00:00"},{"runId":"20260507T214046_gpt-realtime-2_14827bb9","benchmark":"grocery_bench","model":"gpt-realtime-2","turnsScored":30,"scores":{"toolUse":5,"toolUseDenom":22,"instructionFollowing":9,"kbGrounding":21,"ambiguityHandling":6,"stateTracking":0,"ambiguityDenom":9,"stateTrackingDenom":13},"passRate":37.88,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T21:59:03+00:00","createdAt":"2026-05-07T23:02:27.798919+00:00"},{"runId":"20260507T214110_gpt-realtime-2_165ad7e7","benchmark":"grocery_bench","model":"gpt-realtime-2","turnsScored":30,"scores":{"toolUse":13,"toolUseDenom":22,"instructionFollowing":15,"kbGrounding":25,"ambiguityHandling":8,"stateTracking":3,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":59.1,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T21:58:43+00:00","createdAt":"2026-05-07T23:02:29.482798+00:00"},{"runId":"20260507T214031_gpt-realtime-2_b8cbd3f6","benchmark":"grocery_bench","model":"gpt-realtime-2","turnsScored":30,"scores":{"toolUse":7,"toolUseDenom":22,"instructionFollowing":16,"kbGrounding":14,"ambiguityHandling":5,"stateTracking":3,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":40.98,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T21:58:39+00:00","createdAt":"2026-05-07T23:02:26.63363+00:00"},{"runId":"20260507T214101_gpt-realtime-2_eb79896c","benchmark":"grocery_bench","model":"gpt-realtime-2","turnsScored":30,"scores":{"toolUse":3,"toolUseDenom":22,"instructionFollowing":12,"kbGrounding":23,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":26.06,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T21:58:35+00:00","createdAt":"2026-05-07T23:02:28.99838+00:00"},{"runId":"20260507T214059_gpt-realtime-2_c09eb4d2","benchmark":"grocery_bench","model":"gpt-realtime-2","turnsScored":30,"scores":{"toolUse":2,"toolUseDenom":22,"instructionFollowing":21,"kbGrounding":26,"ambiguityHandling":9,"stateTracking":1,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":52.69,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T21:58:20+00:00","createdAt":"2026-05-07T23:02:28.747728+00:00"},{"runId":"20260507T214022_gpt-realtime-2_48f86ff5","benchmark":"grocery_bench","model":"gpt-realtime-2","turnsScored":30,"scores":{"toolUse":19,"toolUseDenom":22,"instructionFollowing":10,"kbGrounding":25,"ambiguityHandling":2,"stateTracking":0,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":44.61,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T21:58:19+00:00","createdAt":"2026-05-07T23:02:26.153747+00:00"},{"runId":"20260507T214031_gpt-realtime-2_7fb422cb","benchmark":"grocery_bench","model":"gpt-realtime-2","turnsScored":30,"scores":{"toolUse":4,"toolUseDenom":22,"instructionFollowing":21,"kbGrounding":26,"ambiguityHandling":10,"stateTracking":3,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":59.59,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T21:58:07+00:00","createdAt":"2026-05-07T23:02:26.396476+00:00"},{"runId":"20260507T214016_gpt-realtime-2_88ebe9ea","benchmark":"grocery_bench","model":"gpt-realtime-2","turnsScored":30,"scores":{"toolUse":3,"toolUseDenom":22,"instructionFollowing":18,"kbGrounding":27,"ambiguityHandling":10,"stateTracking":3,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":57.34,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T21:57:43+00:00","createdAt":"2026-05-07T23:02:25.909503+00:00"},{"runId":"20260507T214045_gpt-realtime-2_ebf4c36f","benchmark":"grocery_bench","model":"gpt-realtime-2","turnsScored":30,"scores":{"toolUse":3,"toolUseDenom":22,"instructionFollowing":7,"kbGrounding":24,"ambiguityHandling":10,"stateTracking":1,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":44.93,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T21:57:24+00:00","createdAt":"2026-05-07T23:02:27.557828+00:00"},{"runId":"20260507T214041_gpt-realtime-2_b9dce133","benchmark":"grocery_bench","model":"gpt-realtime-2","turnsScored":30,"scores":{"toolUse":19,"toolUseDenom":22,"instructionFollowing":6,"kbGrounding":25,"ambiguityHandling":2,"stateTracking":0,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":41.94,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T21:56:36+00:00","createdAt":"2026-05-07T23:02:27.233269+00:00"},{"runId":"20260507T214035_gpt-realtime-2_1e841dc5","benchmark":"grocery_bench","model":"gpt-realtime-2","turnsScored":30,"scores":{"toolUse":2,"toolUseDenom":22,"instructionFollowing":8,"kbGrounding":27,"ambiguityHandling":1,"stateTracking":0,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":27.15,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T21:56:06+00:00","createdAt":"2026-05-07T23:02:26.866343+00:00"},{"runId":"20260507T214013_gpt-realtime-2_d81d7320","benchmark":"grocery_bench","model":"gpt-realtime-2","turnsScored":30,"scores":{"toolUse":2,"toolUseDenom":22,"instructionFollowing":5,"kbGrounding":28,"ambiguityHandling":1,"stateTracking":0,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":25.82,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T21:54:52+00:00","createdAt":"2026-05-07T23:02:25.686393+00:00"},{"runId":"20260507T202106_grok-voice-think-fast-1.0_1a0057bf","benchmark":"grocery_bench","model":"grok-voice-think-fast-1.0","turnsScored":30,"scores":{"toolUse":13,"toolUseDenom":22,"instructionFollowing":19,"kbGrounding":29,"ambiguityHandling":10,"stateTracking":5,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":71.51,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T20:40:10+00:00","createdAt":"2026-05-07T20:58:18.931997+00:00"},{"runId":"20260507T181301_grok-voice-think-fast-1.0_3bddb875","benchmark":"grocery_bench","model":"grok-voice-think-fast-1.0","turnsScored":30,"scores":{"toolUse":12,"toolUseDenom":22,"instructionFollowing":25,"kbGrounding":25,"ambiguityHandling":8,"stateTracking":6,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":69.47,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T18:40:51+00:00","createdAt":"2026-05-07T20:58:16.439369+00:00"},{"runId":"20260507T181331_grok-voice-think-fast-1.0_1eb644ad","benchmark":"grocery_bench","model":"grok-voice-think-fast-1.0","turnsScored":30,"scores":{"toolUse":13,"toolUseDenom":22,"instructionFollowing":25,"kbGrounding":28,"ambiguityHandling":0,"stateTracking":4,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":53.31,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T18:38:46+00:00","createdAt":"2026-05-07T20:58:17.67289+00:00"},{"runId":"20260507T181257_grok-voice-think-fast-1.0_25ee3a1c","benchmark":"grocery_bench","model":"grok-voice-think-fast-1.0","turnsScored":30,"scores":{"toolUse":9,"toolUseDenom":22,"instructionFollowing":25,"kbGrounding":27,"ambiguityHandling":8,"stateTracking":9,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":72.69,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T18:38:23+00:00","createdAt":"2026-05-07T20:58:15.717807+00:00"},{"runId":"20260507T181326_grok-voice-think-fast-1.0_b7373f7d","benchmark":"grocery_bench","model":"grok-voice-think-fast-1.0","turnsScored":30,"scores":{"toolUse":20,"toolUseDenom":22,"instructionFollowing":20,"kbGrounding":24,"ambiguityHandling":9,"stateTracking":4,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":71.67,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T18:37:47+00:00","createdAt":"2026-05-07T20:58:17.428293+00:00"},{"runId":"20260507T181249_grok-voice-think-fast-1.0_b153b96a","benchmark":"grocery_bench","model":"grok-voice-think-fast-1.0","turnsScored":30,"scores":{"toolUse":13,"toolUseDenom":22,"instructionFollowing":24,"kbGrounding":30,"ambiguityHandling":10,"stateTracking":9,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":81.66,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T18:37:30+00:00","createdAt":"2026-05-07T20:58:15.482589+00:00"},{"runId":"20260507T181333_grok-voice-think-fast-1.0_0a91c330","benchmark":"grocery_bench","model":"grok-voice-think-fast-1.0","turnsScored":30,"scores":{"toolUse":12,"toolUseDenom":22,"instructionFollowing":25,"kbGrounding":26,"ambiguityHandling":8,"stateTracking":8,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":73.22,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T18:36:47+00:00","createdAt":"2026-05-07T20:58:17.927028+00:00"},{"runId":"20260507T181258_grok-voice-think-fast-1.0_25d66947","benchmark":"grocery_bench","model":"grok-voice-think-fast-1.0","turnsScored":30,"scores":{"toolUse":11,"toolUseDenom":22,"instructionFollowing":26,"kbGrounding":30,"ambiguityHandling":10,"stateTracking":10,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":82.72,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T18:36:47+00:00","createdAt":"2026-05-07T20:58:16.20341+00:00"},{"runId":"20260507T181352_grok-voice-think-fast-1.0_f3761a69","benchmark":"grocery_bench","model":"grok-voice-think-fast-1.0","turnsScored":30,"scores":{"toolUse":9,"toolUseDenom":22,"instructionFollowing":20,"kbGrounding":23,"ambiguityHandling":7,"stateTracking":7,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":61.62,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T18:36:09+00:00","createdAt":"2026-05-07T20:58:18.445914+00:00"},{"runId":"20260507T181313_grok-voice-think-fast-1.0_c2e9fa16","benchmark":"grocery_bench","model":"grok-voice-think-fast-1.0","turnsScored":30,"scores":{"toolUse":12,"toolUseDenom":22,"instructionFollowing":22,"kbGrounding":25,"ambiguityHandling":10,"stateTracking":11,"ambiguityDenom":10,"stateTrackingDenom":14},"passRate":77.96,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T18:35:34+00:00","createdAt":"2026-05-07T20:58:16.918545+00:00"},{"runId":"20260507T181257_grok-voice-think-fast-1.0_396cfc00","benchmark":"grocery_bench","model":"grok-voice-think-fast-1.0","turnsScored":30,"scores":{"toolUse":11,"toolUseDenom":22,"instructionFollowing":25,"kbGrounding":26,"ambiguityHandling":10,"stateTracking":9,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":77.85,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T18:35:34+00:00","createdAt":"2026-05-07T20:58:15.957256+00:00"},{"runId":"20260507T181345_grok-voice-think-fast-1.0_0f94c994","benchmark":"grocery_bench","model":"grok-voice-think-fast-1.0","turnsScored":30,"scores":{"toolUse":12,"toolUseDenom":22,"instructionFollowing":16,"kbGrounding":23,"ambiguityHandling":6,"stateTracking":5,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":56.6,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T18:35:12+00:00","createdAt":"2026-05-07T20:58:18.186812+00:00"},{"runId":"20260507T181247_grok-voice-think-fast-1.0_36e1089b","benchmark":"grocery_bench","model":"grok-voice-think-fast-1.0","turnsScored":30,"scores":{"toolUse":21,"toolUseDenom":22,"instructionFollowing":25,"kbGrounding":29,"ambiguityHandling":10,"stateTracking":13,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":95.09,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T18:34:58+00:00","createdAt":"2026-05-07T20:58:15.011295+00:00"},{"runId":"20260507T181406_grok-voice-think-fast-1.0_861c2c5a","benchmark":"grocery_bench","model":"grok-voice-think-fast-1.0","turnsScored":30,"scores":{"toolUse":11,"toolUseDenom":22,"instructionFollowing":22,"kbGrounding":21,"ambiguityHandling":9,"stateTracking":9,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":70.51,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T18:33:55+00:00","createdAt":"2026-05-07T20:58:18.68275+00:00"},{"runId":"20260507T181246_grok-voice-think-fast-1.0_e31097cb","benchmark":"grocery_bench","model":"grok-voice-think-fast-1.0","turnsScored":30,"scores":{"toolUse":8,"toolUseDenom":22,"instructionFollowing":27,"kbGrounding":29,"ambiguityHandling":9,"stateTracking":12,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":81.07,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T18:33:47+00:00","createdAt":"2026-05-07T20:58:14.741665+00:00"},{"runId":"20260507T181230_grok-voice-think-fast-1.0_1c66ef71","benchmark":"grocery_bench","model":"grok-voice-think-fast-1.0","turnsScored":30,"scores":{"toolUse":12,"toolUseDenom":22,"instructionFollowing":19,"kbGrounding":23,"ambiguityHandling":10,"stateTracking":12,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":77.37,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T18:33:06+00:00","createdAt":"2026-05-07T20:58:14.27376+00:00"},{"runId":"20260507T181316_grok-voice-think-fast-1.0_967495b0","benchmark":"grocery_bench","model":"grok-voice-think-fast-1.0","turnsScored":30,"scores":{"toolUse":15,"toolUseDenom":22,"instructionFollowing":15,"kbGrounding":26,"ambiguityHandling":6,"stateTracking":4,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":59.12,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T18:31:27+00:00","createdAt":"2026-05-07T20:58:17.189021+00:00"},{"runId":"20260507T181312_grok-voice-think-fast-1.0_ed4614e0","benchmark":"grocery_bench","model":"grok-voice-think-fast-1.0","turnsScored":30,"scores":{"toolUse":4,"toolUseDenom":22,"instructionFollowing":8,"kbGrounding":11,"ambiguityHandling":3,"stateTracking":1,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":23.84,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T18:30:26+00:00","createdAt":"2026-05-07T20:58:16.687491+00:00"},{"runId":"20260507T181247_grok-voice-think-fast-1.0_7624916a","benchmark":"grocery_bench","model":"grok-voice-think-fast-1.0","turnsScored":30,"scores":{"toolUse":3,"toolUseDenom":22,"instructionFollowing":7,"kbGrounding":29,"ambiguityHandling":9,"stateTracking":0,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":44.73,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T18:29:48+00:00","createdAt":"2026-05-07T20:58:15.252652+00:00"},{"runId":"20260507T181244_grok-voice-think-fast-1.0_6b2f65e2","benchmark":"grocery_bench","model":"grok-voice-think-fast-1.0","turnsScored":30,"scores":{"toolUse":2,"toolUseDenom":22,"instructionFollowing":5,"kbGrounding":30,"ambiguityHandling":1,"stateTracking":0,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":27.15,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T18:29:00+00:00","createdAt":"2026-05-07T20:58:14.510728+00:00"},{"runId":"20260412T124551_grok-realtime_f6fa67b4","benchmark":"grocery_bench","model":"grok-realtime","turnsScored":30,"scores":{"toolUse":4,"toolUseDenom":22,"instructionFollowing":23,"kbGrounding":20,"ambiguityHandling":7,"stateTracking":8,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":58.61,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-15T00:16:52+00:00","createdAt":"2026-04-17T18:36:58.940506+00:00"},{"runId":"20260412T115238_gemini-2.5-flash-native-audio-preview-12-2025_80d90008","benchmark":"grocery_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":30,"scores":{"toolUse":16,"toolUseDenom":22,"instructionFollowing":20,"kbGrounding":28,"ambiguityHandling":3,"stateTracking":3,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":57.16,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-15T00:12:48+00:00","createdAt":"2026-04-17T18:36:14.236491+00:00"},{"runId":"20260412T115112_amazon.nova-2-sonic-v1_0_5f5f2b97","benchmark":"grocery_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":30,"scores":{"toolUse":3,"toolUseDenom":22,"instructionFollowing":18,"kbGrounding":10,"ambiguityHandling":8,"stateTracking":2,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":40.47,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-15T00:12:31+00:00","createdAt":"2026-04-17T18:36:13.136542+00:00"},{"runId":"20260412T121330_ultravox-v0.7_be8046d3","benchmark":"grocery_bench","model":"ultravox-v0.7","turnsScored":30,"scores":{"toolUse":13,"toolUseDenom":22,"instructionFollowing":25,"kbGrounding":22,"ambiguityHandling":10,"stateTracking":4,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":69.31,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-15T00:12:11+00:00","createdAt":"2026-04-17T18:36:31.609585+00:00"},{"runId":"20260412T115451_gemini-3.1-flash-live-preview_c8156391","benchmark":"grocery_bench","model":"gemini-3.1-flash-live-preview","turnsScored":30,"scores":{"toolUse":16,"toolUseDenom":22,"instructionFollowing":22,"kbGrounding":25,"ambiguityHandling":9,"stateTracking":9,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":77.72,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-15T00:11:56+00:00","createdAt":"2026-04-17T18:36:17.716055+00:00"},{"runId":"20260412T103558_gemini-2.5-flash-native-audio-preview-12-2025_0c1fc8f0","benchmark":"grocery_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":30,"scores":{"toolUse":17,"toolUseDenom":22,"instructionFollowing":19,"kbGrounding":27,"ambiguityHandling":3,"stateTracking":2,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":55.2,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-15T00:07:51+00:00","createdAt":"2026-04-17T18:35:10.018952+00:00"},{"runId":"20260412T113845_grok-realtime_3033133b","benchmark":"grocery_bench","model":"grok-realtime","turnsScored":30,"scores":{"toolUse":11,"toolUseDenom":22,"instructionFollowing":26,"kbGrounding":20,"ambiguityHandling":8,"stateTracking":9,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":70.51,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-15T00:07:13+00:00","createdAt":"2026-04-17T18:36:02.726535+00:00"},{"runId":"20260412T094719_gemini-3.1-flash-live-preview_116a1b3c","benchmark":"grocery_bench","model":"gemini-3.1-flash-live-preview","turnsScored":30,"scores":{"toolUse":16,"toolUseDenom":22,"instructionFollowing":22,"kbGrounding":28,"ambiguityHandling":9,"stateTracking":6,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":75.11,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-15T00:05:05+00:00","createdAt":"2026-04-17T18:34:28.674256+00:00"},{"runId":"20260412T092744_gemini-3.1-flash-live-preview_2dc3556a","benchmark":"grocery_bench","model":"gemini-3.1-flash-live-preview","turnsScored":30,"scores":{"toolUse":18,"toolUseDenom":22,"instructionFollowing":22,"kbGrounding":28,"ambiguityHandling":9,"stateTracking":6,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":76.93,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-15T00:05:04+00:00","createdAt":"2026-04-17T18:34:12.178235+00:00"},{"runId":"20260412T124417_ultravox-v0.7_0f3a78d4","benchmark":"grocery_bench","model":"ultravox-v0.7","turnsScored":30,"scores":{"toolUse":19,"toolUseDenom":22,"instructionFollowing":26,"kbGrounding":27,"ambiguityHandling":10,"stateTracking":9,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":86.45,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T20:13:52+00:00","createdAt":"2026-04-17T18:36:58.326991+00:00"},{"runId":"20260412T123734_amazon.nova-2-sonic-v1_0_dc662a49","benchmark":"grocery_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":30,"scores":{"toolUse":9,"toolUseDenom":22,"instructionFollowing":22,"kbGrounding":16,"ambiguityHandling":5,"stateTracking":4,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":49.67,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T20:12:08+00:00","createdAt":"2026-04-17T18:36:52.366448+00:00"},{"runId":"20260412T123953_ultravox-v0.7_6e4d059b","benchmark":"grocery_bench","model":"ultravox-v0.7","turnsScored":30,"scores":{"toolUse":14,"toolUseDenom":22,"instructionFollowing":24,"kbGrounding":23,"ambiguityHandling":10,"stateTracking":5,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":71.75,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T20:11:43+00:00","createdAt":"2026-04-17T18:36:54.807824+00:00"},{"runId":"20260412T123943_gemini-3.1-flash-live-preview_b4a60f33","benchmark":"grocery_bench","model":"gemini-3.1-flash-live-preview","turnsScored":30,"scores":{"toolUse":18,"toolUseDenom":22,"instructionFollowing":21,"kbGrounding":28,"ambiguityHandling":9,"stateTracking":11,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":83.95,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T20:07:30+00:00","createdAt":"2026-04-17T18:36:54.202881+00:00"},{"runId":"20260412T124214_gemini-2.5-flash-native-audio-preview-12-2025_64a3ff42","benchmark":"grocery_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":30,"scores":{"toolUse":10,"toolUseDenom":22,"instructionFollowing":19,"kbGrounding":27,"ambiguityHandling":6,"stateTracking":2,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":54.83,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T20:06:08+00:00","createdAt":"2026-04-17T18:36:57.201664+00:00"},{"runId":"20260412T123313_ultravox-v0.7_0b0cff59","benchmark":"grocery_bench","model":"ultravox-v0.7","turnsScored":30,"scores":{"toolUse":13,"toolUseDenom":22,"instructionFollowing":30,"kbGrounding":28,"ambiguityHandling":10,"stateTracking":5,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":78.18,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T20:05:28+00:00","createdAt":"2026-04-17T18:36:48.52051+00:00"},{"runId":"20260412T124641_amazon.nova-2-sonic-v1_0_271b4d6c","benchmark":"grocery_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":30,"scores":{"toolUse":3,"toolUseDenom":22,"instructionFollowing":12,"kbGrounding":23,"ambiguityHandling":5,"stateTracking":2,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":39.14,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T20:05:27+00:00","createdAt":"2026-04-17T18:36:59.526989+00:00"},{"runId":"20260412T123557_grok-realtime_e5c4952f","benchmark":"grocery_bench","model":"grok-realtime","turnsScored":30,"scores":{"toolUse":9,"toolUseDenom":22,"instructionFollowing":23,"kbGrounding":15,"ambiguityHandling":8,"stateTracking":9,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":63.36,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T20:04:13+00:00","createdAt":"2026-04-17T18:36:51.802262+00:00"},{"runId":"20260412T124136_amazon.nova-2-sonic-v1_0_fa2c7d67","benchmark":"grocery_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":30,"scores":{"toolUse":6,"toolUseDenom":22,"instructionFollowing":8,"kbGrounding":28,"ambiguityHandling":5,"stateTracking":2,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":42.53,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T20:02:50+00:00","createdAt":"2026-04-17T18:36:56.101396+00:00"},{"runId":"20260412T124121_grok-realtime_32400d40","benchmark":"grocery_bench","model":"grok-realtime","turnsScored":30,"scores":{"toolUse":0,"toolUseDenom":22,"instructionFollowing":14,"kbGrounding":24,"ambiguityHandling":7,"stateTracking":2,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":42.41,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T20:02:37+00:00","createdAt":"2026-04-17T18:36:55.399763+00:00"},{"runId":"20260412T122804_ultravox-v0.7_3dfbf72d","benchmark":"grocery_bench","model":"ultravox-v0.7","turnsScored":30,"scores":{"toolUse":19,"toolUseDenom":22,"instructionFollowing":28,"kbGrounding":29,"ambiguityHandling":10,"stateTracking":11,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":92.2,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T20:00:52+00:00","createdAt":"2026-04-17T18:36:44.943691+00:00"},{"runId":"20260412T123327_grok-realtime_be2eaec2","benchmark":"grocery_bench","model":"grok-realtime","turnsScored":30,"scores":{"toolUse":5,"toolUseDenom":22,"instructionFollowing":22,"kbGrounding":7,"ambiguityHandling":10,"stateTracking":4,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":50.03,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T20:00:38+00:00","createdAt":"2026-04-17T18:36:49.073453+00:00"},{"runId":"20260412T124156_gpt-realtime_99a60094","benchmark":"grocery_bench","model":"gpt-realtime","turnsScored":30,"scores":{"toolUse":9,"toolUseDenom":22,"instructionFollowing":20,"kbGrounding":23,"ambiguityHandling":4,"stateTracking":5,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":52.54,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:59:01+00:00","createdAt":"2026-04-17T18:36:56.686977+00:00"},{"runId":"20260412T123750_gpt-realtime_243ebae9","benchmark":"grocery_bench","model":"gpt-realtime","turnsScored":30,"scores":{"toolUse":4,"toolUseDenom":22,"instructionFollowing":21,"kbGrounding":27,"ambiguityHandling":9,"stateTracking":9,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":67.48,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:58:48+00:00","createdAt":"2026-04-17T18:36:52.999968+00:00"},{"runId":"20260412T123359_amazon.nova-2-sonic-v1_0_63615b9c","benchmark":"grocery_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":30,"scores":{"toolUse":3,"toolUseDenom":22,"instructionFollowing":17,"kbGrounding":9,"ambiguityHandling":8,"stateTracking":2,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":39.14,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:58:01+00:00","createdAt":"2026-04-17T18:36:49.635764+00:00"},{"runId":"20260412T124305_gemini-3.1-flash-live-preview_d61a21a6","benchmark":"grocery_bench","model":"gemini-3.1-flash-live-preview","turnsScored":30,"scores":{"toolUse":9,"toolUseDenom":22,"instructionFollowing":14,"kbGrounding":15,"ambiguityHandling":6,"stateTracking":2,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":42.59,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:57:48+00:00","createdAt":"2026-04-17T18:36:57.754788+00:00"},{"runId":"20260412T122853_amazon.nova-2-sonic-v1_0_5e769550","benchmark":"grocery_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":30,"scores":{"toolUse":4,"toolUseDenom":22,"instructionFollowing":16,"kbGrounding":9,"ambiguityHandling":3,"stateTracking":2,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":29.38,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:55:54+00:00","createdAt":"2026-04-17T18:36:46.32889+00:00"},{"runId":"20260412T121857_amazon.nova-2-sonic-v1_0_5fbfa396","benchmark":"grocery_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":30,"scores":{"toolUse":19,"toolUseDenom":22,"instructionFollowing":16,"kbGrounding":22,"ambiguityHandling":3,"stateTracking":9,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":62.45,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:55:12+00:00","createdAt":"2026-04-17T18:36:36.273782+00:00"},{"runId":"20260412T122523_ultravox-v0.7_b05be7ed","benchmark":"grocery_bench","model":"ultravox-v0.7","turnsScored":30,"scores":{"toolUse":14,"toolUseDenom":22,"instructionFollowing":26,"kbGrounding":24,"ambiguityHandling":10,"stateTracking":8,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":78.37,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:55:07+00:00","createdAt":"2026-04-17T18:36:41.356251+00:00"},{"runId":"20260412T122700_amazon.nova-2-sonic-v1_0_2df5972b","benchmark":"grocery_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":30,"scores":{"toolUse":5,"toolUseDenom":22,"instructionFollowing":15,"kbGrounding":24,"ambiguityHandling":6,"stateTracking":3,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":47.16,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:52:45+00:00","createdAt":"2026-04-17T18:36:42.563542+00:00"},{"runId":"20260412T123428_gpt-realtime_e4808abe","benchmark":"grocery_bench","model":"gpt-realtime","turnsScored":30,"scores":{"toolUse":3,"toolUseDenom":22,"instructionFollowing":23,"kbGrounding":26,"ambiguityHandling":9,"stateTracking":8,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":65.7,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:52:34+00:00","createdAt":"2026-04-17T18:36:50.177755+00:00"},{"runId":"20260412T122526_grok-realtime_568ef73e","benchmark":"grocery_bench","model":"grok-realtime","turnsScored":30,"scores":{"toolUse":8,"toolUseDenom":22,"instructionFollowing":24,"kbGrounding":16,"ambiguityHandling":9,"stateTracking":10,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":67.32,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:52:26+00:00","createdAt":"2026-04-17T18:36:42.006639+00:00"},{"runId":"20260412T122846_grok-realtime_373ba93d","benchmark":"grocery_bench","model":"grok-realtime","turnsScored":30,"scores":{"toolUse":16,"toolUseDenom":22,"instructionFollowing":23,"kbGrounding":28,"ambiguityHandling":9,"stateTracking":5,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":74.24,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:52:24+00:00","createdAt":"2026-04-17T18:36:45.613971+00:00"},{"runId":"20260412T123837_gemini-2.5-flash-native-audio-preview-12-2025_d1375780","benchmark":"grocery_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":30,"scores":{"toolUse":13,"toolUseDenom":22,"instructionFollowing":22,"kbGrounding":29,"ambiguityHandling":9,"stateTracking":4,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":69.97,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:51:56+00:00","createdAt":"2026-04-17T18:36:53.622007+00:00"},{"runId":"20260412T122204_amazon.nova-2-sonic-v1_0_2f9d4bb6","benchmark":"grocery_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":30,"scores":{"toolUse":8,"toolUseDenom":22,"instructionFollowing":14,"kbGrounding":17,"ambiguityHandling":7,"stateTracking":6,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":51.17,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:49:13+00:00","createdAt":"2026-04-17T18:36:39.10678+00:00"},{"runId":"20260412T123259_gemini-3.1-flash-live-preview_6468c5ef","benchmark":"grocery_bench","model":"gemini-3.1-flash-live-preview","turnsScored":30,"scores":{"toolUse":13,"toolUseDenom":22,"instructionFollowing":17,"kbGrounding":29,"ambiguityHandling":9,"stateTracking":5,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":68.18,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:48:49+00:00","createdAt":"2026-04-17T18:36:47.93531+00:00"},{"runId":"20260412T123219_gemini-2.5-flash-native-audio-preview-12-2025_ee5be3ef","benchmark":"grocery_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":30,"scores":{"toolUse":16,"toolUseDenom":22,"instructionFollowing":16,"kbGrounding":26,"ambiguityHandling":1,"stateTracking":0,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":44.55,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:48:30+00:00","createdAt":"2026-04-17T18:36:47.405444+00:00"},{"runId":"20260412T123435_gemini-2.5-flash-native-audio-preview-12-2025_3d593821","benchmark":"grocery_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":30,"scores":{"toolUse":11,"toolUseDenom":22,"instructionFollowing":19,"kbGrounding":26,"ambiguityHandling":0,"stateTracking":2,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":43.08,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:48:13+00:00","createdAt":"2026-04-17T18:36:50.718803+00:00"},{"runId":"20260412T122711_gpt-realtime_fc1d4bad","benchmark":"grocery_bench","model":"gpt-realtime","turnsScored":30,"scores":{"toolUse":19,"toolUseDenom":22,"instructionFollowing":20,"kbGrounding":29,"ambiguityHandling":2,"stateTracking":3,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":58.55,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:48:05+00:00","createdAt":"2026-04-17T18:36:43.142324+00:00"},{"runId":"20260412T122314_gemini-3.1-flash-live-preview_35d1ed27","benchmark":"grocery_bench","model":"gemini-3.1-flash-live-preview","turnsScored":30,"scores":{"toolUse":10,"toolUseDenom":22,"instructionFollowing":19,"kbGrounding":19,"ambiguityHandling":7,"stateTracking":8,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":60.73,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:48:04+00:00","createdAt":"2026-04-17T18:36:40.808898+00:00"},{"runId":"20260412T122855_gpt-realtime_ac72b11c","benchmark":"grocery_bench","model":"gpt-realtime","turnsScored":30,"scores":{"toolUse":1,"toolUseDenom":22,"instructionFollowing":21,"kbGrounding":25,"ambiguityHandling":9,"stateTracking":6,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":58.81,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:46:39+00:00","createdAt":"2026-04-17T18:36:46.882094+00:00"},{"runId":"20260412T123531_gemini-3.1-flash-live-preview_b7a7d65e","benchmark":"grocery_bench","model":"gemini-3.1-flash-live-preview","turnsScored":30,"scores":{"toolUse":1,"toolUseDenom":22,"instructionFollowing":3,"kbGrounding":3,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":4.91,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:45:50+00:00","createdAt":"2026-04-17T18:36:51.225538+00:00"},{"runId":"20260412T122748_gemini-3.1-flash-live-preview_c11626f2","benchmark":"grocery_bench","model":"gemini-3.1-flash-live-preview","turnsScored":30,"scores":{"toolUse":16,"toolUseDenom":22,"instructionFollowing":23,"kbGrounding":25,"ambiguityHandling":9,"stateTracking":9,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":78.39,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:44:15+00:00","createdAt":"2026-04-17T18:36:44.30975+00:00"},{"runId":"20260412T122712_gemini-2.5-flash-native-audio-preview-12-2025_6284d143","benchmark":"grocery_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":30,"scores":{"toolUse":3,"toolUseDenom":22,"instructionFollowing":10,"kbGrounding":28,"ambiguityHandling":4,"stateTracking":2,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":39.14,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:43:03+00:00","createdAt":"2026-04-17T18:36:43.734491+00:00"},{"runId":"20260412T122154_grok-realtime_d222a7dc","benchmark":"grocery_bench","model":"grok-realtime","turnsScored":30,"scores":{"toolUse":10,"toolUseDenom":22,"instructionFollowing":23,"kbGrounding":16,"ambiguityHandling":7,"stateTracking":10,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":64.48,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:42:13+00:00","createdAt":"2026-04-17T18:36:38.516882+00:00"},{"runId":"20260412T121343_amazon.nova-2-sonic-v1_0_bfb2a827","benchmark":"grocery_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":30,"scores":{"toolUse":5,"toolUseDenom":22,"instructionFollowing":16,"kbGrounding":23,"ambiguityHandling":9,"stateTracking":4,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":54.7,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:41:54+00:00","createdAt":"2026-04-17T18:36:32.793916+00:00"},{"runId":"20260412T121731_ultravox-v0.7_325029d6","benchmark":"grocery_bench","model":"ultravox-v0.7","turnsScored":30,"scores":{"toolUse":18,"toolUseDenom":22,"instructionFollowing":27,"kbGrounding":28,"ambiguityHandling":4,"stateTracking":4,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":67.18,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:41:34+00:00","createdAt":"2026-04-17T18:36:35.004284+00:00"},{"runId":"20260412T121716_gemini-3.1-flash-live-preview_546bb298","benchmark":"grocery_bench","model":"gemini-3.1-flash-live-preview","turnsScored":30,"scores":{"toolUse":19,"toolUseDenom":22,"instructionFollowing":21,"kbGrounding":28,"ambiguityHandling":9,"stateTracking":9,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":81.79,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:41:19+00:00","createdAt":"2026-04-17T18:36:34.404013+00:00"},{"runId":"20260412T122058_gemini-3.1-flash-live-preview_cb135545","benchmark":"grocery_bench","model":"gemini-3.1-flash-live-preview","turnsScored":30,"scores":{"toolUse":16,"toolUseDenom":22,"instructionFollowing":23,"kbGrounding":25,"ambiguityHandling":8,"stateTracking":9,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":76.39,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:39:52+00:00","createdAt":"2026-04-17T18:36:37.947211+00:00"},{"runId":"20260412T121812_grok-realtime_ba9797fd","benchmark":"grocery_bench","model":"grok-realtime","turnsScored":30,"scores":{"toolUse":4,"toolUseDenom":22,"instructionFollowing":20,"kbGrounding":12,"ambiguityHandling":3,"stateTracking":4,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":37.12,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:39:40+00:00","createdAt":"2026-04-17T18:36:35.641394+00:00"},{"runId":"20260412T122215_gpt-realtime_a1c87c43","benchmark":"grocery_bench","model":"gpt-realtime","turnsScored":30,"scores":{"toolUse":13,"toolUseDenom":22,"instructionFollowing":18,"kbGrounding":25,"ambiguityHandling":8,"stateTracking":1,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":58.02,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:38:35+00:00","createdAt":"2026-04-17T18:36:39.688365+00:00"},{"runId":"20260412T121903_gpt-realtime_46792adb","benchmark":"grocery_bench","model":"gpt-realtime","turnsScored":30,"scores":{"toolUse":9,"toolUseDenom":22,"instructionFollowing":23,"kbGrounding":29,"ambiguityHandling":8,"stateTracking":2,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":61.93,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:37:48+00:00","createdAt":"2026-04-17T18:36:36.825784+00:00"},{"runId":"20260412T122220_gemini-2.5-flash-native-audio-preview-12-2025_f4795cef","benchmark":"grocery_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":30,"scores":{"toolUse":15,"toolUseDenom":22,"instructionFollowing":18,"kbGrounding":24,"ambiguityHandling":3,"stateTracking":2,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":50.71,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:37:32+00:00","createdAt":"2026-04-17T18:36:40.255813+00:00"},{"runId":"20260412T121056_amazon.nova-2-sonic-v1_0_80f3d83b","benchmark":"grocery_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":30,"scores":{"toolUse":4,"toolUseDenom":22,"instructionFollowing":17,"kbGrounding":12,"ambiguityHandling":10,"stateTracking":1,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":44.51,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:35:44+00:00","createdAt":"2026-04-17T18:36:29.334884+00:00"},{"runId":"20260412T115339_amazon.nova-2-sonic-v1_0_c9fc3ae0","benchmark":"grocery_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":30,"scores":{"toolUse":10,"toolUseDenom":22,"instructionFollowing":11,"kbGrounding":8,"ambiguityHandling":5,"stateTracking":1,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":33.3,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:35:29+00:00","createdAt":"2026-04-17T18:36:15.977111+00:00"},{"runId":"20260412T120625_amazon.nova-2-sonic-v1_0_a892fbad","benchmark":"grocery_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":30,"scores":{"toolUse":3,"toolUseDenom":22,"instructionFollowing":18,"kbGrounding":18,"ambiguityHandling":10,"stateTracking":3,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":51.34,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:34:31+00:00","createdAt":"2026-04-17T18:36:25.972514+00:00"},{"runId":"20260412T120853_ultravox-v0.7_251527b6","benchmark":"grocery_bench","model":"ultravox-v0.7","turnsScored":30,"scores":{"toolUse":20,"toolUseDenom":22,"instructionFollowing":28,"kbGrounding":24,"ambiguityHandling":10,"stateTracking":9,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":86.69,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:34:20+00:00","createdAt":"2026-04-17T18:36:28.167833+00:00"},{"runId":"20260412T120632_gpt-realtime_3032e28b","benchmark":"grocery_bench","model":"gpt-realtime","turnsScored":30,"scores":{"toolUse":11,"toolUseDenom":22,"instructionFollowing":22,"kbGrounding":19,"ambiguityHandling":5,"stateTracking":7,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":58.1,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:33:57+00:00","createdAt":"2026-04-17T18:36:26.519438+00:00"},{"runId":"20260412T121335_grok-realtime_1dff0107","benchmark":"grocery_bench","model":"grok-realtime","turnsScored":30,"scores":{"toolUse":2,"toolUseDenom":22,"instructionFollowing":21,"kbGrounding":23,"ambiguityHandling":8,"stateTracking":5,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":54.84,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:33:25+00:00","createdAt":"2026-04-17T18:36:32.248431+00:00"},{"runId":"20260412T121029_grok-realtime_6c029ba8","benchmark":"grocery_bench","model":"grok-realtime","turnsScored":30,"scores":{"toolUse":0,"toolUseDenom":22,"instructionFollowing":18,"kbGrounding":17,"ambiguityHandling":7,"stateTracking":5,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":45.03,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:33:11+00:00","createdAt":"2026-04-17T18:36:28.774426+00:00"},{"runId":"20260412T121911_gemini-2.5-flash-native-audio-preview-12-2025_74f6513e","benchmark":"grocery_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":30,"scores":{"toolUse":1,"toolUseDenom":22,"instructionFollowing":16,"kbGrounding":23,"ambiguityHandling":8,"stateTracking":4,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":49.06,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:32:56+00:00","createdAt":"2026-04-17T18:36:37.399503+00:00"},{"runId":"20260412T121440_gpt-realtime_fca7059a","benchmark":"grocery_bench","model":"gpt-realtime","turnsScored":30,"scores":{"toolUse":15,"toolUseDenom":22,"instructionFollowing":25,"kbGrounding":26,"ambiguityHandling":9,"stateTracking":5,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":73.33,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:32:15+00:00","createdAt":"2026-04-17T18:36:33.332906+00:00"},{"runId":"20260412T121124_gemini-2.5-flash-native-audio-preview-12-2025_d33d68b5","benchmark":"grocery_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":30,"scores":{"toolUse":13,"toolUseDenom":22,"instructionFollowing":17,"kbGrounding":25,"ambiguityHandling":0,"stateTracking":3,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":44.43,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:28:47+00:00","createdAt":"2026-04-17T18:36:30.462741+00:00"},{"runId":"20260412T121629_gemini-2.5-flash-native-audio-preview-12-2025_01af9281","benchmark":"grocery_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":30,"scores":{"toolUse":14,"toolUseDenom":22,"instructionFollowing":20,"kbGrounding":27,"ambiguityHandling":4,"stateTracking":4,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":58.21,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:28:46+00:00","createdAt":"2026-04-17T18:36:33.867647+00:00"},{"runId":"20260412T121106_gpt-realtime_e8efc877","benchmark":"grocery_bench","model":"gpt-realtime","turnsScored":30,"scores":{"toolUse":3,"toolUseDenom":22,"instructionFollowing":23,"kbGrounding":26,"ambiguityHandling":9,"stateTracking":10,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":68.78,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:28:43+00:00","createdAt":"2026-04-17T18:36:29.907758+00:00"},{"runId":"20260412T121319_gemini-3.1-flash-live-preview_c78a8e9e","benchmark":"grocery_bench","model":"gemini-3.1-flash-live-preview","turnsScored":30,"scores":{"toolUse":15,"toolUseDenom":22,"instructionFollowing":22,"kbGrounding":27,"ambiguityHandling":10,"stateTracking":7,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":77.07,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:28:00+00:00","createdAt":"2026-04-17T18:36:31.03389+00:00"},{"runId":"20260412T120507_ultravox-v0.7_72cdc3de","benchmark":"grocery_bench","model":"ultravox-v0.7","turnsScored":30,"scores":{"toolUse":16,"toolUseDenom":22,"instructionFollowing":25,"kbGrounding":27,"ambiguityHandling":10,"stateTracking":10,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":84.6,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:27:45+00:00","createdAt":"2026-04-17T18:36:24.720752+00:00"},{"runId":"20260412T120511_grok-realtime_36c174ec","benchmark":"grocery_bench","model":"grok-realtime","turnsScored":30,"scores":{"toolUse":6,"toolUseDenom":22,"instructionFollowing":22,"kbGrounding":22,"ambiguityHandling":7,"stateTracking":7,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":59.56,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:27:10+00:00","createdAt":"2026-04-17T18:36:25.310285+00:00"},{"runId":"20260412T115649_ultravox-v0.7_11269752","benchmark":"grocery_bench","model":"ultravox-v0.7","turnsScored":30,"scores":{"toolUse":21,"toolUseDenom":22,"instructionFollowing":27,"kbGrounding":29,"ambiguityHandling":10,"stateTracking":9,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":90.27,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:27:09+00:00","createdAt":"2026-04-17T18:36:18.298024+00:00"},{"runId":"20260412T115821_amazon.nova-2-sonic-v1_0_ceb2a2d5","benchmark":"grocery_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":30,"scores":{"toolUse":17,"toolUseDenom":22,"instructionFollowing":17,"kbGrounding":14,"ambiguityHandling":3,"stateTracking":1,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":43.66,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:26:56+00:00","createdAt":"2026-04-17T18:36:19.524185+00:00"},{"runId":"20260412T115713_grok-realtime_8df8ae97","benchmark":"grocery_bench","model":"grok-realtime","turnsScored":30,"scores":{"toolUse":13,"toolUseDenom":22,"instructionFollowing":24,"kbGrounding":21,"ambiguityHandling":9,"stateTracking":6,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":69.05,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:25:25+00:00","createdAt":"2026-04-17T18:36:18.911681+00:00"},{"runId":"20260412T120056_grok-realtime_343fd068","benchmark":"grocery_bench","model":"grok-realtime","turnsScored":30,"scores":{"toolUse":19,"toolUseDenom":22,"instructionFollowing":20,"kbGrounding":25,"ambiguityHandling":10,"stateTracking":9,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":81.12,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:23:12+00:00","createdAt":"2026-04-17T18:36:21.787884+00:00"},{"runId":"20260412T120659_gemini-2.5-flash-native-audio-preview-12-2025_88e7ae77","benchmark":"grocery_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":30,"scores":{"toolUse":5,"toolUseDenom":22,"instructionFollowing":16,"kbGrounding":25,"ambiguityHandling":6,"stateTracking":3,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":48.49,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:22:18+00:00","createdAt":"2026-04-17T18:36:27.074438+00:00"},{"runId":"20260412T120308_amazon.nova-2-sonic-v1_0_ac6998ed","benchmark":"grocery_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":30,"scores":{"toolUse":5,"toolUseDenom":22,"instructionFollowing":12,"kbGrounding":12,"ambiguityHandling":6,"stateTracking":4,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":38.7,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:22:14+00:00","createdAt":"2026-04-17T18:36:22.344594+00:00"},{"runId":"20260412T120501_gemini-3.1-flash-live-preview_1f8cbf57","benchmark":"grocery_bench","model":"gemini-3.1-flash-live-preview","turnsScored":30,"scores":{"toolUse":13,"toolUseDenom":22,"instructionFollowing":21,"kbGrounding":23,"ambiguityHandling":9,"stateTracking":5,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":66.84,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:22:02+00:00","createdAt":"2026-04-17T18:36:24.121998+00:00"},{"runId":"20260412T120839_gemini-3.1-flash-live-preview_aef59971","benchmark":"grocery_bench","model":"gemini-3.1-flash-live-preview","turnsScored":30,"scores":{"toolUse":12,"toolUseDenom":22,"instructionFollowing":18,"kbGrounding":25,"ambiguityHandling":9,"stateTracking":3,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":62.19,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:21:42+00:00","createdAt":"2026-04-17T18:36:27.581008+00:00"},{"runId":"20260412T120326_gpt-realtime_f6c83f0b","benchmark":"grocery_bench","model":"gpt-realtime","turnsScored":30,"scores":{"toolUse":13,"toolUseDenom":22,"instructionFollowing":20,"kbGrounding":21,"ambiguityHandling":8,"stateTracking":4,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":61.31,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:20:56+00:00","createdAt":"2026-04-17T18:36:22.904567+00:00"},{"runId":"20260412T120038_gemini-3.1-flash-live-preview_e0e02b39","benchmark":"grocery_bench","model":"gemini-3.1-flash-live-preview","turnsScored":30,"scores":{"toolUse":10,"toolUseDenom":22,"instructionFollowing":21,"kbGrounding":20,"ambiguityHandling":9,"stateTracking":6,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":63.66,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:19:09+00:00","createdAt":"2026-04-17T18:36:21.222741+00:00"},{"runId":"20260412T115830_gpt-realtime_a1c50f03","benchmark":"grocery_bench","model":"gpt-realtime","turnsScored":30,"scores":{"toolUse":4,"toolUseDenom":22,"instructionFollowing":21,"kbGrounding":22,"ambiguityHandling":8,"stateTracking":6,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":57.53,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:19:01+00:00","createdAt":"2026-04-17T18:36:20.093735+00:00"},{"runId":"20260412T114108_amazon.nova-2-sonic-v1_0_12acc599","benchmark":"grocery_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":30,"scores":{"toolUse":3,"toolUseDenom":22,"instructionFollowing":19,"kbGrounding":12,"ambiguityHandling":8,"stateTracking":3,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":44.01,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:18:55+00:00","createdAt":"2026-04-17T18:36:06.672888+00:00"},{"runId":"20260412T114454_amazon.nova-2-sonic-v1_0_c3537209","benchmark":"grocery_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":30,"scores":{"toolUse":3,"toolUseDenom":22,"instructionFollowing":14,"kbGrounding":18,"ambiguityHandling":8,"stateTracking":3,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":44.68,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:18:10+00:00","createdAt":"2026-04-17T18:36:09.952143+00:00"},{"runId":"20260412T115425_gpt-realtime_a3ae09ae","benchmark":"grocery_bench","model":"gpt-realtime","turnsScored":30,"scores":{"toolUse":16,"toolUseDenom":22,"instructionFollowing":21,"kbGrounding":23,"ambiguityHandling":9,"stateTracking":3,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":66.49,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:17:29+00:00","createdAt":"2026-04-17T18:36:16.573878+00:00"},{"runId":"20260412T120438_gemini-2.5-flash-native-audio-preview-12-2025_7067aa85","benchmark":"grocery_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":30,"scores":{"toolUse":15,"toolUseDenom":22,"instructionFollowing":18,"kbGrounding":28,"ambiguityHandling":3,"stateTracking":3,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":54.92,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:16:26+00:00","createdAt":"2026-04-17T18:36:23.553494+00:00"},{"runId":"20260412T115856_gemini-2.5-flash-native-audio-preview-12-2025_e0ae5584","benchmark":"grocery_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":30,"scores":{"toolUse":8,"toolUseDenom":22,"instructionFollowing":18,"kbGrounding":24,"ambiguityHandling":2,"stateTracking":1,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":40.81,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:13:41+00:00","createdAt":"2026-04-17T18:36:20.69225+00:00"},{"runId":"20260412T115431_gemini-2.5-flash-native-audio-preview-12-2025_304e621f","benchmark":"grocery_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":30,"scores":{"toolUse":6,"toolUseDenom":22,"instructionFollowing":17,"kbGrounding":24,"ambiguityHandling":4,"stateTracking":1,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":42.33,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:13:34+00:00","createdAt":"2026-04-17T18:36:17.164268+00:00"},{"runId":"20260412T114351_ultravox-v0.7_da38a19b","benchmark":"grocery_bench","model":"ultravox-v0.7","turnsScored":30,"scores":{"toolUse":13,"toolUseDenom":22,"instructionFollowing":24,"kbGrounding":26,"ambiguityHandling":10,"stateTracking":4,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":71.31,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:13:28+00:00","createdAt":"2026-04-17T18:36:08.832329+00:00"},{"runId":"20260412T115325_grok-realtime_4320bba9","benchmark":"grocery_bench","model":"grok-realtime","turnsScored":30,"scores":{"toolUse":3,"toolUseDenom":22,"instructionFollowing":18,"kbGrounding":23,"ambiguityHandling":8,"stateTracking":4,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":52.21,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:13:17+00:00","createdAt":"2026-04-17T18:36:15.417578+00:00"},{"runId":"20260412T114957_ultravox-v0.7_0d1742ab","benchmark":"grocery_bench","model":"ultravox-v0.7","turnsScored":30,"scores":{"toolUse":14,"toolUseDenom":22,"instructionFollowing":27,"kbGrounding":26,"ambiguityHandling":10,"stateTracking":9,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":81.91,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:11:22+00:00","createdAt":"2026-04-17T18:36:12.104726+00:00"},{"runId":"20260412T113940_gpt-realtime_74f12cbb","benchmark":"grocery_bench","model":"gpt-realtime","turnsScored":30,"scores":{"toolUse":12,"toolUseDenom":22,"instructionFollowing":23,"kbGrounding":28,"ambiguityHandling":9,"stateTracking":4,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":69.06,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:11:04+00:00","createdAt":"2026-04-17T18:36:03.80922+00:00"},{"runId":"20260412T115300_gemini-3.1-flash-live-preview_f83ccd9b","benchmark":"grocery_bench","model":"gemini-3.1-flash-live-preview","turnsScored":30,"scores":{"toolUse":11,"toolUseDenom":22,"instructionFollowing":20,"kbGrounding":26,"ambiguityHandling":9,"stateTracking":6,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":67.9,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:10:54+00:00","createdAt":"2026-04-17T18:36:14.807298+00:00"},{"runId":"20260412T115235_gpt-realtime_c7e35cf4","benchmark":"grocery_bench","model":"gpt-realtime","turnsScored":30,"scores":{"toolUse":14,"toolUseDenom":22,"instructionFollowing":20,"kbGrounding":26,"ambiguityHandling":8,"stateTracking":5,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":67.09,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:10:28+00:00","createdAt":"2026-04-17T18:36:13.665439+00:00"},{"runId":"20260412T113354_amazon.nova-2-sonic-v1_0_bb27a265","benchmark":"grocery_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":30,"scores":{"toolUse":5,"toolUseDenom":22,"instructionFollowing":18,"kbGrounding":15,"ambiguityHandling":3,"stateTracking":4,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":38.7,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:08:52+00:00","createdAt":"2026-04-17T18:35:59.905278+00:00"},{"runId":"20260412T114049_ultravox-v0.7_dcf66144","benchmark":"grocery_bench","model":"ultravox-v0.7","turnsScored":30,"scores":{"toolUse":21,"toolUseDenom":22,"instructionFollowing":24,"kbGrounding":29,"ambiguityHandling":10,"stateTracking":11,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":91.35,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:08:36+00:00","createdAt":"2026-04-17T18:36:05.510312+00:00"},{"runId":"20260412T114929_gemini-3.1-flash-live-preview_cca17223","benchmark":"grocery_bench","model":"gemini-3.1-flash-live-preview","turnsScored":30,"scores":{"toolUse":14,"toolUseDenom":22,"instructionFollowing":20,"kbGrounding":26,"ambiguityHandling":7,"stateTracking":8,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":69.7,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:06:57+00:00","createdAt":"2026-04-17T18:36:11.572564+00:00"},{"runId":"20260412T114423_grok-realtime_ffcc0c23","benchmark":"grocery_bench","model":"grok-realtime","turnsScored":30,"scores":{"toolUse":12,"toolUseDenom":22,"instructionFollowing":22,"kbGrounding":27,"ambiguityHandling":8,"stateTracking":5,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":67.27,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:06:31+00:00","createdAt":"2026-04-17T18:36:09.402814+00:00"},{"runId":"20260412T115045_grok-realtime_bacad8fd","benchmark":"grocery_bench","model":"grok-realtime","turnsScored":30,"scores":{"toolUse":2,"toolUseDenom":22,"instructionFollowing":20,"kbGrounding":23,"ambiguityHandling":7,"stateTracking":2,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":47.56,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:06:23+00:00","createdAt":"2026-04-17T18:36:12.640459+00:00"},{"runId":"20260412T114508_gpt-realtime_66f69f23","benchmark":"grocery_bench","model":"gpt-realtime","turnsScored":30,"scores":{"toolUse":4,"toolUseDenom":22,"instructionFollowing":21,"kbGrounding":25,"ambiguityHandling":7,"stateTracking":8,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":60.61,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:05:05+00:00","createdAt":"2026-04-17T18:36:10.488227+00:00"},{"runId":"20260412T113338_grok-realtime_74bed4cc","benchmark":"grocery_bench","model":"grok-realtime","turnsScored":30,"scores":{"toolUse":11,"toolUseDenom":22,"instructionFollowing":22,"kbGrounding":21,"ambiguityHandling":7,"stateTracking":6,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":61.9,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:04:59+00:00","createdAt":"2026-04-17T18:35:59.357471+00:00"},{"runId":"20260412T113831_ultravox-v0.7_05f794eb","benchmark":"grocery_bench","model":"ultravox-v0.7","turnsScored":30,"scores":{"toolUse":13,"toolUseDenom":22,"instructionFollowing":30,"kbGrounding":28,"ambiguityHandling":10,"stateTracking":12,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":88.95,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:04:36+00:00","createdAt":"2026-04-17T18:36:02.166265+00:00"},{"runId":"20260412T114107_grok-realtime_204b28ad","benchmark":"grocery_bench","model":"grok-realtime","turnsScored":30,"scores":{"toolUse":8,"toolUseDenom":22,"instructionFollowing":23,"kbGrounding":22,"ambiguityHandling":9,"stateTracking":5,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":62.97,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:03:24+00:00","createdAt":"2026-04-17T18:36:06.127789+00:00"},{"runId":"20260412T113928_amazon.nova-2-sonic-v1_0_4d5bdb48","benchmark":"grocery_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":30,"scores":{"toolUse":3,"toolUseDenom":22,"instructionFollowing":22,"kbGrounding":11,"ambiguityHandling":8,"stateTracking":6,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":49.96,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:03:06+00:00","createdAt":"2026-04-17T18:36:03.241785+00:00"},{"runId":"20260412T114537_gemini-2.5-flash-native-audio-preview-12-2025_6d149c9c","benchmark":"grocery_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":30,"scores":{"toolUse":10,"toolUseDenom":22,"instructionFollowing":25,"kbGrounding":29,"ambiguityHandling":3,"stateTracking":3,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":55.71,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:00:43+00:00","createdAt":"2026-04-17T18:36:11.052594+00:00"},{"runId":"20260412T114127_gpt-realtime_a5ce53cf","benchmark":"grocery_bench","model":"gpt-realtime","turnsScored":30,"scores":{"toolUse":20,"toolUseDenom":22,"instructionFollowing":17,"kbGrounding":28,"ambiguityHandling":8,"stateTracking":2,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":67.26,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T19:00:36+00:00","createdAt":"2026-04-17T18:36:07.217798+00:00"},{"runId":"20260412T114316_gemini-3.1-flash-live-preview_ce7cafd6","benchmark":"grocery_bench","model":"gemini-3.1-flash-live-preview","turnsScored":30,"scores":{"toolUse":12,"toolUseDenom":22,"instructionFollowing":19,"kbGrounding":24,"ambiguityHandling":9,"stateTracking":1,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":59.11,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T18:58:54+00:00","createdAt":"2026-04-17T18:36:08.2886+00:00"},{"runId":"20260412T112921_ultravox-v0.7_d086eb71","benchmark":"grocery_bench","model":"ultravox-v0.7","turnsScored":30,"scores":{"toolUse":14,"toolUseDenom":22,"instructionFollowing":25,"kbGrounding":25,"ambiguityHandling":9,"stateTracking":7,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":74.83,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T18:58:28+00:00","createdAt":"2026-04-17T18:35:55.953848+00:00"},{"runId":"20260412T113723_gemini-3.1-flash-live-preview_8540ba45","benchmark":"grocery_bench","model":"gemini-3.1-flash-live-preview","turnsScored":30,"scores":{"toolUse":12,"toolUseDenom":22,"instructionFollowing":22,"kbGrounding":25,"ambiguityHandling":10,"stateTracking":8,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":74.55,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T18:58:19+00:00","createdAt":"2026-04-17T18:36:01.597056+00:00"},{"runId":"20260412T114129_gemini-2.5-flash-native-audio-preview-12-2025_f867bc5c","benchmark":"grocery_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":30,"scores":{"toolUse":17,"toolUseDenom":22,"instructionFollowing":10,"kbGrounding":25,"ambiguityHandling":5,"stateTracking":1,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":50.33,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T18:57:11+00:00","createdAt":"2026-04-17T18:36:07.75723+00:00"},{"runId":"20260412T114013_gemini-3.1-flash-live-preview_b8e8490e","benchmark":"grocery_bench","model":"gemini-3.1-flash-live-preview","turnsScored":30,"scores":{"toolUse":12,"toolUseDenom":22,"instructionFollowing":22,"kbGrounding":25,"ambiguityHandling":8,"stateTracking":7,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":69.01,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T18:56:46+00:00","createdAt":"2026-04-17T18:36:04.978513+00:00"},{"runId":"20260412T113943_gemini-2.5-flash-native-audio-preview-12-2025_ee8d1edd","benchmark":"grocery_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":30,"scores":{"toolUse":2,"toolUseDenom":22,"instructionFollowing":12,"kbGrounding":26,"ambiguityHandling":7,"stateTracking":1,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":42.69,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T18:54:49+00:00","createdAt":"2026-04-17T18:36:04.439747+00:00"},{"runId":"20260412T112728_grok-realtime_69545615","benchmark":"grocery_bench","model":"grok-realtime","turnsScored":30,"scores":{"toolUse":12,"toolUseDenom":22,"instructionFollowing":25,"kbGrounding":24,"ambiguityHandling":8,"stateTracking":9,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":73.42,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T18:54:28+00:00","createdAt":"2026-04-17T18:35:53.037098+00:00"},{"runId":"20260412T112810_gemini-2.5-flash-native-audio-preview-12-2025_9f1016ed","benchmark":"grocery_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":30,"scores":{"toolUse":13,"toolUseDenom":22,"instructionFollowing":27,"kbGrounding":27,"ambiguityHandling":8,"stateTracking":7,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":74.59,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T18:54:23+00:00","createdAt":"2026-04-17T18:35:54.763202+00:00"},{"runId":"20260412T112724_ultravox-v0.7_c38f7ff4","benchmark":"grocery_bench","model":"ultravox-v0.7","turnsScored":30,"scores":{"toolUse":21,"toolUseDenom":22,"instructionFollowing":24,"kbGrounding":28,"ambiguityHandling":10,"stateTracking":5,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":81.45,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T18:53:37+00:00","createdAt":"2026-04-17T18:35:52.439908+00:00"},{"runId":"20260412T113103_amazon.nova-2-sonic-v1_0_f2eed61c","benchmark":"grocery_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":30,"scores":{"toolUse":4,"toolUseDenom":22,"instructionFollowing":26,"kbGrounding":15,"ambiguityHandling":9,"stateTracking":4,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":55.12,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T18:53:13+00:00","createdAt":"2026-04-17T18:35:57.177503+00:00"},{"runId":"20260412T112804_gpt-realtime_280c1b63","benchmark":"grocery_bench","model":"gpt-realtime","turnsScored":30,"scores":{"toolUse":13,"toolUseDenom":22,"instructionFollowing":23,"kbGrounding":24,"ambiguityHandling":9,"stateTracking":5,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":68.84,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T18:52:37+00:00","createdAt":"2026-04-17T18:35:54.206837+00:00"},{"runId":"20260412T113412_gpt-realtime_52301ebc","benchmark":"grocery_bench","model":"gpt-realtime","turnsScored":30,"scores":{"toolUse":1,"toolUseDenom":22,"instructionFollowing":24,"kbGrounding":27,"ambiguityHandling":3,"stateTracking":5,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":48.6,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T18:51:09+00:00","createdAt":"2026-04-17T18:36:00.486445+00:00"},{"runId":"20260412T113617_gemini-2.5-flash-native-audio-preview-12-2025_27becac7","benchmark":"grocery_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":30,"scores":{"toolUse":13,"toolUseDenom":22,"instructionFollowing":25,"kbGrounding":29,"ambiguityHandling":10,"stateTracking":5,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":75.51,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T18:50:41+00:00","createdAt":"2026-04-17T18:36:01.036033+00:00"},{"runId":"20260412T113105_gpt-realtime_94f60e50","benchmark":"grocery_bench","model":"gpt-realtime","turnsScored":30,"scores":{"toolUse":6,"toolUseDenom":22,"instructionFollowing":16,"kbGrounding":25,"ambiguityHandling":7,"stateTracking":1,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":48.33,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T18:49:55+00:00","createdAt":"2026-04-17T18:35:57.730159+00:00"},{"runId":"20260412T113233_gemini-3.1-flash-live-preview_16758193","benchmark":"grocery_bench","model":"gemini-3.1-flash-live-preview","turnsScored":30,"scores":{"toolUse":14,"toolUseDenom":22,"instructionFollowing":25,"kbGrounding":27,"ambiguityHandling":8,"stateTracking":7,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":74.16,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T18:45:05+00:00","createdAt":"2026-04-17T18:35:58.823094+00:00"},{"runId":"20260412T112841_gemini-3.1-flash-live-preview_4cc877f2","benchmark":"grocery_bench","model":"gemini-3.1-flash-live-preview","turnsScored":30,"scores":{"toolUse":12,"toolUseDenom":22,"instructionFollowing":22,"kbGrounding":26,"ambiguityHandling":10,"stateTracking":7,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":73.68,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T18:43:49+00:00","createdAt":"2026-04-17T18:35:55.351375+00:00"},{"runId":"20260412T111623_ultravox-v0.7_a94378ab","benchmark":"grocery_bench","model":"ultravox-v0.7","turnsScored":30,"scores":{"toolUse":13,"toolUseDenom":22,"instructionFollowing":27,"kbGrounding":27,"ambiguityHandling":10,"stateTracking":9,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":81.66,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T18:43:15+00:00","createdAt":"2026-04-17T18:35:46.268039+00:00"},{"runId":"20260412T111323_ultravox-v0.7_0c8d4fde","benchmark":"grocery_bench","model":"ultravox-v0.7","turnsScored":30,"scores":{"toolUse":20,"toolUseDenom":22,"instructionFollowing":29,"kbGrounding":26,"ambiguityHandling":10,"stateTracking":11,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":91.77,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T18:41:05+00:00","createdAt":"2026-04-17T18:35:42.788345+00:00"},{"runId":"20260412T110940_ultravox-v0.7_02839b83","benchmark":"grocery_bench","model":"ultravox-v0.7","turnsScored":30,"scores":{"toolUse":14,"toolUseDenom":22,"instructionFollowing":30,"kbGrounding":26,"ambiguityHandling":9,"stateTracking":11,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":84.98,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T18:40:47+00:00","createdAt":"2026-04-17T18:35:39.31104+00:00"},{"runId":"20260412T105910_ultravox-v0.7_b2b2ecd5","benchmark":"grocery_bench","model":"ultravox-v0.7","turnsScored":30,"scores":{"toolUse":14,"toolUseDenom":22,"instructionFollowing":27,"kbGrounding":22,"ambiguityHandling":10,"stateTracking":9,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":79.24,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T18:38:43+00:00","createdAt":"2026-04-17T18:35:31.198519+00:00"},{"runId":"20260412T102833_grok-realtime_0228cad3","benchmark":"grocery_bench","model":"grok-realtime","turnsScored":30,"scores":{"toolUse":3,"toolUseDenom":22,"instructionFollowing":12,"kbGrounding":26,"ambiguityHandling":3,"stateTracking":6,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":43.29,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T18:26:09+00:00","createdAt":"2026-04-17T18:35:01.781146+00:00"},{"runId":"20260412T102949_grok-realtime_4393f983","benchmark":"grocery_bench","model":"grok-realtime","turnsScored":30,"scores":{"toolUse":10,"toolUseDenom":22,"instructionFollowing":23,"kbGrounding":17,"ambiguityHandling":10,"stateTracking":8,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":68.07,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T18:02:57+00:00","createdAt":"2026-04-17T18:35:04.649132+00:00"},{"runId":"20260412T102930_ultravox-v0.7_f961ec84","benchmark":"grocery_bench","model":"ultravox-v0.7","turnsScored":30,"scores":{"toolUse":21,"toolUseDenom":22,"instructionFollowing":30,"kbGrounding":24,"ambiguityHandling":10,"stateTracking":11,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":92.01,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:56:48+00:00","createdAt":"2026-04-17T18:35:04.06032+00:00"},{"runId":"20260412T102836_gpt-realtime_31475cb4","benchmark":"grocery_bench","model":"gpt-realtime","turnsScored":30,"scores":{"toolUse":12,"toolUseDenom":22,"instructionFollowing":25,"kbGrounding":30,"ambiguityHandling":8,"stateTracking":9,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":77.42,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:56:23+00:00","createdAt":"2026-04-17T18:35:02.32277+00:00"},{"runId":"20260412T103051_amazon.nova-2-sonic-v1_0_48ac09e4","benchmark":"grocery_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":30,"scores":{"toolUse":9,"toolUseDenom":22,"instructionFollowing":22,"kbGrounding":16,"ambiguityHandling":5,"stateTracking":5,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":51.21,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:56:22+00:00","createdAt":"2026-04-17T18:35:05.196418+00:00"},{"runId":"20260412T101849_amazon.nova-2-sonic-v1_0_784adbec","benchmark":"grocery_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":30,"scores":{"toolUse":10,"toolUseDenom":22,"instructionFollowing":20,"kbGrounding":20,"ambiguityHandling":9,"stateTracking":8,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":66.07,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:55:11+00:00","createdAt":"2026-04-17T18:34:55.748753+00:00"},{"runId":"20260412T102833_amazon.nova-2-sonic-v1_0_48f1cf6c","benchmark":"grocery_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":30,"scores":{"toolUse":7,"toolUseDenom":22,"instructionFollowing":27,"kbGrounding":15,"ambiguityHandling":9,"stateTracking":7,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":63.13,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:54:58+00:00","createdAt":"2026-04-17T18:35:01.206624+00:00"},{"runId":"20260412T102800_ultravox-v0.7_f56cda2a","benchmark":"grocery_bench","model":"ultravox-v0.7","turnsScored":30,"scores":{"toolUse":19,"toolUseDenom":22,"instructionFollowing":28,"kbGrounding":27,"ambiguityHandling":10,"stateTracking":5,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":81.63,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:53:08+00:00","createdAt":"2026-04-17T18:35:00.675019+00:00"},{"runId":"20260412T101732_grok-realtime_38833c30","benchmark":"grocery_bench","model":"grok-realtime","turnsScored":30,"scores":{"toolUse":13,"toolUseDenom":22,"instructionFollowing":27,"kbGrounding":28,"ambiguityHandling":10,"stateTracking":5,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":76.18,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:53:03+00:00","createdAt":"2026-04-17T18:34:55.154863+00:00"},{"runId":"20260412T101113_amazon.nova-2-sonic-v1_0_ec674c8b","benchmark":"grocery_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":30,"scores":{"toolUse":9,"toolUseDenom":22,"instructionFollowing":20,"kbGrounding":11,"ambiguityHandling":4,"stateTracking":3,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":41.46,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:51:38+00:00","createdAt":"2026-04-17T18:34:49.944552+00:00"},{"runId":"20260412T102150_grok-realtime_0709209f","benchmark":"grocery_bench","model":"grok-realtime","turnsScored":30,"scores":{"toolUse":8,"toolUseDenom":22,"instructionFollowing":26,"kbGrounding":18,"ambiguityHandling":10,"stateTracking":8,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":68.91,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:47:24+00:00","createdAt":"2026-04-17T18:34:58.092284+00:00"},{"runId":"20260412T102910_gemini-3.1-flash-live-preview_4656ceef","benchmark":"grocery_bench","model":"gemini-3.1-flash-live-preview","turnsScored":30,"scores":{"toolUse":18,"toolUseDenom":22,"instructionFollowing":26,"kbGrounding":29,"ambiguityHandling":10,"stateTracking":11,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":89.95,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:42:55+00:00","createdAt":"2026-04-17T18:35:03.492701+00:00"},{"runId":"20260412T101458_ultravox-v0.7_fb47f3d0","benchmark":"grocery_bench","model":"ultravox-v0.7","turnsScored":30,"scores":{"toolUse":13,"toolUseDenom":22,"instructionFollowing":29,"kbGrounding":24,"ambiguityHandling":10,"stateTracking":4,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":73.31,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:42:34+00:00","createdAt":"2026-04-17T18:34:52.035426+00:00"},{"runId":"20260412T102855_gemini-2.5-flash-native-audio-preview-12-2025_76deb5f9","benchmark":"grocery_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":30,"scores":{"toolUse":15,"toolUseDenom":22,"instructionFollowing":28,"kbGrounding":28,"ambiguityHandling":10,"stateTracking":11,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":87.89,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:42:08+00:00","createdAt":"2026-04-17T18:35:02.890534+00:00"},{"runId":"20260412T100826_amazon.nova-2-sonic-v1_0_d6a3f81d","benchmark":"grocery_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":30,"scores":{"toolUse":5,"toolUseDenom":22,"instructionFollowing":19,"kbGrounding":12,"ambiguityHandling":5,"stateTracking":4,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":41.37,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:41:48+00:00","createdAt":"2026-04-17T18:34:46.586024+00:00"},{"runId":"20260412T102204_amazon.nova-2-sonic-v1_0_14e3a461","benchmark":"grocery_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":30,"scores":{"toolUse":5,"toolUseDenom":22,"instructionFollowing":17,"kbGrounding":23,"ambiguityHandling":10,"stateTracking":3,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":55.83,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:41:06+00:00","createdAt":"2026-04-17T18:34:58.600586+00:00"},{"runId":"20260412T101859_gpt-realtime_7e827d77","benchmark":"grocery_bench","model":"gpt-realtime","turnsScored":30,"scores":{"toolUse":3,"toolUseDenom":22,"instructionFollowing":20,"kbGrounding":24,"ambiguityHandling":6,"stateTracking":5,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":51.75,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:41:06+00:00","createdAt":"2026-04-17T18:34:56.40519+00:00"},{"runId":"20260412T102551_gemini-3.1-flash-live-preview_2d5514bc","benchmark":"grocery_bench","model":"gemini-3.1-flash-live-preview","turnsScored":30,"scores":{"toolUse":12,"toolUseDenom":22,"instructionFollowing":25,"kbGrounding":24,"ambiguityHandling":9,"stateTracking":9,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":75.42,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:40:00+00:00","createdAt":"2026-04-17T18:35:00.131096+00:00"},{"runId":"20260412T102323_gpt-realtime_6c57421d","benchmark":"grocery_bench","model":"gpt-realtime","turnsScored":30,"scores":{"toolUse":2,"toolUseDenom":22,"instructionFollowing":22,"kbGrounding":26,"ambiguityHandling":10,"stateTracking":1,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":55.36,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:39:58+00:00","createdAt":"2026-04-17T18:34:59.09476+00:00"},{"runId":"20260412T101502_grok-realtime_e45ee5a6","benchmark":"grocery_bench","model":"grok-realtime","turnsScored":30,"scores":{"toolUse":6,"toolUseDenom":22,"instructionFollowing":28,"kbGrounding":12,"ambiguityHandling":10,"stateTracking":9,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":65.97,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:39:42+00:00","createdAt":"2026-04-17T18:34:52.602538+00:00"},{"runId":"20260412T101940_gemini-3.1-flash-live-preview_87829384","benchmark":"grocery_bench","model":"gemini-3.1-flash-live-preview","turnsScored":30,"scores":{"toolUse":19,"toolUseDenom":22,"instructionFollowing":23,"kbGrounding":30,"ambiguityHandling":10,"stateTracking":8,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":84.91,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:38:48+00:00","createdAt":"2026-04-17T18:34:57.517601+00:00"},{"runId":"20260412T102551_gemini-2.5-flash-native-audio-preview-12-2025_bbb6db9f","benchmark":"grocery_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":30,"scores":{"toolUse":13,"toolUseDenom":22,"instructionFollowing":19,"kbGrounding":20,"ambiguityHandling":8,"stateTracking":3,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":58.43,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:38:29+00:00","createdAt":"2026-04-17T18:34:59.605492+00:00"},{"runId":"20260412T101103_grok-realtime_e2c9fddb","benchmark":"grocery_bench","model":"grok-realtime","turnsScored":30,"scores":{"toolUse":13,"toolUseDenom":22,"instructionFollowing":24,"kbGrounding":27,"ambiguityHandling":9,"stateTracking":5,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":71.51,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:38:27+00:00","createdAt":"2026-04-17T18:34:49.401012+00:00"},{"runId":"20260412T100636_grok-realtime_b52b5b98","benchmark":"grocery_bench","model":"grok-realtime","turnsScored":30,"scores":{"toolUse":10,"toolUseDenom":22,"instructionFollowing":24,"kbGrounding":19,"ambiguityHandling":10,"stateTracking":5,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":65.45,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:35:58+00:00","createdAt":"2026-04-17T18:34:45.966459+00:00"},{"runId":"20260412T101035_ultravox-v0.7_40bb68db","benchmark":"grocery_bench","model":"ultravox-v0.7","turnsScored":30,"scores":{"toolUse":13,"toolUseDenom":22,"instructionFollowing":24,"kbGrounding":23,"ambiguityHandling":10,"stateTracking":5,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":70.84,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:35:57+00:00","createdAt":"2026-04-17T18:34:48.846431+00:00"},{"runId":"20260412T101908_gemini-2.5-flash-native-audio-preview-12-2025_751972ea","benchmark":"grocery_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":30,"scores":{"toolUse":13,"toolUseDenom":22,"instructionFollowing":24,"kbGrounding":28,"ambiguityHandling":4,"stateTracking":9,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":68.33,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:35:05+00:00","createdAt":"2026-04-17T18:34:56.961941+00:00"},{"runId":"20260412T101031_gemini-3.1-flash-live-preview_50fe928e","benchmark":"grocery_bench","model":"gemini-3.1-flash-live-preview","turnsScored":30,"scores":{"toolUse":16,"toolUseDenom":22,"instructionFollowing":27,"kbGrounding":29,"ambiguityHandling":10,"stateTracking":6,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":81.11,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:34:55+00:00","createdAt":"2026-04-17T18:34:48.282877+00:00"},{"runId":"20260412T100416_amazon.nova-2-sonic-v1_0_c5c7344d","benchmark":"grocery_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":30,"scores":{"toolUse":4,"toolUseDenom":22,"instructionFollowing":9,"kbGrounding":22,"ambiguityHandling":5,"stateTracking":2,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":37.38,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:34:43+00:00","createdAt":"2026-04-17T18:34:43.10769+00:00"},{"runId":"20260412T101629_gemini-2.5-flash-native-audio-preview-12-2025_2b9e4621","benchmark":"grocery_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":30,"scores":{"toolUse":12,"toolUseDenom":22,"instructionFollowing":23,"kbGrounding":26,"ambiguityHandling":1,"stateTracking":4,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":51.73,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:33:00+00:00","createdAt":"2026-04-17T18:34:54.143711+00:00"},{"runId":"20260412T101604_amazon.nova-2-sonic-v1_0_5fe5647c","benchmark":"grocery_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":30,"scores":{"toolUse":3,"toolUseDenom":22,"instructionFollowing":13,"kbGrounding":26,"ambiguityHandling":5,"stateTracking":4,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":44.88,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:31:55+00:00","createdAt":"2026-04-17T18:34:53.134971+00:00"},{"runId":"20260412T100536_ultravox-v0.7_63ee7f61","benchmark":"grocery_bench","model":"ultravox-v0.7","turnsScored":30,"scores":{"toolUse":16,"toolUseDenom":22,"instructionFollowing":26,"kbGrounding":25,"ambiguityHandling":10,"stateTracking":10,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":83.93,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:31:52+00:00","createdAt":"2026-04-17T18:34:45.266058+00:00"},{"runId":"20260412T100357_grok-realtime_3bf71523","benchmark":"grocery_bench","model":"grok-realtime","turnsScored":30,"scores":{"toolUse":12,"toolUseDenom":22,"instructionFollowing":24,"kbGrounding":24,"ambiguityHandling":10,"stateTracking":7,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":73.68,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:31:39+00:00","createdAt":"2026-04-17T18:34:42.477363+00:00"},{"runId":"20260412T101627_gpt-realtime_f17f3eaf","benchmark":"grocery_bench","model":"gpt-realtime","turnsScored":30,"scores":{"toolUse":11,"toolUseDenom":22,"instructionFollowing":21,"kbGrounding":27,"ambiguityHandling":2,"stateTracking":8,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":58.31,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:30:49+00:00","createdAt":"2026-04-17T18:34:53.641298+00:00"},{"runId":"20260412T101652_gemini-3.1-flash-live-preview_88bb696f","benchmark":"grocery_bench","model":"gemini-3.1-flash-live-preview","turnsScored":30,"scores":{"toolUse":14,"toolUseDenom":22,"instructionFollowing":27,"kbGrounding":28,"ambiguityHandling":10,"stateTracking":9,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":83.24,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:29:48+00:00","createdAt":"2026-04-17T18:34:54.637034+00:00"},{"runId":"20260412T101222_gemini-2.5-flash-native-audio-preview-12-2025_8f4adcb3","benchmark":"grocery_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":30,"scores":{"toolUse":10,"toolUseDenom":22,"instructionFollowing":25,"kbGrounding":30,"ambiguityHandling":10,"stateTracking":2,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":68.83,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:29:29+00:00","createdAt":"2026-04-17T18:34:50.983132+00:00"},{"runId":"20260412T100840_gpt-realtime_4bedd83a","benchmark":"grocery_bench","model":"gpt-realtime","turnsScored":30,"scores":{"toolUse":3,"toolUseDenom":22,"instructionFollowing":23,"kbGrounding":25,"ambiguityHandling":10,"stateTracking":9,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":68.57,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:29:08+00:00","createdAt":"2026-04-17T18:34:47.145848+00:00"},{"runId":"20260412T100003_ultravox-v0.7_cc663024","benchmark":"grocery_bench","model":"ultravox-v0.7","turnsScored":30,"scores":{"toolUse":18,"toolUseDenom":22,"instructionFollowing":29,"kbGrounding":29,"ambiguityHandling":10,"stateTracking":11,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":91.95,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:28:53+00:00","createdAt":"2026-04-17T18:34:38.567991+00:00"},{"runId":"20260412T101212_gpt-realtime_7b85727d","benchmark":"grocery_bench","model":"gpt-realtime","turnsScored":30,"scores":{"toolUse":21,"toolUseDenom":22,"instructionFollowing":25,"kbGrounding":30,"ambiguityHandling":9,"stateTracking":7,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":84.53,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:28:34+00:00","createdAt":"2026-04-17T18:34:50.470266+00:00"},{"runId":"20260412T100242_ultravox-v0.7_b693561d","benchmark":"grocery_bench","model":"ultravox-v0.7","turnsScored":30,"scores":{"toolUse":13,"toolUseDenom":22,"instructionFollowing":21,"kbGrounding":26,"ambiguityHandling":10,"stateTracking":5,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":70.84,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:28:31+00:00","createdAt":"2026-04-17T18:34:41.91349+00:00"},{"runId":"20260412T095741_grok-realtime_e953b54d","benchmark":"grocery_bench","model":"grok-realtime","turnsScored":30,"scores":{"toolUse":12,"toolUseDenom":22,"instructionFollowing":29,"kbGrounding":18,"ambiguityHandling":10,"stateTracking":7,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":73.01,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:28:31+00:00","createdAt":"2026-04-17T18:34:35.618559+00:00"},{"runId":"20260412T100157_amazon.nova-2-sonic-v1_0_3aa4e5bd","benchmark":"grocery_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":30,"scores":{"toolUse":14,"toolUseDenom":22,"instructionFollowing":18,"kbGrounding":22,"ambiguityHandling":5,"stateTracking":4,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":55.55,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:27:58+00:00","createdAt":"2026-04-17T18:34:39.857197+00:00"},{"runId":"20260412T100504_gpt-realtime_26277cb7","benchmark":"grocery_bench","model":"gpt-realtime","turnsScored":30,"scores":{"toolUse":14,"toolUseDenom":22,"instructionFollowing":20,"kbGrounding":24,"ambiguityHandling":9,"stateTracking":4,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":66.21,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:25:50+00:00","createdAt":"2026-04-17T18:34:43.650491+00:00"},{"runId":"20260412T101305_gemini-3.1-flash-live-preview_c00130a9","benchmark":"grocery_bench","model":"gemini-3.1-flash-live-preview","turnsScored":30,"scores":{"toolUse":19,"toolUseDenom":22,"instructionFollowing":23,"kbGrounding":28,"ambiguityHandling":10,"stateTracking":8,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":83.58,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:25:49+00:00","createdAt":"2026-04-17T18:34:51.488621+00:00"},{"runId":"20260412T094735_grok-realtime_cf977f71","benchmark":"grocery_bench","model":"grok-realtime","turnsScored":30,"scores":{"toolUse":11,"toolUseDenom":22,"instructionFollowing":22,"kbGrounding":23,"ambiguityHandling":9,"stateTracking":6,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":67.23,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:23:22+00:00","createdAt":"2026-04-17T18:34:29.353581+00:00"},{"runId":"20260412T100206_gpt-realtime_8fc79b28","benchmark":"grocery_bench","model":"gpt-realtime","turnsScored":30,"scores":{"toolUse":12,"toolUseDenom":22,"instructionFollowing":21,"kbGrounding":26,"ambiguityHandling":2,"stateTracking":1,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":47.78,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:22:02+00:00","createdAt":"2026-04-17T18:34:40.351811+00:00"},{"runId":"20260412T100930_gemini-2.5-flash-native-audio-preview-12-2025_7d329b1d","benchmark":"grocery_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":30,"scores":{"toolUse":14,"toolUseDenom":22,"instructionFollowing":24,"kbGrounding":28,"ambiguityHandling":8,"stateTracking":12,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":81.86,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:21:39+00:00","createdAt":"2026-04-17T18:34:47.705882+00:00"},{"runId":"20260412T100230_gemini-3.1-flash-live-preview_093f3b60","benchmark":"grocery_bench","model":"gemini-3.1-flash-live-preview","turnsScored":30,"scores":{"toolUse":14,"toolUseDenom":22,"instructionFollowing":25,"kbGrounding":26,"ambiguityHandling":10,"stateTracking":7,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":77.5,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:19:38+00:00","createdAt":"2026-04-17T18:34:41.377627+00:00"},{"runId":"20260412T100026_grok-realtime_bc83a391","benchmark":"grocery_bench","model":"grok-realtime","turnsScored":30,"scores":{"toolUse":16,"toolUseDenom":22,"instructionFollowing":23,"kbGrounding":28,"ambiguityHandling":9,"stateTracking":5,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":74.24,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:19:07+00:00","createdAt":"2026-04-17T18:34:39.336183+00:00"},{"runId":"20260412T095808_gpt-realtime_364b9c72","benchmark":"grocery_bench","model":"gpt-realtime","turnsScored":30,"scores":{"toolUse":3,"toolUseDenom":22,"instructionFollowing":22,"kbGrounding":27,"ambiguityHandling":5,"stateTracking":9,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":59.24,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:18:58+00:00","createdAt":"2026-04-17T18:34:36.79497+00:00"},{"runId":"20260412T100529_gemini-3.1-flash-live-preview_da6a1581","benchmark":"grocery_bench","model":"gemini-3.1-flash-live-preview","turnsScored":30,"scores":{"toolUse":16,"toolUseDenom":22,"instructionFollowing":25,"kbGrounding":29,"ambiguityHandling":10,"stateTracking":8,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":82.85,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:18:47+00:00","createdAt":"2026-04-17T18:34:44.692931+00:00"},{"runId":"20260412T095619_ultravox-v0.7_d82828dd","benchmark":"grocery_bench","model":"ultravox-v0.7","turnsScored":30,"scores":{"toolUse":15,"toolUseDenom":22,"instructionFollowing":24,"kbGrounding":29,"ambiguityHandling":10,"stateTracking":6,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":78.2,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:17:20+00:00","createdAt":"2026-04-17T18:34:35.03009+00:00"},{"runId":"20260412T095743_amazon.nova-2-sonic-v1_0_61c88fcd","benchmark":"grocery_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":30,"scores":{"toolUse":11,"toolUseDenom":22,"instructionFollowing":26,"kbGrounding":20,"ambiguityHandling":6,"stateTracking":6,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":61.9,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:16:51+00:00","createdAt":"2026-04-17T18:34:36.22765+00:00"},{"runId":"20260412T100219_gemini-2.5-flash-native-audio-preview-12-2025_a560b586","benchmark":"grocery_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":30,"scores":{"toolUse":2,"toolUseDenom":22,"instructionFollowing":19,"kbGrounding":27,"ambiguityHandling":10,"stateTracking":2,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":55.56,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:16:28+00:00","createdAt":"2026-04-17T18:34:40.88104+00:00"},{"runId":"20260412T094946_amazon.nova-2-sonic-v1_0_f6dd56c0","benchmark":"grocery_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":30,"scores":{"toolUse":3,"toolUseDenom":22,"instructionFollowing":24,"kbGrounding":16,"ambiguityHandling":9,"stateTracking":11,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":64.32,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:16:25+00:00","createdAt":"2026-04-17T18:34:32.175848+00:00"},{"runId":"20260412T100515_gemini-2.5-flash-native-audio-preview-12-2025_ac529ab2","benchmark":"grocery_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":30,"scores":{"toolUse":13,"toolUseDenom":22,"instructionFollowing":24,"kbGrounding":28,"ambiguityHandling":10,"stateTracking":6,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":75.72,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:16:02+00:00","createdAt":"2026-04-17T18:34:44.187123+00:00"},{"runId":"20260412T094357_amazon.nova-2-sonic-v1_0_8d9ad870","benchmark":"grocery_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":30,"scores":{"toolUse":6,"toolUseDenom":22,"instructionFollowing":23,"kbGrounding":13,"ambiguityHandling":9,"stateTracking":6,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":56.69,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:15:01+00:00","createdAt":"2026-04-17T18:34:26.703667+00:00"},{"runId":"20260412T095816_gemini-2.5-flash-native-audio-preview-12-2025_e278af01","benchmark":"grocery_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":30,"scores":{"toolUse":13,"toolUseDenom":22,"instructionFollowing":24,"kbGrounding":24,"ambiguityHandling":7,"stateTracking":7,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":68.59,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:14:56+00:00","createdAt":"2026-04-17T18:34:37.385087+00:00"},{"runId":"20260412T094303_ultravox-v0.7_a98c3f56","benchmark":"grocery_bench","model":"ultravox-v0.7","turnsScored":30,"scores":{"toolUse":14,"toolUseDenom":22,"instructionFollowing":26,"kbGrounding":25,"ambiguityHandling":10,"stateTracking":6,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":75.96,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:13:03+00:00","createdAt":"2026-04-17T18:34:25.191727+00:00"},{"runId":"20260412T095554_gemini-3.1-flash-live-preview_c8ceab0e","benchmark":"grocery_bench","model":"gemini-3.1-flash-live-preview","turnsScored":30,"scores":{"toolUse":14,"toolUseDenom":22,"instructionFollowing":25,"kbGrounding":27,"ambiguityHandling":10,"stateTracking":5,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":75.09,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:12:20+00:00","createdAt":"2026-04-17T18:34:34.504945+00:00"},{"runId":"20260412T095920_gemini-3.1-flash-live-preview_89fb2d49","benchmark":"grocery_bench","model":"gemini-3.1-flash-live-preview","turnsScored":30,"scores":{"toolUse":18,"toolUseDenom":22,"instructionFollowing":22,"kbGrounding":27,"ambiguityHandling":10,"stateTracking":7,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":79.8,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:12:10+00:00","createdAt":"2026-04-17T18:34:37.912683+00:00"},{"runId":"20260412T094309_grok-realtime_c753c044","benchmark":"grocery_bench","model":"grok-realtime","turnsScored":30,"scores":{"toolUse":3,"toolUseDenom":22,"instructionFollowing":26,"kbGrounding":25,"ambiguityHandling":7,"stateTracking":10,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":66.11,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:11:11+00:00","createdAt":"2026-04-17T18:34:25.967582+00:00"},{"runId":"20260412T094746_amazon.nova-2-sonic-v1_0_8c9eb73a","benchmark":"grocery_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":30,"scores":{"toolUse":7,"toolUseDenom":22,"instructionFollowing":14,"kbGrounding":12,"ambiguityHandling":8,"stateTracking":2,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":42.77,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:11:01+00:00","createdAt":"2026-04-17T18:34:29.940111+00:00"},{"runId":"20260412T093928_grok-realtime_a502b722","benchmark":"grocery_bench","model":"grok-realtime","turnsScored":30,"scores":{"toolUse":18,"toolUseDenom":22,"instructionFollowing":28,"kbGrounding":28,"ambiguityHandling":10,"stateTracking":12,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":92.16,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:10:34+00:00","createdAt":"2026-04-17T18:34:22.317628+00:00"},{"runId":"20260412T095108_gemini-2.5-flash-native-audio-preview-12-2025_fa1a4a25","benchmark":"grocery_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":30,"scores":{"toolUse":12,"toolUseDenom":22,"instructionFollowing":21,"kbGrounding":27,"ambiguityHandling":9,"stateTracking":5,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":68.6,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:10:30+00:00","createdAt":"2026-04-17T18:34:33.706533+00:00"},{"runId":"20260412T095022_gpt-realtime_f454ed68","benchmark":"grocery_bench","model":"gpt-realtime","turnsScored":30,"scores":{"toolUse":3,"toolUseDenom":22,"instructionFollowing":22,"kbGrounding":26,"ambiguityHandling":10,"stateTracking":3,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":59.34,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:09:29+00:00","createdAt":"2026-04-17T18:34:33.185028+00:00"},{"runId":"20260412T093725_amazon.nova-2-sonic-v1_0_a6cf7b6b","benchmark":"grocery_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":30,"scores":{"toolUse":8,"toolUseDenom":22,"instructionFollowing":23,"kbGrounding":16,"ambiguityHandling":7,"stateTracking":2,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":50.35,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:08:39+00:00","createdAt":"2026-04-17T18:34:19.694449+00:00"},{"runId":"20260412T093459_ultravox-v0.7_00402ffd","benchmark":"grocery_bench","model":"ultravox-v0.7","turnsScored":30,"scores":{"toolUse":13,"toolUseDenom":22,"instructionFollowing":24,"kbGrounding":21,"ambiguityHandling":10,"stateTracking":7,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":72.59,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:08:24+00:00","createdAt":"2026-04-17T18:34:18.608782+00:00"},{"runId":"20260412T094436_gpt-realtime_9bb7d87c","benchmark":"grocery_bench","model":"gpt-realtime","turnsScored":30,"scores":{"toolUse":20,"toolUseDenom":22,"instructionFollowing":22,"kbGrounding":26,"ambiguityHandling":1,"stateTracking":5,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":59.87,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:05:34+00:00","createdAt":"2026-04-17T18:34:27.302362+00:00"},{"runId":"20260412T094946_grok-realtime_e207b40d","benchmark":"grocery_bench","model":"grok-realtime","turnsScored":30,"scores":{"toolUse":10,"toolUseDenom":22,"instructionFollowing":25,"kbGrounding":28,"ambiguityHandling":1,"stateTracking":8,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":58.73,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:05:27+00:00","createdAt":"2026-04-17T18:34:32.692822+00:00"},{"runId":"20260412T094812_gpt-realtime_381bfb44","benchmark":"grocery_bench","model":"gpt-realtime","turnsScored":30,"scores":{"toolUse":2,"toolUseDenom":22,"instructionFollowing":22,"kbGrounding":24,"ambiguityHandling":10,"stateTracking":0,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":52.48,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:05:03+00:00","createdAt":"2026-04-17T18:34:30.485793+00:00"},{"runId":"20260412T094037_amazon.nova-2-sonic-v1_0_6d951619","benchmark":"grocery_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":30,"scores":{"toolUse":5,"toolUseDenom":22,"instructionFollowing":16,"kbGrounding":24,"ambiguityHandling":5,"stateTracking":5,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":48.9,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:04:15+00:00","createdAt":"2026-04-17T18:34:22.886666+00:00"},{"runId":"20260412T094922_gemini-3.1-flash-live-preview_b0e00487","benchmark":"grocery_bench","model":"gemini-3.1-flash-live-preview","turnsScored":30,"scores":{"toolUse":18,"toolUseDenom":22,"instructionFollowing":28,"kbGrounding":28,"ambiguityHandling":10,"stateTracking":9,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":87.54,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:03:56+00:00","createdAt":"2026-04-17T18:34:31.630704+00:00"},{"runId":"20260412T093052_grok-realtime_6b376824","benchmark":"grocery_bench","model":"grok-realtime","turnsScored":30,"scores":{"toolUse":15,"toolUseDenom":22,"instructionFollowing":29,"kbGrounding":27,"ambiguityHandling":10,"stateTracking":13,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":90.97,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:02:40+00:00","createdAt":"2026-04-17T18:34:15.955899+00:00"},{"runId":"20260412T093854_gemini-3.1-flash-live-preview_3a206649","benchmark":"grocery_bench","model":"gemini-3.1-flash-live-preview","turnsScored":30,"scores":{"toolUse":16,"toolUseDenom":22,"instructionFollowing":27,"kbGrounding":26,"ambiguityHandling":9,"stateTracking":9,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":81.72,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:02:28+00:00","createdAt":"2026-04-17T18:34:21.231603+00:00"},{"runId":"20260412T092834_ultravox-v0.7_b45601de","benchmark":"grocery_bench","model":"ultravox-v0.7","turnsScored":30,"scores":{"toolUse":13,"toolUseDenom":22,"instructionFollowing":24,"kbGrounding":25,"ambiguityHandling":10,"stateTracking":8,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":76.79,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:02:17+00:00","createdAt":"2026-04-17T18:34:12.709749+00:00"},{"runId":"20260412T093219_amazon.nova-2-sonic-v1_0_b3d91aea","benchmark":"grocery_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":30,"scores":{"toolUse":9,"toolUseDenom":22,"instructionFollowing":20,"kbGrounding":20,"ambiguityHandling":7,"stateTracking":7,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":59.62,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:02:05+00:00","createdAt":"2026-04-17T18:34:16.545929+00:00"},{"runId":"20260412T093713_grok-realtime_47f6618e","benchmark":"grocery_bench","model":"grok-realtime","turnsScored":30,"scores":{"toolUse":11,"toolUseDenom":22,"instructionFollowing":27,"kbGrounding":18,"ambiguityHandling":10,"stateTracking":9,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":73.85,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:01:56+00:00","createdAt":"2026-04-17T18:34:19.187294+00:00"},{"runId":"20260412T094835_gemini-2.5-flash-native-audio-preview-12-2025_6c5ae6a1","benchmark":"grocery_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":30,"scores":{"toolUse":13,"toolUseDenom":22,"instructionFollowing":21,"kbGrounding":29,"ambiguityHandling":8,"stateTracking":1,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":62.69,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:00:24+00:00","createdAt":"2026-04-17T18:34:31.093489+00:00"},{"runId":"20260412T094125_gpt-realtime_ac4612bc","benchmark":"grocery_bench","model":"gpt-realtime","turnsScored":30,"scores":{"toolUse":9,"toolUseDenom":22,"instructionFollowing":14,"kbGrounding":25,"ambiguityHandling":5,"stateTracking":7,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":54.95,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T17:00:02+00:00","createdAt":"2026-04-17T18:34:23.449773+00:00"},{"runId":"20260412T093927_ultravox-v0.7_dfa25801","benchmark":"grocery_bench","model":"ultravox-v0.7","turnsScored":30,"scores":{"toolUse":21,"toolUseDenom":22,"instructionFollowing":23,"kbGrounding":30,"ambiguityHandling":10,"stateTracking":12,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":92.89,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:59:18+00:00","createdAt":"2026-04-17T18:34:21.756581+00:00"},{"runId":"20260412T093826_gpt-realtime_1398d94d","benchmark":"grocery_bench","model":"gpt-realtime","turnsScored":30,"scores":{"toolUse":2,"toolUseDenom":22,"instructionFollowing":23,"kbGrounding":30,"ambiguityHandling":9,"stateTracking":8,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":67.46,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:58:15+00:00","createdAt":"2026-04-17T18:34:20.201019+00:00"},{"runId":"20260412T094128_gemini-3.1-flash-live-preview_e0533d47","benchmark":"grocery_bench","model":"gemini-3.1-flash-live-preview","turnsScored":30,"scores":{"toolUse":18,"toolUseDenom":22,"instructionFollowing":25,"kbGrounding":26,"ambiguityHandling":10,"stateTracking":7,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":81.13,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:57:42+00:00","createdAt":"2026-04-17T18:34:24.589358+00:00"},{"runId":"20260412T094532_gemini-2.5-flash-native-audio-preview-12-2025_dd435fc2","benchmark":"grocery_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":30,"scores":{"toolUse":13,"toolUseDenom":22,"instructionFollowing":25,"kbGrounding":26,"ambiguityHandling":8,"stateTracking":9,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":75.66,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:57:40+00:00","createdAt":"2026-04-17T18:34:27.899935+00:00"},{"runId":"20260412T094127_gemini-2.5-flash-native-audio-preview-12-2025_09a3afc4","benchmark":"grocery_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":30,"scores":{"toolUse":12,"toolUseDenom":22,"instructionFollowing":26,"kbGrounding":27,"ambiguityHandling":8,"stateTracking":5,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":69.93,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:55:52+00:00","createdAt":"2026-04-17T18:34:24.006372+00:00"},{"runId":"20260412T093828_gemini-2.5-flash-native-audio-preview-12-2025_af0cc5b6","benchmark":"grocery_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":30,"scores":{"toolUse":14,"toolUseDenom":22,"instructionFollowing":23,"kbGrounding":26,"ambiguityHandling":10,"stateTracking":4,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":71.55,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:51:07+00:00","createdAt":"2026-04-17T18:34:20.721576+00:00"},{"runId":"20260412T092850_amazon.nova-2-sonic-v1_0_071f1bbf","benchmark":"grocery_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":30,"scores":{"toolUse":5,"toolUseDenom":22,"instructionFollowing":20,"kbGrounding":18,"ambiguityHandling":7,"stateTracking":5,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":51.57,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:50:20+00:00","createdAt":"2026-04-17T18:34:13.770201+00:00"},{"runId":"20260412T092940_gpt-realtime_128891f1","benchmark":"grocery_bench","model":"gpt-realtime","turnsScored":30,"scores":{"toolUse":2,"toolUseDenom":22,"instructionFollowing":23,"kbGrounding":29,"ambiguityHandling":10,"stateTracking":6,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":65.72,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:49:44+00:00","createdAt":"2026-04-17T18:34:14.287825+00:00"},{"runId":"20260412T092840_grok-realtime_2f5ccd6c","benchmark":"grocery_bench","model":"grok-realtime","turnsScored":30,"scores":{"toolUse":17,"toolUseDenom":22,"instructionFollowing":23,"kbGrounding":21,"ambiguityHandling":10,"stateTracking":4,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":70.94,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:49:35+00:00","createdAt":"2026-04-17T18:34:13.258903+00:00"},{"runId":"20260412T093220_gpt-realtime_754f9571","benchmark":"grocery_bench","model":"gpt-realtime","turnsScored":30,"scores":{"toolUse":3,"toolUseDenom":22,"instructionFollowing":24,"kbGrounding":25,"ambiguityHandling":9,"stateTracking":4,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":59.55,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:48:33+00:00","createdAt":"2026-04-17T18:34:17.061786+00:00"},{"runId":"20260412T092614_gpt-realtime_73d90a7f","benchmark":"grocery_bench","model":"gpt-realtime","turnsScored":30,"scores":{"toolUse":2,"toolUseDenom":22,"instructionFollowing":20,"kbGrounding":23,"ambiguityHandling":10,"stateTracking":4,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":56.64,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:48:11+00:00","createdAt":"2026-04-17T18:34:10.792252+00:00"},{"runId":"20260412T093251_gemini-3.1-flash-live-preview_310d3102","benchmark":"grocery_bench","model":"gemini-3.1-flash-live-preview","turnsScored":30,"scores":{"toolUse":15,"toolUseDenom":22,"instructionFollowing":23,"kbGrounding":30,"ambiguityHandling":10,"stateTracking":3,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":73.59,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:47:44+00:00","createdAt":"2026-04-17T18:34:18.085469+00:00"},{"runId":"20260412T093243_gemini-2.5-flash-native-audio-preview-12-2025_fe0dc475","benchmark":"grocery_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":30,"scores":{"toolUse":18,"toolUseDenom":22,"instructionFollowing":23,"kbGrounding":26,"ambiguityHandling":6,"stateTracking":4,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":67.18,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:47:18+00:00","createdAt":"2026-04-17T18:34:17.558006+00:00"},{"runId":"20260412T093035_gemini-3.1-flash-live-preview_d0089e02","benchmark":"grocery_bench","model":"gemini-3.1-flash-live-preview","turnsScored":30,"scores":{"toolUse":14,"toolUseDenom":22,"instructionFollowing":24,"kbGrounding":26,"ambiguityHandling":10,"stateTracking":9,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":79.91,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:43:56+00:00","createdAt":"2026-04-17T18:34:15.317026+00:00"},{"runId":"20260412T093004_gemini-2.5-flash-native-audio-preview-12-2025_c7e24b4b","benchmark":"grocery_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":30,"scores":{"toolUse":14,"toolUseDenom":22,"instructionFollowing":26,"kbGrounding":30,"ambiguityHandling":10,"stateTracking":11,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":86.98,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:43:01+00:00","createdAt":"2026-04-17T18:34:14.79666+00:00"},{"runId":"20260412T092646_gemini-2.5-flash-native-audio-preview-12-2025_3b2b778a","benchmark":"grocery_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":30,"scores":{"toolUse":12,"toolUseDenom":22,"instructionFollowing":23,"kbGrounding":27,"ambiguityHandling":9,"stateTracking":6,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":71.47,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T16:38:24+00:00","createdAt":"2026-04-17T18:34:11.523038+00:00"},{"runId":"20260403T144127_glm-realtime-flash_31bcb3de","benchmark":"grocery_bench","model":"glm-realtime-flash","turnsScored":30,"scores":{"toolUse":1,"toolUseDenom":22,"instructionFollowing":22,"kbGrounding":27,"ambiguityHandling":9,"stateTracking":2,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":54.65,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-04T01:15:10+00:00","createdAt":"2026-04-05T07:08:25.167509+00:00"},{"runId":"20260403T164804_glm-realtime-flash_0151d07d","benchmark":"grocery_bench","model":"glm-realtime-flash","turnsScored":30,"scores":{"toolUse":2,"toolUseDenom":22,"instructionFollowing":15,"kbGrounding":18,"ambiguityHandling":6,"stateTracking":3,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":40.43,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-04T01:12:34+00:00","createdAt":"2026-04-05T07:08:28.834207+00:00"},{"runId":"20260403T140107_glm-realtime-flash_7c07e253","benchmark":"grocery_bench","model":"glm-realtime-flash","turnsScored":30,"scores":{"toolUse":9,"toolUseDenom":22,"instructionFollowing":22,"kbGrounding":14,"ambiguityHandling":0,"stateTracking":2,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":35.26,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-04T01:12:26+00:00","createdAt":"2026-04-05T07:08:23.87792+00:00"},{"runId":"20260403T135302_glm-realtime-flash_63cf1763","benchmark":"grocery_bench","model":"glm-realtime-flash","turnsScored":30,"scores":{"toolUse":10,"toolUseDenom":22,"instructionFollowing":10,"kbGrounding":10,"ambiguityHandling":1,"stateTracking":1,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":25.96,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-04T01:11:05+00:00","createdAt":"2026-04-05T07:08:21.885311+00:00"},{"runId":"20260403T170001_glm-realtime-flash_41e0bb19","benchmark":"grocery_bench","model":"glm-realtime-flash","turnsScored":30,"scores":{"toolUse":9,"toolUseDenom":22,"instructionFollowing":12,"kbGrounding":11,"ambiguityHandling":7,"stateTracking":0,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":37.52,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-04T01:10:28+00:00","createdAt":"2026-04-05T07:08:29.839832+00:00"},{"runId":"20260403T141911_glm-realtime-flash_50da115a","benchmark":"grocery_bench","model":"glm-realtime-flash","turnsScored":30,"scores":{"toolUse":6,"toolUseDenom":22,"instructionFollowing":16,"kbGrounding":5,"ambiguityHandling":5,"stateTracking":4,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":35.61,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-04T01:09:49+00:00","createdAt":"2026-04-05T07:08:24.595148+00:00"},{"runId":"20260403T153607_glm-realtime-flash_c6f8f0b4","benchmark":"grocery_bench","model":"glm-realtime-flash","turnsScored":30,"scores":{"toolUse":6,"toolUseDenom":22,"instructionFollowing":10,"kbGrounding":10,"ambiguityHandling":4,"stateTracking":1,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":28.33,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-04T01:06:28+00:00","createdAt":"2026-04-05T07:08:27.507931+00:00"},{"runId":"20260403T140103_glm-realtime-flash_bf38f940","benchmark":"grocery_bench","model":"glm-realtime-flash","turnsScored":30,"scores":{"toolUse":6,"toolUseDenom":22,"instructionFollowing":19,"kbGrounding":26,"ambiguityHandling":3,"stateTracking":4,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":47.61,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-04T01:06:01+00:00","createdAt":"2026-04-05T07:08:23.269348+00:00"},{"runId":"20260403T135920_glm-realtime-flash_a42d1baa","benchmark":"grocery_bench","model":"glm-realtime-flash","turnsScored":30,"scores":{"toolUse":7,"toolUseDenom":22,"instructionFollowing":12,"kbGrounding":27,"ambiguityHandling":6,"stateTracking":2,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":47.44,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-04T01:05:46+00:00","createdAt":"2026-04-05T07:08:22.500041+00:00"},{"runId":"20260403T154800_glm-realtime-flash_a9b741b7","benchmark":"grocery_bench","model":"glm-realtime-flash","turnsScored":30,"scores":{"toolUse":0,"toolUseDenom":22,"instructionFollowing":17,"kbGrounding":17,"ambiguityHandling":9,"stateTracking":2,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":43.74,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-04T01:04:01+00:00","createdAt":"2026-04-05T07:08:28.133346+00:00"},{"runId":"20260403T153225_glm-realtime-flash_314d5cf6","benchmark":"grocery_bench","model":"glm-realtime-flash","turnsScored":30,"scores":{"toolUse":7,"toolUseDenom":22,"instructionFollowing":11,"kbGrounding":15,"ambiguityHandling":5,"stateTracking":1,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":35.24,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-04T01:02:25+00:00","createdAt":"2026-04-05T07:08:26.798138+00:00"},{"runId":"20260403T151140_glm-realtime-flash_e2799f49","benchmark":"grocery_bench","model":"glm-realtime-flash","turnsScored":30,"scores":{"toolUse":0,"toolUseDenom":22,"instructionFollowing":17,"kbGrounding":17,"ambiguityHandling":9,"stateTracking":1,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":42.21,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-04T01:01:52+00:00","createdAt":"2026-04-05T07:08:26.209733+00:00"},{"runId":"20260403T133910_glm-realtime-flash_add34280","benchmark":"grocery_bench","model":"glm-realtime-flash","turnsScored":30,"scores":{"toolUse":10,"toolUseDenom":22,"instructionFollowing":15,"kbGrounding":17,"ambiguityHandling":7,"stateTracking":3,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":49.04,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-04T00:59:08+00:00","createdAt":"2026-04-05T07:08:20.855116+00:00"},{"runId":"20260331T224230_glm-realtime-flash_2788d28b","benchmark":"grocery_bench","model":"glm-realtime-flash","turnsScored":30,"scores":{"toolUse":9,"toolUseDenom":22,"instructionFollowing":8,"kbGrounding":21,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":27.52,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T07:26:35+00:00","createdAt":"2026-04-01T19:19:27.717286+00:00"},{"runId":"20260330T205407_glm-realtime-flash_42cd3ccb","benchmark":"grocery_bench","model":"glm-realtime-flash","turnsScored":30,"scores":{"toolUse":1,"toolUseDenom":22,"instructionFollowing":3,"kbGrounding":17,"ambiguityHandling":0,"stateTracking":2,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":17.32,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T03:53:17+00:00","createdAt":"2026-04-01T07:15:56.806543+00:00"},{"runId":"20260330T204938_glm-realtime-air_fad92d94","benchmark":"grocery_bench","model":"glm-realtime-air","turnsScored":30,"scores":{"toolUse":0,"toolUseDenom":22,"instructionFollowing":3,"kbGrounding":14,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":11.33,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T03:53:00+00:00","createdAt":"2026-04-01T07:15:56.28325+00:00"},{"runId":"20260330T205550_glm-realtime-air_347360d9","benchmark":"grocery_bench","model":"glm-realtime-air","turnsScored":30,"scores":{"toolUse":0,"toolUseDenom":22,"instructionFollowing":0,"kbGrounding":0,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":0,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T03:52:44+00:00","createdAt":"2026-04-01T07:15:57.318025+00:00"},{"runId":"20260330T204055_glm-realtime-flash_9330bf6d","benchmark":"grocery_bench","model":"glm-realtime-flash","turnsScored":30,"scores":{"toolUse":8,"toolUseDenom":22,"instructionFollowing":5,"kbGrounding":9,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":16.61,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T03:52:04+00:00","createdAt":"2026-04-01T07:15:54.613673+00:00"},{"runId":"20260330T204856_glm-realtime-air_4e79d99b","benchmark":"grocery_bench","model":"glm-realtime-air","turnsScored":30,"scores":{"toolUse":0,"toolUseDenom":22,"instructionFollowing":3,"kbGrounding":19,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":14.67,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T03:50:10+00:00","createdAt":"2026-04-01T07:15:55.729213+00:00"},{"runId":"20260330T204507_glm-realtime-air_62d204d3","benchmark":"grocery_bench","model":"glm-realtime-air","turnsScored":30,"scores":{"toolUse":0,"toolUseDenom":22,"instructionFollowing":1,"kbGrounding":19,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":13.33,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T03:49:35+00:00","createdAt":"2026-04-01T07:15:55.173168+00:00"},{"runId":"20260330T203924_glm-realtime-air_54ab1f67","benchmark":"grocery_bench","model":"glm-realtime-air","turnsScored":30,"scores":{"toolUse":0,"toolUseDenom":22,"instructionFollowing":4,"kbGrounding":23,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":18,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T03:49:28+00:00","createdAt":"2026-04-01T07:15:54.075282+00:00"},{"runId":"20260330T203850_glm-realtime-air_414e20c1","benchmark":"grocery_bench","model":"glm-realtime-air","turnsScored":30,"scores":{"toolUse":0,"toolUseDenom":22,"instructionFollowing":1,"kbGrounding":17,"ambiguityHandling":7,"stateTracking":0,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":26,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T03:49:28+00:00","createdAt":"2026-04-01T07:15:53.436424+00:00"},{"runId":"20260330T203846_glm-realtime-flash_899d4c7c","benchmark":"grocery_bench","model":"glm-realtime-flash","turnsScored":30,"scores":{"toolUse":0,"toolUseDenom":22,"instructionFollowing":1,"kbGrounding":2,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":2,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T03:46:21+00:00","createdAt":"2026-04-01T07:15:52.842378+00:00"},{"runId":"20260330T203240_glm-realtime-flash_28c20bed","benchmark":"grocery_bench","model":"glm-realtime-flash","turnsScored":30,"scores":{"toolUse":7,"toolUseDenom":22,"instructionFollowing":1,"kbGrounding":18,"ambiguityHandling":2,"stateTracking":0,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":23.03,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T03:46:06+00:00","createdAt":"2026-04-01T07:15:50.09872+00:00"},{"runId":"20260330T203520_glm-realtime-air_37df0614","benchmark":"grocery_bench","model":"glm-realtime-air","turnsScored":30,"scores":{"toolUse":0,"toolUseDenom":22,"instructionFollowing":4,"kbGrounding":23,"ambiguityHandling":1,"stateTracking":0,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":20,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T03:46:02+00:00","createdAt":"2026-04-01T07:15:51.52629+00:00"},{"runId":"20260330T203314_glm-realtime-air_60caaaf6","benchmark":"grocery_bench","model":"glm-realtime-air","turnsScored":30,"scores":{"toolUse":0,"toolUseDenom":22,"instructionFollowing":1,"kbGrounding":22,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":15.33,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T03:45:49+00:00","createdAt":"2026-04-01T07:15:50.696124+00:00"},{"runId":"20260330T203648_glm-realtime-air_c7d52724","benchmark":"grocery_bench","model":"glm-realtime-air","turnsScored":30,"scores":{"toolUse":0,"toolUseDenom":22,"instructionFollowing":2,"kbGrounding":24,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":17.33,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T03:45:22+00:00","createdAt":"2026-04-01T07:15:52.253909+00:00"},{"runId":"20260330T202600_glm-realtime-air_c35f3bce","benchmark":"grocery_bench","model":"glm-realtime-air","turnsScored":30,"scores":{"toolUse":0,"toolUseDenom":22,"instructionFollowing":2,"kbGrounding":5,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":4.67,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T03:43:11+00:00","createdAt":"2026-04-01T07:15:49.59366+00:00"},{"runId":"20260330T202135_glm-realtime-flash_c873f618","benchmark":"grocery_bench","model":"glm-realtime-flash","turnsScored":30,"scores":{"toolUse":0,"toolUseDenom":22,"instructionFollowing":10,"kbGrounding":13,"ambiguityHandling":6,"stateTracking":0,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":27.33,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T03:42:12+00:00","createdAt":"2026-04-01T07:15:47.949895+00:00"},{"runId":"20260330T202545_glm-realtime-flash_6defe0f6","benchmark":"grocery_bench","model":"glm-realtime-flash","turnsScored":30,"scores":{"toolUse":0,"toolUseDenom":22,"instructionFollowing":6,"kbGrounding":21,"ambiguityHandling":4,"stateTracking":0,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":26,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T03:40:48+00:00","createdAt":"2026-04-01T07:15:49.047967+00:00"},{"runId":"20260330T201956_glm-realtime-flash_14cca63e","benchmark":"grocery_bench","model":"glm-realtime-flash","turnsScored":30,"scores":{"toolUse":2,"toolUseDenom":22,"instructionFollowing":6,"kbGrounding":16,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":16.48,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T03:40:04+00:00","createdAt":"2026-04-01T07:15:47.360571+00:00"},{"runId":"20260330T202542_glm-realtime-flash_419e5d99","benchmark":"grocery_bench","model":"glm-realtime-flash","turnsScored":30,"scores":{"toolUse":0,"toolUseDenom":22,"instructionFollowing":1,"kbGrounding":16,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":11.33,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T03:39:58+00:00","createdAt":"2026-04-01T07:15:48.515671+00:00"},{"runId":"20260330T201829_glm-realtime-air_25965045","benchmark":"grocery_bench","model":"glm-realtime-air","turnsScored":30,"scores":{"toolUse":0,"toolUseDenom":22,"instructionFollowing":4,"kbGrounding":5,"ambiguityHandling":3,"stateTracking":0,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":12,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T03:39:42+00:00","createdAt":"2026-04-01T07:15:46.737938+00:00"},{"runId":"20260330T201410_glm-realtime-flash_8a6eb481","benchmark":"grocery_bench","model":"glm-realtime-flash","turnsScored":30,"scores":{"toolUse":0,"toolUseDenom":22,"instructionFollowing":8,"kbGrounding":19,"ambiguityHandling":4,"stateTracking":1,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":27.54,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T03:37:04+00:00","createdAt":"2026-04-01T07:15:45.971692+00:00"},{"runId":"20260330T200356_glm-realtime-flash_5fbb8d42","benchmark":"grocery_bench","model":"glm-realtime-flash","turnsScored":30,"scores":{"toolUse":4,"toolUseDenom":22,"instructionFollowing":5,"kbGrounding":11,"ambiguityHandling":3,"stateTracking":0,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":20.3,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T03:36:32+00:00","createdAt":"2026-04-01T07:15:45.239159+00:00"},{"runId":"20260330T165225_gemini-3.1-flash-live-preview_18df8f70","benchmark":"grocery_bench","model":"gemini-3.1-flash-live-preview","turnsScored":30,"scores":{"toolUse":14,"toolUseDenom":22,"instructionFollowing":30,"kbGrounding":28,"ambiguityHandling":10,"stateTracking":10,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":86.78,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-31T02:42:07+00:00","createdAt":"2026-03-31T02:14:58.816881+00:00"},{"runId":"20260330T165059_gemini-3.1-flash-live-preview_816a7014","benchmark":"grocery_bench","model":"gemini-3.1-flash-live-preview","turnsScored":30,"scores":{"toolUse":14,"toolUseDenom":22,"instructionFollowing":27,"kbGrounding":28,"ambiguityHandling":10,"stateTracking":8,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":81.7,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-31T02:37:30+00:00","createdAt":"2026-03-31T04:45:54.762224+00:00"},{"runId":"20260329T232120_gemini-3.1-flash-live-preview_b89c98df","benchmark":"grocery_bench","model":"gemini-3.1-flash-live-preview","turnsScored":30,"scores":{"toolUse":13,"toolUseDenom":22,"instructionFollowing":21,"kbGrounding":26,"ambiguityHandling":10,"stateTracking":6,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":72.38,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-30T07:42:28+00:00","createdAt":"2026-03-30T14:05:34.46249+00:00"},{"runId":"20260323T005037_ultravox-v0.7_ca1b8a0c","benchmark":"grocery_bench","model":"ultravox-v0.7","turnsScored":30,"scores":{"toolUse":12,"toolUseDenom":22,"instructionFollowing":16,"kbGrounding":23,"ambiguityHandling":9,"stateTracking":6,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":64.14,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-28T03:53:24+00:00","createdAt":"2026-03-23T18:07:11.703526+00:00"},{"runId":"20260323T005508_ultravox-v0.7_416a1017","benchmark":"grocery_bench","model":"ultravox-v0.7","turnsScored":30,"scores":{"toolUse":12,"toolUseDenom":22,"instructionFollowing":25,"kbGrounding":24,"ambiguityHandling":9,"stateTracking":7,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":72.34,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-28T03:53:18+00:00","createdAt":"2026-03-23T18:07:12.294031+00:00"},{"runId":"20260323T145643_ultravox-v0.7_ecfe5c5f","benchmark":"grocery_bench","model":"ultravox-v0.7","turnsScored":30,"scores":{"toolUse":19,"toolUseDenom":22,"instructionFollowing":29,"kbGrounding":24,"ambiguityHandling":10,"stateTracking":13,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":92.61,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-28T03:52:04+00:00","createdAt":"2026-03-23T23:13:48.938859+00:00"},{"runId":"20260323T145929_ultravox-v0.7_bf64fe54","benchmark":"grocery_bench","model":"ultravox-v0.7","turnsScored":30,"scores":{"toolUse":20,"toolUseDenom":22,"instructionFollowing":30,"kbGrounding":30,"ambiguityHandling":10,"stateTracking":11,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":95.1,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-28T03:48:33+00:00","createdAt":"2026-03-23T23:13:50.047169+00:00"},{"runId":"20260323T130452_grok-realtime_36a0dbb7","benchmark":"grocery_bench","model":"grok-realtime","turnsScored":30,"scores":{"toolUse":14,"toolUseDenom":22,"instructionFollowing":28,"kbGrounding":24,"ambiguityHandling":10,"stateTracking":12,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":85.86,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-28T03:45:01+00:00","createdAt":"2026-03-23T22:46:29.739268+00:00"},{"runId":"20260323T133628_grok-realtime_5c6c02bb","benchmark":"grocery_bench","model":"grok-realtime","turnsScored":30,"scores":{"toolUse":1,"toolUseDenom":22,"instructionFollowing":2,"kbGrounding":30,"ambiguityHandling":10,"stateTracking":3,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":46.86,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-28T03:42:57+00:00","createdAt":"2026-03-23T22:46:30.941614+00:00"},{"runId":"20260323T150006_grok-realtime_67da0922","benchmark":"grocery_bench","model":"grok-realtime","turnsScored":30,"scores":{"toolUse":12,"toolUseDenom":22,"instructionFollowing":30,"kbGrounding":18,"ambiguityHandling":10,"stateTracking":11,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":79.83,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-28T03:42:56+00:00","createdAt":"2026-03-23T22:46:31.569858+00:00"},{"runId":"20260323T144059_gpt-realtime_83148ad8","benchmark":"grocery_bench","model":"gpt-realtime","turnsScored":30,"scores":{"toolUse":12,"toolUseDenom":22,"instructionFollowing":23,"kbGrounding":26,"ambiguityHandling":7,"stateTracking":5,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":65.27,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-28T03:36:04+00:00","createdAt":"2026-03-23T23:13:45.616457+00:00"},{"runId":"20260323T185559_gpt-realtime_e5e9a62d","benchmark":"grocery_bench","model":"gpt-realtime","turnsScored":30,"scores":{"toolUse":15,"toolUseDenom":22,"instructionFollowing":23,"kbGrounding":28,"ambiguityHandling":9,"stateTracking":6,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":74.87,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-28T03:35:00+00:00","createdAt":"2026-03-24T02:33:06.148522+00:00"},{"runId":"20260323T185556_gpt-realtime_6645ce14","benchmark":"grocery_bench","model":"gpt-realtime","turnsScored":30,"scores":{"toolUse":20,"toolUseDenom":22,"instructionFollowing":24,"kbGrounding":27,"ambiguityHandling":2,"stateTracking":5,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":63.87,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-28T03:34:04+00:00","createdAt":"2026-03-24T02:33:03.870904+00:00"},{"runId":"20260323T145034_gemini-2.5-flash-native-audio-preview-12-2025_70ad4552","benchmark":"grocery_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":30,"scores":{"toolUse":21,"toolUseDenom":22,"instructionFollowing":25,"kbGrounding":28,"ambiguityHandling":9,"stateTracking":13,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":92.42,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-28T03:25:00+00:00","createdAt":"2026-03-23T23:13:47.498781+00:00"},{"runId":"20260323T190541_gemini-2.5-flash-native-audio-preview-12-2025_ce942055","benchmark":"grocery_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":30,"scores":{"toolUse":13,"toolUseDenom":22,"instructionFollowing":24,"kbGrounding":28,"ambiguityHandling":6,"stateTracking":6,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":67.72,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-28T03:22:39+00:00","createdAt":"2026-03-24T02:33:07.225039+00:00"},{"runId":"20260323T145219_gemini-2.5-flash-native-audio-preview-12-2025_288c6d60","benchmark":"grocery_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":30,"scores":{"toolUse":20,"toolUseDenom":22,"instructionFollowing":26,"kbGrounding":29,"ambiguityHandling":10,"stateTracking":13,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":94.85,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-28T03:22:20+00:00","createdAt":"2026-03-23T23:13:48.262379+00:00"},{"runId":"20260323T154942_amazon.nova-2-sonic-v1_0_ae5c04fe","benchmark":"grocery_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":30,"scores":{"toolUse":4,"toolUseDenom":22,"instructionFollowing":20,"kbGrounding":11,"ambiguityHandling":9,"stateTracking":5,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":50,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-28T03:12:20+00:00","createdAt":"2026-03-23T23:13:54.820334+00:00"},{"runId":"20260323T152844_amazon.nova-2-sonic-v1_0_a0531eb1","benchmark":"grocery_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":30,"scores":{"toolUse":4,"toolUseDenom":22,"instructionFollowing":24,"kbGrounding":16,"ambiguityHandling":4,"stateTracking":6,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":47.53,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-28T03:11:03+00:00","createdAt":"2026-03-23T23:13:54.294513+00:00"},{"runId":"20260323T214250_amazon.nova-2-sonic-v1_0_10a7ee03","benchmark":"grocery_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":30,"scores":{"toolUse":6,"toolUseDenom":22,"instructionFollowing":25,"kbGrounding":13,"ambiguityHandling":9,"stateTracking":12,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":67.25,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-28T03:09:38+00:00","createdAt":"2026-03-24T05:14:35.642077+00:00"},{"runId":"20260323T010823_ultravox-v0.7_7008bc41","benchmark":"grocery_bench","model":"ultravox-v0.7","turnsScored":30,"scores":{"toolUse":19,"toolUseDenom":22,"instructionFollowing":24,"kbGrounding":23,"ambiguityHandling":10,"stateTracking":13,"ambiguityDenom":10,"stateTrackingDenom":13},"passRate":88.61,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"gpt-5.2","judgedAt":"2026-03-23T19:00:43+00:00","createdAt":"2026-03-23T15:30:26.210963+00:00"},{"runId":"20260507T214330_gpt-realtime-2_a8320082","benchmark":"product_bench","model":"gpt-realtime-2","turnsScored":31,"scores":{"toolUse":11,"toolUseDenom":11,"instructionFollowing":28,"kbGrounding":31,"ambiguityHandling":2,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":98.06,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T22:16:34+00:00","createdAt":"2026-05-07T23:02:35.488574+00:00"},{"runId":"20260507T214218_gpt-realtime-2_ef36438c","benchmark":"product_bench","model":"gpt-realtime-2","turnsScored":31,"scores":{"toolUse":9,"toolUseDenom":11,"instructionFollowing":30,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":11,"ambiguityDenom":2,"stateTrackingDenom":16},"passRate":78.18,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T22:14:50+00:00","createdAt":"2026-05-07T23:02:34.39958+00:00"},{"runId":"20260507T214106_gpt-realtime-2_e45bb56a","benchmark":"product_bench","model":"gpt-realtime-2","turnsScored":31,"scores":{"toolUse":8,"toolUseDenom":11,"instructionFollowing":31,"kbGrounding":30,"ambiguityHandling":2,"stateTracking":11,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":89.61,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T22:13:11+00:00","createdAt":"2026-05-07T23:02:30.462608+00:00"},{"runId":"20260507T214237_gpt-realtime-2_043daa9a","benchmark":"product_bench","model":"gpt-realtime-2","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":11,"instructionFollowing":30,"kbGrounding":30,"ambiguityHandling":2,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":86.37,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T22:09:37+00:00","createdAt":"2026-05-07T23:02:35.222178+00:00"},{"runId":"20260507T214136_gpt-realtime-2_da1799e6","benchmark":"product_bench","model":"gpt-realtime-2","turnsScored":31,"scores":{"toolUse":6,"toolUseDenom":11,"instructionFollowing":25,"kbGrounding":31,"ambiguityHandling":2,"stateTracking":11,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":82.75,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T22:09:09+00:00","createdAt":"2026-05-07T23:02:32.166197+00:00"},{"runId":"20260507T214125_gpt-realtime-2_9c80d921","benchmark":"product_bench","model":"gpt-realtime-2","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":11,"instructionFollowing":24,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":11,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":69,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T22:09:06+00:00","createdAt":"2026-05-07T23:02:31.434733+00:00"},{"runId":"20260507T214212_gpt-realtime-2_580f5703","benchmark":"product_bench","model":"gpt-realtime-2","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":11,"instructionFollowing":29,"kbGrounding":31,"ambiguityHandling":2,"stateTracking":11,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":83.51,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T22:09:05+00:00","createdAt":"2026-05-07T23:02:33.873958+00:00"},{"runId":"20260507T214226_gpt-realtime-2_7a66ad36","benchmark":"product_bench","model":"gpt-realtime-2","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":11,"instructionFollowing":29,"kbGrounding":30,"ambiguityHandling":2,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":87.16,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T22:08:44+00:00","createdAt":"2026-05-07T23:02:34.948684+00:00"},{"runId":"20260507T214157_gpt-realtime-2_ca2aeecc","benchmark":"product_bench","model":"gpt-realtime-2","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":11,"instructionFollowing":31,"kbGrounding":31,"ambiguityHandling":1,"stateTracking":11,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":74.81,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T22:08:40+00:00","createdAt":"2026-05-07T23:02:33.015703+00:00"},{"runId":"20260507T214124_gpt-realtime-2_d5109e5e","benchmark":"product_bench","model":"gpt-realtime-2","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":11,"instructionFollowing":27,"kbGrounding":31,"ambiguityHandling":2,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":85.08,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T22:07:49+00:00","createdAt":"2026-05-07T23:02:31.180154+00:00"},{"runId":"20260507T214132_gpt-realtime-2_d38f5cf9","benchmark":"product_bench","model":"gpt-realtime-2","turnsScored":31,"scores":{"toolUse":8,"toolUseDenom":11,"instructionFollowing":25,"kbGrounding":31,"ambiguityHandling":2,"stateTracking":11,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":86.39,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T22:07:18+00:00","createdAt":"2026-05-07T23:02:31.68389+00:00"},{"runId":"20260507T214151_gpt-realtime-2_362122fa","benchmark":"product_bench","model":"gpt-realtime-2","turnsScored":31,"scores":{"toolUse":4,"toolUseDenom":11,"instructionFollowing":30,"kbGrounding":31,"ambiguityHandling":1,"stateTracking":11,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":72.34,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T22:07:14+00:00","createdAt":"2026-05-07T23:02:32.632509+00:00"},{"runId":"20260507T214216_gpt-realtime-2_422858f4","benchmark":"product_bench","model":"gpt-realtime-2","turnsScored":31,"scores":{"toolUse":8,"toolUseDenom":11,"instructionFollowing":30,"kbGrounding":31,"ambiguityHandling":2,"stateTracking":11,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":89.61,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T22:07:05+00:00","createdAt":"2026-05-07T23:02:34.121563+00:00"},{"runId":"20260507T214204_gpt-realtime-2_34b657b4","benchmark":"product_bench","model":"gpt-realtime-2","turnsScored":31,"scores":{"toolUse":7,"toolUseDenom":11,"instructionFollowing":26,"kbGrounding":31,"ambiguityHandling":2,"stateTracking":11,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":85.22,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T22:06:20+00:00","createdAt":"2026-05-07T23:02:33.480666+00:00"},{"runId":"20260507T214152_gpt-realtime-2_59e9c821","benchmark":"product_bench","model":"gpt-realtime-2","turnsScored":31,"scores":{"toolUse":8,"toolUseDenom":11,"instructionFollowing":30,"kbGrounding":30,"ambiguityHandling":2,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":93.26,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T22:05:57+00:00","createdAt":"2026-05-07T23:02:32.868654+00:00"},{"runId":"20260507T214135_gpt-realtime-2_687fda4f","benchmark":"product_bench","model":"gpt-realtime-2","turnsScored":31,"scores":{"toolUse":9,"toolUseDenom":11,"instructionFollowing":31,"kbGrounding":31,"ambiguityHandling":2,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":96.36,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T22:05:56+00:00","createdAt":"2026-05-07T23:02:31.925588+00:00"},{"runId":"20260507T214147_gpt-realtime-2_e543cb70","benchmark":"product_bench","model":"gpt-realtime-2","turnsScored":31,"scores":{"toolUse":10,"toolUseDenom":11,"instructionFollowing":31,"kbGrounding":31,"ambiguityHandling":2,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":98.18,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T22:05:53+00:00","createdAt":"2026-05-07T23:02:32.399263+00:00"},{"runId":"20260507T214124_gpt-realtime-2_d372f059","benchmark":"product_bench","model":"gpt-realtime-2","turnsScored":31,"scores":{"toolUse":8,"toolUseDenom":11,"instructionFollowing":27,"kbGrounding":31,"ambiguityHandling":2,"stateTracking":11,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":87.68,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T22:05:23+00:00","createdAt":"2026-05-07T23:02:30.931652+00:00"},{"runId":"20260507T214117_gpt-realtime-2_4ca10fc3","benchmark":"product_bench","model":"gpt-realtime-2","turnsScored":31,"scores":{"toolUse":8,"toolUseDenom":11,"instructionFollowing":27,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":79.25,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T22:04:22+00:00","createdAt":"2026-05-07T23:02:30.691962+00:00"},{"runId":"20260507T214220_gpt-realtime-2_8f757eb0","benchmark":"product_bench","model":"gpt-realtime-2","turnsScored":31,"scores":{"toolUse":2,"toolUseDenom":11,"instructionFollowing":13,"kbGrounding":13,"ambiguityHandling":2,"stateTracking":3,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":44.7,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T21:57:56+00:00","createdAt":"2026-05-07T23:02:34.693386+00:00"},{"runId":"20260507T181358_grok-voice-think-fast-1.0_f299445c","benchmark":"product_bench","model":"grok-voice-think-fast-1.0","turnsScored":31,"scores":{"toolUse":4,"toolUseDenom":11,"instructionFollowing":27,"kbGrounding":27,"ambiguityHandling":1,"stateTracking":8,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":63.54,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T19:26:50+00:00","createdAt":"2026-05-07T20:58:20.165877+00:00"},{"runId":"20260507T181444_grok-voice-think-fast-1.0_2b486d82","benchmark":"product_bench","model":"grok-voice-think-fast-1.0","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":11,"instructionFollowing":29,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":10,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":70.8,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T19:09:19+00:00","createdAt":"2026-05-07T20:58:22.323863+00:00"},{"runId":"20260507T181537_grok-voice-think-fast-1.0_c0f4cfc2","benchmark":"product_bench","model":"grok-voice-think-fast-1.0","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":11,"instructionFollowing":24,"kbGrounding":28,"ambiguityHandling":1,"stateTracking":12,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":69.78,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T18:53:50+00:00","createdAt":"2026-05-07T20:58:23.587578+00:00"},{"runId":"20260507T181521_grok-voice-think-fast-1.0_aba10981","benchmark":"product_bench","model":"grok-voice-think-fast-1.0","turnsScored":31,"scores":{"toolUse":10,"toolUseDenom":11,"instructionFollowing":29,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":85.6,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T18:50:44+00:00","createdAt":"2026-05-07T20:58:23.328435+00:00"},{"runId":"20260507T181510_grok-voice-think-fast-1.0_aeb8c055","benchmark":"product_bench","model":"grok-voice-think-fast-1.0","turnsScored":31,"scores":{"toolUse":7,"toolUseDenom":11,"instructionFollowing":24,"kbGrounding":29,"ambiguityHandling":2,"stateTracking":10,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":81.21,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T18:48:55+00:00","createdAt":"2026-05-07T20:58:23.071786+00:00"},{"runId":"20260507T181451_grok-voice-think-fast-1.0_36a644ce","benchmark":"product_bench","model":"grok-voice-think-fast-1.0","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":11,"instructionFollowing":25,"kbGrounding":25,"ambiguityHandling":1,"stateTracking":8,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":62.78,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T18:47:58+00:00","createdAt":"2026-05-07T20:58:22.570631+00:00"},{"runId":"20260507T181543_grok-voice-think-fast-1.0_992fb0a5","benchmark":"product_bench","model":"grok-voice-think-fast-1.0","turnsScored":31,"scores":{"toolUse":6,"toolUseDenom":11,"instructionFollowing":26,"kbGrounding":30,"ambiguityHandling":1,"stateTracking":10,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":71.32,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T18:47:39+00:00","createdAt":"2026-05-07T20:58:23.829624+00:00"},{"runId":"20260507T181358_grok-voice-think-fast-1.0_4d019815","benchmark":"product_bench","model":"grok-voice-think-fast-1.0","turnsScored":31,"scores":{"toolUse":9,"toolUseDenom":11,"instructionFollowing":26,"kbGrounding":26,"ambiguityHandling":1,"stateTracking":12,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":77.05,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T18:47:08+00:00","createdAt":"2026-05-07T20:58:19.919755+00:00"},{"runId":"20260507T181413_grok-voice-think-fast-1.0_467ca423","benchmark":"product_bench","model":"grok-voice-think-fast-1.0","turnsScored":31,"scores":{"toolUse":7,"toolUseDenom":11,"instructionFollowing":27,"kbGrounding":28,"ambiguityHandling":2,"stateTracking":10,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":82.5,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T18:46:45+00:00","createdAt":"2026-05-07T20:58:20.873817+00:00"},{"runId":"20260507T181413_grok-voice-think-fast-1.0_b8da2300","benchmark":"product_bench","model":"grok-voice-think-fast-1.0","turnsScored":31,"scores":{"toolUse":11,"toolUseDenom":11,"instructionFollowing":29,"kbGrounding":30,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":88.06,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T18:46:27+00:00","createdAt":"2026-05-07T20:58:21.110355+00:00"},{"runId":"20260507T181500_grok-voice-think-fast-1.0_ab732715","benchmark":"product_bench","model":"grok-voice-think-fast-1.0","turnsScored":31,"scores":{"toolUse":7,"toolUseDenom":11,"instructionFollowing":27,"kbGrounding":27,"ambiguityHandling":1,"stateTracking":10,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":71.85,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T18:46:20+00:00","createdAt":"2026-05-07T20:58:22.822907+00:00"},{"runId":"20260507T181406_grok-voice-think-fast-1.0_66c7c8bb","benchmark":"product_bench","model":"grok-voice-think-fast-1.0","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":11,"instructionFollowing":27,"kbGrounding":28,"ambiguityHandling":1,"stateTracking":11,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":70.29,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T18:46:15+00:00","createdAt":"2026-05-07T20:58:20.396332+00:00"},{"runId":"20260507T181426_grok-voice-think-fast-1.0_e5b82c00","benchmark":"product_bench","model":"grok-voice-think-fast-1.0","turnsScored":31,"scores":{"toolUse":9,"toolUseDenom":11,"instructionFollowing":29,"kbGrounding":30,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":83,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T18:45:25+00:00","createdAt":"2026-05-07T20:58:21.598851+00:00"},{"runId":"20260507T181434_grok-voice-think-fast-1.0_ae075be3","benchmark":"product_bench","model":"grok-voice-think-fast-1.0","turnsScored":31,"scores":{"toolUse":10,"toolUseDenom":11,"instructionFollowing":27,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":12,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":81.45,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T18:44:58+00:00","createdAt":"2026-05-07T20:58:22.082634+00:00"},{"runId":"20260507T181420_grok-voice-think-fast-1.0_17b8e0b7","benchmark":"product_bench","model":"grok-voice-think-fast-1.0","turnsScored":31,"scores":{"toolUse":10,"toolUseDenom":11,"instructionFollowing":29,"kbGrounding":28,"ambiguityHandling":1,"stateTracking":12,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":82.1,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T18:44:28+00:00","createdAt":"2026-05-07T20:58:21.361933+00:00"},{"runId":"20260507T181347_grok-voice-think-fast-1.0_935279ce","benchmark":"product_bench","model":"grok-voice-think-fast-1.0","turnsScored":31,"scores":{"toolUse":8,"toolUseDenom":11,"instructionFollowing":27,"kbGrounding":30,"ambiguityHandling":1,"stateTracking":12,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":78.46,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T18:44:01+00:00","createdAt":"2026-05-07T20:58:19.193547+00:00"},{"runId":"20260507T181427_grok-voice-think-fast-1.0_c18bac95","benchmark":"product_bench","model":"grok-voice-think-fast-1.0","turnsScored":31,"scores":{"toolUse":9,"toolUseDenom":11,"instructionFollowing":30,"kbGrounding":25,"ambiguityHandling":1,"stateTracking":12,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":78.99,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T18:43:57+00:00","createdAt":"2026-05-07T20:58:21.839127+00:00"},{"runId":"20260507T181349_grok-voice-think-fast-1.0_59330d47","benchmark":"product_bench","model":"grok-voice-think-fast-1.0","turnsScored":31,"scores":{"toolUse":7,"toolUseDenom":11,"instructionFollowing":30,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":12,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":77.93,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T18:43:14+00:00","createdAt":"2026-05-07T20:58:19.440539+00:00"},{"runId":"20260507T181411_grok-voice-think-fast-1.0_33e15799","benchmark":"product_bench","model":"grok-voice-think-fast-1.0","turnsScored":31,"scores":{"toolUse":8,"toolUseDenom":11,"instructionFollowing":27,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":10,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":74.96,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T18:42:05+00:00","createdAt":"2026-05-07T20:58:20.633055+00:00"},{"runId":"20260507T181354_grok-voice-think-fast-1.0_6bd0cdd2","benchmark":"product_bench","model":"grok-voice-think-fast-1.0","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":11,"instructionFollowing":25,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":9,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":66.79,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-05-07T18:40:05+00:00","createdAt":"2026-05-07T20:58:19.681253+00:00"},{"runId":"20260412T165824_grok-realtime_0b5e5d58","benchmark":"product_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":3,"toolUseDenom":11,"instructionFollowing":30,"kbGrounding":30,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":74.16,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-15T00:34:59+00:00","createdAt":"2026-04-17T18:52:38.309876+00:00"},{"runId":"20260412T165209_amazon.nova-2-sonic-v1_0_4cd1f8e3","benchmark":"product_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":11,"instructionFollowing":15,"kbGrounding":18,"ambiguityHandling":1,"stateTracking":7,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":50.38,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-15T00:33:11+00:00","createdAt":"2026-04-17T18:52:32.904801+00:00"},{"runId":"20260412T163904_grok-realtime_bd1b5add","benchmark":"product_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":6,"toolUseDenom":11,"instructionFollowing":31,"kbGrounding":30,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":78.84,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-15T00:32:56+00:00","createdAt":"2026-04-17T18:52:21.498716+00:00"},{"runId":"20260412T164146_grok-realtime_33260341","benchmark":"product_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":11,"instructionFollowing":30,"kbGrounding":30,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":77.8,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-15T00:32:53+00:00","createdAt":"2026-04-17T18:52:25.200478+00:00"},{"runId":"20260412T155145_gemini-3.1-flash-live-preview_c77f2dd5","benchmark":"product_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":4,"toolUseDenom":11,"instructionFollowing":30,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":73.91,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-15T00:31:47+00:00","createdAt":"2026-04-17T18:51:50.065011+00:00"},{"runId":"20260412T160816_gemini-2.5-flash-native-audio-preview-12-2025_7bd15e14","benchmark":"product_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":3,"toolUseDenom":11,"instructionFollowing":28,"kbGrounding":30,"ambiguityHandling":2,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":81.45,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-15T00:31:20+00:00","createdAt":"2026-04-17T18:51:58.908482+00:00"},{"runId":"20260412T152012_gemini-2.5-flash-native-audio-preview-12-2025_a5d0e34e","benchmark":"product_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":11,"instructionFollowing":29,"kbGrounding":27,"ambiguityHandling":1,"stateTracking":11,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":70.93,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-15T00:30:28+00:00","createdAt":"2026-04-17T18:51:28.16632+00:00"},{"runId":"20260412T154306_gemini-2.5-flash-native-audio-preview-12-2025_04ea015c","benchmark":"product_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":11,"instructionFollowing":23,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":12,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":69.78,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-15T00:30:05+00:00","createdAt":"2026-04-17T18:51:42.40517+00:00"},{"runId":"20260412T152219_gemini-3.1-flash-live-preview_5c8aca00","benchmark":"product_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":6,"toolUseDenom":11,"instructionFollowing":25,"kbGrounding":28,"ambiguityHandling":2,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":83.67,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-15T00:29:56+00:00","createdAt":"2026-04-17T18:51:28.71989+00:00"},{"runId":"20260412T150831_gemini-3.1-flash-live-preview_905f615f","benchmark":"product_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":8,"toolUseDenom":11,"instructionFollowing":28,"kbGrounding":27,"ambiguityHandling":1,"stateTracking":12,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":77.17,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-15T00:29:38+00:00","createdAt":"2026-04-17T18:51:21.56367+00:00"},{"runId":"20260412T143002_amazon.nova-2-sonic-v1_0_f8634460","benchmark":"product_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":4,"toolUseDenom":11,"instructionFollowing":6,"kbGrounding":9,"ambiguityHandling":1,"stateTracking":1,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":28.38,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-15T00:29:24+00:00","createdAt":"2026-04-17T18:50:55.247188+00:00"},{"runId":"20260412T144921_gemini-3.1-flash-live-preview_2886e90e","benchmark":"product_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":8,"toolUseDenom":11,"instructionFollowing":28,"kbGrounding":27,"ambiguityHandling":1,"stateTracking":11,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":75.74,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-15T00:26:42+00:00","createdAt":"2026-04-17T18:51:08.375038+00:00"},{"runId":"20260412T143725_amazon.nova-2-sonic-v1_0_77aa5e32","benchmark":"product_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":7,"toolUseDenom":11,"instructionFollowing":21,"kbGrounding":28,"ambiguityHandling":2,"stateTracking":5,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":71.48,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-15T00:24:27+00:00","createdAt":"2026-04-17T18:50:58.793769+00:00"},{"runId":"20260412T143940_gemini-3.1-flash-live-preview_1b606233","benchmark":"product_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":10,"toolUseDenom":11,"instructionFollowing":30,"kbGrounding":27,"ambiguityHandling":1,"stateTracking":12,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":82.1,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-15T00:22:47+00:00","createdAt":"2026-04-17T18:51:00.829334+00:00"},{"runId":"20260412T135031_gemini-3.1-flash-live-preview_723a611a","benchmark":"product_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":11,"instructionFollowing":29,"kbGrounding":30,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":77.16,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-15T00:21:48+00:00","createdAt":"2026-04-17T18:50:28.47194+00:00"},{"runId":"20260412T141319_gemini-3.1-flash-live-preview_8b67ebef","benchmark":"product_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":3,"toolUseDenom":11,"instructionFollowing":25,"kbGrounding":26,"ambiguityHandling":2,"stateTracking":10,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":72.64,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-15T00:19:53+00:00","createdAt":"2026-04-17T18:50:43.509839+00:00"},{"runId":"20260412T142937_grok-realtime_879ccadd","benchmark":"product_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":1,"toolUseDenom":11,"instructionFollowing":28,"kbGrounding":28,"ambiguityHandling":2,"stateTracking":12,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":75.09,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-15T00:19:02+00:00","createdAt":"2026-04-17T18:50:54.65909+00:00"},{"runId":"20260412T133148_grok-realtime_99fbbccf","benchmark":"product_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":7,"toolUseDenom":11,"instructionFollowing":29,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":78.72,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-15T00:18:33+00:00","createdAt":"2026-04-17T18:50:15.530805+00:00"},{"runId":"20260412T132551_grok-realtime_630f0d2f","benchmark":"product_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":2,"toolUseDenom":11,"instructionFollowing":31,"kbGrounding":30,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":72.99,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-15T00:16:44+00:00","createdAt":"2026-04-17T18:50:12.115986+00:00"},{"runId":"20260412T131808_grok-realtime_b8965e0e","benchmark":"product_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":2,"toolUseDenom":11,"instructionFollowing":30,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":12,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":68.84,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-15T00:15:49+00:00","createdAt":"2026-04-17T18:50:05.658611+00:00"},{"runId":"20260414T160440_gemini-2.5-flash-native-audio-preview-12-2025_bd144690","benchmark":"product_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":9,"toolUseDenom":11,"instructionFollowing":26,"kbGrounding":31,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":83.14,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T23:22:39+00:00","createdAt":"2026-04-17T18:52:39.397596+00:00"},{"runId":"20260412T155045_amazon.nova-2-sonic-v1_0_fb47ecb5","benchmark":"product_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":11,"instructionFollowing":19,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":7,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":60.06,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T00:10:46+00:00","createdAt":"2026-04-17T18:51:48.310965+00:00"},{"runId":"20260412T165308_gpt-realtime_cef2be65","benchmark":"product_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":10,"toolUseDenom":11,"instructionFollowing":31,"kbGrounding":30,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":87.54,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-14T00:07:36+00:00","createdAt":"2026-04-17T18:52:33.574962+00:00"},{"runId":"20260412T142719_gpt-realtime_5bb80832","benchmark":"product_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":4,"toolUseDenom":11,"instructionFollowing":30,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":73.91,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-13T23:56:40+00:00","createdAt":"2026-04-17T18:50:52.228334+00:00"},{"runId":"20260412T125514_gemini-3.1-flash-live-preview_641daaed","benchmark":"product_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":7,"toolUseDenom":11,"instructionFollowing":29,"kbGrounding":28,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":78.07,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-13T23:47:52+00:00","createdAt":"2026-04-17T18:49:50.923107+00:00"},{"runId":"20260412T124842_grok-realtime_b036e5eb","benchmark":"product_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":3,"toolUseDenom":11,"instructionFollowing":29,"kbGrounding":29,"ambiguityHandling":2,"stateTracking":12,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":80.02,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-13T23:46:32+00:00","createdAt":"2026-04-17T18:49:46.091325+00:00"},{"runId":"20260412T143844_gemini-2.5-flash-native-audio-preview-12-2025_ae9f82db","benchmark":"product_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":3,"toolUseDenom":11,"instructionFollowing":26,"kbGrounding":25,"ambiguityHandling":1,"stateTracking":11,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":64.07,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-13T20:23:03+00:00","createdAt":"2026-04-17T18:51:00.170175+00:00"},{"runId":"20260412T170010_amazon.nova-2-sonic-v1_0_42d0f421","benchmark":"product_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":3,"toolUseDenom":11,"instructionFollowing":12,"kbGrounding":14,"ambiguityHandling":1,"stateTracking":4,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":37.94,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-13T00:48:48+00:00","createdAt":"2026-04-17T18:52:38.851391+00:00"},{"runId":"20260412T165545_amazon.nova-2-sonic-v1_0_5a72163c","benchmark":"product_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":11,"instructionFollowing":21,"kbGrounding":20,"ambiguityHandling":2,"stateTracking":9,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":68.4,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-13T00:41:18+00:00","createdAt":"2026-04-17T18:52:35.978796+00:00"},{"runId":"20260412T165105_ultravox-v0.7_82ab6b1c","benchmark":"product_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":6,"toolUseDenom":11,"instructionFollowing":29,"kbGrounding":31,"ambiguityHandling":2,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":89.62,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-13T00:28:00+00:00","createdAt":"2026-04-17T18:52:31.602392+00:00"},{"runId":"20260412T163905_amazon.nova-2-sonic-v1_0_bed751a7","benchmark":"product_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":3,"toolUseDenom":11,"instructionFollowing":15,"kbGrounding":26,"ambiguityHandling":1,"stateTracking":8,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":53.33,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-13T00:25:48+00:00","createdAt":"2026-04-17T18:52:22.066201+00:00"},{"runId":"20260412T164632_ultravox-v0.7_de436e19","benchmark":"product_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":9,"toolUseDenom":11,"instructionFollowing":30,"kbGrounding":30,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":83.64,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-13T00:22:05+00:00","createdAt":"2026-04-17T18:52:28.064751+00:00"},{"runId":"20260412T165419_grok-realtime_32309fa2","benchmark":"product_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":7,"toolUseDenom":11,"instructionFollowing":30,"kbGrounding":27,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":79.5,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-13T00:21:46+00:00","createdAt":"2026-04-17T18:52:35.406231+00:00"},{"runId":"20260412T165144_grok-realtime_32805cf0","benchmark":"product_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":8,"toolUseDenom":11,"instructionFollowing":31,"kbGrounding":30,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":83.9,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-13T00:21:21+00:00","createdAt":"2026-04-17T18:52:32.295789+00:00"},{"runId":"20260412T165616_gpt-realtime_bbc47e63","benchmark":"product_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":10,"toolUseDenom":11,"instructionFollowing":28,"kbGrounding":28,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":82.88,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-13T00:19:02+00:00","createdAt":"2026-04-17T18:52:36.67278+00:00"},{"runId":"20260412T164635_grok-realtime_4b579b53","benchmark":"product_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":3,"toolUseDenom":11,"instructionFollowing":31,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":74.16,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-13T00:17:11+00:00","createdAt":"2026-04-17T18:52:28.742145+00:00"},{"runId":"20260412T165752_gemini-3.1-flash-live-preview_75889129","benchmark":"product_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":8,"toolUseDenom":11,"instructionFollowing":29,"kbGrounding":25,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":79.38,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-13T00:17:03+00:00","createdAt":"2026-04-17T18:52:37.761011+00:00"},{"runId":"20260412T165625_gemini-2.5-flash-native-audio-preview-12-2025_02537332","benchmark":"product_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":6,"toolUseDenom":11,"instructionFollowing":28,"kbGrounding":28,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":75.61,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-13T00:15:29+00:00","createdAt":"2026-04-17T18:52:37.207344+00:00"},{"runId":"20260412T163849_ultravox-v0.7_2b98e296","benchmark":"product_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":11,"instructionFollowing":30,"kbGrounding":30,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":77.8,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-13T00:13:56+00:00","createdAt":"2026-04-17T18:52:20.983586+00:00"},{"runId":"20260412T164127_ultravox-v0.7_1778e2a8","benchmark":"product_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":3,"toolUseDenom":11,"instructionFollowing":29,"kbGrounding":28,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":72.23,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-13T00:12:50+00:00","createdAt":"2026-04-17T18:52:24.468375+00:00"},{"runId":"20260412T164754_gpt-realtime_3b721834","benchmark":"product_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":4,"toolUseDenom":11,"instructionFollowing":23,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":10,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":65.11,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-13T00:12:45+00:00","createdAt":"2026-04-17T18:52:29.873451+00:00"},{"runId":"20260412T165347_gemini-3.1-flash-live-preview_2b1cb4da","benchmark":"product_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":6,"toolUseDenom":11,"instructionFollowing":23,"kbGrounding":24,"ambiguityHandling":2,"stateTracking":9,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":74.09,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-13T00:12:01+00:00","createdAt":"2026-04-17T18:52:34.771912+00:00"},{"runId":"20260412T165040_gemini-3.1-flash-live-preview_efaefb01","benchmark":"product_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":11,"instructionFollowing":19,"kbGrounding":24,"ambiguityHandling":1,"stateTracking":12,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":63.98,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-13T00:11:03+00:00","createdAt":"2026-04-17T18:52:31.013202+00:00"},{"runId":"20260412T165338_gemini-2.5-flash-native-audio-preview-12-2025_b80946bf","benchmark":"product_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":6,"toolUseDenom":11,"instructionFollowing":27,"kbGrounding":28,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":74.96,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-13T00:09:58+00:00","createdAt":"2026-04-17T18:52:34.174047+00:00"},{"runId":"20260412T164404_amazon.nova-2-sonic-v1_0_5c7eef65","benchmark":"product_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":7,"toolUseDenom":11,"instructionFollowing":17,"kbGrounding":29,"ambiguityHandling":2,"stateTracking":9,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":75.26,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-13T00:08:34+00:00","createdAt":"2026-04-17T18:52:25.799177+00:00"},{"runId":"20260412T165007_gemini-2.5-flash-native-audio-preview-12-2025_1eee5980","benchmark":"product_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":7,"toolUseDenom":11,"instructionFollowing":27,"kbGrounding":28,"ambiguityHandling":2,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":88.21,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-13T00:08:25+00:00","createdAt":"2026-04-17T18:52:30.44075+00:00"},{"runId":"20260412T164424_gpt-realtime_b6414c4b","benchmark":"product_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":2,"toolUseDenom":11,"instructionFollowing":30,"kbGrounding":29,"ambiguityHandling":2,"stateTracking":12,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":78.84,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-13T00:07:36+00:00","createdAt":"2026-04-17T18:52:26.429757+00:00"},{"runId":"20260412T163234_grok-realtime_07806928","benchmark":"product_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":11,"instructionFollowing":30,"kbGrounding":28,"ambiguityHandling":2,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":85.08,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-13T00:05:52+00:00","createdAt":"2026-04-17T18:52:17.911343+00:00"},{"runId":"20260412T164450_gemini-2.5-flash-native-audio-preview-12-2025_9f5b66db","benchmark":"product_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":6,"toolUseDenom":11,"instructionFollowing":29,"kbGrounding":28,"ambiguityHandling":1,"stateTracking":12,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":74.83,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-13T00:05:34+00:00","createdAt":"2026-04-17T18:52:26.97251+00:00"},{"runId":"20260412T164727_amazon.nova-2-sonic-v1_0_1dd61229","benchmark":"product_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":4,"toolUseDenom":11,"instructionFollowing":10,"kbGrounding":13,"ambiguityHandling":1,"stateTracking":1,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":33.54,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-13T00:04:56+00:00","createdAt":"2026-04-17T18:52:29.308319+00:00"},{"runId":"20260412T162735_ultravox-v0.7_bd6949fa","benchmark":"product_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":11,"instructionFollowing":29,"kbGrounding":30,"ambiguityHandling":2,"stateTracking":12,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":84.3,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-13T00:03:35+00:00","createdAt":"2026-04-17T18:52:14.30479+00:00"},{"runId":"20260412T164510_gemini-3.1-flash-live-preview_d62c7d77","benchmark":"product_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":9,"toolUseDenom":11,"instructionFollowing":28,"kbGrounding":30,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":82.35,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-13T00:03:30+00:00","createdAt":"2026-04-17T18:52:27.542843+00:00"},{"runId":"20260412T163909_gpt-realtime_fa687951","benchmark":"product_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":3,"toolUseDenom":11,"instructionFollowing":30,"kbGrounding":30,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":74.16,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-13T00:02:37+00:00","createdAt":"2026-04-17T18:52:22.695224+00:00"},{"runId":"20260412T161444_amazon.nova-2-sonic-v1_0_821cfbf1","benchmark":"product_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":4,"toolUseDenom":11,"instructionFollowing":16,"kbGrounding":17,"ambiguityHandling":1,"stateTracking":6,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":47.13,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-13T00:01:07+00:00","createdAt":"2026-04-17T18:52:05.052836+00:00"},{"runId":"20260412T164045_gemini-3.1-flash-live-preview_3a57b371","benchmark":"product_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":10,"toolUseDenom":11,"instructionFollowing":29,"kbGrounding":27,"ambiguityHandling":2,"stateTracking":12,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":91.45,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-13T00:00:08+00:00","createdAt":"2026-04-17T18:52:23.841119+00:00"},{"runId":"20260412T163433_gpt-realtime_7461fb86","benchmark":"product_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":11,"instructionFollowing":29,"kbGrounding":30,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":77.16,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T23:58:12+00:00","createdAt":"2026-04-17T18:52:19.169206+00:00"},{"runId":"20260412T163931_gemini-2.5-flash-native-audio-preview-12-2025_cf34cf75","benchmark":"product_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":11,"instructionFollowing":28,"kbGrounding":28,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":73.79,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T23:57:50+00:00","createdAt":"2026-04-17T18:52:23.267208+00:00"},{"runId":"20260412T162830_grok-realtime_aaa4e75b","benchmark":"product_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":4,"toolUseDenom":11,"instructionFollowing":29,"kbGrounding":30,"ambiguityHandling":2,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":85.34,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T23:56:23+00:00","createdAt":"2026-04-17T18:52:14.981597+00:00"},{"runId":"20260412T163740_gemini-3.1-flash-live-preview_ea20af13","benchmark":"product_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":4,"toolUseDenom":11,"instructionFollowing":26,"kbGrounding":30,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":73.4,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T23:56:14+00:00","createdAt":"2026-04-17T18:52:20.385537+00:00"},{"runId":"20260412T162126_grok-realtime_06bbdef4","benchmark":"product_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":11,"instructionFollowing":30,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":77.16,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T23:55:44+00:00","createdAt":"2026-04-17T18:52:11.461795+00:00"},{"runId":"20260412T163414_amazon.nova-2-sonic-v1_0_d2160423","benchmark":"product_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":11,"instructionFollowing":12,"kbGrounding":25,"ambiguityHandling":1,"stateTracking":7,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":52.96,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T23:54:06+00:00","createdAt":"2026-04-17T18:52:18.530578+00:00"},{"runId":"20260412T161702_ultravox-v0.7_aa20650e","benchmark":"product_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":9,"toolUseDenom":11,"instructionFollowing":31,"kbGrounding":29,"ambiguityHandling":2,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":95.07,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T23:53:45+00:00","createdAt":"2026-04-17T18:52:07.592316+00:00"},{"runId":"20260412T163538_gemini-2.5-flash-native-audio-preview-12-2025_bbab37d3","benchmark":"product_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":6,"toolUseDenom":11,"instructionFollowing":26,"kbGrounding":28,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":74.32,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T23:53:37+00:00","createdAt":"2026-04-17T18:52:19.786137+00:00"},{"runId":"20260412T162215_amazon.nova-2-sonic-v1_0_8174698c","benchmark":"product_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":6,"toolUseDenom":11,"instructionFollowing":26,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":9,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":69.25,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T23:53:06+00:00","createdAt":"2026-04-17T18:52:12.062556+00:00"},{"runId":"20260412T163050_gpt-realtime_4902aa54","benchmark":"product_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":7,"toolUseDenom":11,"instructionFollowing":28,"kbGrounding":31,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":80.79,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T23:52:07+00:00","createdAt":"2026-04-17T18:52:16.139295+00:00"},{"runId":"20260412T162833_amazon.nova-2-sonic-v1_0_caf3e945","benchmark":"product_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":10,"toolUseDenom":11,"instructionFollowing":18,"kbGrounding":22,"ambiguityHandling":2,"stateTracking":8,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":75.42,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T23:51:43+00:00","createdAt":"2026-04-17T18:52:15.54309+00:00"},{"runId":"20260412T161331_ultravox-v0.7_f50114c3","benchmark":"product_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":9,"toolUseDenom":11,"instructionFollowing":31,"kbGrounding":31,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":86.36,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T23:51:04+00:00","createdAt":"2026-04-17T18:52:03.789411+00:00"},{"runId":"20260412T163153_gemini-3.1-flash-live-preview_164311e5","benchmark":"product_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":8,"toolUseDenom":11,"instructionFollowing":28,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":81.32,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T23:50:38+00:00","createdAt":"2026-04-17T18:52:17.3072+00:00"},{"runId":"20260412T162559_gpt-realtime_ef630029","benchmark":"product_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":9,"toolUseDenom":11,"instructionFollowing":30,"kbGrounding":26,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":81.06,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T23:50:05+00:00","createdAt":"2026-04-17T18:52:12.704407+00:00"},{"runId":"20260412T161803_amazon.nova-2-sonic-v1_0_1951ce18","benchmark":"product_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":9,"toolUseDenom":11,"instructionFollowing":24,"kbGrounding":24,"ambiguityHandling":2,"stateTracking":10,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":81.62,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T23:47:52+00:00","createdAt":"2026-04-17T18:52:08.337049+00:00"},{"runId":"20260412T161803_grok-realtime_76f5ac52","benchmark":"product_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":11,"instructionFollowing":30,"kbGrounding":30,"ambiguityHandling":1,"stateTracking":12,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":74.94,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T23:47:25+00:00","createdAt":"2026-04-17T18:52:09.062264+00:00"},{"runId":"20260412T163127_gemini-2.5-flash-native-audio-preview-12-2025_dfad0e10","benchmark":"product_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":1,"toolUseDenom":11,"instructionFollowing":18,"kbGrounding":25,"ambiguityHandling":2,"stateTracking":12,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":66.7,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T23:46:33+00:00","createdAt":"2026-04-17T18:52:16.703294+00:00"},{"runId":"20260412T161335_grok-realtime_3760a6e5","benchmark":"product_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":3,"toolUseDenom":11,"instructionFollowing":30,"kbGrounding":29,"ambiguityHandling":2,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":83.52,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T23:46:30+00:00","createdAt":"2026-04-17T18:52:04.465603+00:00"},{"runId":"20260412T160643_ultravox-v0.7_94723560","benchmark":"product_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":7,"toolUseDenom":11,"instructionFollowing":31,"kbGrounding":30,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":82.08,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T23:45:08+00:00","createdAt":"2026-04-17T18:51:56.361425+00:00"},{"runId":"20260412T162707_gemini-3.1-flash-live-preview_f3d25a4d","benchmark":"product_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":11,"toolUseDenom":11,"instructionFollowing":26,"kbGrounding":29,"ambiguityHandling":2,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":94.06,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T23:44:49+00:00","createdAt":"2026-04-17T18:52:13.782579+00:00"},{"runId":"20260412T162627_gemini-2.5-flash-native-audio-preview-12-2025_29ffb363","benchmark":"product_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":7,"toolUseDenom":11,"instructionFollowing":29,"kbGrounding":30,"ambiguityHandling":1,"stateTracking":12,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":77.93,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T23:44:22+00:00","createdAt":"2026-04-17T18:52:13.226514+00:00"},{"runId":"20260412T161828_gpt-realtime_c57bfbfa","benchmark":"product_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":11,"instructionFollowing":30,"kbGrounding":31,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":78.45,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T23:44:02+00:00","createdAt":"2026-04-17T18:52:09.668312+00:00"},{"runId":"20260412T160905_ultravox-v0.7_9945fb3a","benchmark":"product_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":3,"toolUseDenom":11,"instructionFollowing":30,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":72.09,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T23:41:44+00:00","createdAt":"2026-04-17T18:52:00.088122+00:00"},{"runId":"20260412T160708_amazon.nova-2-sonic-v1_0_13e9cc45","benchmark":"product_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":4,"toolUseDenom":11,"instructionFollowing":10,"kbGrounding":15,"ambiguityHandling":1,"stateTracking":1,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":34.83,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T23:41:25+00:00","createdAt":"2026-04-17T18:51:57.703724+00:00"},{"runId":"20260412T161106_amazon.nova-2-sonic-v1_0_19343b0a","benchmark":"product_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":2,"toolUseDenom":11,"instructionFollowing":15,"kbGrounding":19,"ambiguityHandling":1,"stateTracking":5,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":42.71,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T23:40:43+00:00","createdAt":"2026-04-17T18:52:01.346126+00:00"},{"runId":"20260412T160049_amazon.nova-2-sonic-v1_0_4607f0ab","benchmark":"product_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":4,"toolUseDenom":11,"instructionFollowing":15,"kbGrounding":28,"ambiguityHandling":1,"stateTracking":5,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":52.16,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T23:39:29+00:00","createdAt":"2026-04-17T18:51:54.057758+00:00"},{"runId":"20260412T162005_gemini-3.1-flash-live-preview_baba7366","benchmark":"product_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":4,"toolUseDenom":11,"instructionFollowing":26,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":72.76,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T23:39:08+00:00","createdAt":"2026-04-17T18:52:10.824574+00:00"},{"runId":"20260412T161505_gpt-realtime_09e29969","benchmark":"product_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":10,"toolUseDenom":11,"instructionFollowing":29,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":85.6,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T23:39:04+00:00","createdAt":"2026-04-17T18:52:05.736104+00:00"},{"runId":"20260412T161942_gemini-2.5-flash-native-audio-preview-12-2025_b70417a3","benchmark":"product_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":11,"instructionFollowing":30,"kbGrounding":30,"ambiguityHandling":2,"stateTracking":12,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":84.94,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T23:39:02+00:00","createdAt":"2026-04-17T18:52:10.239643+00:00"},{"runId":"20260412T160951_grok-realtime_96852e48","benchmark":"product_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":11,"instructionFollowing":31,"kbGrounding":30,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":78.45,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T23:38:47+00:00","createdAt":"2026-04-17T18:52:00.76724+00:00"},{"runId":"20260412T154239_amazon.nova-2-sonic-v1_0_9c017c25","benchmark":"product_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":11,"instructionFollowing":17,"kbGrounding":22,"ambiguityHandling":2,"stateTracking":6,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":62.82,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T23:37:38+00:00","createdAt":"2026-04-17T18:51:41.173019+00:00"},{"runId":"20260412T161621_gemini-3.1-flash-live-preview_35efdaed","benchmark":"product_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":7,"toolUseDenom":11,"instructionFollowing":27,"kbGrounding":27,"ambiguityHandling":1,"stateTracking":12,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":74.71,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T23:35:36+00:00","createdAt":"2026-04-17T18:52:06.970194+00:00"},{"runId":"20260412T161106_gpt-realtime_8d882b77","benchmark":"product_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":11,"instructionFollowing":27,"kbGrounding":30,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":75.87,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T23:34:32+00:00","createdAt":"2026-04-17T18:52:02.115104+00:00"},{"runId":"20260412T161602_gemini-2.5-flash-native-audio-preview-12-2025_69ce24ac","benchmark":"product_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":6,"toolUseDenom":11,"instructionFollowing":27,"kbGrounding":28,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":76.39,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T23:34:12+00:00","createdAt":"2026-04-17T18:52:06.402198+00:00"},{"runId":"20260412T160706_grok-realtime_b0591263","benchmark":"product_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":1,"toolUseDenom":11,"instructionFollowing":31,"kbGrounding":30,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":71.17,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T23:32:22+00:00","createdAt":"2026-04-17T18:51:57.032486+00:00"},{"runId":"20260412T161116_gemini-2.5-flash-native-audio-preview-12-2025_fc55a493","benchmark":"product_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":11,"instructionFollowing":28,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":74.44,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T23:31:52+00:00","createdAt":"2026-04-17T18:52:02.675609+00:00"},{"runId":"20260412T161153_gemini-3.1-flash-live-preview_dfd3459d","benchmark":"product_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":6,"toolUseDenom":11,"instructionFollowing":27,"kbGrounding":29,"ambiguityHandling":2,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":85.61,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T23:31:25+00:00","createdAt":"2026-04-17T18:52:03.234442+00:00"},{"runId":"20260412T160758_gpt-realtime_53140844","benchmark":"product_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":9,"toolUseDenom":11,"instructionFollowing":28,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":83.14,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T23:30:48+00:00","createdAt":"2026-04-17T18:51:58.313156+00:00"},{"runId":"20260412T155819_grok-realtime_2f15adbf","benchmark":"product_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":4,"toolUseDenom":11,"instructionFollowing":30,"kbGrounding":30,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":75.98,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T23:28:31+00:00","createdAt":"2026-04-17T18:51:53.50511+00:00"},{"runId":"20260412T160854_gemini-3.1-flash-live-preview_b3af6dec","benchmark":"product_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":4,"toolUseDenom":11,"instructionFollowing":29,"kbGrounding":27,"ambiguityHandling":1,"stateTracking":10,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":67.69,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T23:27:33+00:00","createdAt":"2026-04-17T18:51:59.52543+00:00"},{"runId":"20260412T154939_ultravox-v0.7_93b05f16","benchmark":"product_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":9,"toolUseDenom":11,"instructionFollowing":30,"kbGrounding":30,"ambiguityHandling":2,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":93.64,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T23:27:05+00:00","createdAt":"2026-04-17T18:51:47.129496+00:00"},{"runId":"20260412T160418_gemini-3.1-flash-live-preview_30000bb8","benchmark":"product_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":7,"toolUseDenom":11,"instructionFollowing":28,"kbGrounding":28,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":77.43,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T23:26:26+00:00","createdAt":"2026-04-17T18:51:55.811984+00:00"},{"runId":"20260412T160316_gpt-realtime_751bcfbc","benchmark":"product_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":11,"instructionFollowing":30,"kbGrounding":31,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":78.45,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T23:25:57+00:00","createdAt":"2026-04-17T18:51:54.665021+00:00"},{"runId":"20260412T155416_grok-realtime_bc0d2c80","benchmark":"product_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":11,"instructionFollowing":30,"kbGrounding":30,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":77.8,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T23:22:14+00:00","createdAt":"2026-04-17T18:51:50.615993+00:00"},{"runId":"20260412T160332_gemini-2.5-flash-native-audio-preview-12-2025_f15abbb6","benchmark":"product_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":8,"toolUseDenom":11,"instructionFollowing":29,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":81.96,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T23:21:13+00:00","createdAt":"2026-04-17T18:51:55.207424+00:00"},{"runId":"20260412T155039_grok-realtime_0abcb56f","benchmark":"product_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":6,"toolUseDenom":11,"instructionFollowing":30,"kbGrounding":30,"ambiguityHandling":2,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":89.62,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T23:20:03+00:00","createdAt":"2026-04-17T18:51:47.751773+00:00"},{"runId":"20260412T155600_gpt-realtime_3af47dcd","benchmark":"product_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":11,"toolUseDenom":11,"instructionFollowing":28,"kbGrounding":28,"ambiguityHandling":2,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":94.7,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T23:18:26+00:00","createdAt":"2026-04-17T18:51:51.77074+00:00"},{"runId":"20260412T154338_ultravox-v0.7_135e12fb","benchmark":"product_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":3,"toolUseDenom":11,"instructionFollowing":29,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":71.45,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T23:18:01+00:00","createdAt":"2026-04-17T18:51:43.500817+00:00"},{"runId":"20260412T153558_ultravox-v0.7_ec66664f","benchmark":"product_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":9,"toolUseDenom":11,"instructionFollowing":31,"kbGrounding":31,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":86.36,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T23:18:01+00:00","createdAt":"2026-04-17T18:51:35.936411+00:00"},{"runId":"20260412T155725_gemini-2.5-flash-native-audio-preview-12-2025_2802fcd7","benchmark":"product_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":7,"toolUseDenom":11,"instructionFollowing":29,"kbGrounding":30,"ambiguityHandling":2,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":90.79,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T23:17:01+00:00","createdAt":"2026-04-17T18:51:52.37875+00:00"},{"runId":"20260412T155542_amazon.nova-2-sonic-v1_0_55ca3d06","benchmark":"product_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":8,"toolUseDenom":11,"instructionFollowing":14,"kbGrounding":17,"ambiguityHandling":1,"stateTracking":5,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":51.69,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T23:16:19+00:00","createdAt":"2026-04-17T18:51:51.216777+00:00"},{"runId":"20260412T155751_gemini-3.1-flash-live-preview_cac9c454","benchmark":"product_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":9,"toolUseDenom":11,"instructionFollowing":30,"kbGrounding":30,"ambiguityHandling":2,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":95.07,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T23:16:00+00:00","createdAt":"2026-04-17T18:51:52.967627+00:00"},{"runId":"20260412T154524_amazon.nova-2-sonic-v1_0_7fdb1116","benchmark":"product_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":11,"instructionFollowing":22,"kbGrounding":22,"ambiguityHandling":0,"stateTracking":6,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":46.05,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T23:15:03+00:00","createdAt":"2026-04-17T18:51:44.856254+00:00"},{"runId":"20260412T155130_gpt-realtime_24655906","benchmark":"product_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":11,"instructionFollowing":29,"kbGrounding":28,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":74.44,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T23:13:34+00:00","createdAt":"2026-04-17T18:51:48.889302+00:00"},{"runId":"20260412T154810_gpt-realtime_a2c3e6f5","benchmark":"product_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":6,"toolUseDenom":11,"instructionFollowing":30,"kbGrounding":30,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":79.62,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T23:13:30+00:00","createdAt":"2026-04-17T18:51:45.501259+00:00"},{"runId":"20260412T154357_grok-realtime_8df46090","benchmark":"product_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":3,"toolUseDenom":11,"instructionFollowing":31,"kbGrounding":30,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":74.81,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T23:11:51+00:00","createdAt":"2026-04-17T18:51:44.310105+00:00"},{"runId":"20260412T153828_ultravox-v0.7_ce5c4237","benchmark":"product_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":7,"toolUseDenom":11,"instructionFollowing":26,"kbGrounding":28,"ambiguityHandling":1,"stateTracking":12,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":74.71,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T23:11:04+00:00","createdAt":"2026-04-17T18:51:39.809926+00:00"},{"runId":"20260412T153712_grok-realtime_7908413c","benchmark":"product_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":4,"toolUseDenom":11,"instructionFollowing":30,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":73.91,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T23:11:04+00:00","createdAt":"2026-04-17T18:51:36.634037+00:00"},{"runId":"20260412T154825_gemini-2.5-flash-native-audio-preview-12-2025_fa325ac2","benchmark":"product_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":3,"toolUseDenom":11,"instructionFollowing":28,"kbGrounding":27,"ambiguityHandling":2,"stateTracking":9,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":73.8,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T23:09:49+00:00","createdAt":"2026-04-17T18:51:46.080045+00:00"},{"runId":"20260412T153950_grok-realtime_32dc194c","benchmark":"product_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":4,"toolUseDenom":11,"instructionFollowing":29,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":74.69,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T23:08:14+00:00","createdAt":"2026-04-17T18:51:40.626776+00:00"},{"runId":"20260412T155141_gemini-2.5-flash-native-audio-preview-12-2025_1d7f0106","benchmark":"product_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":3,"toolUseDenom":11,"instructionFollowing":26,"kbGrounding":25,"ambiguityHandling":1,"stateTracking":11,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":64.07,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T23:07:56+00:00","createdAt":"2026-04-17T18:51:49.474758+00:00"},{"runId":"20260412T154239_gpt-realtime_62a139ad","benchmark":"product_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":9,"toolUseDenom":11,"instructionFollowing":28,"kbGrounding":27,"ambiguityHandling":1,"stateTracking":15,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":83.28,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person2","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T23:07:04+00:00","createdAt":"2026-04-17T18:51:41.852742+00:00"},{"runId":"20260412T153807_gpt-realtime_00c50b3f","benchmark":"product_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":6,"toolUseDenom":11,"instructionFollowing":28,"kbGrounding":30,"ambiguityHandling":1,"stateTracking":12,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":75.47,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T23:03:30+00:00","createdAt":"2026-04-17T18:51:37.880904+00:00"},{"runId":"20260412T152259_ultravox-v0.7_3a7d790a","benchmark":"product_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":10,"toolUseDenom":11,"instructionFollowing":30,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":12,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":83.39,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T23:03:13+00:00","createdAt":"2026-04-17T18:51:29.246367+00:00"},{"runId":"20260412T153809_gemini-2.5-flash-native-audio-preview-12-2025_bb9b0b7e","benchmark":"product_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":2,"toolUseDenom":11,"instructionFollowing":26,"kbGrounding":26,"ambiguityHandling":1,"stateTracking":9,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":60.04,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T22:58:06+00:00","createdAt":"2026-04-17T18:51:38.489536+00:00"},{"runId":"20260412T153031_gpt-realtime_b5f5f7dd","benchmark":"product_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":7,"toolUseDenom":11,"instructionFollowing":29,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":78.72,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T22:55:56+00:00","createdAt":"2026-04-17T18:51:33.929117+00:00"},{"runId":"20260412T151712_ultravox-v0.7_50737040","benchmark":"product_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":3,"toolUseDenom":11,"instructionFollowing":28,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":70.8,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T22:50:37+00:00","createdAt":"2026-04-17T18:51:25.73458+00:00"},{"runId":"20260412T150950_ultravox-v0.7_d902d0ca","benchmark":"product_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":2,"toolUseDenom":11,"instructionFollowing":26,"kbGrounding":28,"ambiguityHandling":1,"stateTracking":11,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":64.19,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T22:43:04+00:00","createdAt":"2026-04-17T18:51:22.162535+00:00"},{"runId":"20260412T150656_ultravox-v0.7_3279ee3f","benchmark":"product_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":8,"toolUseDenom":11,"instructionFollowing":27,"kbGrounding":26,"ambiguityHandling":1,"stateTracking":12,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":75.88,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T22:38:23+00:00","createdAt":"2026-04-17T18:51:18.54457+00:00"},{"runId":"20260412T150034_ultravox-v0.7_ad109bfe","benchmark":"product_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":4,"toolUseDenom":11,"instructionFollowing":26,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":71.33,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T22:35:56+00:00","createdAt":"2026-04-17T18:51:15.019044+00:00"},{"runId":"20260412T144339_ultravox-v0.7_abd390da","benchmark":"product_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":9,"toolUseDenom":11,"instructionFollowing":25,"kbGrounding":27,"ambiguityHandling":1,"stateTracking":11,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":75.63,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T22:22:56+00:00","createdAt":"2026-04-17T18:51:05.155701+00:00"},{"runId":"20260412T144014_ultravox-v0.7_ac0ca541","benchmark":"product_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":11,"instructionFollowing":28,"kbGrounding":30,"ambiguityHandling":1,"stateTracking":11,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":72.22,"rehydrated":false,"isReal":true,"audioSource":"real_audio/person1","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T22:19:21+00:00","createdAt":"2026-04-17T18:51:01.467348+00:00"},{"runId":"20260412T141153_amazon.nova-2-sonic-v1_0_13f5ef20","benchmark":"product_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":4,"toolUseDenom":11,"instructionFollowing":26,"kbGrounding":25,"ambiguityHandling":1,"stateTracking":9,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":63.03,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T21:56:17+00:00","createdAt":"2026-04-17T18:50:41.601173+00:00"},{"runId":"20260412T140613_amazon.nova-2-sonic-v1_0_0ba9923b","benchmark":"product_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":9,"toolUseDenom":11,"instructionFollowing":14,"kbGrounding":13,"ambiguityHandling":1,"stateTracking":5,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":50.93,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T21:47:23+00:00","createdAt":"2026-04-17T18:50:37.97065+00:00"},{"runId":"20260412T140611_grok-realtime_7a9b9e8c","benchmark":"product_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":4,"toolUseDenom":11,"instructionFollowing":30,"kbGrounding":30,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":74.55,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T21:45:48+00:00","createdAt":"2026-04-17T18:50:37.252833+00:00"},{"runId":"20260412T141044_ultravox-v0.7_82d59d67","benchmark":"product_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":8,"toolUseDenom":11,"instructionFollowing":24,"kbGrounding":27,"ambiguityHandling":1,"stateTracking":11,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":73.16,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T21:43:33+00:00","createdAt":"2026-04-17T18:50:40.245051+00:00"},{"runId":"20260412T140221_amazon.nova-2-sonic-v1_0_8d3885ff","benchmark":"product_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":9,"toolUseDenom":11,"instructionFollowing":22,"kbGrounding":29,"ambiguityHandling":2,"stateTracking":9,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":82.12,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T21:40:12+00:00","createdAt":"2026-04-17T18:50:34.018573+00:00"},{"runId":"20260412T141121_grok-realtime_69040a6a","benchmark":"product_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":2,"toolUseDenom":11,"instructionFollowing":30,"kbGrounding":28,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":71.06,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T21:38:43+00:00","createdAt":"2026-04-17T18:50:40.986115+00:00"},{"runId":"20260412T140555_ultravox-v0.7_33791c37","benchmark":"product_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":3,"toolUseDenom":11,"instructionFollowing":30,"kbGrounding":30,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":72.74,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T21:38:09+00:00","createdAt":"2026-04-17T18:50:36.518299+00:00"},{"runId":"20260412T135955_ultravox-v0.7_94d40b2c","benchmark":"product_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":11,"instructionFollowing":28,"kbGrounding":31,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":75.73,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T21:33:56+00:00","createdAt":"2026-04-17T18:50:32.80827+00:00"},{"runId":"20260412T140622_gpt-realtime_533dfb7a","benchmark":"product_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":6,"toolUseDenom":11,"instructionFollowing":29,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":78.33,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T21:32:18+00:00","createdAt":"2026-04-17T18:50:38.539363+00:00"},{"runId":"20260412T134921_amazon.nova-2-sonic-v1_0_33ae619a","benchmark":"product_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":11,"instructionFollowing":22,"kbGrounding":27,"ambiguityHandling":1,"stateTracking":9,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":63.56,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T21:31:50+00:00","createdAt":"2026-04-17T18:50:26.736497+00:00"},{"runId":"20260412T141044_gemini-3.1-flash-live-preview_2881f356","benchmark":"product_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":4,"toolUseDenom":11,"instructionFollowing":28,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":11,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":69.76,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T21:30:00+00:00","createdAt":"2026-04-17T18:50:39.66456+00:00"},{"runId":"20260412T140148_grok-realtime_aca9077e","benchmark":"product_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":1,"toolUseDenom":11,"instructionFollowing":28,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":12,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":65.74,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T21:29:35+00:00","createdAt":"2026-04-17T18:50:33.483741+00:00"},{"runId":"20260412T140441_gpt-realtime_c5c82bfa","benchmark":"product_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":11,"instructionFollowing":31,"kbGrounding":30,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":77.02,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T21:29:20+00:00","createdAt":"2026-04-17T18:50:34.676308+00:00"},{"runId":"20260412T140520_gemini-2.5-flash-native-audio-preview-12-2025_bd0066e4","benchmark":"product_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":4,"toolUseDenom":11,"instructionFollowing":24,"kbGrounding":27,"ambiguityHandling":2,"stateTracking":12,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":77.32,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T21:27:54+00:00","createdAt":"2026-04-17T18:50:35.262382+00:00"},{"runId":"20260412T135430_amazon.nova-2-sonic-v1_0_7ec38226","benchmark":"product_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":7,"toolUseDenom":11,"instructionFollowing":22,"kbGrounding":24,"ambiguityHandling":1,"stateTracking":7,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":62.4,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T21:27:21+00:00","createdAt":"2026-04-17T18:50:30.403434+00:00"},{"runId":"20260412T134416_amazon.nova-2-sonic-v1_0_2e579f2b","benchmark":"product_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":3,"toolUseDenom":11,"instructionFollowing":24,"kbGrounding":28,"ambiguityHandling":1,"stateTracking":12,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":66.15,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T21:27:17+00:00","createdAt":"2026-04-17T18:50:23.761004+00:00"},{"runId":"20260412T140536_gemini-3.1-flash-live-preview_7f7954d4","benchmark":"product_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":8,"toolUseDenom":11,"instructionFollowing":29,"kbGrounding":30,"ambiguityHandling":2,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":92.61,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T21:26:02+00:00","createdAt":"2026-04-17T18:50:35.949426+00:00"},{"runId":"20260412T135237_ultravox-v0.7_525e5799","benchmark":"product_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":9,"toolUseDenom":11,"instructionFollowing":29,"kbGrounding":31,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":83.64,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T21:25:18+00:00","createdAt":"2026-04-17T18:50:29.030523+00:00"},{"runId":"20260412T140635_gemini-2.5-flash-native-audio-preview-12-2025_64d6ee28","benchmark":"product_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":6,"toolUseDenom":11,"instructionFollowing":22,"kbGrounding":24,"ambiguityHandling":1,"stateTracking":12,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":67.73,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T21:23:36+00:00","createdAt":"2026-04-17T18:50:39.095675+00:00"},{"runId":"20260412T134400_ultravox-v0.7_cc3543d7","benchmark":"product_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":10,"toolUseDenom":11,"instructionFollowing":30,"kbGrounding":30,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":85.46,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T21:21:56+00:00","createdAt":"2026-04-17T18:50:22.557279+00:00"},{"runId":"20260412T135313_grok-realtime_52e43a52","benchmark":"product_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":1,"toolUseDenom":11,"instructionFollowing":30,"kbGrounding":29,"ambiguityHandling":2,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":78.45,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T21:20:47+00:00","createdAt":"2026-04-17T18:50:29.739337+00:00"},{"runId":"20260412T135546_gpt-realtime_a606437d","benchmark":"product_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":2,"toolUseDenom":11,"instructionFollowing":30,"kbGrounding":30,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":72.35,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T21:18:57+00:00","createdAt":"2026-04-17T18:50:31.032408+00:00"},{"runId":"20260412T132626_amazon.nova-2-sonic-v1_0_124a4614","benchmark":"product_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":8,"toolUseDenom":11,"instructionFollowing":16,"kbGrounding":28,"ambiguityHandling":1,"stateTracking":9,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":65.79,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T21:17:15+00:00","createdAt":"2026-04-17T18:50:12.659717+00:00"},{"runId":"20260412T134914_grok-realtime_24f8812d","benchmark":"product_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":3,"toolUseDenom":11,"instructionFollowing":29,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":72.87,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T21:16:47+00:00","createdAt":"2026-04-17T18:50:26.098486+00:00"},{"runId":"20260412T135719_gemini-3.1-flash-live-preview_234d6975","benchmark":"product_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":8,"toolUseDenom":11,"instructionFollowing":30,"kbGrounding":30,"ambiguityHandling":2,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":93.26,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T21:16:20+00:00","createdAt":"2026-04-17T18:50:32.216116+00:00"},{"runId":"20260412T134727_gemini-2.5-flash-native-audio-preview-12-2025_217a6707","benchmark":"product_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":4,"toolUseDenom":11,"instructionFollowing":20,"kbGrounding":22,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":62.94,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T21:16:07+00:00","createdAt":"2026-04-17T18:50:24.930046+00:00"},{"runId":"20260412T135622_gemini-2.5-flash-native-audio-preview-12-2025_a3de8bd8","benchmark":"product_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":7,"toolUseDenom":11,"instructionFollowing":26,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":12,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":75.35,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T21:14:48+00:00","createdAt":"2026-04-17T18:50:31.621619+00:00"},{"runId":"20260412T134307_gemini-2.5-flash-native-audio-preview-12-2025_5d2e21e0","benchmark":"product_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":11,"instructionFollowing":19,"kbGrounding":24,"ambiguityHandling":1,"stateTracking":12,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":63.98,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T21:14:12+00:00","createdAt":"2026-04-17T18:50:21.387554+00:00"},{"runId":"20260412T134955_gpt-realtime_cb38a75a","benchmark":"product_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":6,"toolUseDenom":11,"instructionFollowing":30,"kbGrounding":29,"ambiguityHandling":2,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":88.97,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T21:13:18+00:00","createdAt":"2026-04-17T18:50:27.394432+00:00"},{"runId":"20260412T134401_grok-realtime_504c48a7","benchmark":"product_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":6,"toolUseDenom":11,"instructionFollowing":30,"kbGrounding":24,"ambiguityHandling":1,"stateTracking":11,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":71.46,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T21:13:00+00:00","createdAt":"2026-04-17T18:50:23.208622+00:00"},{"runId":"20260412T133647_ultravox-v0.7_d86a4709","benchmark":"product_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":4,"toolUseDenom":11,"instructionFollowing":28,"kbGrounding":31,"ambiguityHandling":2,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":83.91,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T21:12:45+00:00","createdAt":"2026-04-17T18:50:18.591594+00:00"},{"runId":"20260412T133311_amazon.nova-2-sonic-v1_0_d720a950","benchmark":"product_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":11,"instructionFollowing":21,"kbGrounding":23,"ambiguityHandling":1,"stateTracking":6,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":56.05,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T21:11:51+00:00","createdAt":"2026-04-17T18:50:16.105446+00:00"},{"runId":"20260412T134711_gpt-realtime_3c2a8114","benchmark":"product_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":11,"toolUseDenom":11,"instructionFollowing":30,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":86.64,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T21:11:19+00:00","createdAt":"2026-04-17T18:50:24.366056+00:00"},{"runId":"20260412T133900_amazon.nova-2-sonic-v1_0_818ac859","benchmark":"product_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":9,"toolUseDenom":11,"instructionFollowing":22,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":11,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":74.98,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T21:10:42+00:00","createdAt":"2026-04-17T18:50:19.921956+00:00"},{"runId":"20260412T135005_gemini-2.5-flash-native-audio-preview-12-2025_4b9663e5","benchmark":"product_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":3,"toolUseDenom":11,"instructionFollowing":26,"kbGrounding":25,"ambiguityHandling":1,"stateTracking":12,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":65.5,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T21:06:33+00:00","createdAt":"2026-04-17T18:50:27.929907+00:00"},{"runId":"20260412T133850_grok-realtime_53384574","benchmark":"product_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":6,"toolUseDenom":11,"instructionFollowing":29,"kbGrounding":30,"ambiguityHandling":1,"stateTracking":12,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":76.12,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T21:06:21+00:00","createdAt":"2026-04-17T18:50:19.335442+00:00"},{"runId":"20260412T133145_ultravox-v0.7_6daf019c","benchmark":"product_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":9,"toolUseDenom":11,"instructionFollowing":29,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":11,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":79.5,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T21:06:11+00:00","createdAt":"2026-04-17T18:50:14.865479+00:00"},{"runId":"20260412T134910_gemini-3.1-flash-live-preview_b85ebd1b","benchmark":"product_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":7,"toolUseDenom":11,"instructionFollowing":30,"kbGrounding":30,"ambiguityHandling":2,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":91.44,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T21:06:10+00:00","createdAt":"2026-04-17T18:50:25.463784+00:00"},{"runId":"20260412T132401_amazon.nova-2-sonic-v1_0_1e64dc4f","benchmark":"product_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":6,"toolUseDenom":11,"instructionFollowing":23,"kbGrounding":28,"ambiguityHandling":1,"stateTracking":9,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":66.67,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T21:05:54+00:00","createdAt":"2026-04-17T18:50:09.181702+00:00"},{"runId":"20260412T131825_amazon.nova-2-sonic-v1_0_b4d86328","benchmark":"product_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":8,"toolUseDenom":11,"instructionFollowing":22,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":10,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":71.73,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T21:05:35+00:00","createdAt":"2026-04-17T18:50:06.286367+00:00"},{"runId":"20260412T134014_gpt-realtime_4f1ed8fa","benchmark":"product_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":6,"toolUseDenom":11,"instructionFollowing":25,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":10,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":70.03,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T21:05:19+00:00","createdAt":"2026-04-17T18:50:20.613631+00:00"},{"runId":"20260412T131204_amazon.nova-2-sonic-v1_0_f14a5ca5","benchmark":"product_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":11,"instructionFollowing":14,"kbGrounding":28,"ambiguityHandling":2,"stateTracking":4,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":61.9,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T21:02:20+00:00","createdAt":"2026-04-17T18:50:02.758341+00:00"},{"runId":"20260412T134348_gemini-3.1-flash-live-preview_732e4a73","benchmark":"product_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":6,"toolUseDenom":11,"instructionFollowing":28,"kbGrounding":28,"ambiguityHandling":2,"stateTracking":11,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":82.75,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T21:01:46+00:00","createdAt":"2026-04-17T18:50:22.023936+00:00"},{"runId":"20260412T132925_gemini-2.5-flash-native-audio-preview-12-2025_dc465194","benchmark":"product_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":11,"instructionFollowing":23,"kbGrounding":31,"ambiguityHandling":1,"stateTracking":12,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":71.07,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T20:59:53+00:00","createdAt":"2026-04-17T18:50:13.826556+00:00"},{"runId":"20260412T132535_ultravox-v0.7_3ab4f25a","benchmark":"product_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":4,"toolUseDenom":11,"instructionFollowing":29,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":10,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":68.98,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T20:57:18+00:00","createdAt":"2026-04-17T18:50:11.500084+00:00"},{"runId":"20260412T130716_amazon.nova-2-sonic-v1_0_4f117d84","benchmark":"product_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":6,"toolUseDenom":11,"instructionFollowing":18,"kbGrounding":26,"ambiguityHandling":1,"stateTracking":6,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":57.87,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T20:56:20+00:00","createdAt":"2026-04-17T18:49:59.048045+00:00"},{"runId":"20260412T133318_gpt-realtime_b610b5d3","benchmark":"product_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":4,"toolUseDenom":11,"instructionFollowing":30,"kbGrounding":30,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":75.98,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T20:55:44+00:00","createdAt":"2026-04-17T18:50:16.713092+00:00"},{"runId":"20260412T133504_gemini-2.5-flash-native-audio-preview-12-2025_451a1965","benchmark":"product_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":4,"toolUseDenom":11,"instructionFollowing":27,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":71.97,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T20:54:29+00:00","createdAt":"2026-04-17T18:50:17.283986+00:00"},{"runId":"20260412T133510_gemini-3.1-flash-live-preview_83c87215","benchmark":"product_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":9,"toolUseDenom":11,"instructionFollowing":27,"kbGrounding":28,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":81.85,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T20:53:12+00:00","createdAt":"2026-04-17T18:50:18.019906+00:00"},{"runId":"20260412T131646_ultravox-v0.7_0425160f","benchmark":"product_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":10,"toolUseDenom":11,"instructionFollowing":29,"kbGrounding":30,"ambiguityHandling":2,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":94.82,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T20:52:36+00:00","createdAt":"2026-04-17T18:50:05.041129+00:00"},{"runId":"20260412T132351_grok-realtime_f4eee3c7","benchmark":"product_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":3,"toolUseDenom":11,"instructionFollowing":31,"kbGrounding":29,"ambiguityHandling":2,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":82.74,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T20:50:29+00:00","createdAt":"2026-04-17T18:50:08.620365+00:00"},{"runId":"20260412T132403_gpt-realtime_7b0ce218","benchmark":"product_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":11,"instructionFollowing":29,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":11,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":72.22,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T20:50:03+00:00","createdAt":"2026-04-17T18:50:09.825997+00:00"},{"runId":"20260412T133048_gemini-3.1-flash-live-preview_9eb62903","benchmark":"product_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":11,"instructionFollowing":30,"kbGrounding":29,"ambiguityHandling":2,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":87.16,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T20:49:53+00:00","createdAt":"2026-04-17T18:50:14.356683+00:00"},{"runId":"20260412T132725_gpt-realtime_9e0a5e3f","benchmark":"product_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":10,"toolUseDenom":11,"instructionFollowing":28,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":84.96,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T20:49:09+00:00","createdAt":"2026-04-17T18:50:13.296469+00:00"},{"runId":"20260412T132234_gemini-3.1-flash-live-preview_59e45d59","benchmark":"product_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":6,"toolUseDenom":11,"instructionFollowing":29,"kbGrounding":30,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":78.97,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T20:49:09+00:00","createdAt":"2026-04-17T18:50:08.062016+00:00"},{"runId":"20260412T132050_gemini-2.5-flash-native-audio-preview-12-2025_fcdba1de","benchmark":"product_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":4,"toolUseDenom":11,"instructionFollowing":26,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":12,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":69.9,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T20:47:25+00:00","createdAt":"2026-04-17T18:50:07.518224+00:00"},{"runId":"20260412T132532_gemini-3.1-flash-live-preview_8417b161","benchmark":"product_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":8,"toolUseDenom":11,"instructionFollowing":27,"kbGrounding":28,"ambiguityHandling":2,"stateTracking":12,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":87.17,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T20:47:10+00:00","createdAt":"2026-04-17T18:50:10.928043+00:00"},{"runId":"20260412T125803_amazon.nova-2-sonic-v1_0_be0654db","benchmark":"product_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":6,"toolUseDenom":11,"instructionFollowing":24,"kbGrounding":27,"ambiguityHandling":1,"stateTracking":9,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":66.67,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T20:44:15+00:00","createdAt":"2026-04-17T18:49:52.646563+00:00"},{"runId":"20260412T131144_ultravox-v0.7_c459b204","benchmark":"product_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":6,"toolUseDenom":11,"instructionFollowing":24,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":12,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":72.25,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T20:43:58+00:00","createdAt":"2026-04-17T18:50:01.452413+00:00"},{"runId":"20260412T132019_gpt-realtime_44ae4e20","benchmark":"product_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":6,"toolUseDenom":11,"instructionFollowing":23,"kbGrounding":28,"ambiguityHandling":1,"stateTracking":12,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":70.96,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T20:43:46+00:00","createdAt":"2026-04-17T18:50:06.952086+00:00"},{"runId":"20260412T130251_amazon.nova-2-sonic-v1_0_fb43bbb6","benchmark":"product_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":6,"toolUseDenom":11,"instructionFollowing":18,"kbGrounding":22,"ambiguityHandling":1,"stateTracking":7,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":56.72,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T20:43:05+00:00","createdAt":"2026-04-17T18:49:56.109105+00:00"},{"runId":"20260412T131147_grok-realtime_1c5c8d26","benchmark":"product_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":6,"toolUseDenom":11,"instructionFollowing":29,"kbGrounding":29,"ambiguityHandling":2,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":88.33,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T20:40:13+00:00","createdAt":"2026-04-17T18:50:02.210712+00:00"},{"runId":"20260412T132514_gemini-2.5-flash-native-audio-preview-12-2025_52d846af","benchmark":"product_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":3,"toolUseDenom":11,"instructionFollowing":21,"kbGrounding":22,"ambiguityHandling":2,"stateTracking":10,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":67.48,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T20:38:58+00:00","createdAt":"2026-04-17T18:50:10.37604+00:00"},{"runId":"20260412T131209_gpt-realtime_b691e4c6","benchmark":"product_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":11,"instructionFollowing":29,"kbGrounding":28,"ambiguityHandling":2,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":84.44,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T20:38:48+00:00","createdAt":"2026-04-17T18:50:03.389606+00:00"},{"runId":"20260412T130620_grok-realtime_1fa57328","benchmark":"product_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":2,"toolUseDenom":11,"instructionFollowing":31,"kbGrounding":30,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":72.99,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T20:36:46+00:00","createdAt":"2026-04-17T18:49:58.452777+00:00"},{"runId":"20260412T131634_gemini-3.1-flash-live-preview_7a3ea15f","benchmark":"product_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":10,"toolUseDenom":11,"instructionFollowing":29,"kbGrounding":30,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":84.82,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T20:35:08+00:00","createdAt":"2026-04-17T18:50:04.504546+00:00"},{"runId":"20260412T130931_gemini-3.1-flash-live-preview_f73b96dd","benchmark":"product_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":6,"toolUseDenom":11,"instructionFollowing":28,"kbGrounding":30,"ambiguityHandling":1,"stateTracking":12,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":75.47,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T20:35:02+00:00","createdAt":"2026-04-17T18:50:00.871585+00:00"},{"runId":"20260412T131353_gemini-2.5-flash-native-audio-preview-12-2025_5abaa4d9","benchmark":"product_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":11,"toolUseDenom":11,"instructionFollowing":29,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":87.42,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T20:33:16+00:00","createdAt":"2026-04-17T18:50:03.952645+00:00"},{"runId":"20260412T130731_gpt-realtime_d4042e46","benchmark":"product_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":2,"toolUseDenom":11,"instructionFollowing":31,"kbGrounding":30,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":71.56,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T20:33:09+00:00","createdAt":"2026-04-17T18:49:59.730868+00:00"},{"runId":"20260412T125236_amazon.nova-2-sonic-v1_0_fc36838c","benchmark":"product_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":11,"instructionFollowing":25,"kbGrounding":22,"ambiguityHandling":1,"stateTracking":9,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":62.27,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T20:31:47+00:00","createdAt":"2026-04-17T18:49:49.260197+00:00"},{"runId":"20260412T130238_grok-realtime_d8465960","benchmark":"product_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":2,"toolUseDenom":11,"instructionFollowing":29,"kbGrounding":30,"ambiguityHandling":1,"stateTracking":11,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":67.42,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T20:31:43+00:00","createdAt":"2026-04-17T18:49:55.528322+00:00"},{"runId":"20260412T130053_ultravox-v0.7_57669710","benchmark":"product_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":11,"instructionFollowing":28,"kbGrounding":26,"ambiguityHandling":2,"stateTracking":10,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":78.22,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T20:30:46+00:00","createdAt":"2026-04-17T18:49:54.861781+00:00"},{"runId":"20260412T130415_gpt-realtime_ab1eaf68","benchmark":"product_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":3,"toolUseDenom":11,"instructionFollowing":31,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":74.16,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T20:29:23+00:00","createdAt":"2026-04-17T18:49:56.755005+00:00"},{"runId":"20260412T124850_amazon.nova-2-sonic-v1_0_31205d75","benchmark":"product_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":11,"instructionFollowing":19,"kbGrounding":24,"ambiguityHandling":2,"stateTracking":6,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":65.4,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T20:27:22+00:00","createdAt":"2026-04-17T18:49:46.609542+00:00"},{"runId":"20260412T125556_ultravox-v0.7_befe2ce1","benchmark":"product_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":11,"instructionFollowing":27,"kbGrounding":28,"ambiguityHandling":1,"stateTracking":11,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":70.29,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T20:26:24+00:00","createdAt":"2026-04-17T18:49:51.502591+00:00"},{"runId":"20260412T125749_grok-realtime_9814cdf6","benchmark":"product_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":4,"toolUseDenom":11,"instructionFollowing":30,"kbGrounding":28,"ambiguityHandling":1,"stateTracking":12,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":71.83,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T20:25:49+00:00","createdAt":"2026-04-17T18:49:52.119896+00:00"},{"runId":"20260412T125849_gpt-realtime_5673b120","benchmark":"product_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":6,"toolUseDenom":11,"instructionFollowing":30,"kbGrounding":27,"ambiguityHandling":1,"stateTracking":12,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":74.83,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T20:25:33+00:00","createdAt":"2026-04-17T18:49:53.251103+00:00"},{"runId":"20260412T125226_ultravox-v0.7_22feb6bd","benchmark":"product_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":11,"instructionFollowing":30,"kbGrounding":31,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":77.02,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T20:25:30+00:00","createdAt":"2026-04-17T18:49:48.239931+00:00"},{"runId":"20260412T130530_gemini-3.1-flash-live-preview_8aa3a99b","benchmark":"product_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":7,"toolUseDenom":11,"instructionFollowing":26,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":78.21,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T20:25:12+00:00","createdAt":"2026-04-17T18:49:57.860123+00:00"},{"runId":"20260412T130858_gemini-2.5-flash-native-audio-preview-12-2025_0800b2a6","benchmark":"product_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":3,"toolUseDenom":11,"instructionFollowing":19,"kbGrounding":23,"ambiguityHandling":1,"stateTracking":10,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":56.84,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T20:24:02+00:00","createdAt":"2026-04-17T18:50:00.303376+00:00"},{"runId":"20260412T130529_gemini-2.5-flash-native-audio-preview-12-2025_a0ddb25a","benchmark":"product_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":11,"instructionFollowing":25,"kbGrounding":27,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":71.21,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T20:23:49+00:00","createdAt":"2026-04-17T18:49:57.305595+00:00"},{"runId":"20260412T125227_grok-realtime_bb043231","benchmark":"product_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":4,"toolUseDenom":11,"instructionFollowing":31,"kbGrounding":24,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":72.76,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T20:22:32+00:00","createdAt":"2026-04-17T18:49:48.762363+00:00"},{"runId":"20260412T130040_gemini-3.1-flash-live-preview_d2d0d5e5","benchmark":"product_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":8,"toolUseDenom":11,"instructionFollowing":29,"kbGrounding":30,"ambiguityHandling":2,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":91.18,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T20:20:49+00:00","createdAt":"2026-04-17T18:49:54.312605+00:00"},{"runId":"20260412T124806_gpt-realtime_601c9091","benchmark":"product_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":3,"toolUseDenom":11,"instructionFollowing":30,"kbGrounding":30,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":72.74,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T20:20:17+00:00","createdAt":"2026-04-17T18:49:44.942861+00:00"},{"runId":"20260412T125247_gpt-realtime_2322ab5f","benchmark":"product_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":9,"toolUseDenom":11,"instructionFollowing":28,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":81.71,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T20:18:07+00:00","createdAt":"2026-04-17T18:49:49.876608+00:00"},{"runId":"20260412T125510_gemini-2.5-flash-native-audio-preview-12-2025_5eafe046","benchmark":"product_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":6,"toolUseDenom":11,"instructionFollowing":30,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":12,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":76.12,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T20:16:45+00:00","createdAt":"2026-04-17T18:49:50.415607+00:00"},{"runId":"20260412T125902_gemini-2.5-flash-native-audio-preview-12-2025_f303cdfe","benchmark":"product_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":7,"toolUseDenom":11,"instructionFollowing":28,"kbGrounding":27,"ambiguityHandling":2,"stateTracking":11,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":83.93,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T20:16:33+00:00","createdAt":"2026-04-17T18:49:53.778906+00:00"},{"runId":"20260412T124851_gpt-realtime_61ce726a","benchmark":"product_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":9,"toolUseDenom":11,"instructionFollowing":26,"kbGrounding":27,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":79.13,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T20:12:02+00:00","createdAt":"2026-04-17T18:49:47.199392+00:00"},{"runId":"20260412T125158_gemini-3.1-flash-live-preview_fe88f927","benchmark":"product_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":9,"toolUseDenom":11,"instructionFollowing":30,"kbGrounding":30,"ambiguityHandling":2,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":95.07,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T20:11:46+00:00","createdAt":"2026-04-17T18:49:47.744785+00:00"},{"runId":"20260412T124814_gemini-3.1-flash-live-preview_aca12af0","benchmark":"product_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":4,"toolUseDenom":11,"instructionFollowing":28,"kbGrounding":28,"ambiguityHandling":1,"stateTracking":10,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":67.69,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T20:08:56+00:00","createdAt":"2026-04-17T18:49:45.56013+00:00"},{"runId":"20260412T124806_gemini-2.5-flash-native-audio-preview-12-2025_c5aa94ad","benchmark":"product_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":9,"toolUseDenom":11,"instructionFollowing":28,"kbGrounding":30,"ambiguityHandling":2,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":92.35,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-12T20:07:14+00:00","createdAt":"2026-04-17T18:49:44.367542+00:00"},{"runId":"20260403T174039_glm-realtime-flash_f38fb7c9","benchmark":"product_bench","model":"glm-realtime-flash","turnsScored":31,"scores":{"toolUse":1,"toolUseDenom":11,"instructionFollowing":17,"kbGrounding":17,"ambiguityHandling":1,"stateTracking":3,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":38.04,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-04T07:54:10+00:00","createdAt":"2026-04-05T07:08:49.312113+00:00"},{"runId":"20260403T172431_glm-realtime-flash_37572079","benchmark":"product_bench","model":"glm-realtime-flash","turnsScored":31,"scores":{"toolUse":2,"toolUseDenom":11,"instructionFollowing":10,"kbGrounding":17,"ambiguityHandling":0,"stateTracking":1,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":22.48,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-04T07:51:21+00:00","createdAt":"2026-04-05T07:08:48.246713+00:00"},{"runId":"20260403T152320_glm-realtime-flash_3371c76e","benchmark":"product_bench","model":"glm-realtime-flash","turnsScored":31,"scores":{"toolUse":1,"toolUseDenom":11,"instructionFollowing":10,"kbGrounding":16,"ambiguityHandling":1,"stateTracking":3,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":32.88,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-04T01:37:54+00:00","createdAt":"2026-04-05T07:08:44.952258+00:00"},{"runId":"20260403T143531_glm-realtime-flash_30689e86","benchmark":"product_bench","model":"glm-realtime-flash","turnsScored":31,"scores":{"toolUse":2,"toolUseDenom":11,"instructionFollowing":14,"kbGrounding":20,"ambiguityHandling":1,"stateTracking":7,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":45.57,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-04T01:35:39+00:00","createdAt":"2026-04-05T07:08:43.749706+00:00"},{"runId":"20260403T160700_glm-realtime-flash_e1e48b52","benchmark":"product_bench","model":"glm-realtime-flash","turnsScored":31,"scores":{"toolUse":1,"toolUseDenom":11,"instructionFollowing":16,"kbGrounding":18,"ambiguityHandling":0,"stateTracking":5,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":30.9,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-04T01:33:48+00:00","createdAt":"2026-04-05T07:08:46.965844+00:00"},{"runId":"20260403T160031_glm-realtime-flash_c3bae054","benchmark":"product_bench","model":"glm-realtime-flash","turnsScored":31,"scores":{"toolUse":1,"toolUseDenom":11,"instructionFollowing":11,"kbGrounding":21,"ambiguityHandling":1,"stateTracking":1,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":33.89,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-04T01:33:35+00:00","createdAt":"2026-04-05T07:08:46.255483+00:00"},{"runId":"20260403T155144_glm-realtime-flash_5673490f","benchmark":"product_bench","model":"glm-realtime-flash","turnsScored":31,"scores":{"toolUse":1,"toolUseDenom":11,"instructionFollowing":8,"kbGrounding":16,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":17.3,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-04T01:32:50+00:00","createdAt":"2026-04-05T07:08:45.576341+00:00"},{"runId":"20260403T135249_glm-realtime-flash_98e8e78e","benchmark":"product_bench","model":"glm-realtime-flash","turnsScored":31,"scores":{"toolUse":1,"toolUseDenom":11,"instructionFollowing":11,"kbGrounding":14,"ambiguityHandling":2,"stateTracking":0,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":37.95,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-04T01:32:07+00:00","createdAt":"2026-04-05T07:08:41.28995+00:00"},{"runId":"20260403T145435_glm-realtime-flash_da066be5","benchmark":"product_bench","model":"glm-realtime-flash","turnsScored":31,"scores":{"toolUse":1,"toolUseDenom":11,"instructionFollowing":16,"kbGrounding":21,"ambiguityHandling":1,"stateTracking":6,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":44.26,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-04T01:31:07+00:00","createdAt":"2026-04-05T07:08:44.360714+00:00"},{"runId":"20260403T162119_glm-realtime-flash_e4f80713","benchmark":"product_bench","model":"glm-realtime-flash","turnsScored":31,"scores":{"toolUse":0,"toolUseDenom":11,"instructionFollowing":10,"kbGrounding":15,"ambiguityHandling":1,"stateTracking":0,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":26.13,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-04T01:30:19+00:00","createdAt":"2026-04-05T07:08:47.573215+00:00"},{"runId":"20260403T141546_glm-realtime-flash_8b9c2df8","benchmark":"product_bench","model":"glm-realtime-flash","turnsScored":31,"scores":{"toolUse":3,"toolUseDenom":11,"instructionFollowing":18,"kbGrounding":19,"ambiguityHandling":1,"stateTracking":7,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":49.33,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-04T01:26:50+00:00","createdAt":"2026-04-05T07:08:42.402333+00:00"},{"runId":"20260403T143417_glm-realtime-flash_b7d1b402","benchmark":"product_bench","model":"glm-realtime-flash","turnsScored":31,"scores":{"toolUse":1,"toolUseDenom":11,"instructionFollowing":10,"kbGrounding":12,"ambiguityHandling":1,"stateTracking":2,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":28.87,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-04T01:23:46+00:00","createdAt":"2026-04-05T07:08:43.043163+00:00"},{"runId":"20260330T221839_glm-realtime-air_6eb84cc4","benchmark":"product_bench","model":"glm-realtime-air","turnsScored":31,"scores":{"toolUse":0,"toolUseDenom":11,"instructionFollowing":2,"kbGrounding":4,"ambiguityHandling":0,"stateTracking":1,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":5.3,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T04:37:54+00:00","createdAt":"2026-04-01T07:16:10.755014+00:00"},{"runId":"20260330T221634_glm-realtime-flash_df26029c","benchmark":"product_bench","model":"glm-realtime-flash","turnsScored":31,"scores":{"toolUse":0,"toolUseDenom":11,"instructionFollowing":7,"kbGrounding":22,"ambiguityHandling":1,"stateTracking":0,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":28.71,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T04:36:46+00:00","createdAt":"2026-04-01T07:16:10.209195+00:00"},{"runId":"20260330T220320_glm-realtime-air_aa7dbc42","benchmark":"product_bench","model":"glm-realtime-air","turnsScored":31,"scores":{"toolUse":1,"toolUseDenom":11,"instructionFollowing":1,"kbGrounding":2,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":3.75,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T04:35:01+00:00","createdAt":"2026-04-01T07:16:08.640176+00:00"},{"runId":"20260330T221506_glm-realtime-air_931efb82","benchmark":"product_bench","model":"glm-realtime-air","turnsScored":31,"scores":{"toolUse":0,"toolUseDenom":11,"instructionFollowing":1,"kbGrounding":12,"ambiguityHandling":1,"stateTracking":2,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":21.24,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T04:33:33+00:00","createdAt":"2026-04-01T07:16:09.573619+00:00"},{"runId":"20260330T215723_glm-realtime-flash_7686739a","benchmark":"product_bench","model":"glm-realtime-flash","turnsScored":31,"scores":{"toolUse":0,"toolUseDenom":11,"instructionFollowing":10,"kbGrounding":16,"ambiguityHandling":2,"stateTracking":0,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":36.77,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T04:33:23+00:00","createdAt":"2026-04-01T07:16:07.562401+00:00"},{"runId":"20260330T215918_glm-realtime-flash_c33ba650","benchmark":"product_bench","model":"glm-realtime-flash","turnsScored":31,"scores":{"toolUse":0,"toolUseDenom":11,"instructionFollowing":7,"kbGrounding":20,"ambiguityHandling":1,"stateTracking":1,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":28.85,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T04:31:41+00:00","createdAt":"2026-04-01T07:16:08.090047+00:00"},{"runId":"20260330T214644_glm-realtime-air_3d1bdea2","benchmark":"product_bench","model":"glm-realtime-air","turnsScored":31,"scores":{"toolUse":0,"toolUseDenom":11,"instructionFollowing":2,"kbGrounding":21,"ambiguityHandling":0,"stateTracking":1,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":16.27,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T04:30:27+00:00","createdAt":"2026-04-01T07:16:05.337545+00:00"},{"runId":"20260330T215703_glm-realtime-air_1a362701","benchmark":"product_bench","model":"glm-realtime-air","turnsScored":31,"scores":{"toolUse":0,"toolUseDenom":11,"instructionFollowing":6,"kbGrounding":26,"ambiguityHandling":1,"stateTracking":0,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":30.65,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T04:29:07+00:00","createdAt":"2026-04-01T07:16:07.023637+00:00"},{"runId":"20260330T214740_glm-realtime-flash_a7ef393d","benchmark":"product_bench","model":"glm-realtime-flash","turnsScored":31,"scores":{"toolUse":0,"toolUseDenom":11,"instructionFollowing":8,"kbGrounding":9,"ambiguityHandling":1,"stateTracking":2,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":23.82,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T04:28:22+00:00","createdAt":"2026-04-01T07:16:05.846955+00:00"},{"runId":"20260330T214536_glm-realtime-air_26c219a9","benchmark":"product_bench","model":"glm-realtime-air","turnsScored":31,"scores":{"toolUse":0,"toolUseDenom":11,"instructionFollowing":2,"kbGrounding":4,"ambiguityHandling":1,"stateTracking":1,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":15.3,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T04:28:22+00:00","createdAt":"2026-04-01T07:16:04.825398+00:00"},{"runId":"20260330T215105_glm-realtime-air_3c558b44","benchmark":"product_bench","model":"glm-realtime-air","turnsScored":31,"scores":{"toolUse":0,"toolUseDenom":11,"instructionFollowing":7,"kbGrounding":16,"ambiguityHandling":1,"stateTracking":0,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":24.84,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T04:27:35+00:00","createdAt":"2026-04-01T07:16:06.436185+00:00"},{"runId":"20260330T213304_glm-realtime-flash_5b59ec85","benchmark":"product_bench","model":"glm-realtime-flash","turnsScored":31,"scores":{"toolUse":0,"toolUseDenom":11,"instructionFollowing":7,"kbGrounding":22,"ambiguityHandling":1,"stateTracking":1,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":30.14,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T04:25:06+00:00","createdAt":"2026-04-01T07:16:03.143093+00:00"},{"runId":"20260330T214454_glm-realtime-air_99c12e1a","benchmark":"product_bench","model":"glm-realtime-air","turnsScored":31,"scores":{"toolUse":0,"toolUseDenom":11,"instructionFollowing":5,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":0,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":31.94,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T04:24:15+00:00","createdAt":"2026-04-01T07:16:04.297601+00:00"},{"runId":"20260330T212926_glm-realtime-air_cb688ef6","benchmark":"product_bench","model":"glm-realtime-air","turnsScored":31,"scores":{"toolUse":0,"toolUseDenom":11,"instructionFollowing":1,"kbGrounding":3,"ambiguityHandling":0,"stateTracking":1,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":4.01,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T04:24:07+00:00","createdAt":"2026-04-01T07:16:02.067139+00:00"},{"runId":"20260330T214322_glm-realtime-flash_88e41e8e","benchmark":"product_bench","model":"glm-realtime-flash","turnsScored":31,"scores":{"toolUse":0,"toolUseDenom":11,"instructionFollowing":2,"kbGrounding":26,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":18.06,"rehydrated":false,"isReal":true,"audioSource":null,"judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T04:23:11+00:00","createdAt":"2026-04-01T07:16:03.759653+00:00"},{"runId":"20260330T212929_glm-realtime-air_37ddb7b1","benchmark":"product_bench","model":"glm-realtime-air","turnsScored":31,"scores":{"toolUse":0,"toolUseDenom":11,"instructionFollowing":3,"kbGrounding":24,"ambiguityHandling":0,"stateTracking":1,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":18.85,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T04:22:34+00:00","createdAt":"2026-04-01T07:16:02.602958+00:00"},{"runId":"20260330T212424_glm-realtime-flash_edc16aa3","benchmark":"product_bench","model":"glm-realtime-flash","turnsScored":31,"scores":{"toolUse":0,"toolUseDenom":11,"instructionFollowing":7,"kbGrounding":18,"ambiguityHandling":0,"stateTracking":1,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":17.56,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T04:20:48+00:00","createdAt":"2026-04-01T07:16:01.521725+00:00"},{"runId":"20260330T212244_glm-realtime-air_69d0df30","benchmark":"product_bench","model":"glm-realtime-air","turnsScored":31,"scores":{"toolUse":0,"toolUseDenom":11,"instructionFollowing":5,"kbGrounding":13,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":11.61,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T04:19:10+00:00","createdAt":"2026-04-01T07:16:00.857398+00:00"},{"runId":"20260330T211850_glm-realtime-flash_4430bd55","benchmark":"product_bench","model":"glm-realtime-flash","turnsScored":31,"scores":{"toolUse":0,"toolUseDenom":11,"instructionFollowing":8,"kbGrounding":18,"ambiguityHandling":0,"stateTracking":3,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":21.06,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T04:19:04+00:00","createdAt":"2026-04-01T07:15:59.793616+00:00"},{"runId":"20260330T210628_glm-realtime-flash_99e6d412","benchmark":"product_bench","model":"glm-realtime-flash","turnsScored":31,"scores":{"toolUse":0,"toolUseDenom":11,"instructionFollowing":4,"kbGrounding":17,"ambiguityHandling":0,"stateTracking":0,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":13.55,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T04:18:39+00:00","createdAt":"2026-04-01T07:15:59.210957+00:00"},{"runId":"20260330T210620_glm-realtime-flash_a0a5a6cb","benchmark":"product_bench","model":"glm-realtime-flash","turnsScored":31,"scores":{"toolUse":0,"toolUseDenom":11,"instructionFollowing":4,"kbGrounding":16,"ambiguityHandling":0,"stateTracking":1,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":14.33,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T04:17:41+00:00","createdAt":"2026-04-01T07:15:58.560635+00:00"},{"runId":"20260330T205923_glm-realtime-flash_fa43593b","benchmark":"product_bench","model":"glm-realtime-flash","turnsScored":31,"scores":{"toolUse":0,"toolUseDenom":11,"instructionFollowing":13,"kbGrounding":25,"ambiguityHandling":0,"stateTracking":2,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":27.37,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-04-01T04:15:38+00:00","createdAt":"2026-04-01T07:15:58.06112+00:00"},{"runId":"20260330T171257_gemini-3.1-flash-live-preview_080ef3ac","benchmark":"product_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":7,"toolUseDenom":11,"instructionFollowing":27,"kbGrounding":27,"ambiguityHandling":2,"stateTracking":11,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":83.28,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-31T03:51:39+00:00","createdAt":"2026-03-31T02:38:36.964119+00:00"},{"runId":"20260330T170358_gemini-3.1-flash-live-preview_ce9bac85","benchmark":"product_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":6,"toolUseDenom":11,"instructionFollowing":28,"kbGrounding":28,"ambiguityHandling":1,"stateTracking":11,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":72.75,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-31T03:45:32+00:00","createdAt":"2026-03-31T04:46:01.741754+00:00"},{"runId":"20260329T233704_gemini-3.1-flash-live-preview_a032022a","benchmark":"product_bench","model":"gemini-3.1-flash-live-preview","turnsScored":31,"scores":{"toolUse":9,"toolUseDenom":11,"instructionFollowing":26,"kbGrounding":29,"ambiguityHandling":2,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":91.85,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-30T07:48:03+00:00","createdAt":"2026-03-30T14:05:38.860798+00:00"},{"runId":"20260326T103752_ultravox-v0.7_20ee1c6a","benchmark":"product_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":11,"instructionFollowing":28,"kbGrounding":31,"ambiguityHandling":1,"stateTracking":12,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":74.3,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-28T04:40:34+00:00","createdAt":"2026-03-26T20:39:16.77722+00:00"},{"runId":"20260324T231550_ultravox-v0.7_82476483","benchmark":"product_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":10,"toolUseDenom":11,"instructionFollowing":28,"kbGrounding":30,"ambiguityHandling":1,"stateTracking":11,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":81.32,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-28T04:38:51+00:00","createdAt":"2026-03-25T17:43:47.840048+00:00"},{"runId":"20260326T103752_ultravox-v0.7_84c9b5fb","benchmark":"product_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":4,"toolUseDenom":11,"instructionFollowing":30,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":73.91,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-28T04:38:13+00:00","createdAt":"2026-03-26T20:39:17.411498+00:00"},{"runId":"20260324T231550_ultravox-v0.7_b1b1c957","benchmark":"product_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":11,"instructionFollowing":27,"kbGrounding":28,"ambiguityHandling":0,"stateTracking":12,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":61.72,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-28T04:37:57+00:00","createdAt":"2026-03-26T20:39:14.428114+00:00"},{"runId":"20260323T130739_grok-realtime_4f6de9b0","benchmark":"product_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":3,"toolUseDenom":11,"instructionFollowing":30,"kbGrounding":26,"ambiguityHandling":1,"stateTracking":11,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":67.3,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-28T04:34:51+00:00","createdAt":"2026-03-23T22:46:43.35796+00:00"},{"runId":"20260323T131602_grok-realtime_cf72d318","benchmark":"product_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":3,"toolUseDenom":11,"instructionFollowing":31,"kbGrounding":30,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":74.81,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-28T04:31:51+00:00","createdAt":"2026-03-23T22:46:43.894252+00:00"},{"runId":"20260324T234043_grok-realtime_5029e4e9","benchmark":"product_bench","model":"grok-realtime","turnsScored":31,"scores":{"toolUse":3,"toolUseDenom":11,"instructionFollowing":30,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":72.09,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-28T04:31:42+00:00","createdAt":"2026-03-25T17:43:48.518031+00:00"},{"runId":"20260324T231550_gpt-realtime_e34f53c0","benchmark":"product_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":6,"toolUseDenom":11,"instructionFollowing":25,"kbGrounding":31,"ambiguityHandling":1,"stateTracking":11,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":72.75,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-28T04:20:54+00:00","createdAt":"2026-03-25T17:43:46.662693+00:00"},{"runId":"20260324T231550_gpt-realtime_b8e377a3","benchmark":"product_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":6,"toolUseDenom":11,"instructionFollowing":29,"kbGrounding":29,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":76.9,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-28T04:18:30+00:00","createdAt":"2026-03-25T17:43:46.061512+00:00"},{"runId":"20260324T231550_gemini-2.5-flash-native-audio-preview-12-2025_d1952bb1","benchmark":"product_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":4,"toolUseDenom":11,"instructionFollowing":23,"kbGrounding":28,"ambiguityHandling":2,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":78.75,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-28T04:13:42+00:00","createdAt":"2026-03-25T17:43:45.410943+00:00"},{"runId":"20260324T231550_gemini-2.5-flash-native-audio-preview-12-2025_8c50d144","benchmark":"product_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":4,"toolUseDenom":11,"instructionFollowing":30,"kbGrounding":23,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":70.04,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-28T04:11:45+00:00","createdAt":"2026-03-25T17:43:44.75325+00:00"},{"runId":"20260326T103752_gemini-2.5-flash-native-audio-preview-12-2025_c519166c","benchmark":"product_bench","model":"gemini-2.5-flash-native-audio-preview-12-2025","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":11,"instructionFollowing":25,"kbGrounding":29,"ambiguityHandling":2,"stateTracking":12,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":81.07,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-28T04:08:14+00:00","createdAt":"2026-03-26T20:39:16.223157+00:00"},{"runId":"20260324T231550_amazon.nova-2-sonic-v1_0_f4d13173","benchmark":"product_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":5,"toolUseDenom":11,"instructionFollowing":16,"kbGrounding":26,"ambiguityHandling":1,"stateTracking":4,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":51.9,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-28T04:00:38+00:00","createdAt":"2026-03-25T17:43:44.160166+00:00"},{"runId":"20260323T215238_amazon.nova-2-sonic-v1_0_e8ec202a","benchmark":"product_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":2,"toolUseDenom":11,"instructionFollowing":25,"kbGrounding":29,"ambiguityHandling":2,"stateTracking":9,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":71.33,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-28T03:57:30+00:00","createdAt":"2026-03-24T05:34:56.043462+00:00"},{"runId":"20260323T010221_gpt-realtime_d5f27e30","benchmark":"product_bench","model":"gpt-realtime","turnsScored":31,"scores":{"toolUse":2,"toolUseDenom":11,"instructionFollowing":19,"kbGrounding":25,"ambiguityHandling":1,"stateTracking":7,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":52.02,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"gpt-5.2","judgedAt":"2026-03-23T19:04:00+00:00","createdAt":"2026-03-23T15:30:30.534106+00:00"},{"runId":"20260323T004857_amazon.nova-2-sonic-v1_0_5483f979","benchmark":"product_bench","model":"amazon.nova-2-sonic-v1:0","turnsScored":31,"scores":{"toolUse":4,"toolUseDenom":11,"instructionFollowing":19,"kbGrounding":20,"ambiguityHandling":1,"stateTracking":10,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":56.72,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"gpt-5.2","judgedAt":"2026-03-23T19:03:34+00:00","createdAt":"2026-03-23T15:30:29.881435+00:00"},{"runId":"20260323T011421_ultravox-v0.7_9f173645","benchmark":"product_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":10,"toolUseDenom":11,"instructionFollowing":25,"kbGrounding":28,"ambiguityHandling":1,"stateTracking":13,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":80.95,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"gpt-5.2","judgedAt":"2026-03-23T19:03:16+00:00","createdAt":"2026-03-23T15:30:33.963689+00:00"},{"runId":"20260322T183522_ultravox-v0.7_6c70f53b","benchmark":"product_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":11,"toolUseDenom":11,"instructionFollowing":30,"kbGrounding":29,"ambiguityHandling":2,"stateTracking":12,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":95.21,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-23T02:42:16+00:00","createdAt":"2026-03-23T02:45:04.657631+00:00"},{"runId":"20260322T183521_ultravox-v0.7_709db73e","benchmark":"product_bench","model":"ultravox-v0.7","turnsScored":31,"scores":{"toolUse":10,"toolUseDenom":11,"instructionFollowing":29,"kbGrounding":30,"ambiguityHandling":1,"stateTracking":14,"ambiguityDenom":2,"stateTrackingDenom":14},"passRate":86.25,"rehydrated":false,"isReal":false,"audioSource":"tts","judgeModel":"claude-opus-4-5","judgedAt":"2026-03-23T02:40:15+00:00","createdAt":"2026-03-23T02:41:21.453794+00:00"}],"count":1837}