tb-timeout-ablation

Public
1602/1602 trials completed|159 errors|avg reward 0.70
harbor job download c6bb2353-6b49-4ed2-bd34-712f528d02df
terminal-bench/adaptive-rejection-samplerclaude-code2.1.118anthropicclaude-opus-4-7-
0.00
6
0
9m 9s
-
3,370,003
256,663
2,979,763
-
terminal-bench/adaptive-rejection-samplercodex0.122.0openaigpt-5.4-
0.67
6
0
12m 32s
-
5,374,933
131,664
4,860,800
-
terminal-bench/adaptive-rejection-samplerterminus-22.0.0geminigemini-3.1-pro-preview-
0.83
6
0
5m 44s
-
202,149
191,385
69,367
$2.58
terminal-bench/bn-fit-modifyclaude-code2.1.118anthropicclaude-opus-4-7-
1.00
6
0
2m 22s
-
2,199,153
37,978
2,056,975
-
terminal-bench/bn-fit-modifycodex0.122.0openaigpt-5.4-
1.00
6
0
10m 4s
-
1,612,782
49,441
1,494,144
-
terminal-bench/bn-fit-modifyterminus-22.0.0geminigemini-3.1-pro-preview-
1.00
6
0
4m 26s
-
342,517
120,374
146,502
$1.87
terminal-bench/break-filter-js-from-htmlclaude-code2.1.118anthropicclaude-opus-4-7-
0.83
6
1
6m 9s
NonZeroAgentExitCodeError
4,120,208
152,618
3,853,829
-
terminal-bench/break-filter-js-from-htmlcodex0.122.0openaigpt-5.4-
0.33
6
0
8m 13s
-
852,230
21,407
795,776
-
terminal-bench/break-filter-js-from-htmlterminus-22.0.0geminigemini-3.1-pro-preview-
1.00
6
0
5m 34s
-
661,259
141,060
289,302
$2.49
terminal-bench/build-cython-extclaude-code2.1.118anthropicclaude-opus-4-7-
1.00
6
0
5m 36s
-
18,530,515
88,071
18,110,794
-
terminal-bench/build-cython-extcodex0.122.0openaigpt-5.4-
1.00
6
0
10m 10s
-
12,923,795
80,026
12,140,928
-
terminal-bench/build-cython-extterminus-22.0.0geminigemini-3.1-pro-preview-
0.50
6
0
3m 54s
-
851,955
52,077
508,388
$1.41
terminal-bench/build-pmarsclaude-code2.1.118anthropicclaude-opus-4-7-
1.00
6
0
1m 44s
-
3,500,151
22,390
3,395,446
-
terminal-bench/build-pmarscodex0.122.0openaigpt-5.4-
1.00
6
0
10m 20s
-
5,277,494
47,959
4,646,784
-
terminal-bench/build-pmarsterminus-22.0.0geminigemini-3.1-pro-preview-
1.00
6
0
2m 1s
-
376,161
36,147
171,128
$0.88
terminal-bench/build-pov-rayclaude-code2.1.118anthropicclaude-opus-4-7-
1.00
6
0
3m 21s
-
7,790,281
39,468
7,510,830
-
terminal-bench/build-pov-raycodex0.122.0openaigpt-5.4-
0.83
6
0
9m 48s
-
10,123,686
63,224
9,266,560
-
terminal-bench/build-pov-rayterminus-22.0.0geminigemini-3.1-pro-preview-
1.00
6
0
4m 38s
-
1,405,413
56,547
985,240
$1.72
terminal-bench/caffe-cifar-10claude-code2.1.118anthropicclaude-opus-4-7-
0.00
6
6
34m 51s
AgentTimeoutError +1 more
4,881,534
38,040
4,660,608
-
terminal-bench/caffe-cifar-10codex0.122.0openaigpt-5.4-
0.17
6
4
32m 47s
AgentTimeoutError
41,187,063
93,610
38,787,456
-
terminal-bench/caffe-cifar-10terminus-22.0.0geminigemini-3.1-pro-preview-
0.00
6
5
31m 18s
AgentTimeoutError +1 more
1,064,832
60,657
671,405
$1.65
terminal-bench/cancel-async-tasksclaude-code2.1.118anthropicclaude-opus-4-7-
0.67
6
0
3m 27s
-
2,481,564
68,977
2,365,491
-
terminal-bench/cancel-async-taskscodex0.122.0openaigpt-5.4-
0.83
6
0
9m 4s
-
413,967
19,148
390,144
-
terminal-bench/cancel-async-tasksterminus-22.0.0geminigemini-3.1-pro-preview-
1.00
6
0
4m 9s
-
156,844
118,366
8,118
$1.72
terminal-bench/chess-best-moveclaude-code2.1.118anthropicclaude-opus-4-7-
0.50
6
0
3m 15s
-
648,186
74,826
529,749
-
terminal-bench/chess-best-movecodex0.122.0openaigpt-5.4-
0.83
6
0
7m 58s
-
1,632,010
27,128
1,436,416
-
terminal-bench/chess-best-moveterminus-22.0.0geminigemini-3.1-pro-preview-
0.50
6
2
11m 59s
AgentTimeoutError
1,698,164
366,664
1,122,790
$5.78
terminal-bench/circuit-fibsqrtclaude-code2.1.118anthropicclaude-opus-4-7-
0.67
6
2
36m 3s
NonZeroAgentExitCodeError
5,590,378
1,010,096
4,978,753
-
terminal-bench/circuit-fibsqrtcodex0.122.0openaigpt-5.4-
1.00
6
0
15m 5s
-
4,437,910
131,031
4,157,696
-
terminal-bench/circuit-fibsqrtterminus-22.0.0geminigemini-3.1-pro-preview-
0.83
6
0
8m 47s
-
274,970
323,201
84,488
$4.28
terminal-bench/cobol-modernizationclaude-code2.1.118anthropicclaude-opus-4-7-
1.00
6
0
4m 9s
-
3,888,354
106,222
3,696,290
-
terminal-bench/cobol-modernizationcodex0.122.0openaigpt-5.4-
1.00
6
0
9m 23s
-
2,161,032
71,380
2,073,984
-
terminal-bench/cobol-modernizationterminus-22.0.0geminigemini-3.1-pro-preview-
1.00
6
0
6m 40s
-
594,492
244,695
260,287
$3.66
terminal-bench/code-from-imageclaude-code2.1.118anthropicclaude-opus-4-7-
1.00
6
0
40s
-
523,451
3,249
472,225
-
terminal-bench/code-from-imagecodex0.122.0openaigpt-5.4-
1.00
6
1
7m 14s
AgentSetupTimeoutError
339,819
4,492
252,160
-
terminal-bench/code-from-imageterminus-22.0.0geminigemini-3.1-pro-preview-
1.00
6
0
1m 25s
-
146,874
25,450
0
$0.60
terminal-bench/compile-compcertclaude-code2.1.118anthropicclaude-opus-4-7-
0.67
6
4
37m 46s
AgentTimeoutError
12,736,618
84,062
12,091,532
-
terminal-bench/compile-compcertcodex0.122.0openaigpt-5.4-
1.00
6
1
31m 51s
AgentTimeoutError
39,177,213
112,028
37,918,080
-
terminal-bench/compile-compcertterminus-22.0.0geminigemini-3.1-pro-preview-
0.17
6
1
27m 32s
AgentTimeoutError
4,302,469
112,855
3,674,249
$3.35
terminal-bench/configure-git-webserverclaude-code2.1.118anthropicclaude-opus-4-7-
0.50
6
0
1m 57s
-
2,627,170
22,956
2,530,503
-
terminal-bench/configure-git-webservercodex0.122.0openaigpt-5.4-
0.00
6
0
9m 55s
-
2,490,430
61,560
2,342,016
-
terminal-bench/configure-git-webserverterminus-22.0.0geminigemini-3.1-pro-preview-
0.33
6
0
2m 28s
-
211,089
58,359
32,526
$1.06
terminal-bench/constraints-schedulingclaude-code2.1.118anthropicclaude-opus-4-7-
1.00
6
0
1m 40s
-
667,041
33,791
570,001
-
terminal-bench/constraints-schedulingcodex0.122.0openaigpt-5.4-
1.00
6
0
9m 11s
-
556,117
24,662
494,848
-
terminal-bench/constraints-schedulingterminus-22.0.0geminigemini-3.1-pro-preview-
1.00
6
0
2m 50s
-
139,496
98,202
0
$1.46
terminal-bench/count-dataset-tokensclaude-code2.1.118anthropicclaude-opus-4-7-
0.67
6
0
2m 7s
-
1,958,936
21,857
1,827,885
-
terminal-bench/count-dataset-tokenscodex0.122.0openaigpt-5.4-
1.00
6
0
8m 26s
-
2,663,691
35,958
2,385,152
-
terminal-bench/count-dataset-tokensterminus-22.0.0geminigemini-3.1-pro-preview-
1.00
6
0
8m 13s
-
2,086,554
165,227
1,543,540
$3.38
terminal-bench/crack-7z-hashclaude-code2.1.118anthropicclaude-opus-4-7-
1.00
6
0
4m 11s
-
2,595,181
14,329
2,519,134
-
terminal-bench/crack-7z-hashcodex0.122.0openaigpt-5.4-
1.00
6
0
12m 5s
-
6,321,748
51,038
5,937,408
-
terminal-bench/crack-7z-hashterminus-22.0.0geminigemini-3.1-pro-preview-
0.83
6
1
11m 36s
AgentTimeoutError
1,865,382
145,200
1,176,345
$3.36
terminal-bench/custom-memory-heap-crashclaude-code2.1.118anthropicclaude-opus-4-7-
1.00
6
0
5m 34s
-
10,125,833
112,154
9,831,417
-
terminal-bench/custom-memory-heap-crashcodex0.122.0openaigpt-5.4-
1.00
6
1
9m 0s
AgentSetupTimeoutError
3,002,652
30,932
2,704,640
-
terminal-bench/custom-memory-heap-crashterminus-22.0.0geminigemini-3.1-pro-preview-
1.00
6
0
3m 59s
-
602,414
99,938
288,287
$1.89
terminal-bench/db-wal-recoveryclaude-code2.1.118anthropicclaude-opus-4-7-
0.17
6
0
5m 25s
-
8,827,110
86,957
8,556,536
-
terminal-bench/db-wal-recoverycodex0.122.0openaigpt-5.4-
0.83
6
0
10m 44s
-
6,884,894
79,763
6,037,504
-
terminal-bench/db-wal-recoveryterminus-22.0.0geminigemini-3.1-pro-preview-
0.20
6
5
11m 22s
AgentTimeoutError +1 more
1,525,608
320,191
1,089,349
$4.93
terminal-bench/distribution-searchclaude-code2.1.118anthropicclaude-opus-4-7-
1.00
6
0
6m 29s
-
1,428,105
199,583
1,240,919
-
terminal-bench/distribution-searchcodex0.122.0openaigpt-5.4-
1.00
6
0
9m 27s
-
1,394,902
45,096
1,304,320
-
terminal-bench/distribution-searchterminus-22.0.0geminigemini-3.1-pro-preview-
1.00
6
1
1m 43s
DaytonaError
75,968
49,710
0
$0.75
terminal-bench/dna-assemblyclaude-code2.1.118anthropicclaude-opus-4-7-
0.17
6
5
28m 39s
AgentTimeoutError
7,301,138
746,985
6,153,630
-
terminal-bench/dna-assemblycodex0.122.0openaigpt-5.4-
0.20
6
1
13m 37s
AgentSetupTimeoutError
6,674,700
146,677
6,150,272
-
terminal-bench/dna-assemblyterminus-22.0.0geminigemini-3.1-pro-preview-
0.83
6
0
12m 34s
-
636,833
447,701
268,965
$6.16
terminal-bench/dna-insertclaude-code2.1.118anthropicclaude-opus-4-7-
0.17
6
0
5m 25s
-
4,326,000
121,426
4,049,550
-
terminal-bench/dna-insertcodex0.122.0openaigpt-5.4-
0.17
6
0
9m 2s
-
2,251,709
71,206
2,091,648
-
terminal-bench/dna-insertterminus-22.0.0geminigemini-3.1-pro-preview-
0.17
6
0
4m 40s
-
475,089
148,288
244,268
$2.29
terminal-bench/extract-elfclaude-code2.1.118anthropicclaude-opus-4-7-
0.20
6
1
2m 19s
DaytonaError
1,708,146
52,428
1,595,676
-
terminal-bench/extract-elfcodex0.122.0openaigpt-5.4-
0.33
6
0
12m 34s
-
2,287,066
89,348
2,147,840
-
terminal-bench/extract-elfterminus-22.0.0geminigemini-3.1-pro-preview-
0.83
6
0
4m 49s
-
277,347
168,935
85,603
$2.43
terminal-bench/extract-moves-from-videoclaude-code2.1.118anthropicclaude-opus-4-7-
0.00
6
3
16m 16s
AgentTimeoutError +1 more
14,552,885
74,139
14,264,914
-
terminal-bench/extract-moves-from-videocodex0.122.0openaigpt-5.4-
0.67
6
3
27m 44s
AgentTimeoutError
59,847,394
186,272
56,355,968
-
terminal-bench/extract-moves-from-videoterminus-22.0.0geminigemini-3.1-pro-preview-
0.00
6
2
24m 13s
AgentTimeoutError
5,510,541
201,253
4,537,373
$5.27
terminal-bench/feal-differential-cryptanalysisclaude-code2.1.118anthropicclaude-opus-4-7-
1.00
6
1
4m 23s
DaytonaError
1,198,678
123,730
1,027,092
-
terminal-bench/feal-differential-cryptanalysiscodex0.122.0openaigpt-5.4-
1.00
6
0
14m 17s
-
8,706,927
117,048
8,460,288
-
terminal-bench/feal-differential-cryptanalysisterminus-22.0.0geminigemini-3.1-pro-preview-
1.00
6
0
4m 42s
-
171,530
144,549
0
$2.08
terminal-bench/feal-linear-cryptanalysisclaude-code2.1.118anthropicclaude-opus-4-7-
1.00
6
0
8m 41s
-
3,465,512
269,812
3,049,967
-
terminal-bench/feal-linear-cryptanalysiscodex0.122.0openaigpt-5.4-
1.00
6
1
9m 6s
DownloadVerifierDirError
2,126,894
66,248
2,045,312
-
terminal-bench/feal-linear-cryptanalysisterminus-22.0.0geminigemini-3.1-pro-preview-
0.83
6
0
6m 13s
-
202,319
247,323
48,947
$3.28
terminal-bench/filter-js-from-htmlclaude-code2.1.118anthropicclaude-opus-4-7-
0.00
6
0
12m 41s
-
2,202,789
142,262
1,999,693
-
terminal-bench/filter-js-from-htmlcodex0.122.0openaigpt-5.4-
0.00
6
0
18m 1s
-
1,547,018
100,505
1,477,632
-
terminal-bench/filter-js-from-htmlterminus-22.0.0geminigemini-3.1-pro-preview-
0.00
6
0
14m 32s
-
173,520
258,377
16,262
$3.42
terminal-bench/financial-document-processorclaude-code2.1.118anthropicclaude-opus-4-7-
0.83
6
0
3m 46s
-
7,551,176
49,750
7,039,369
-
terminal-bench/financial-document-processorcodex0.122.0openaigpt-5.4-
1.00
6
0
11m 35s
-
6,375,370
66,778
5,609,088
-
terminal-bench/financial-document-processorterminus-22.0.0geminigemini-3.1-pro-preview-
0.83
6
0
6m 35s
-
1,220,965
173,956
839,161
$3.02
terminal-bench/fix-code-vulnerabilityclaude-code2.1.118anthropicclaude-opus-4-7-
1.00
6
0
1m 7s
-
1,588,115
11,232
1,503,252
-
terminal-bench/fix-code-vulnerabilitycodex0.122.0openaigpt-5.4-
1.00
6
0
9m 55s
-
2,228,868
25,232
1,881,728
-
terminal-bench/fix-code-vulnerabilityterminus-22.0.0geminigemini-3.1-pro-preview-
1.00
6
0
2m 4s
-
408,262
46,842
150,742
$1.11
terminal-bench/fix-gitclaude-code2.1.118anthropicclaude-opus-4-7-
1.00
6
0
1m 19s
-
1,302,438
20,111
1,227,729
-
terminal-bench/fix-gitcodex0.122.0openaigpt-5.4-
1.00
6
0
8m 28s
-
1,535,407
23,927
1,368,960
-
terminal-bench/fix-gitterminus-22.0.0geminigemini-3.1-pro-preview-
1.00
6
0
1m 28s
-
97,900
30,356
0
$0.56
terminal-bench/fix-ocaml-gcclaude-code2.1.118anthropicclaude-opus-4-7-
1.00
6
0
20m 43s
-
11,489,069
100,261
10,571,796
-
terminal-bench/fix-ocaml-gccodex0.122.0openaigpt-5.4-
1.00
6
0
29m 23s
-
36,484,162
104,404
34,888,320
-
terminal-bench/fix-ocaml-gcterminus-22.0.0geminigemini-3.1-pro-preview-
1.00
6
0
21m 57s
-
4,733,580
72,535
3,996,139
$3.14
terminal-bench/gcode-to-textclaude-code2.1.118anthropicclaude-opus-4-7-
1.00
6
0
5m 19s
-
6,033,398
125,133
5,767,948
-
terminal-bench/gcode-to-textcodex0.122.0openaigpt-5.4-
0.17
6
1
15m 55s
AgentTimeoutError
16,554,758
162,763
15,355,520
-
terminal-bench/gcode-to-textterminus-22.0.0geminigemini-3.1-pro-preview-
0.17
6
0
8m 41s
-
1,586,319
325,514
1,140,995
$5.03
terminal-bench/git-leak-recoveryclaude-code2.1.118anthropicclaude-opus-4-7-
1.00
6
0
1m 5s
-
1,638,713
12,251
1,577,855
-
terminal-bench/git-leak-recoverycodex0.122.0openaigpt-5.4-
0.83
6
0
7m 21s
-
796,336
20,280
767,232
-
terminal-bench/git-leak-recoveryterminus-22.0.0geminigemini-3.1-pro-preview-
1.00
6
0
1m 35s
-
72,297
30,251
0
$0.51
terminal-bench/git-multibranchclaude-code2.1.118anthropicclaude-opus-4-7-
0.83
6
0
2m 58s
-
5,980,741
43,395
5,787,052
-
Showing 1-100 of 267 tasks