-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathreferences.bib
14218 lines (13421 loc) · 591 KB
/
references.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
% AI agents solving kaggle problems autonomously
@misc{mle_bench,
title = {{MLE}-bench: {Evaluating} {Machine} {Learning} {Agents} on {Machine} {Learning} {Engineering}},
shorttitle = {{MLE}-bench},
url = {http:%arxiv.org/abs/2410.07095},
doi = {10.48550/arXiv.2410.07095},
abstract = {We introduce MLE-bench, a benchmark for measuring how well AI agents perform at machine learning engineering. To this end, we curate 75 ML engineering-related competitions from Kaggle, creating a diverse set of challenging tasks that test real-world ML engineering skills such as training models, preparing datasets, and running experiments. We establish human baselines for each competition using Kaggle's publicly available leaderboards. We use open-source agent scaffolds to evaluate several frontier language models on our benchmark, finding that the best-performing setup--OpenAI's o1-preview with AIDE scaffolding--achieves at least the level of a Kaggle bronze medal in 16.9\% of competitions. In addition to our main results, we investigate various forms of resource scaling for AI agents and the impact of contamination from pre-training. We open-source our benchmark code (github.com/openai/mle-bench/) to facilitate future research in understanding the ML engineering capabilities of AI agents.},
urldate = {2024-11-18},
publisher = {arXiv},
author = {Chan, Jun Shern and Chowdhury, Neil and Jaffe, Oliver and Aung, James and Sherburn, Dane and Mays, Evan and Starace, Giulio and Liu, Kevin and Maksin, Leon and Patwardhan, Tejal and Weng, Lilian and Mądry, Aleksander},
month = oct,
year = {2024},
note = {arXiv:2410.07095 [cs]},
keywords = {Computer Science - Computation and Language},
}
% Gemini 1.5 w/ 1M tokens
@misc{geminiteam2024gemini15unlockingmultimodal,
title={Gemini 1.5: Unlocking multimodal understanding across millions of tokens of context},
author={Gemini Team and Petko Georgiev and Ving Ian Lei and Ryan Burnell and Libin Bai and Anmol Gulati and Garrett Tanzer and Damien Vincent and Zhufeng Pan and Shibo Wang and Soroosh Mariooryad and Yifan Ding and Xinyang Geng and Fred Alcober and Roy Frostig and Mark Omernick and Lexi Walker and Cosmin Paduraru and Christina Sorokin and Andrea Tacchetti and Colin Gaffney and Samira Daruki and Olcan Sercinoglu and Zach Gleicher and Juliette Love and Paul Voigtlaender and Rohan Jain and Gabriela Surita and Kareem Mohamed and Rory Blevins and Junwhan Ahn and Tao Zhu and Kornraphop Kawintiranon and Orhan Firat and Yiming Gu and Yujing Zhang and Matthew Rahtz and Manaal Faruqui and Natalie Clay and Justin Gilmer and JD Co-Reyes and Ivo Penchev and Rui Zhu and Nobuyuki Morioka and Kevin Hui and Krishna Haridasan and Victor Campos and Mahdis Mahdieh and Mandy Guo and Samer Hassan and Kevin Kilgour and Arpi Vezer and Heng-Tze Cheng and Raoul de Liedekerke and Siddharth Goyal and Paul Barham and DJ Strouse and Seb Noury and Jonas Adler and Mukund Sundararajan and Sharad Vikram and Dmitry Lepikhin and Michela Paganini and Xavier Garcia and Fan Yang and Dasha Valter and Maja Trebacz and Kiran Vodrahalli and Chulayuth Asawaroengchai and Roman Ring and Norbert Kalb and Livio Baldini Soares and Siddhartha Brahma and David Steiner and Tianhe Yu and Fabian Mentzer and Antoine He and Lucas Gonzalez and Bibo Xu and Raphael Lopez Kaufman and Laurent El Shafey and Junhyuk Oh and Tom Hennigan and George van den Driessche and Seth Odoom and Mario Lucic and Becca Roelofs and Sid Lall and Amit Marathe and Betty Chan and Santiago Ontanon and Luheng He and Denis Teplyashin and Jonathan Lai and Phil Crone and Bogdan Damoc and Lewis Ho and Sebastian Riedel and Karel Lenc and Chih-Kuan Yeh and Aakanksha Chowdhery and Yang Xu and Mehran Kazemi and Ehsan Amid and Anastasia Petrushkina and Kevin Swersky and Ali Khodaei and Gowoon Chen and Chris Larkin and Mario Pinto and Geng Yan and Adria Puigdomenech Badia and Piyush Patil and Steven Hansen and Dave Orr and Sebastien M. R. Arnold and Jordan Grimstad and Andrew Dai and Sholto Douglas and Rishika Sinha and Vikas Yadav and Xi Chen and Elena Gribovskaya and Jacob Austin and Jeffrey Zhao and Kaushal Patel and Paul Komarek and Sophia Austin and Sebastian Borgeaud and Linda Friso and Abhimanyu Goyal and Ben Caine and Kris Cao and Da-Woon Chung and Matthew Lamm and Gabe Barth-Maron and Thais Kagohara and Kate Olszewska and Mia Chen and Kaushik Shivakumar and Rishabh Agarwal and Harshal Godhia and Ravi Rajwar and Javier Snaider and Xerxes Dotiwalla and Yuan Liu and Aditya Barua and Victor Ungureanu and Yuan Zhang and Bat-Orgil Batsaikhan and Mateo Wirth and James Qin and Ivo Danihelka and Tulsee Doshi and Martin Chadwick and Jilin Chen and Sanil Jain and Quoc Le and Arjun Kar and Madhu Gurumurthy and Cheng Li and Ruoxin Sang and Fangyu Liu and Lampros Lamprou and Rich Munoz and Nathan Lintz and Harsh Mehta and Heidi Howard and Malcolm Reynolds and Lora Aroyo and Quan Wang and Lorenzo Blanco and Albin Cassirer and Jordan Griffith and Dipanjan Das and Stephan Lee and Jakub Sygnowski and Zach Fisher and James Besley and Richard Powell and Zafarali Ahmed and Dominik Paulus and David Reitter and Zalan Borsos and Rishabh Joshi and Aedan Pope and Steven Hand and Vittorio Selo and Vihan Jain and Nikhil Sethi and Megha Goel and Takaki Makino and Rhys May and Zhen Yang and Johan Schalkwyk and Christina Butterfield and Anja Hauth and Alex Goldin and Will Hawkins and Evan Senter and Sergey Brin and Oliver Woodman and Marvin Ritter and Eric Noland and Minh Giang and Vijay Bolina and Lisa Lee and Tim Blyth and Ian Mackinnon and Machel Reid and Obaid Sarvana and David Silver and Alexander Chen and Lily Wang and Loren Maggiore and Oscar Chang and Nithya Attaluri and Gregory Thornton and Chung-Cheng Chiu and Oskar Bunyan and Nir Levine and Timothy Chung and Evgenii Eltyshev and Xiance Si and Timothy Lillicrap and Demetra Brady and Vaibhav Aggarwal and Boxi Wu and Yuanzhong Xu and Ross McIlroy and Kartikeya Badola and Paramjit Sandhu and Erica Moreira and Wojciech Stokowiec and Ross Hemsley and Dong Li and Alex Tudor and Pranav Shyam and Elahe Rahimtoroghi and Salem Haykal and Pablo Sprechmann and Xiang Zhou and Diana Mincu and Yujia Li and Ravi Addanki and Kalpesh Krishna and Xiao Wu and Alexandre Frechette and Matan Eyal and Allan Dafoe and Dave Lacey and Jay Whang and Thi Avrahami and Ye Zhang and Emanuel Taropa and Hanzhao Lin and Daniel Toyama and Eliza Rutherford and Motoki Sano and HyunJeong Choe and Alex Tomala and Chalence Safranek-Shrader and Nora Kassner and Mantas Pajarskas and Matt Harvey and Sean Sechrist and Meire Fortunato and Christina Lyu and Gamaleldin Elsayed and Chenkai Kuang and James Lottes and Eric Chu and Chao Jia and Chih-Wei Chen and Peter Humphreys and Kate Baumli and Connie Tao and Rajkumar Samuel and Cicero Nogueira dos Santos and Anders Andreassen and Nemanja Rakićević and Dominik Grewe and Aviral Kumar and Stephanie Winkler and Jonathan Caton and Andrew Brock and Sid Dalmia and Hannah Sheahan and Iain Barr and Yingjie Miao and Paul Natsev and Jacob Devlin and Feryal Behbahani and Flavien Prost and Yanhua Sun and Artiom Myaskovsky and Thanumalayan Sankaranarayana Pillai and Dan Hurt and Angeliki Lazaridou and Xi Xiong and Ce Zheng and Fabio Pardo and Xiaowei Li and Dan Horgan and Joe Stanton and Moran Ambar and Fei Xia and Alejandro Lince and Mingqiu Wang and Basil Mustafa and Albert Webson and Hyo Lee and Rohan Anil and Martin Wicke and Timothy Dozat and Abhishek Sinha and Enrique Piqueras and Elahe Dabir and Shyam Upadhyay and Anudhyan Boral and Lisa Anne Hendricks and Corey Fry and Josip Djolonga and Yi Su and Jake Walker and Jane Labanowski and Ronny Huang and Vedant Misra and Jeremy Chen and RJ Skerry-Ryan and Avi Singh and Shruti Rijhwani and Dian Yu and Alex Castro-Ros and Beer Changpinyo and Romina Datta and Sumit Bagri and Arnar Mar Hrafnkelsson and Marcello Maggioni and Daniel Zheng and Yury Sulsky and Shaobo Hou and Tom Le Paine and Antoine Yang and Jason Riesa and Dominika Rogozinska and Dror Marcus and Dalia El Badawy and Qiao Zhang and Luyu Wang and Helen Miller and Jeremy Greer and Lars Lowe Sjos and Azade Nova and Heiga Zen and Rahma Chaabouni and Mihaela Rosca and Jiepu Jiang and Charlie Chen and Ruibo Liu and Tara Sainath and Maxim Krikun and Alex Polozov and Jean-Baptiste Lespiau and Josh Newlan and Zeyncep Cankara and Soo Kwak and Yunhan Xu and Phil Chen and Andy Coenen and Clemens Meyer and Katerina Tsihlas and Ada Ma and Juraj Gottweis and Jinwei Xing and Chenjie Gu and Jin Miao and Christian Frank and Zeynep Cankara and Sanjay Ganapathy and Ishita Dasgupta and Steph Hughes-Fitt and Heng Chen and David Reid and Keran Rong and Hongmin Fan and Joost van Amersfoort and Vincent Zhuang and Aaron Cohen and Shixiang Shane Gu and Anhad Mohananey and Anastasija Ilic and Taylor Tobin and John Wieting and Anna Bortsova and Phoebe Thacker and Emma Wang and Emily Caveness and Justin Chiu and Eren Sezener and Alex Kaskasoli and Steven Baker and Katie Millican and Mohamed Elhawaty and Kostas Aisopos and Carl Lebsack and Nathan Byrd and Hanjun Dai and Wenhao Jia and Matthew Wiethoff and Elnaz Davoodi and Albert Weston and Lakshman Yagati and Arun Ahuja and Isabel Gao and Golan Pundak and Susan Zhang and Michael Azzam and Khe Chai Sim and Sergi Caelles and James Keeling and Abhanshu Sharma and Andy Swing and YaGuang Li and Chenxi Liu and Carrie Grimes Bostock and Yamini Bansal and Zachary Nado and Ankesh Anand and Josh Lipschultz and Abhijit Karmarkar and Lev Proleev and Abe Ittycheriah and Soheil Hassas Yeganeh and George Polovets and Aleksandra Faust and Jiao Sun and Alban Rrustemi and Pen Li and Rakesh Shivanna and Jeremiah Liu and Chris Welty and Federico Lebron and Anirudh Baddepudi and Sebastian Krause and Emilio Parisotto and Radu Soricut and Zheng Xu and Dawn Bloxwich and Melvin Johnson and Behnam Neyshabur and Justin Mao-Jones and Renshen Wang and Vinay Ramasesh and Zaheer Abbas and Arthur Guez and Constant Segal and Duc Dung Nguyen and James Svensson and Le Hou and Sarah York and Kieran Milan and Sophie Bridgers and Wiktor Gworek and Marco Tagliasacchi and James Lee-Thorp and Michael Chang and Alexey Guseynov and Ale Jakse Hartman and Michael Kwong and Ruizhe Zhao and Sheleem Kashem and Elizabeth Cole and Antoine Miech and Richard Tanburn and Mary Phuong and Filip Pavetic and Sebastien Cevey and Ramona Comanescu and Richard Ives and Sherry Yang and Cosmo Du and Bo Li and Zizhao Zhang and Mariko Iinuma and Clara Huiyi Hu and Aurko Roy and Shaan Bijwadia and Zhenkai Zhu and Danilo Martins and Rachel Saputro and Anita Gergely and Steven Zheng and Dawei Jia and Ioannis Antonoglou and Adam Sadovsky and Shane Gu and Yingying Bi and Alek Andreev and Sina Samangooei and Mina Khan and Tomas Kocisky and Angelos Filos and Chintu Kumar and Colton Bishop and Adams Yu and Sarah Hodkinson and Sid Mittal and Premal Shah and Alexandre Moufarek and Yong Cheng and Adam Bloniarz and Jaehoon Lee and Pedram Pejman and Paul Michel and Stephen Spencer and Vladimir Feinberg and Xuehan Xiong and Nikolay Savinov and Charlotte Smith and Siamak Shakeri and Dustin Tran and Mary Chesus and Bernd Bohnet and George Tucker and Tamara von Glehn and Carrie Muir and Yiran Mao and Hideto Kazawa and Ambrose Slone and Kedar Soparkar and Disha Shrivastava and James Cobon-Kerr and Michael Sharman and Jay Pavagadhi and Carlos Araya and Karolis Misiunas and Nimesh Ghelani and Michael Laskin and David Barker and Qiujia Li and Anton Briukhov and Neil Houlsby and Mia Glaese and Balaji Lakshminarayanan and Nathan Schucher and Yunhao Tang and Eli Collins and Hyeontaek Lim and Fangxiaoyu Feng and Adria Recasens and Guangda Lai and Alberto Magni and Nicola De Cao and Aditya Siddhant and Zoe Ashwood and Jordi Orbay and Mostafa Dehghani and Jenny Brennan and Yifan He and Kelvin Xu and Yang Gao and Carl Saroufim and James Molloy and Xinyi Wu and Seb Arnold and Solomon Chang and Julian Schrittwieser and Elena Buchatskaya and Soroush Radpour and Martin Polacek and Skye Giordano and Ankur Bapna and Simon Tokumine and Vincent Hellendoorn and Thibault Sottiaux and Sarah Cogan and Aliaksei Severyn and Mohammad Saleh and Shantanu Thakoor and Laurent Shefey and Siyuan Qiao and Meenu Gaba and Shuo-yiin Chang and Craig Swanson and Biao Zhang and Benjamin Lee and Paul Kishan Rubenstein and Gan Song and Tom Kwiatkowski and Anna Koop and Ajay Kannan and David Kao and Parker Schuh and Axel Stjerngren and Golnaz Ghiasi and Gena Gibson and Luke Vilnis and Ye Yuan and Felipe Tiengo Ferreira and Aishwarya Kamath and Ted Klimenko and Ken Franko and Kefan Xiao and Indro Bhattacharya and Miteyan Patel and Rui Wang and Alex Morris and Robin Strudel and Vivek Sharma and Peter Choy and Sayed Hadi Hashemi and Jessica Landon and Mara Finkelstein and Priya Jhakra and Justin Frye and Megan Barnes and Matthew Mauger and Dennis Daun and Khuslen Baatarsukh and Matthew Tung and Wael Farhan and Henryk Michalewski and Fabio Viola and Felix de Chaumont Quitry and Charline Le Lan and Tom Hudson and Qingze Wang and Felix Fischer and Ivy Zheng and Elspeth White and Anca Dragan and Jean-baptiste Alayrac and Eric Ni and Alexander Pritzel and Adam Iwanicki and Michael Isard and Anna Bulanova and Lukas Zilka and Ethan Dyer and Devendra Sachan and Srivatsan Srinivasan and Hannah Muckenhirn and Honglong Cai and Amol Mandhane and Mukarram Tariq and Jack W. Rae and Gary Wang and Kareem Ayoub and Nicholas FitzGerald and Yao Zhao and Woohyun Han and Chris Alberti and Dan Garrette and Kashyap Krishnakumar and Mai Gimenez and Anselm Levskaya and Daniel Sohn and Josip Matak and Inaki Iturrate and Michael B. Chang and Jackie Xiang and Yuan Cao and Nishant Ranka and Geoff Brown and Adrian Hutter and Vahab Mirrokni and Nanxin Chen and Kaisheng Yao and Zoltan Egyed and Francois Galilee and Tyler Liechty and Praveen Kallakuri and Evan Palmer and Sanjay Ghemawat and Jasmine Liu and David Tao and Chloe Thornton and Tim Green and Mimi Jasarevic and Sharon Lin and Victor Cotruta and Yi-Xuan Tan and Noah Fiedel and Hongkun Yu and Ed Chi and Alexander Neitz and Jens Heitkaemper and Anu Sinha and Denny Zhou and Yi Sun and Charbel Kaed and Brice Hulse and Swaroop Mishra and Maria Georgaki and Sneha Kudugunta and Clement Farabet and Izhak Shafran and Daniel Vlasic and Anton Tsitsulin and Rajagopal Ananthanarayanan and Alen Carin and Guolong Su and Pei Sun and Shashank V and Gabriel Carvajal and Josef Broder and Iulia Comsa and Alena Repina and William Wong and Warren Weilun Chen and Peter Hawkins and Egor Filonov and Lucia Loher and Christoph Hirnschall and Weiyi Wang and Jingchen Ye and Andrea Burns and Hardie Cate and Diana Gage Wright and Federico Piccinini and Lei Zhang and Chu-Cheng Lin and Ionel Gog and Yana Kulizhskaya and Ashwin Sreevatsa and Shuang Song and Luis C. Cobo and Anand Iyer and Chetan Tekur and Guillermo Garrido and Zhuyun Xiao and Rupert Kemp and Huaixiu Steven Zheng and Hui Li and Ananth Agarwal and Christel Ngani and Kati Goshvadi and Rebeca Santamaria-Fernandez and Wojciech Fica and Xinyun Chen and Chris Gorgolewski and Sean Sun and Roopal Garg and Xinyu Ye and S. M. Ali Eslami and Nan Hua and Jon Simon and Pratik Joshi and Yelin Kim and Ian Tenney and Sahitya Potluri and Lam Nguyen Thiet and Quan Yuan and Florian Luisier and Alexandra Chronopoulou and Salvatore Scellato and Praveen Srinivasan and Minmin Chen and Vinod Koverkathu and Valentin Dalibard and Yaming Xu and Brennan Saeta and Keith Anderson and Thibault Sellam and Nick Fernando and Fantine Huot and Junehyuk Jung and Mani Varadarajan and Michael Quinn and Amit Raul and Maigo Le and Ruslan Habalov and Jon Clark and Komal Jalan and Kalesha Bullard and Achintya Singhal and Thang Luong and Boyu Wang and Sujeevan Rajayogam and Julian Eisenschlos and Johnson Jia and Daniel Finchelstein and Alex Yakubovich and Daniel Balle and Michael Fink and Sameer Agarwal and Jing Li and Dj Dvijotham and Shalini Pal and Kai Kang and Jaclyn Konzelmann and Jennifer Beattie and Olivier Dousse and Diane Wu and Remi Crocker and Chen Elkind and Siddhartha Reddy Jonnalagadda and Jong Lee and Dan Holtmann-Rice and Krystal Kallarackal and Rosanne Liu and Denis Vnukov and Neera Vats and Luca Invernizzi and Mohsen Jafari and Huanjie Zhou and Lilly Taylor and Jennifer Prendki and Marcus Wu and Tom Eccles and Tianqi Liu and Kavya Kopparapu and Francoise Beaufays and Christof Angermueller and Andreea Marzoca and Shourya Sarcar and Hilal Dib and Jeff Stanway and Frank Perbet and Nejc Trdin and Rachel Sterneck and Andrey Khorlin and Dinghua Li and Xihui Wu and Sonam Goenka and David Madras and Sasha Goldshtein and Willi Gierke and Tong Zhou and Yaxin Liu and Yannie Liang and Anais White and Yunjie Li and Shreya Singh and Sanaz Bahargam and Mark Epstein and Sujoy Basu and Li Lao and Adnan Ozturel and Carl Crous and Alex Zhai and Han Lu and Zora Tung and Neeraj Gaur and Alanna Walton and Lucas Dixon and Ming Zhang and Amir Globerson and Grant Uy and Andrew Bolt and Olivia Wiles and Milad Nasr and Ilia Shumailov and Marco Selvi and Francesco Piccinno and Ricardo Aguilar and Sara McCarthy and Misha Khalman and Mrinal Shukla and Vlado Galic and John Carpenter and Kevin Villela and Haibin Zhang and Harry Richardson and James Martens and Matko Bosnjak and Shreyas Rammohan Belle and Jeff Seibert and Mahmoud Alnahlawi and Brian McWilliams and Sankalp Singh and Annie Louis and Wen Ding and Dan Popovici and Lenin Simicich and Laura Knight and Pulkit Mehta and Nishesh Gupta and Chongyang Shi and Saaber Fatehi and Jovana Mitrovic and Alex Grills and Joseph Pagadora and Dessie Petrova and Danielle Eisenbud and Zhishuai Zhang and Damion Yates and Bhavishya Mittal and Nilesh Tripuraneni and Yannis Assael and Thomas Brovelli and Prateek Jain and Mihajlo Velimirovic and Canfer Akbulut and Jiaqi Mu and Wolfgang Macherey and Ravin Kumar and Jun Xu and Haroon Qureshi and Gheorghe Comanici and Jeremy Wiesner and Zhitao Gong and Anton Ruddock and Matthias Bauer and Nick Felt and Anirudh GP and Anurag Arnab and Dustin Zelle and Jonas Rothfuss and Bill Rosgen and Ashish Shenoy and Bryan Seybold and Xinjian Li and Jayaram Mudigonda and Goker Erdogan and Jiawei Xia and Jiri Simsa and Andrea Michi and Yi Yao and Christopher Yew and Steven Kan and Isaac Caswell and Carey Radebaugh and Andre Elisseeff and Pedro Valenzuela and Kay McKinney and Kim Paterson and Albert Cui and Eri Latorre-Chimoto and Solomon Kim and William Zeng and Ken Durden and Priya Ponnapalli and Tiberiu Sosea and Christopher A. Choquette-Choo and James Manyika and Brona Robenek and Harsha Vashisht and Sebastien Pereira and Hoi Lam and Marko Velic and Denese Owusu-Afriyie and Katherine Lee and Tolga Bolukbasi and Alicia Parrish and Shawn Lu and Jane Park and Balaji Venkatraman and Alice Talbert and Lambert Rosique and Yuchung Cheng and Andrei Sozanschi and Adam Paszke and Praveen Kumar and Jessica Austin and Lu Li and Khalid Salama and Wooyeol Kim and Nandita Dukkipati and Anthony Baryshnikov and Christos Kaplanis and XiangHai Sheng and Yuri Chervonyi and Caglar Unlu and Diego de Las Casas and Harry Askham and Kathryn Tunyasuvunakool and Felix Gimeno and Siim Poder and Chester Kwak and Matt Miecnikowski and Vahab Mirrokni and Alek Dimitriev and Aaron Parisi and Dangyi Liu and Tomy Tsai and Toby Shevlane and Christina Kouridi and Drew Garmon and Adrian Goedeckemeyer and Adam R. Brown and Anitha Vijayakumar and Ali Elqursh and Sadegh Jazayeri and Jin Huang and Sara Mc Carthy and Jay Hoover and Lucy Kim and Sandeep Kumar and Wei Chen and Courtney Biles and Garrett Bingham and Evan Rosen and Lisa Wang and Qijun Tan and David Engel and Francesco Pongetti and Dario de Cesare and Dongseong Hwang and Lily Yu and Jennifer Pullman and Srini Narayanan and Kyle Levin and Siddharth Gopal and Megan Li and Asaf Aharoni and Trieu Trinh and Jessica Lo and Norman Casagrande and Roopali Vij and Loic Matthey and Bramandia Ramadhana and Austin Matthews and CJ Carey and Matthew Johnson and Kremena Goranova and Rohin Shah and Shereen Ashraf and Kingshuk Dasgupta and Rasmus Larsen and Yicheng Wang and Manish Reddy Vuyyuru and Chong Jiang and Joana Ijazi and Kazuki Osawa and Celine Smith and Ramya Sree Boppana and Taylan Bilal and Yuma Koizumi and Ying Xu and Yasemin Altun and Nir Shabat and Ben Bariach and Alex Korchemniy and Kiam Choo and Olaf Ronneberger and Chimezie Iwuanyanwu and Shubin Zhao and David Soergel and Cho-Jui Hsieh and Irene Cai and Shariq Iqbal and Martin Sundermeyer and Zhe Chen and Elie Bursztein and Chaitanya Malaviya and Fadi Biadsy and Prakash Shroff and Inderjit Dhillon and Tejasi Latkar and Chris Dyer and Hannah Forbes and Massimo Nicosia and Vitaly Nikolaev and Somer Greene and Marin Georgiev and Pidong Wang and Nina Martin and Hanie Sedghi and John Zhang and Praseem Banzal and Doug Fritz and Vikram Rao and Xuezhi Wang and Jiageng Zhang and Viorica Patraucean and Dayou Du and Igor Mordatch and Ivan Jurin and Lewis Liu and Ayush Dubey and Abhi Mohan and Janek Nowakowski and Vlad-Doru Ion and Nan Wei and Reiko Tojo and Maria Abi Raad and Drew A. Hudson and Vaishakh Keshava and Shubham Agrawal and Kevin Ramirez and Zhichun Wu and Hoang Nguyen and Ji Liu and Madhavi Sewak and Bryce Petrini and DongHyun Choi and Ivan Philips and Ziyue Wang and Ioana Bica and Ankush Garg and Jarek Wilkiewicz and Priyanka Agrawal and Xiaowei Li and Danhao Guo and Emily Xue and Naseer Shaik and Andrew Leach and Sadh MNM Khan and Julia Wiesinger and Sammy Jerome and Abhishek Chakladar and Alek Wenjiao Wang and Tina Ornduff and Folake Abu and Alireza Ghaffarkhah and Marcus Wainwright and Mario Cortes and Frederick Liu and Joshua Maynez and Andreas Terzis and Pouya Samangouei and Riham Mansour and Tomasz Kępa and François-Xavier Aubet and Anton Algymr and Dan Banica and Agoston Weisz and Andras Orban and Alexandre Senges and Ewa Andrejczuk and Mark Geller and Niccolo Dal Santo and Valentin Anklin and Majd Al Merey and Martin Baeuml and Trevor Strohman and Junwen Bai and Slav Petrov and Yonghui Wu and Demis Hassabis and Koray Kavukcuoglu and Jeffrey Dean and Oriol Vinyals},
year={2024},
eprint={2403.05530},
archivePrefix={arXiv},
primaryClass={cs.CL},
url={https:%arxiv.org/abs/2403.05530},
}
@article{gpt4,
title={GPT-4 Technical Report},
author={{OpenAI}},
journal={arXiv preprint arXiv:2303.08774},
year={2023},
note={GPT-4 represents a significant leap in multimodal capabilities and reasoning, demonstrating human-level performance across various professional tests and academic benchmarks. The model shows remarkable improvements in factuality, steerability, and creative tasks.}
}
@article{llama2,
title={Llama 2: Open Foundation and Fine-Tuned Chat Models},
author={Touvron, Hugo and others},
journal={arXiv preprint arXiv:2307.09288},
year={2023},
publisher={Meta AI},
note={Meta's open-source release of Llama 2 marked a pivotal moment in AI democratization, providing state-of-the-art performance in an accessible format. The model family includes versions up to 70B parameters, with specialized chat variants showing strong performance in instruction-following tasks.}
}
@misc{llama3,
title={The Llama 3 Herd of Models},
author={Abhimanyu Dubey and Abhinav Jauhri and Abhinav Pandey and Abhishek Kadian and Ahmad Al-Dahle and Aiesha Letman and Akhil Mathur and Alan Schelten and Amy Yang and Angela Fan and Anirudh Goyal and Anthony Hartshorn and Aobo Yang and Archi Mitra and Archie Sravankumar and Artem Korenev and Arthur Hinsvark and Arun Rao and Aston Zhang and Aurelien Rodriguez and Austen Gregerson and Ava Spataru and Baptiste Roziere and Bethany Biron and Binh Tang and Bobbie Chern and Charlotte Caucheteux and Chaya Nayak and Chloe Bi and Chris Marra and Chris McConnell and Christian Keller and Christophe Touret and Chunyang Wu and Corinne Wong and Cristian Canton Ferrer and Cyrus Nikolaidis and Damien Allonsius and Daniel Song and Danielle Pintz and Danny Livshits and David Esiobu and Dhruv Choudhary and Dhruv Mahajan and Diego Garcia-Olano and Diego Perino and Dieuwke Hupkes and Egor Lakomkin and Ehab AlBadawy and Elina Lobanova and Emily Dinan and Eric Michael Smith and Filip Radenovic and Frank Zhang and Gabriel Synnaeve and Gabrielle Lee and Georgia Lewis Anderson and Graeme Nail and Gregoire Mialon and Guan Pang and Guillem Cucurell and Hailey Nguyen and Hannah Korevaar and Hu Xu and Hugo Touvron and Iliyan Zarov and Imanol Arrieta Ibarra and Isabel Kloumann and Ishan Misra and Ivan Evtimov and Jade Copet and Jaewon Lee and Jan Geffert and Jana Vranes and Jason Park and Jay Mahadeokar and Jeet Shah and Jelmer van der Linde and Jennifer Billock and Jenny Hong and Jenya Lee and Jeremy Fu and Jianfeng Chi and Jianyu Huang and Jiawen Liu and Jie Wang and Jiecao Yu and Joanna Bitton and Joe Spisak and Jongsoo Park and Joseph Rocca and Joshua Johnstun and Joshua Saxe and Junteng Jia and Kalyan Vasuden Alwala and Kartikeya Upasani and Kate Plawiak and Ke Li and Kenneth Heafield and Kevin Stone and Khalid El-Arini and Krithika Iyer and Kshitiz Malik and Kuenley Chiu and Kunal Bhalla and Lauren Rantala-Yeary and Laurens van der Maaten and Lawrence Chen and Liang Tan and Liz Jenkins and Louis Martin and Lovish Madaan and Lubo Malo and Lukas Blecher and Lukas Landzaat and Luke de Oliveira and Madeline Muzzi and Mahesh Pasupuleti and Mannat Singh and Manohar Paluri and Marcin Kardas and Mathew Oldham and Mathieu Rita and Maya Pavlova and Melanie Kambadur and Mike Lewis and Min Si and Mitesh Kumar Singh and Mona Hassan and Naman Goyal and Narjes Torabi and Nikolay Bashlykov and Nikolay Bogoychev and Niladri Chatterji and Olivier Duchenne and Onur Çelebi and Patrick Alrassy and Pengchuan Zhang and Pengwei Li and Petar Vasic and Peter Weng and Prajjwal Bhargava and Pratik Dubal and Praveen Krishnan and Punit Singh Koura and Puxin Xu and Qing He and Qingxiao Dong and Ragavan Srinivasan and Raj Ganapathy and Ramon Calderer and Ricardo Silveira Cabral and Robert Stojnic and Roberta Raileanu and Rohit Girdhar and Rohit Patel and Romain Sauvestre and Ronnie Polidoro and Roshan Sumbaly and Ross Taylor and Ruan Silva and Rui Hou and Rui Wang and Saghar Hosseini and Sahana Chennabasappa and Sanjay Singh and Sean Bell and Seohyun Sonia Kim and Sergey Edunov and Shaoliang Nie and Sharan Narang and Sharath Raparthy and Sheng Shen and Shengye Wan and Shruti Bhosale and Shun Zhang and Simon Vandenhende and Soumya Batra and Spencer Whitman and Sten Sootla and Stephane Collot and Suchin Gururangan and Sydney Borodinsky and Tamar Herman and Tara Fowler and Tarek Sheasha and Thomas Georgiou and Thomas Scialom and Tobias Speckbacher and Todor Mihaylov and Tong Xiao and Ujjwal Karn and Vedanuj Goswami and Vibhor Gupta and Vignesh Ramanathan and Viktor Kerkez and Vincent Gonguet and Virginie Do and Vish Vogeti and Vladan Petrovic and Weiwei Chu and Wenhan Xiong and Wenyin Fu and Whitney Meers and Xavier Martinet and Xiaodong Wang and Xiaoqing Ellen Tan and Xinfeng Xie and Xuchao Jia and Xuewei Wang and Yaelle Goldschlag and Yashesh Gaur and Yasmine Babaei and Yi Wen and Yiwen Song and Yuchen Zhang and Yue Li and Yuning Mao and Zacharie Delpierre Coudert and Zheng Yan and Zhengxing Chen and Zoe Papakipos and Aaditya Singh and Aaron Grattafiori and Abha Jain and Adam Kelsey and Adam Shajnfeld and Adithya Gangidi and Adolfo Victoria and Ahuva Goldstand and Ajay Menon and Ajay Sharma and Alex Boesenberg and Alex Vaughan and Alexei Baevski and Allie Feinstein and Amanda Kallet and Amit Sangani and Anam Yunus and Andrei Lupu and Andres Alvarado and Andrew Caples and Andrew Gu and Andrew Ho and Andrew Poulton and Andrew Ryan and Ankit Ramchandani and Annie Franco and Aparajita Saraf and Arkabandhu Chowdhury and Ashley Gabriel and Ashwin Bharambe and Assaf Eisenman and Azadeh Yazdan and Beau James and Ben Maurer and Benjamin Leonhardi and Bernie Huang and Beth Loyd and Beto De Paola and Bhargavi Paranjape and Bing Liu and Bo Wu and Boyu Ni and Braden Hancock and Bram Wasti and Brandon Spence and Brani Stojkovic and Brian Gamido and Britt Montalvo and Carl Parker and Carly Burton and Catalina Mejia and Changhan Wang and Changkyu Kim and Chao Zhou and Chester Hu and Ching-Hsiang Chu and Chris Cai and Chris Tindal and Christoph Feichtenhofer and Damon Civin and Dana Beaty and Daniel Kreymer and Daniel Li and Danny Wyatt and David Adkins and David Xu and Davide Testuggine and Delia David and Devi Parikh and Diana Liskovich and Didem Foss and Dingkang Wang and Duc Le and Dustin Holland and Edward Dowling and Eissa Jamil and Elaine Montgomery and Eleonora Presani and Emily Hahn and Emily Wood and Erik Brinkman and Esteban Arcaute and Evan Dunbar and Evan Smothers and Fei Sun and Felix Kreuk and Feng Tian and Firat Ozgenel and Francesco Caggioni and Francisco Guzmán and Frank Kanayet and Frank Seide and Gabriela Medina Florez and Gabriella Schwarz and Gada Badeer and Georgia Swee and Gil Halpern and Govind Thattai and Grant Herman and Grigory Sizov and Guangyi and Zhang and Guna Lakshminarayanan and Hamid Shojanazeri and Han Zou and Hannah Wang and Hanwen Zha and Haroun Habeeb and Harrison Rudolph and Helen Suk and Henry Aspegren and Hunter Goldman and Ibrahim Damlaj and Igor Molybog and Igor Tufanov and Irina-Elena Veliche and Itai Gat and Jake Weissman and James Geboski and James Kohli and Japhet Asher and Jean-Baptiste Gaya and Jeff Marcus and Jeff Tang and Jennifer Chan and Jenny Zhen and Jeremy Reizenstein and Jeremy Teboul and Jessica Zhong and Jian Jin and Jingyi Yang and Joe Cummings and Jon Carvill and Jon Shepard and Jonathan McPhie and Jonathan Torres and Josh Ginsburg and Junjie Wang and Kai Wu and Kam Hou U and Karan Saxena and Karthik Prasad and Kartikay Khandelwal and Katayoun Zand and Kathy Matosich and Kaushik Veeraraghavan and Kelly Michelena and Keqian Li and Kun Huang and Kunal Chawla and Kushal Lakhotia and Kyle Huang and Lailin Chen and Lakshya Garg and Lavender A and Leandro Silva and Lee Bell and Lei Zhang and Liangpeng Guo and Licheng Yu and Liron Moshkovich and Luca Wehrstedt and Madian Khabsa and Manav Avalani and Manish Bhatt and Maria Tsimpoukelli and Martynas Mankus and Matan Hasson and Matthew Lennie and Matthias Reso and Maxim Groshev and Maxim Naumov and Maya Lathi and Meghan Keneally and Michael L. Seltzer and Michal Valko and Michelle Restrepo and Mihir Patel and Mik Vyatskov and Mikayel Samvelyan and Mike Clark and Mike Macey and Mike Wang and Miquel Jubert Hermoso and Mo Metanat and Mohammad Rastegari and Munish Bansal and Nandhini Santhanam and Natascha Parks and Natasha White and Navyata Bawa and Nayan Singhal and Nick Egebo and Nicolas Usunier and Nikolay Pavlovich Laptev and Ning Dong and Ning Zhang and Norman Cheng and Oleg Chernoguz and Olivia Hart and Omkar Salpekar and Ozlem Kalinli and Parkin Kent and Parth Parekh and Paul Saab and Pavan Balaji and Pedro Rittner and Philip Bontrager and Pierre Roux and Piotr Dollar and Polina Zvyagina and Prashant Ratanchandani and Pritish Yuvraj and Qian Liang and Rachad Alao and Rachel Rodriguez and Rafi Ayub and Raghotham Murthy and Raghu Nayani and Rahul Mitra and Raymond Li and Rebekkah Hogan and Robin Battey and Rocky Wang and Rohan Maheswari and Russ Howes and Ruty Rinott and Sai Jayesh Bondu and Samyak Datta and Sara Chugh and Sara Hunt and Sargun Dhillon and Sasha Sidorov and Satadru Pan and Saurabh Verma and Seiji Yamamoto and Sharadh Ramaswamy and Shaun Lindsay and Shaun Lindsay and Sheng Feng and Shenghao Lin and Shengxin Cindy Zha and Shiva Shankar and Shuqiang Zhang and Shuqiang Zhang and Sinong Wang and Sneha Agarwal and Soji Sajuyigbe and Soumith Chintala and Stephanie Max and Stephen Chen and Steve Kehoe and Steve Satterfield and Sudarshan Govindaprasad and Sumit Gupta and Sungmin Cho and Sunny Virk and Suraj Subramanian and Sy Choudhury and Sydney Goldman and Tal Remez and Tamar Glaser and Tamara Best and Thilo Kohler and Thomas Robinson and Tianhe Li and Tianjun Zhang and Tim Matthews and Timothy Chou and Tzook Shaked and Varun Vontimitta and Victoria Ajayi and Victoria Montanez and Vijai Mohan and Vinay Satish Kumar and Vishal Mangla and Vítor Albiero and Vlad Ionescu and Vlad Poenaru and Vlad Tiberiu Mihailescu and Vladimir Ivanov and Wei Li and Wenchen Wang and Wenwen Jiang and Wes Bouaziz and Will Constable and Xiaocheng Tang and Xiaofang Wang and Xiaojian Wu and Xiaolan Wang and Xide Xia and Xilun Wu and Xinbo Gao and Yanjun Chen and Ye Hu and Ye Jia and Ye Qi and Yenda Li and Yilin Zhang and Ying Zhang and Yossi Adi and Youngjin Nam and Yu and Wang and Yuchen Hao and Yundi Qian and Yuzi He and Zach Rait and Zachary DeVito and Zef Rosnbrick and Zhaoduo Wen and Zhenyu Yang and Zhiwei Zhao},
year={2024},
eprint={2407.21783},
archivePrefix={arXiv},
primaryClass={cs.AI},
url={https:%arxiv.org/abs/2407.21783},
}
@misc{dettmers2023qloraefficientfinetuningquantized,
title={QLoRA: Efficient Finetuning of Quantized LLMs},
author={Tim Dettmers and Artidoro Pagnoni and Ari Holtzman and Luke Zettlemoyer},
year={2023},
eprint={2305.14314},
archivePrefix={arXiv},
primaryClass={cs.LG},
url={https:%arxiv.org/abs/2305.14314},
}
@article{mamba,
title={Mamba: Linear-Time Sequence Modeling with Selective State Spaces},
author={Gu, Albert and Dao, Tri},
journal={arXiv preprint arXiv:2312.00752},
year={2023},
note={Mamba introduces a revolutionary architecture that challenges the transformer's quadratic attention mechanism, achieving linear scaling with sequence length while maintaining or exceeding transformer performance. This breakthrough suggests new directions for efficient sequence modeling.}
}
@article{llm_survey,
title={A survey on large language model based autonomous agents},
volume={18},
ISSN={2095-2236},
url={http:%dx.doi.org/10.1007/s11704-024-40231-1},
DOI={10.1007/s11704-024-40231-1},
number={6},
journal={Frontiers of Computer Science},
publisher={Springer Science and Business Media LLC},
author={Wang, Lei and Ma, Chen and Feng, Xueyang and Zhang, Zeyu and Yang, Hao and Zhang, Jingsen and Chen, Zhiyuan and Tang, Jiakai and Chen, Xu and Lin, Yankai and Zhao, Wayne Xin and Wei, Zhewei and Wen, Jirong},
year={2024},
month=mar }
% Data supporting the "keeps getting better" claim
@misc{scaling_laws,
title={Scaling Laws for Neural Language Models},
author={Jared Kaplan and Sam McCandlish and Tom Henighan and Tom B. Brown and Benjamin Chess and Rewon Child and Scott Gray and Alec Radford and Jeffrey Wu and Dario Amodei},
year={2020},
eprint={2001.08361},
archivePrefix={arXiv},
primaryClass={cs.LG},
url={https:%arxiv.org/abs/2001.08361},
note={Scaling laws for neural language models predicting gpt4 performance}
}
@misc{mmlu,
title = {Measuring {Massive} {Multitask} {Language} {Understanding}},
url = {http:%arxiv.org/abs/2009.03300},
doi = {10.48550/arXiv.2009.03300},
abstract = {We propose a new test to measure a text model's multitask accuracy. The test covers 57 tasks including elementary mathematics, US history, computer science, law, and more. To attain high accuracy on this test, models must possess extensive world knowledge and problem solving ability. We find that while most recent models have near random-chance accuracy, the very largest GPT-3 model improves over random chance by almost 20 percentage points on average. However, on every one of the 57 tasks, the best models still need substantial improvements before they can reach expert-level accuracy. Models also have lopsided performance and frequently do not know when they are wrong. Worse, they still have near-random accuracy on some socially important subjects such as morality and law. By comprehensively evaluating the breadth and depth of a model's academic and professional understanding, our test can be used to analyze models across many tasks and to identify important shortcomings.},
urldate = {2024-11-18},
publisher = {arXiv},
author = {Hendrycks, Dan and Burns, Collin and Basart, Steven and Zou, Andy and Mazeika, Mantas and Song, Dawn and Steinhardt, Jacob},
month = jan,
year = {2021},
note = {arXiv:2009.03300 [cs]},
keywords = {Computer Science - Artificial Intelligence, Computer Science - Computation and Language, Computer Science - Computers and Society, Computer Science - Machine Learning},
}
@misc{o1,
title = {Learning to Reason with LLMs},
author = {OpenAI},
urldate = {2024-09-12},
url = {https:%openai.com/index/learning-to-reason-with-llms/},
note = {Technical overview of OpenAI's research on chain-of-thought prompting and intermediate reasoning steps, demonstrating significant improvements in model reasoning capabilities.}
}
% Emergent abilities w/ scaling of a single LLM
@misc{wei2022emergentabilitieslargelanguage,
title={Emergent Abilities of Large Language Models},
author={Jason Wei and Yi Tay and Rishi Bommasani and Colin Raffel and Barret Zoph and Sebastian Borgeaud and Dani Yogatama and Maarten Bosma and Denny Zhou and Donald Metzler and Ed H. Chi and Tatsunori Hashimoto and Oriol Vinyals and Percy Liang and Jeff Dean and William Fedus},
year={2022},
eprint={2206.07682},
archivePrefix={arXiv},
primaryClass={cs.CL},
url={https:%arxiv.org/abs/2206.07682},
}
% The rising costs of frontier models
@misc{cottier2024risingcoststrainingfrontier,
title={The rising costs of training frontier AI models},
author={Ben Cottier and Robi Rahman and Loredana Fattorini and Nestor Maslej and David Owen},
year={2024},
eprint={2405.21015},
archivePrefix={arXiv},
primaryClass={cs.CY},
url={https:%arxiv.org/abs/2405.21015},
}
% multi-agent system from microsoft
@misc{autogen_agents_collaborating,
title={AutoGen: Enabling Next-Gen LLM Applications via Multi-Agent Conversation},
author={Qingyun Wu and Gagan Bansal and Jieyu Zhang and Yiran Wu and Beibin Li and Erkang Zhu and Li Jiang and Xiaoyun Zhang and Shaokun Zhang and Jiale Liu and Ahmed Hassan Awadallah and Ryen W White and Doug Burger and Chi Wang},
year={2023},
eprint={2308.08155},
archivePrefix={arXiv},
primaryClass={cs.AI},
url={https:%arxiv.org/abs/2308.08155},
}
% emergent behavior from multi-agent competition (pre-LLM)
@inproceedings{bansal2017emergence,
title={Emergent Complexity via Multi-Agent Competition},
author={Bansal, Trapit and Pachocki, Jakub and Sidor, Szymon and Sutskever, Ilya and Mordatch, Igor},
booktitle={Advances in Neural Information Processing Systems},
year={2017},
pages={1306--1316}
}
% ReACT
@misc{react,
title={ReAct: Synergizing Reasoning and Acting in Language Models},
author={Shunyu Yao and Jeffrey Zhao and Dian Yu and Nan Du and Izhak Shafran and Karthik Narasimhan and Yuan Cao},
year={2023},
eprint={2210.03629},
archivePrefix={arXiv},
primaryClass={cs.CL},
url={https:%arxiv.org/abs/2210.03629},
}
% Analysis of what is the meaning of Agent for different people over time
@inproceedings{inproceedings,
author = {Franklin, Stan and Graesser, Arthur},
year = {1996},
month = {01},
pages = {21-35},
title = {Is it an Agent, or Just a Program?: A Taxonomy for Autonomous Agents.}
}
% Shows good performance on code generation by LLMs (very cited)
@misc{austin2021programsynthesislargelanguage,
title={Program Synthesis with Large Language Models},
author={Jacob Austin and Augustus Odena and Maxwell Nye and Maarten Bosma and Henryk Michalewski and David Dohan and Ellen Jiang and Carrie Cai and Michael Terry and Quoc Le and Charles Sutton},
year={2021},
eprint={2108.07732},
archivePrefix={arXiv},
primaryClass={cs.PL},
url={https:%arxiv.org/abs/2108.07732},
}
% shows improved performance on self-refinement, but not on code generation
@misc{madaan2023selfrefineiterativerefinementselffeedback,
title={Self-Refine: Iterative Refinement with Self-Feedback},
author={Aman Madaan and Niket Tandon and Prakhar Gupta and Skyler Hallinan and Luyu Gao and Sarah Wiegreffe and Uri Alon and Nouha Dziri and Shrimai Prabhumoye and Yiming Yang and Shashank Gupta and Bodhisattwa Prasad Majumder and Katherine Hermann and Sean Welleck and Amir Yazdanbakhsh and Peter Clark},
year={2023},
eprint={2303.17651},
archivePrefix={arXiv},
primaryClass={cs.CL},
url={https:%arxiv.org/abs/2303.17651},
}
% Demonstrates limitations of traditional RL agents vs LLM-based agents in complex environments
@misc{zhu2023ghost,
title={Ghost in the Minecraft: Generally Capable Agents for Open-World Environments via Large Language Models with Text-based Knowledge and Memory},
author={Zhu, Xizhou and Chen, Yuntao and Tian, Hao and Tao, Chenxin and Su, Weijie and Yang, Chenyu and Huang, Gao and Li, Bin and Lu, Lewei and Wang, Xiaogang and Qiao, Yu and Zhang, Zhaoxiang and Dai, Jifeng},
year={2023},
eprint={2305.17144},
archivePrefix={arXiv},
primaryClass={cs.AI},
note={Shows how LLM-based agents overcome limitations of traditional RL approaches by dynamically generating actions instead of selecting from predefined ones. Traditional agents achieved only 30% completion rate vs 100% for LLM agents}
}
LLMs enable open-ended exploration and skill acquisition without predefined goals - VOYAGER demonstrates how LLMs can drive autonomous exploration and continually learn new skills through an automatic curriculum, unlike traditional RL approaches that require fixed objectives.
Code as action space enables compositional and interpretable behaviors - Using code/programs as actions (rather than low-level controls) allows VOYAGER to build increasingly complex skills by composing simpler ones, while maintaining interpretability.
Iterative prompting with environment feedback enables robust skill learning - The combination of execution feedback, error traces, and self-verification allows VOYAGER to reliably learn new skills through iterative refinement, achieving 3.3x more item discoveries compared to baselines.
@misc{voyager,
title={Voyager: An Open-Ended Embodied Agent with Large Language Models},
author={Guanzhi Wang and Yuqi Xie and Yunfan Jiang and Ajay Mandlekar and Chaowei Xiao and Yuke Zhu and Linxi Fan and Anima Anandkumar},
year={2023},
eprint={2305.16291},
archivePrefix={arXiv},
primaryClass={cs.AI},
url={https:%arxiv.org/abs/2305.16291},
}
@misc{ai_agents_survey,
title={The Rise and Potential of Large Language Model Based Agents: A Survey},
author={Zhiheng Xi and Wenxiang Chen and Xin Guo and Wei He and Yiwen Ding and Boyang Hong and Ming Zhang and Junzhe Wang and Senjie Jin and Enyu Zhou and Rui Zheng and Xiaoran Fan and Xiao Wang and Limao Xiong and Yuhao Zhou and Weiran Wang and Changhao Jiang and Yicheng Zou and Xiangyang Liu and Zhangyue Yin and Shihan Dou and Rongxiang Weng and Wensen Cheng and Qi Zhang and Wenjuan Qin and Yongyan Zheng and Xipeng Qiu and Xuanjing Huang and Tao Gui},
year={2023},
eprint={2309.07864},
archivePrefix={arXiv},
primaryClass={cs.AI},
url={https:%arxiv.org/abs/2309.07864},
}
% IMPORTED REFERENCES
@article{DBLP:journals/corr/abs-2207-05221,
author = {Saurav Kadavath and
Tom Conerly and
Amanda Askell and
Tom Henighan and
Dawn Drain and
Ethan Perez and
Nicholas Schiefer and
Zac Hatfield{-}Dodds and
Nova DasSarma and
Eli Tran{-}Johnson and
Scott Johnston and
Sheer El Showk and
Andy Jones and
Nelson Elhage and
Tristan Hume and
Anna Chen and
Yuntao Bai and
Sam Bowman and
Stanislav Fort and
Deep Ganguli and
Danny Hernandez and
Josh Jacobson and
Jackson Kernion and
Shauna Kravec and
Liane Lovitt and
Kamal Ndousse and
Catherine Olsson and
Sam Ringer and
Dario Amodei and
Tom Brown and
Jack Clark and
Nicholas Joseph and
Ben Mann and
Sam McCandlish and
Chris Olah and
Jared Kaplan},
title = {Language Models (Mostly) Know What They Know},
journal = {CoRR},
volume = {abs/2207.05221},
year = {2022},
url = {https://doi.org/10.48550/arXiv.2207.05221},
doi = {10.48550/arXiv.2207.05221},
eprinttype = {arXiv},
eprint = {2207.05221},
timestamp = {Thu, 14 Jul 2022 15:34:28 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2207-05221.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2209-07858,
author = {Deep Ganguli and
Liane Lovitt and
Jackson Kernion and
Amanda Askell and
Yuntao Bai and
Saurav Kadavath and
Ben Mann and
Ethan Perez and
Nicholas Schiefer and
Kamal Ndousse and
Andy Jones and
Sam Bowman and
Anna Chen and
Tom Conerly and
Nova DasSarma and
Dawn Drain and
Nelson Elhage and
Sheer El Showk and
Stanislav Fort and
Zac Hatfield{-}Dodds and
Tom Henighan and
Danny Hernandez and
Tristan Hume and
Josh Jacobson and
Scott Johnston and
Shauna Kravec and
Catherine Olsson and
Sam Ringer and
Eli Tran{-}Johnson and
Dario Amodei and
Tom Brown and
Nicholas Joseph and
Sam McCandlish and
Chris Olah and
Jared Kaplan and
Jack Clark},
title = {Red Teaming Language Models to Reduce Harms: Methods, Scaling Behaviors,
and Lessons Learned},
journal = {CoRR},
volume = {abs/2209.07858},
year = {2022},
url = {https://doi.org/10.48550/arXiv.2209.07858},
doi = {10.48550/arXiv.2209.07858},
eprinttype = {arXiv},
eprint = {2209.07858},
timestamp = {Tue, 27 Sep 2022 16:29:43 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2209-07858.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
ciet@inproceedings{DBLP:conf/emnlp/Andreas22,
author = {Jacob Andreas},
editor = {Yoav Goldberg and
Zornitsa Kozareva and
Yue Zhang},
title = {Language Models as Agent Models},
booktitle = {Findings of the Association for Computational Linguistics: {EMNLP}
2022, Abu Dhabi, United Arab Emirates, December 7-11, 2022},
pages = {5769--5779},
publisher = {Association for Computational Linguistics},
year = {2022},
url = {https://aclanthology.org/2022.findings-emnlp.423},
timestamp = {Tue, 07 Feb 2023 17:10:52 +0100},
biburl = {https://dblp.org/rec/conf/emnlp/Andreas22.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@InCollection{sep-agency,
author = {Schlosser, Markus},
title = {{Agency}},
booktitle = {The {Stanford} Encyclopedia of Philosophy},
editor = {Edward N. Zalta},
year = {2019},
edition = {{W}inter 2019},
publisher = {Metaphysics Research Lab, Stanford University}
}
@book{bloom2002children,
title={How children learn the meanings of words},
author={Bloom, Paul},
year={2002},
publisher={MIT press}
}
@book{tomasello2005constructing,
title={Constructing a language: A usage-based theory of language acquisition},
author={Tomasello, Michael},
year={2005},
publisher={Harvard university press}
}
@article{zwaan2005embodied,
title={Embodied sentence comprehension},
author={Zwaan, Rolf A and Madden, Carol J},
journal={Grounding cognition: The role of perception and action in memory, language, and thinking},
volume={22},
year={2005},
publisher={Cambridge}
}
@inproceedings{DBLP:conf/emnlp/BiskHTABCLLMNPT20,
author = {Yonatan Bisk and
Ari Holtzman and
Jesse Thomason and
Jacob Andreas and
Yoshua Bengio and
Joyce Chai and
Mirella Lapata and
Angeliki Lazaridou and
Jonathan May and
Aleksandr Nisnevich and
Nicolas Pinto and
Joseph P. Turian},
editor = {Bonnie Webber and
Trevor Cohn and
Yulan He and
Yang Liu},
title = {Experience Grounds Language},
booktitle = {Proceedings of the 2020 Conference on Empirical Methods in Natural
Language Processing, {EMNLP} 2020, Online, November 16-20, 2020},
pages = {8718--8735},
publisher = {Association for Computational Linguistics},
year = {2020},
url = {https://doi.org/10.18653/v1/2020.emnlp-main.703},
doi = {10.18653/v1/2020.emnlp-main.703},
timestamp = {Wed, 23 Mar 2022 10:11:55 +0100},
biburl = {https://dblp.org/rec/conf/emnlp/BiskHTABCLLMNPT20.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/RadfordJS17,
author = {Alec Radford and
Rafal J{\'{o}}zefowicz and
Ilya Sutskever},
title = {Learning to Generate Reviews and Discovering Sentiment},
journal = {CoRR},
volume = {abs/1704.01444},
year = {2017},
url = {http://arxiv.org/abs/1704.01444},
eprinttype = {arXiv},
eprint = {1704.01444},
timestamp = {Mon, 13 Aug 2018 16:47:25 +0200},
biburl = {https://dblp.org/rec/journals/corr/RadfordJS17.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/acl/LiNA20,
author = {Belinda Z. Li and
Maxwell I. Nye and
Jacob Andreas},
editor = {Chengqing Zong and
Fei Xia and
Wenjie Li and
Roberto Navigli},
title = {Implicit Representations of Meaning in Neural Language Models},
booktitle = {Proceedings of the 59th Annual Meeting of the Association for Computational
Linguistics and the 11th International Joint Conference on Natural
Language Processing, {ACL/IJCNLP} 2021, (Volume 1: Long Papers), Virtual
Event, August 1-6, 2021},
pages = {1813--1827},
publisher = {Association for Computational Linguistics},
year = {2021},
url = {https://doi.org/10.18653/v1/2021.acl-long.143},
doi = {10.18653/v1/2021.acl-long.143},
timestamp = {Mon, 09 Aug 2021 16:25:37 +0200},
biburl = {https://dblp.org/rec/conf/acl/LiNA20.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{barandiaran2009defining,
title={Defining agency: Individuality, normativity, asymmetry, and spatio-temporality in action},
author={Barandiaran, Xabier E and Di Paolo, Ezequiel and Rohde, Marieke},
journal={Adaptive Behavior},
volume={17},
number={5},
pages={367--386},
year={2009},
publisher={Sage Publications Sage UK: London, England}
}
@article{radford2018improving,
title={Improving language understanding by generative pre-training},
author={Radford, Alec and Narasimhan, Karthik and Salimans, Tim and Sutskever, Ilya and others},
year={2018},
journal={OpenAI},
publisher={OpenAI}
}
@article{radford2019language,
title={Language models are unsupervised multitask learners},
author={Radford, Alec and Wu, Jeffrey and Child, Rewon and Luan, David and Amodei, Dario and Sutskever, Ilya and others},
journal={OpenAI blog},
volume={1},
number={8},
pages={9},
year={2019}
}
@inproceedings{DBLP:conf/nips/BrownMRSKDNSSAA20,
author = {Tom B. Brown and
Benjamin Mann and
Nick Ryder and
Melanie Subbiah and
Jared Kaplan and
Prafulla Dhariwal and
Arvind Neelakantan and
Pranav Shyam and
Girish Sastry and
Amanda Askell and
Sandhini Agarwal and
Ariel Herbert{-}Voss and
Gretchen Krueger and
Tom Henighan and
Rewon Child and
Aditya Ramesh and
Daniel M. Ziegler and
Jeffrey Wu and
Clemens Winter and
Christopher Hesse and
Mark Chen and
Eric Sigler and
Mateusz Litwin and
Scott Gray and
Benjamin Chess and
Jack Clark and
Christopher Berner and
Sam McCandlish and
Alec Radford and
Ilya Sutskever and
Dario Amodei},
editor = {Hugo Larochelle and
Marc'Aurelio Ranzato and
Raia Hadsell and
Maria{-}Florina Balcan and
Hsuan{-}Tien Lin},
title = {Language Models are Few-Shot Learners},
booktitle = {Advances in Neural Information Processing Systems 33: Annual Conference
on Neural Information Processing Systems 2020, NeurIPS 2020, December
6-12, 2020, virtual},
year = {2020},
url = {https://proceedings.neurips.cc/paper/2020/hash/1457c0d6bfcb4967418bfb8ac142f64a-Abstract.html},
timestamp = {Thu, 25 May 2023 10:38:31 +0200},
biburl = {https://dblp.org/rec/conf/nips/BrownMRSKDNSSAA20.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@misc{taori2023stanford,
title={Stanford alpaca: An instruction-following llama model},
author={Taori, Rohan and Gulrajani, Ishaan and Zhang, Tianyi and Dubois, Yann and Li, Xuechen and Guestrin, Carlos and Liang, Percy and Hashimoto, Tatsunori B},
year={2023}
}
@article{raffel2020exploring,
title={Exploring the limits of transfer learning with a unified text-to-text transformer},
author={Raffel, Colin and Shazeer, Noam and Roberts, Adam and Lee, Katherine and Narang, Sharan and Matena, Michael and Zhou, Yanqi and Li, Wei and Liu, Peter J},
journal={The Journal of Machine Learning Research},
volume={21},
number={1},
pages={5485--5551},
year={2020},
publisher={JMLRORG}
}
@inproceedings{DBLP:conf/cvpr/WangWCS023,
author = {Xinlong Wang and
Wen Wang and
Yue Cao and
Chunhua Shen and
Tiejun Huang},
title = {Images Speak in Images: {A} Generalist Painter for In-Context Visual Learning},
booktitle = {{IEEE/CVF} Conference on Computer Vision and Pattern Recognition,
{CVPR} 2023, Vancouver, BC, Canada, June 17-24, 2023},
pages = {6830--6839},
publisher = {{IEEE}},
year = {2023},
url = {https://doi.org/10.1109/CVPR52729.2023.00660},
doi = {10.1109/CVPR52729.2023.00660},
timestamp = {Mon, 28 Aug 2023 16:14:40 +0200},
biburl = {https://dblp.org/rec/conf/cvpr/WangWCS023.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2301-02111,
author = {Chengyi Wang and
Sanyuan Chen and
Yu Wu and
Ziqiang Zhang and
Long Zhou and
Shujie Liu and
Zhuo Chen and
Yanqing Liu and
Huaming Wang and
Jinyu Li and
Lei He and
Sheng Zhao and
Furu Wei},
title = {Neural Codec Language Models are Zero-Shot Text to Speech Synthesizers},
journal = {CoRR},
volume = {abs/2301.02111},
year = {2023},
url = {https://doi.org/10.48550/arXiv.2301.02111},
doi = {10.48550/arXiv.2301.02111},
eprinttype = {arXiv},
eprint = {2301.02111},
timestamp = {Mon, 28 Aug 2023 21:26:20 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2301-02111.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2303-03926,
author = {Ziqiang Zhang and
Long Zhou and
Chengyi Wang and
Sanyuan Chen and
Yu Wu and
Shujie Liu and
Zhuo Chen and
Yanqing Liu and
Huaming Wang and
Jinyu Li and
Lei He and
Sheng Zhao and
Furu Wei},
title = {Speak Foreign Languages with Your Own Voice: Cross-Lingual Neural Codec Language Modeling},
journal = {CoRR},
volume = {abs/2303.03926},
year = {2023},
url = {https://doi.org/10.48550/arXiv.2303.03926},
doi = {10.48550/arXiv.2303.03926},
eprinttype = {arXiv},
eprint = {2303.03926},
timestamp = {Thu, 11 May 2023 13:50:01 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2303-03926.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2304-04675,
author = {Wenhao Zhu and
Hongyi Liu and
Qingxiu Dong and
Jingjing Xu and
Lingpeng Kong and
Jiajun Chen and
Lei Li and
Shujian Huang},
title = {Multilingual Machine Translation with Large Language Models: Empirical Results and Analysis},
journal = {CoRR},
volume = {abs/2304.04675},
year = {2023},
url = {https://doi.org/10.48550/arXiv.2304.04675},
doi = {10.48550/arXiv.2304.04675},
eprinttype = {arXiv},
eprint = {2304.04675},
timestamp = {Tue, 22 Aug 2023 07:47:47 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2304-04675.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/TsimpoukelliMCE21,
author = {Maria Tsimpoukelli and
Jacob Menick and
Serkan Cabi and
S. M. Ali Eslami and
Oriol Vinyals and
Felix Hill},
editor = {Marc'Aurelio Ranzato and
Alina Beygelzimer and
Yann N. Dauphin and
Percy Liang and
Jennifer Wortman Vaughan},
title = {Multimodal Few-Shot Learning with Frozen Language Models},
booktitle = {Advances in Neural Information Processing Systems 34: Annual Conference
on Neural Information Processing Systems 2021, NeurIPS 2021, December
6-14, 2021, virtual},
pages = {200--212},
year = {2021},
url = {https://proceedings.neurips.cc/paper/2021/hash/01b7575c38dac42f3cfb7d500438b875-Abstract.html},
timestamp = {Tue, 03 May 2022 16:20:46 +0200},
biburl = {https://dblp.org/rec/conf/nips/TsimpoukelliMCE21.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/BarGDGE22,
author = {Amir Bar and
Yossi Gandelsman and
Trevor Darrell and
Amir Globerson and
Alexei A. Efros},
title = {Visual Prompting via Image Inpainting},
booktitle = {NeurIPS},
year = {2022},
url = {http://papers.nips.cc/paper\_files/paper/2022/hash/9f09f316a3eaf59d9ced5ffaefe97e0f-Abstract-Conference.html},
timestamp = {Thu, 11 May 2023 17:08:21 +0200},
biburl = {https://dblp.org/rec/conf/nips/BarGDGE22.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/Wei0SBIXCLZ22,
author = {Jason Wei and
Xuezhi Wang and
Dale Schuurmans and
Maarten Bosma and
Brian Ichter and
Fei Xia and
Ed H. Chi and
Quoc V. Le and
Denny Zhou},
title = {Chain-of-Thought Prompting Elicits Reasoning in Large Language Models},
booktitle = {NeurIPS},
year = {2022},
url = {http://papers.nips.cc/paper\_files/paper/2022/hash/9d5609613524ecf4f15af0f7b31abca4-Abstract-Conference.html},
timestamp = {Thu, 11 May 2023 17:08:21 +0200},
biburl = {https://dblp.org/rec/conf/nips/Wei0SBIXCLZ22.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/acl/LuBM0S22,
author = {Yao Lu and
Max Bartolo and
Alastair Moore and
Sebastian Riedel and
Pontus Stenetorp},
editor = {Smaranda Muresan and
Preslav Nakov and
Aline Villavicencio},
title = {Fantastically Ordered Prompts and Where to Find Them: Overcoming Few-Shot Prompt Order Sensitivity},
booktitle = {Proceedings of the 60th Annual Meeting of the Association for Computational
Linguistics (Volume 1: Long Papers), {ACL} 2022, Dublin, Ireland,
May 22-27, 2022},
pages = {8086--8098},
publisher = {Association for Computational Linguistics},
year = {2022},
url = {https://doi.org/10.18653/v1/2022.acl-long.556},
doi = {10.18653/v1/2022.acl-long.556},
timestamp = {Mon, 01 Aug 2022 16:27:50 +0200},
biburl = {https://dblp.org/rec/conf/acl/LuBM0S22.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/cacm/Winston80,
author = {Patrick H. Winston},
title = {Learning and Reasoning by Analogy},
journal = {Commun. {ACM}},
volume = {23},
number = {12},
pages = {689--703},
year = {1980},
url = {https://doi.org/10.1145/359038.359042},
doi = {10.1145/359038.359042},
timestamp = {Tue, 06 Nov 2018 12:51:36 +0100},
biburl = {https://dblp.org/rec/journals/cacm/Winston80.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2301-00234,
author = {Qingxiu Dong and
Lei Li and
Damai Dai and
Ce Zheng and
Zhiyong Wu and
Baobao Chang and
Xu Sun and
Jingjing Xu and
Lei Li and
Zhifang Sui},
title = {A Survey for In-context Learning},
journal = {CoRR},
volume = {abs/2301.00234},
year = {2023},
url = {https://doi.org/10.48550/arXiv.2301.00234},
doi = {10.48550/arXiv.2301.00234},
eprinttype = {arXiv},
eprint = {2301.00234},
timestamp = {Wed, 26 Jul 2023 08:43:11 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2301-00234.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/Ouyang0JAWMZASR22,
author = {Long Ouyang and
Jeffrey Wu and
Xu Jiang and
Diogo Almeida and
Carroll L. Wainwright and
Pamela Mishkin and
Chong Zhang and
Sandhini Agarwal and
Katarina Slama and
Alex Ray and
John Schulman and
Jacob Hilton and
Fraser Kelton and
Luke Miller and
Maddie Simens and
Amanda Askell and
Peter Welinder and
Paul F. Christiano and
Jan Leike and
Ryan Lowe},
title = {Training language models to follow instructions with human feedback},
booktitle = {NeurIPS},
year = {2022},
url = {http://papers.nips.cc/paper\_files/paper/2022/hash/b1efde53be364a73914f58805a001731-Abstract-Conference.html},
timestamp = {Thu, 11 May 2023 17:08:21 +0200},
biburl = {https://dblp.org/rec/conf/nips/Ouyang0JAWMZASR22.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2212-12017,
author = {Srinivasan Iyer and
Xi Victoria Lin and
Ramakanth Pasunuru and
Todor Mihaylov and
Daniel Simig and
Ping Yu and
Kurt Shuster and
Tianlu Wang and
Qing Liu and
Punit Singh Koura and
Xian Li and
Brian O'Horo and
Gabriel Pereyra and
Jeff Wang and
Christopher Dewan and
Asli Celikyilmaz and
Luke Zettlemoyer and
Ves Stoyanov},
title = {{OPT-IML:} Scaling Language Model Instruction Meta Learning through
the Lens of Generalization},
journal = {CoRR},
volume = {abs/2212.12017},
year = {2022},
url = {https://doi.org/10.48550/arXiv.2212.12017},
doi = {10.48550/arXiv.2212.12017},
eprinttype = {arXiv},
eprint = {2212.12017},
timestamp = {Tue, 09 May 2023 21:48:35 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2212-12017.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/acl/BachSYWRNSKBFAD22,
author = {Stephen H. Bach and
Victor Sanh and
Zheng Xin Yong and
Albert Webson and
Colin Raffel and
Nihal V. Nayak and
Abheesht Sharma and
Taewoon Kim and
M. Saiful Bari and
Thibault F{\'{e}}vry and
Zaid Alyafeai and
Manan Dey and
Andrea Santilli and
Zhiqing Sun and
Srulik Ben{-}David and
Canwen Xu and
Gunjan Chhablani and
Han Wang and
Jason Alan Fries and
Maged Saeed AlShaibani and
Shanya Sharma and
Urmish Thakker and
Khalid Almubarak and
Xiangru Tang and
Dragomir R. Radev and
Mike Tian{-}Jian Jiang and
Alexander M. Rush},
editor = {Valerio Basile and
Zornitsa Kozareva and
Sanja Stajner},
title = {PromptSource: An Integrated Development Environment and Repository
for Natural Language Prompts},
booktitle = {Proceedings of the 60th Annual Meeting of the Association for Computational
Linguistics, {ACL} 2022 - System Demonstrations, Dublin, Ireland,
May 22-27, 2022},
pages = {93--104},
publisher = {Association for Computational Linguistics},
year = {2022},
url = {https://doi.org/10.18653/v1/2022.acl-demo.9},
doi = {10.18653/v1/2022.acl-demo.9},
timestamp = {Mon, 01 Aug 2022 16:27:48 +0200},
biburl = {https://dblp.org/rec/conf/acl/BachSYWRNSKBFAD22.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2303-08774,
author = {OpenAI},
title = {{GPT-4} Technical Report},
journal = {CoRR},
volume = {abs/2303.08774},
year = {2023},
url = {https://doi.org/10.48550/arXiv.2303.08774},
doi = {10.48550/arXiv.2303.08774},
eprinttype = {arXiv},
eprint = {2303.08774},
timestamp = {Mon, 20 Mar 2023 15:23:19 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2303-08774.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/naacl/LinJLGE21,
author = {Chu{-}Cheng Lin and
Aaron Jaech and
Xin Li and
Matthew R. Gormley and
Jason Eisner},
editor = {Kristina Toutanova and
Anna Rumshisky and
Luke Zettlemoyer and
Dilek Hakkani{-}T{\"{u}}r and
Iz Beltagy and
Steven Bethard and
Ryan Cotterell and
Tanmoy Chakraborty and
Yichao Zhou},
title = {Limitations of Autoregressive Models and Their Alternatives},
booktitle = {Proceedings of the 2021 Conference of the North American Chapter of
the Association for Computational Linguistics: Human Language Technologies,
{NAACL-HLT} 2021, Online, June 6-11, 2021},
pages = {5147--5173},
publisher = {Association for Computational Linguistics},
year = {2021},
url = {https://doi.org/10.18653/v1/2021.naacl-main.405},
doi = {10.18653/v1/2021.naacl-main.405},
timestamp = {Fri, 06 Aug 2021 00:41:31 +0200},
biburl = {https://dblp.org/rec/conf/naacl/LinJLGE21.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@book{anscombe2000intention,
title={Intention},
author={Anscombe, Gertrude Elizabeth Margaret},
year={2000},
publisher={Harvard University Press}
}
@article{60a9dd9a-e48a-3fcf-87dd-d5c158406fc6,
ISSN = {0022362X},
URL = {http://www.jstor.org/stable/2023177},
author = {Donald Davidson},
journal = {The Journal of Philosophy},
number = {23},
pages = {685--700},
publisher = {Journal of Philosophy, Inc.},
title = {Actions, Reasons, and Causes},
urldate = {2023-08-08},
volume = {60},
year = {1963}
}
@incollection{Davidson1971-DAVIA-2,
editor = {Ausonio Marras and R. N. Bronaugh and Robert W. Binkley},
title = {I. Agency},
publisher = {University of Toronto Press},
author = {Donald Davidson},
year = {1971},
booktitle = {Agent, Action, and Reason},
pages = {1--37}
}
@phdthesis{DBLP:phd/us/Agha85,
author = {Gul A. Agha},
title = {Actors: a Model of Concurrent Computation in Distributed Systems (Parallel
Processing, Semantics, Open, Programming Languages, Artificial Intelligence)},
school = {University of Michigan, {USA}},
year = {1985},
url = {http://hdl.handle.net/2027.42/160629},
timestamp = {Fri, 06 May 2022 23:08:24 +0200},
biburl = {https://dblp.org/rec/phd/us/Agha85.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/law/Shoham92,
author = {Yoav Shoham},
editor = {Michael Masuch and
L{\'{a}}szl{\'{o}} P{\'{o}}los},
title = {Agent Oriented Programming},
booktitle = {Knowledge Representation and Reasoning Under Uncertainty, Logic at
Work [International Conference Logic at Work, Amsterdam, The Netherlands,
December 17-19, 1992]},
series = {Lecture Notes in Computer Science},
volume = {808},
pages = {123--129},
publisher = {Springer},
year = {1992},
url = {https://doi.org/10.1007/3-540-58095-6\_9},
doi = {10.1007/3-540-58095-6\_9},
timestamp = {Tue, 14 May 2019 10:00:37 +0200},
biburl = {https://dblp.org/rec/conf/law/Shoham92.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}