-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathreferences.bib
194 lines (170 loc) · 7.43 KB
/
references.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
@inproceedings{yolo,
title={You only look once: Unified, real-time object detection},
author={Redmon, Joseph and Divvala, Santosh and Girshick, Ross and Farhadi, Ali},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={779--788},
year={2016}
}
@article{vit,
title={An image is worth 16x16 words: Transformers for image recognition at scale},
author={Dosovitskiy, Alexey and Beyer, Lucas and Kolesnikov, Alexander and Weissenborn, Dirk and Zhai, Xiaohua and Unterthiner, Thomas and Dehghani, Mostafa and Minderer, Matthias and Heigold, Georg and Gelly, Sylvain and others},
journal={arXiv preprint arXiv:2010.11929},
year={2020}
}
@article{bert,
title={Bert: Pre-training of deep bidirectional transformers for language understanding},
author={Devlin, Jacob and Chang, Ming-Wei and Lee, Kenton and Toutanova, Kristina},
journal={arXiv preprint arXiv:1810.04805},
year={2018}
}
@article{gpt3,
title={Language models are few-shot learners},
author={Brown, Tom and Mann, Benjamin and Ryder, Nick and Subbiah, Melanie and Kaplan, Jared D and Dhariwal, Prafulla and Neelakantan, Arvind and Shyam, Pranav and Sastry, Girish and Askell, Amanda and others},
journal={Advances in neural information processing systems},
volume={33},
pages={1877--1901},
year={2020}
}
@article{mobilenet,
title={Mobilenets: Efficient convolutional neural networks for mobile vision applications},
author={Howard, Andrew G and Zhu, Menglong and Chen, Bo and Kalenichenko, Dmitry and Wang, Weijun and Weyand, Tobias and Andreetto, Marco and Adam, Hartwig},
journal={arXiv preprint arXiv:1704.04861},
year={2017}
}
@article{squeezenet,
title={SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and< 0.5 MB model size},
author={Iandola, Forrest N and Han, Song and Moskewicz, Matthew W and Ashraf, Khalid and Dally, William J and Keutzer, Kurt},
journal={arXiv preprint arXiv:1602.07360},
year={2016}
}
@article{LTH,
title={The lottery ticket hypothesis: Finding sparse, trainable neural networks},
author={Frankle, Jonathan and Carbin, Michael},
journal={arXiv preprint arXiv:1803.03635},
year={2018}
}
@article{sparsitymaycry,
title={Sparsity May Cry: Let Us Fail (Current) Sparse Neural Networks Together!},
author={Liu, Shiwei and Chen, Tianlong and Zhang, Zhenyu and Chen, Xuxi and Huang, Tianjin and Jaiswal, Ajay and Wang, Zhangyang},
journal={arXiv preprint arXiv:2303.02141},
year={2023}
}
@article{PIE,
title={What do compressed deep neural networks forget?},
author={Hooker, Sara and Courville, Aaron and Clark, Gregory and Dauphin, Yann and Frome, Andrea},
journal={arXiv preprint arXiv:1911.05248},
year={2019}
}
@article{liu2018rethinking,
title={Rethinking the value of network pruning},
author={Liu, Zhuang and Sun, Mingjie and Zhou, Tinghui and Huang, Gao and Darrell, Trevor},
journal={arXiv preprint arXiv:1810.05270},
year={2018}
}
@article{jaiswal2023emergence,
title={The Emergence of Essential Sparsity in Large Pre-trained Models: The Weights that Matter},
author={Jaiswal, Ajay and Liu, Shiwei and Chen, Tianlong and Wang, Zhangyang},
journal={arXiv preprint arXiv:2306.03805},
year={2023}
}
@article{GPT4,
title={GPT-4 Technical Report},
author={OpenAI},
journal={ArXiv},
year={2023},
volume={abs/2303.08774}
}
@article{hinton2022forward,
title={The forward-forward algorithm: Some preliminary investigations},
author={Hinton, Geoffrey},
journal={arXiv preprint arXiv:2212.13345},
year={2022}
}
@article{redunet,
title={ReduNet: A white-box deep network from the principle of maximizing rate reduction},
author={Chan, Kwan Ho Ryan and Yu, Yaodong and You, Chong and Qi, Haozhi and Wright, John and Ma, Yi},
journal={The Journal of Machine Learning Research},
volume={23},
number={1},
pages={4907--5009},
year={2022},
publisher={JMLRORG}
}
@article{morcos2019one,
title={One ticket to win them all: generalizing lottery ticket initializations across datasets and optimizers},
author={Morcos, Ari and Yu, Haonan and Paganini, Michela and Tian, Yuandong},
journal={Advances in neural information processing systems},
volume={32},
year={2019}
}
@InProceedings{pmlr-v139-liu21aa,
title = {Lottery Ticket Preserves Weight Correlation: Is It Desirable or Not?},
author = {Liu, Ning and Yuan, Geng and Che, Zhengping and Shen, Xuan and Ma, Xiaolong and Jin, Qing and Ren, Jian and Tang, Jian and Liu, Sijia and Wang, Yanzhi},
booktitle = {Proceedings of the 38th International Conference on Machine Learning},
pages = {7011--7020},
year = {2021},
editor = {Meila, Marina and Zhang, Tong},
volume = {139},
series = {Proceedings of Machine Learning Research},
month = {18--24 Jul},
publisher = {PMLR},
}
@inproceedings{sparsedoubledescent,
title={Sparse Double Descent: Where Network Pruning Aggravates Overfitting},
author={He, Zheng and Xie, Zeke and Zhu, Quanzhi and Qin, Zengchang},
booktitle={International Conference on Machine Learning},
pages={8635--8659},
year={2022},
organization={PMLR}
}
@article{liebenwein2021lost,
title={Lost in pruning: The effects of pruning neural networks beyond test accuracy},
author={Liebenwein, Lucas and Baykal, Cenk and Carter, Brandon and Gifford, David and Rus, Daniela},
journal={Proceedings of Machine Learning and Systems},
volume={3},
pages={93--138},
year={2021}
}
@inproceedings{NEURIPS2020_b6af2c97,
author = {Chen, Tianlong and Frankle, Jonathan and Chang, Shiyu and Liu, Sijia and Zhang, Yang and Wang, Zhangyang and Carbin, Michael},
booktitle = {Advances in Neural Information Processing Systems},
editor = {H. Larochelle and M. Ranzato and R. Hadsell and M.F. Balcan and H. Lin},
pages = {15834--15846},
publisher = {Curran Associates, Inc.},
title = {The Lottery Ticket Hypothesis for Pre-trained BERT Networks},
url = {https://proceedings.neurips.cc/paper_files/paper/2020/file/b6af2c9703f203a2794be03d443af2e3-Paper.pdf},
volume = {33},
year = {2020}
}
@inproceedings{girish2021lottery,
title={The lottery ticket hypothesis for object recognition},
author={Girish, Sharath and Maiya, Shishira R and Gupta, Kamal and Chen, Hao and Davis, Larry S and Shrivastava, Abhinav},
booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
pages={762--771},
year={2021}
}
@inproceedings{chen2021lottery,
title={The lottery tickets hypothesis for supervised and self-supervised pre-training in computer vision models},
author={Chen, Tianlong and Frankle, Jonathan and Chang, Shiyu and Liu, Sijia and Zhang, Yang and Carbin, Michael and Wang, Zhangyang},
booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
pages={16306--16316},
year={2021}
}
@article{DualLTH,
title={Dual lottery ticket hypothesis},
author={Bai, Yue and Wang, Huan and Tao, Zhiqiang and Li, Kunpeng and Fu, Yun},
journal={arXiv preprint arXiv:2203.04248},
year={2022}
}
@article{yu2023white,
title={White-Box Transformers via Sparse Rate Reduction},
author={Yu, Yaodong and Buchanan, Sam and Pai, Druv and Chu, Tianzhe and Wu, Ziyang and Tong, Shengbang and Haeffele, Benjamin D and Ma, Yi},
journal={arXiv preprint arXiv:2306.01129},
year={2023}
}
@article{saukh2023representing,
title={Representing Input Transformations by Low-Dimensional Parameter Subspaces},
author={Saukh, Olga and Wang, Dong and He, Xiaoxi and Thiele, Lothar},
journal={arXiv preprint arXiv:2305.13536},
year={2023}
}