Back
What Makes and Breaks Safety Fine-tuning? A Mechanistic Study
@conference{Jainetal24, title = {What Makes and Breaks Safety Fine-tuning? A Mechanistic Study}, booktitle = {Advances in Neural Information Processing Systems 37 (NeurIPS 2024)}, volume = {37}, pages = {93406--93478}, editors = {A. Globerson and L. Mackey and D. Belgrave and A. Fan and U. Paquet and J. Tomczak and C. Zhang}, publisher = {Curran Associates, Inc.}, month = dec, year = {2024}, slug = {jainetal24-721b15d0-65bd-4334-a544-01904e7d1787}, author = {Jain, S. and Lubana, E. S. and Oksuz, K. and Joy, T. and Torr, P. and Sanyal, A. and Dokania, P. K.}, url = {https://proceedings.neurips.cc/paper_files/paper/2024/file/a9bef53eb7b0e5950d4f2d9c74a16006-Paper-Conference.pdf}, month_numeric = {12} }