@article{
xu2026iterative,
title={Iterative Preference Optimization with Proximal Policy Regularization for Large Language Model Alignment},
author={Siyuan Xu and Hangfan Zhang and Zhimeng Guo and Huaisheng Zhu and Yue Mao and Shicheng Liu},
journal={Transactions on Machine Learning Research},
issn={2835-8856},
year={2026},
url={https://openreview.net/forum?id=xoxO5Tr4Vh},
note={}
}