@article{
thrampoulidis2026advantage,
title={Advantage Shaping as Surrogate Reward Maximization: Unifying Pass@K Policy Gradients},
author={Christos Thrampoulidis and Sadegh Mahdavi and Wenlong Deng},
journal={Transactions on Machine Learning Research},
issn={2835-8856},
year={2026},
url={https://openreview.net/forum?id=R1RhBFUk8t},
note={}
}