@article{
chen2026stepwise,
title={Stepwise Guided Policy Optimization: Coloring Your Incorrect Reasoning in {GRPO}},
author={Peter Chen and Xiaopeng Li and Ziniu Li and Xi Chen and Tianyi Lin},
journal={Transactions on Machine Learning Research},
issn={2835-8856},
year={2026},
url={https://openreview.net/forum?id=ALnVAqtshR},
note={}
}