@article{JMLR:v26:24-0720,
  author  = {Han Shen and Zhuoran Yang and Tianyi Chen},
  title   = {Principled Penalty-based Methods for Bilevel Reinforcement Learning and RLHF},
  journal = {Journal of Machine Learning Research},
  year    = {2025},
  volume  = {26},
  number  = {114},
  pages   = {1--49},
  url     = {http://jmlr.org/papers/v26/24-0720.html}
}