@article{JMLR:v22:19-736,
  author  = {Alekh Agarwal and Sham M. Kakade and Jason D. Lee and Gaurav Mahajan},
  title   = {On the Theory of Policy Gradient Methods: Optimality, Approximation, and Distribution Shift},
  journal = {Journal of Machine Learning Research},
  year    = {2021},
  volume  = {22},
  number  = {98},
  pages   = {1--76},
  url     = {http://jmlr.org/papers/v22/19-736.html}
}