update RL intro from Felix
This commit is contained in:
@@ -859,7 +859,7 @@
|
||||
@article{kingma2014adam,
|
||||
title={Adam: A method for stochastic optimization},
|
||||
author={Kingma, Diederik P and Ba, Jimmy},
|
||||
journal={arXiv preprint arXiv:1412.6980},
|
||||
journal={arXiv:1412.6980},
|
||||
year={2014}
|
||||
}
|
||||
|
||||
@@ -879,5 +879,12 @@
|
||||
publisher={MIT press}
|
||||
}
|
||||
|
||||
@article{schulman2017proximal,
|
||||
title={Proximal policy optimization algorithms},
|
||||
author={Schulman, John and Wolski, Filip and Dhariwal, Prafulla and Radford, Alec and Klimov, Oleg},
|
||||
journal={arXiv:1707.06347},
|
||||
year={2017}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user