Pit-Storm · August 31, 2020 20:02
diff --git a/rl-literature.bib b/rl-literature.bib
 % This file was created with Citavi 6.4.0.35

 @article{Arulkumaran.2017,
 author = "Arulkumaran, K. and Deisenroth, M. P. and Brundage, M. and Bharath, A. A.",
 year = "2017",
 title = "Deep Reinforcement Learning: A Brief Survey",
 keywords = "Deep;Einführung;Survey",
 pages = "26--38",
 volume = "34",
 number = "6",
 issn = "1053-5888",
 journal = "IEEE Signal Processing Magazine",
 doi = "10.1109/MSP.2017.2743240"
 }


 @article{Barto.1983,
 author = "Barto, A. G. and Sutton, R. S. and Anderson, C. W.",
 year = "1983",
 title = "Neuronlike adaptive elements that can solve difficult learning control problems",
 keywords = "Grundlagen",
 pages = "834--846",
 volume = "SMC-13",
 number = "5",
 journal = "IEEE Transactions on Systems, Man, and Cybernetics"
 }


 @article{Bellemare.2013,
 author = "Bellemare, M. G. and Naddaf, Y. and Veness, J. and Bowling, M.",
 year = "2013",
 title = "The Arcade Learning Environment: An Evaluation Platform for General Agents",
 pages = "253--279",
 volume = "47",
 journal = "Journal of Artificial Intelligence Research"
 }


 @article{Bellman.1957,
 author = "Bellman, R.",
 year = "1957",
 title = "A Markovian Decision Process",
 url = "http://www.jstor.org/stable/24900506",
 keywords = "Grundlagen",
 pages = "679--684",
 volume = "6",
 number = "5",
 issn = "00959057",
 journal = "Journal of Mathematics and Mechanics"
 }


 @inproceedings{Campos.2003,
 author = "Campos, P. and Langlois, T.",
 title = "Abalearn: A Risk-Sensitive Approach to Self-play Learning in Abalone",
 pages = "35--46",
 publisher = "{Springer Berlin Heidelberg}",
 isbn = "978-3-540-39857-8",
 editor = "Lavrač, N. and Gamberger, D. and Blockeel, H. and Todorovski, L.",
 booktitle = "Machine Learning: ECML 2003",
 year = "2003",
 address = "Berlin, Heidelberg"
 }


 @book{Cohen.2013,
 author = "Cohen, J.",
 year = "1988",
 title = "Statistical Power Analysis for the Behavioral Sciences",
 url = "http://www.utstat.toronto.edu/~brunner/oldclass/378f16/readings/CohenPower.pdf",
 address = "Hillsdale, NJ (u.a.)",
 edition = "2nd ed.",
 publisher = "Erlbaum",
 isbn = "0-8058-0283-5",
 }


 @inproceedings{Degris.2012,
 author = "Degris, T. and Pilarski, P. M. and Sutton, R. S.",
 title = "Model-Free reinforcement learning with continuous action in practice",
 keywords = "Actor-Critic;Algorithm",
 pages = "2177--2182",
 publisher = "IEEE",
 isbn = "978-1-4577-1096-4",
 booktitle = "2012 American Control Conference (ACC)",
 year = "2012",
 doi = "10.1109/ACC.2012.6315022",
 file = "http://ieeexplore.ieee.org/document/6315022/"
 }


 @inproceedings{Fujita.2019,
 author = "Fujita, Y. and Kataoka, T. and Nagarajan, P. and Ishikawa, T.",
 title = "ChainerRL: A Deep Reinforcement Learning Library",
 keywords = "Implementierung",
 booktitle = "Workshop on Deep Reinforcement Learning at the 33rd Conference on Neural Information Processing Systems",
 year = "2019"
 }


 @article{Grondman.2012,
 author = "Grondman, I. and Busoniu, L. and Lopes, G. A. D. and Babuska, R.",
 year = "2012",
 title = "A Survey of Actor-Critic Reinforcement Learning: Standard and Natural Policy Gradients",
 keywords = "Actor-Critic;Survey",
 pages = "1291--1307",
 volume = "42",
 number = "6",
 issn = "1094-6977",
 journal = "IEEE Transactions on Systems, Man, and Cybernetics, Part C (Applications and Reviews)",
 doi = "10.1109/TSMCC.2012.2218595"
 }


 @misc{Henderson.2018,
 	author = "Henderson, P. and Islam, R. and Bachman, P. and Pineau, J. and Precup, D. and Meger, D.",
 	title = "Deep Reinforcement Learning That Matters",
 	conference = "AAAI Conference on Artificial Intelligence",
 	year = "2018",
 	url = "https://www.aaai.org/ocs/index.php/AAAI/AAAI18/paper/view/16669"
 }


 @article{Justesen.2020,
 author = "Justesen, N. and Bontrager, P. and Togelius, J. and Risi, S.",
 year = "2020",
 title = "Deep Learning for Video Game Playing",
 keywords = "Einführung;Video-Games",
 pages = "1--20",
 volume = "12",
 number = "1",
 issn = "2475-1502",
 journal = "IEEE Transactions on Games",
 doi = "10.1109/TG.2019.2896986"
 }


 @book{Kohn.2005,
 author = "Kohn, W.",
 year = "2005",
 title = "Statistik: Datenanalyse und Wahrscheinlichkeitsrechnung",
 address = "Berlin/Heidelberg",
 publisher = "Springer-Verlag",
 isbn = "3-540-21677-4",
 doi = "10.1007/b138042"
 }


 @article{Konda.2003,
 author = "Konda, V. R. and Tsitsiklis, J. N.",
 year = "2003",
 title = "On Actor-Critic Algorithms",
 keywords = "Actor-Critic;Algorithm",
 pages = "1143--1166",
 volume = "42",
 number = "4",
 issn = "0363-0129",
 journal = "SIAM J. Control Optim.",
 doi = "10.1137/S0363012901385691"
 }


 @inproceedings{Lagoudakis.2003,
 author = "Lagoudakis, M. G. and Parr, R.",
 title = "Reinforcement Learning as Classification: Leveraging Modern Classifiers",
 keywords = "Grundlagen;Klassifikation",
 pages = "424--431",
 publisher = "{AAAI Press}",
 isbn = "1577351894",
 series = "ICML’03",
 booktitle = "Proceedings of the Twentieth International Conference on International Conference on Machine Learning",
 year = "2003"
 }


 @article{Lavet.2018,
 author = "François-Lavet, V. and Henderson, P. and Islam, R. and Bellemare, M. G. and Pineau, J.",
 year = "2018",
 title = "An Introduction to Deep Reinforcement Learning",
 keywords = "Grundlagen",
 pages = "219--354",
 volume = "11",
 number = "3-4",
 issn = "1935-8237",
 journal = "Foundations and Trends® in Machine Learning",
 doi = "10.1561/2200000071"
 }


 @proceedings{Lavrac.2003,
 year = "2003",
 title = "Machine Learning: ECML 2003",
 address = "Berlin, Heidelberg",
 publisher = "{Springer Berlin Heidelberg}",
 isbn = "978-3-540-39857-8",
 editor = "Lavrač, N. and Gamberger, D. and Blockeel, H. and Todorovski, L."
 }


 @book{Lehmann.1999,
 author = "Lehmann, E. L.",
 year = "1999",
 title = "Elements of Large-Sample Theory",
 keywords = "a;Grundlagen",
 address = "New York",
 publisher = "Springer-Verlag",
 isbn = "0-387-98595-6",
 doi = "10.1007/b98855"
 }


 @article{Mnih.2015,
 author = "Mnih, V. and Kavukcuoglu, K. and Silver, D. and Rusu, A. A. and Veness, J. and Bellemare, M. G. and Graves, A. and Riedmiller, M. and Fidjeland, A. K. and Ostrovski, G. and Petersen, S. and Beattie, C. and Sadik, A. and Antonoglou, I. and King, H. and Kumaran, D. and Wierstra, D. and Legg, S. and Hassabis, D.",
 year = "2015",
 title = "Human-level control through deep reinforcement learning",
 keywords = "Algorithm;Deep;Q-Learning",
 pages = "529--533",
 volume = "518",
 number = "7540",
 journal = "Nature",
 doi = "10.1038/nature14236",
 file = "http://www.ncbi.nlm.nih.gov/pubmed/25719670"
 }


 @inproceedings{Mnih.2016,
 author = "Mnih, V. and Badia, A. P. and Mirza, M. and Graves, A. and Harley, T. and Lillicrap, T. P. and Silver, D. and Kavukcuoglu, K.",
 title = "Asynchronous Methods for Deep Reinforcement Learning",
 keywords = "Actor-Critic;Algorithm;Deep",
 pages = "1928--1937",
 publisher = "JMLR.org",
 series = "ICML’16",
 booktitle = "Proceedings of the 33rd International Conference on International Conference on Machine Learning - Volume 48",
 year = "2016"
 }


 @article{Silver.2017,
 author = "Silver, D. and Schrittwieser, J. and Simonyan, K. and Antonoglou, I. and Huang, A. and Guez, A. and Hubert, T. and Baker, L. and Lai, M. and Bolton, A. and Chen, Y. and Lillicrap, T. and Hui, F. and Sifre, L. and {van den Driessche}, G. and Graepel, T. and Hassabis, D.",
 year = "2017",
 title = "Mastering the game of Go without human knowledge",
 pages = "354--359",
 volume = "550",
 number = "7676",
 journal = "Nature",
 doi = "10.1038/nature24270",
 file = "http://www.ncbi.nlm.nih.gov/pubmed/29052630"
 }


 @article{Sutton.1988,
 author = "Sutton, R. S.",
 year = "1988",
 title = "Learning to predict by the methods of temporal differences",
 keywords = "Grundlagen;Temporal-Difference",
 pages = "9--44",
 volume = "3",
 number = "1",
 issn = "0885-6125",
 journal = "Machine Learning",
 doi = "10.1007/BF00115009"
 }


 @inproceedings{Sutton.1999,
 author = "Sutton, R. S. and McAllester, D. and Singh, S. and Mansour, Y.",
 title = "Policy Gradient Methods for Reinforcement Learning with Function Approximation",
 keywords = "Actor-Critic;Deep",
 pages = "1057--1063",
 publisher = "{MIT Press}",
 series = "NIPS’99",
 booktitle = "Proceedings of the 12th International Conference on Neural Information Processing Systems",
 year = "1999",
 address = "Cambridge, MA, USA"
 }


 @book{Sutton.2018,
 author = "Sutton, R. S. and Barto, A.",
 year = "2018",
 title = "Reinforcement learning: An introduction",
 keywords = "Grundlagen",
 address = "Cambridge, MA and London",
 edition = "Second edition",
 publisher = "{The MIT Press}",
 isbn = "9780262039246",
 series = "Adaptive computation and machine learning"
 }


 @misc{Wang.04.11.2016,
 author = "Wang, Z. and Bapst, V. and Heess, N. and Mnih, V. and Munos, R. and Kavukcuoglu, K. and de Freitas, N.",
 year = "2016",
 title = "Sample Efficient Actor-Critic with Experience Replay",
 url = "http://arxiv.org/abs/1611.01224v2",
 keywords = "Actor-Critic;Algorithm",
 file = "http://arxiv.org/abs/1611.01224v2",
 file = "https://arxiv.org/pdf/1611.01224v2.pdf"
 }


 @phdthesis{Watkins.1989,
 author = "{Watkins, C. J. C. H.}",
 year = "1989",
 title = "Learning from Delayed Rewards",
 url = "http://www.cs.rhul.ac.uk/ chrisw/new_thesis.pdf",
 keywords = "Grundlagen",
 address = "Cambridge, UK",
 school = "{King’s College}"
 }


 @article{Watkins.1992,
 author = "Watkins, C. J. C. H. and Dayan, P.",
 year = "1992",
 title = "Q-learning",
 keywords = "Grundlagen;Q-Learning",
 pages = "279--292",
 volume = "8",
 number = "3-4",
 issn = "0885-6125",
 journal = "Machine Learning",
 doi = "10.1007/BF00992698"
 }


 @article{Williams.1992,
 author = "Williams, Ronald J.",
 year = "1992",
 title = "Simple statistical gradient-following algorithms for connectionist reinforcement learning",
 keywords = "Actor-Critic;Algorithm;Grundlagen",
 pages = "229--256",
 volume = "8",
 number = "3-4",
 issn = "0885-6125",
 journal = "Machine Learning",
 doi = "10.1007/BF00992696"
 }
	% This file was created with Citavi 6.4.0.35

	@article{Arulkumaran.2017,
	author = "Arulkumaran, K. and Deisenroth, M. P. and Brundage, M. and Bharath, A. A.",
	year = "2017",
	title = "Deep Reinforcement Learning: A Brief Survey",
	keywords = "Deep;Einführung;Survey",
	pages = "26--38",
	volume = "34",
	number = "6",
	issn = "1053-5888",
	journal = "IEEE Signal Processing Magazine",
	doi = "10.1109/MSP.2017.2743240"
	}


	@article{Barto.1983,
	author = "Barto, A. G. and Sutton, R. S. and Anderson, C. W.",
	year = "1983",
	title = "Neuronlike adaptive elements that can solve difficult learning control problems",
	keywords = "Grundlagen",
	pages = "834--846",
	volume = "SMC-13",
	number = "5",
	journal = "IEEE Transactions on Systems, Man, and Cybernetics"
	}


	@article{Bellemare.2013,
	author = "Bellemare, M. G. and Naddaf, Y. and Veness, J. and Bowling, M.",
	year = "2013",
	title = "The Arcade Learning Environment: An Evaluation Platform for General Agents",
	pages = "253--279",
	volume = "47",
	journal = "Journal of Artificial Intelligence Research"
	}


	@article{Bellman.1957,
	author = "Bellman, R.",
	year = "1957",
	title = "A Markovian Decision Process",
	url = "http://www.jstor.org/stable/24900506",
	keywords = "Grundlagen",
	pages = "679--684",
	volume = "6",
	number = "5",
	issn = "00959057",
	journal = "Journal of Mathematics and Mechanics"
	}


	@inproceedings{Campos.2003,
	author = "Campos, P. and Langlois, T.",
	title = "Abalearn: A Risk-Sensitive Approach to Self-play Learning in Abalone",
	pages = "35--46",
	publisher = "{Springer Berlin Heidelberg}",
	isbn = "978-3-540-39857-8",
	editor = "Lavrač, N. and Gamberger, D. and Blockeel, H. and Todorovski, L.",
	booktitle = "Machine Learning: ECML 2003",
	year = "2003",
	address = "Berlin, Heidelberg"
	}


	@book{Cohen.2013,
	author = "Cohen, J.",
	year = "1988",
	title = "Statistical Power Analysis for the Behavioral Sciences",
	url = "http://www.utstat.toronto.edu/~brunner/oldclass/378f16/readings/CohenPower.pdf",
	address = "Hillsdale, NJ (u.a.)",
	edition = "2nd ed.",
	publisher = "Erlbaum",
	isbn = "0-8058-0283-5",
	}


	@inproceedings{Degris.2012,
	author = "Degris, T. and Pilarski, P. M. and Sutton, R. S.",
	title = "Model-Free reinforcement learning with continuous action in practice",
	keywords = "Actor-Critic;Algorithm",
	pages = "2177--2182",
	publisher = "IEEE",
	isbn = "978-1-4577-1096-4",
	booktitle = "2012 American Control Conference (ACC)",
	year = "2012",
	doi = "10.1109/ACC.2012.6315022",
	file = "http://ieeexplore.ieee.org/document/6315022/"
	}


	@inproceedings{Fujita.2019,
	author = "Fujita, Y. and Kataoka, T. and Nagarajan, P. and Ishikawa, T.",
	title = "ChainerRL: A Deep Reinforcement Learning Library",
	keywords = "Implementierung",
	booktitle = "Workshop on Deep Reinforcement Learning at the 33rd Conference on Neural Information Processing Systems",
	year = "2019"
	}


	@article{Grondman.2012,
	author = "Grondman, I. and Busoniu, L. and Lopes, G. A. D. and Babuska, R.",
	year = "2012",
	title = "A Survey of Actor-Critic Reinforcement Learning: Standard and Natural Policy Gradients",
	keywords = "Actor-Critic;Survey",
	pages = "1291--1307",
	volume = "42",
	number = "6",
	issn = "1094-6977",
	journal = "IEEE Transactions on Systems, Man, and Cybernetics, Part C (Applications and Reviews)",
	doi = "10.1109/TSMCC.2012.2218595"
	}


	@misc{Henderson.2018,
	author = "Henderson, P. and Islam, R. and Bachman, P. and Pineau, J. and Precup, D. and Meger, D.",
	title = "Deep Reinforcement Learning That Matters",
	conference = "AAAI Conference on Artificial Intelligence",
	year = "2018",
	url = "https://www.aaai.org/ocs/index.php/AAAI/AAAI18/paper/view/16669"
	}


	@article{Justesen.2020,
	author = "Justesen, N. and Bontrager, P. and Togelius, J. and Risi, S.",
	year = "2020",
	title = "Deep Learning for Video Game Playing",
	keywords = "Einführung;Video-Games",
	pages = "1--20",
	volume = "12",
	number = "1",
	issn = "2475-1502",
	journal = "IEEE Transactions on Games",
	doi = "10.1109/TG.2019.2896986"
	}


	@book{Kohn.2005,
	author = "Kohn, W.",
	year = "2005",
	title = "Statistik: Datenanalyse und Wahrscheinlichkeitsrechnung",
	address = "Berlin/Heidelberg",
	publisher = "Springer-Verlag",
	isbn = "3-540-21677-4",
	doi = "10.1007/b138042"
	}


	@article{Konda.2003,
	author = "Konda, V. R. and Tsitsiklis, J. N.",
	year = "2003",
	title = "On Actor-Critic Algorithms",
	keywords = "Actor-Critic;Algorithm",
	pages = "1143--1166",
	volume = "42",
	number = "4",
	issn = "0363-0129",
	journal = "SIAM J. Control Optim.",
	doi = "10.1137/S0363012901385691"
	}


	@inproceedings{Lagoudakis.2003,
	author = "Lagoudakis, M. G. and Parr, R.",
	title = "Reinforcement Learning as Classification: Leveraging Modern Classifiers",
	keywords = "Grundlagen;Klassifikation",
	pages = "424--431",
	publisher = "{AAAI Press}",
	isbn = "1577351894",
	series = "ICML’03",
	booktitle = "Proceedings of the Twentieth International Conference on International Conference on Machine Learning",
	year = "2003"
	}


	@article{Lavet.2018,
	author = "François-Lavet, V. and Henderson, P. and Islam, R. and Bellemare, M. G. and Pineau, J.",
	year = "2018",
	title = "An Introduction to Deep Reinforcement Learning",
	keywords = "Grundlagen",
	pages = "219--354",
	volume = "11",
	number = "3-4",
	issn = "1935-8237",
	journal = "Foundations and Trends® in Machine Learning",
	doi = "10.1561/2200000071"
	}


	@proceedings{Lavrac.2003,
	year = "2003",
	title = "Machine Learning: ECML 2003",
	address = "Berlin, Heidelberg",
	publisher = "{Springer Berlin Heidelberg}",
	isbn = "978-3-540-39857-8",
	editor = "Lavrač, N. and Gamberger, D. and Blockeel, H. and Todorovski, L."
	}


	@book{Lehmann.1999,
	author = "Lehmann, E. L.",
	year = "1999",
	title = "Elements of Large-Sample Theory",
	keywords = "a;Grundlagen",
	address = "New York",
	publisher = "Springer-Verlag",
	isbn = "0-387-98595-6",
	doi = "10.1007/b98855"
	}


	@article{Mnih.2015,
	author = "Mnih, V. and Kavukcuoglu, K. and Silver, D. and Rusu, A. A. and Veness, J. and Bellemare, M. G. and Graves, A. and Riedmiller, M. and Fidjeland, A. K. and Ostrovski, G. and Petersen, S. and Beattie, C. and Sadik, A. and Antonoglou, I. and King, H. and Kumaran, D. and Wierstra, D. and Legg, S. and Hassabis, D.",
	year = "2015",
	title = "Human-level control through deep reinforcement learning",
	keywords = "Algorithm;Deep;Q-Learning",
	pages = "529--533",
	volume = "518",
	number = "7540",
	journal = "Nature",
	doi = "10.1038/nature14236",
	file = "http://www.ncbi.nlm.nih.gov/pubmed/25719670"
	}


	@inproceedings{Mnih.2016,
	author = "Mnih, V. and Badia, A. P. and Mirza, M. and Graves, A. and Harley, T. and Lillicrap, T. P. and Silver, D. and Kavukcuoglu, K.",
	title = "Asynchronous Methods for Deep Reinforcement Learning",
	keywords = "Actor-Critic;Algorithm;Deep",
	pages = "1928--1937",
	publisher = "JMLR.org",
	series = "ICML’16",
	booktitle = "Proceedings of the 33rd International Conference on International Conference on Machine Learning - Volume 48",
	year = "2016"
	}


	@article{Silver.2017,
	author = "Silver, D. and Schrittwieser, J. and Simonyan, K. and Antonoglou, I. and Huang, A. and Guez, A. and Hubert, T. and Baker, L. and Lai, M. and Bolton, A. and Chen, Y. and Lillicrap, T. and Hui, F. and Sifre, L. and {van den Driessche}, G. and Graepel, T. and Hassabis, D.",
	year = "2017",
	title = "Mastering the game of Go without human knowledge",
	pages = "354--359",
	volume = "550",
	number = "7676",
	journal = "Nature",
	doi = "10.1038/nature24270",
	file = "http://www.ncbi.nlm.nih.gov/pubmed/29052630"
	}


	@article{Sutton.1988,
	author = "Sutton, R. S.",
	year = "1988",
	title = "Learning to predict by the methods of temporal differences",
	keywords = "Grundlagen;Temporal-Difference",
	pages = "9--44",
	volume = "3",
	number = "1",
	issn = "0885-6125",
	journal = "Machine Learning",
	doi = "10.1007/BF00115009"
	}


	@inproceedings{Sutton.1999,
	author = "Sutton, R. S. and McAllester, D. and Singh, S. and Mansour, Y.",
	title = "Policy Gradient Methods for Reinforcement Learning with Function Approximation",
	keywords = "Actor-Critic;Deep",
	pages = "1057--1063",
	publisher = "{MIT Press}",
	series = "NIPS’99",
	booktitle = "Proceedings of the 12th International Conference on Neural Information Processing Systems",
	year = "1999",
	address = "Cambridge, MA, USA"
	}


	@book{Sutton.2018,
	author = "Sutton, R. S. and Barto, A.",
	year = "2018",
	title = "Reinforcement learning: An introduction",
	keywords = "Grundlagen",
	address = "Cambridge, MA and London",
	edition = "Second edition",
	publisher = "{The MIT Press}",
	isbn = "9780262039246",
	series = "Adaptive computation and machine learning"
	}


	@misc{Wang.04.11.2016,
	author = "Wang, Z. and Bapst, V. and Heess, N. and Mnih, V. and Munos, R. and Kavukcuoglu, K. and de Freitas, N.",
	year = "2016",
	title = "Sample Efficient Actor-Critic with Experience Replay",
	url = "http://arxiv.org/abs/1611.01224v2",
	keywords = "Actor-Critic;Algorithm",
	file = "http://arxiv.org/abs/1611.01224v2",
	file = "https://arxiv.org/pdf/1611.01224v2.pdf"
	}


	@phdthesis{Watkins.1989,
	author = "{Watkins, C. J. C. H.}",
	year = "1989",
	title = "Learning from Delayed Rewards",
	url = "http://www.cs.rhul.ac.uk/ chrisw/new_thesis.pdf",
	keywords = "Grundlagen",
	address = "Cambridge, UK",
	school = "{King’s College}"
	}


	@article{Watkins.1992,
	author = "Watkins, C. J. C. H. and Dayan, P.",
	year = "1992",
	title = "Q-learning",
	keywords = "Grundlagen;Q-Learning",
	pages = "279--292",
	volume = "8",
	number = "3-4",
	issn = "0885-6125",
	journal = "Machine Learning",
	doi = "10.1007/BF00992698"
	}


	@article{Williams.1992,
	author = "Williams, Ronald J.",
	year = "1992",
	title = "Simple statistical gradient-following algorithms for connectionist reinforcement learning",
	keywords = "Actor-Critic;Algorithm;Grundlagen",
	pages = "229--256",
	volume = "8",
	number = "3-4",
	issn = "0885-6125",
	journal = "Machine Learning",
	doi = "10.1007/BF00992696"
	}