{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,3,25]],"date-time":"2025-03-25T14:16:28Z","timestamp":1742912188456,"version":"3.40.3"},"publisher-location":"Berlin, Heidelberg","reference-count":26,"publisher":"Springer Berlin Heidelberg","isbn-type":[{"type":"print","value":"9783662449226"},{"type":"electronic","value":"9783662449233"}],"license":[{"start":{"date-parts":[[2014,1,1]],"date-time":"2014-01-01T00:00:00Z","timestamp":1388534400000},"content-version":"tdm","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"},{"start":{"date-parts":[[2014,1,1]],"date-time":"2014-01-01T00:00:00Z","timestamp":1388534400000},"content-version":"vor","delay-in-days":0,"URL":"https:\/\/www.springernature.com\/gp\/researchers\/text-and-data-mining"}],"content-domain":{"domain":["link.springer.com"],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2014]]},"DOI":"10.1007\/978-3-662-44923-3_6","type":"book-chapter","created":{"date-parts":[[2014,9,24]],"date-time":"2014-09-24T00:04:17Z","timestamp":1411517057000},"page":"76-92","update-policy":"https:\/\/doi.org\/10.1007\/springer_crossmark_policy","source":"Crossref","is-referenced-by-count":2,"title":["A Direct Policy-Search Algorithm for Relational Reinforcement Learning"],"prefix":"10.1007","author":[{"given":"Samuel","family":"Sarjant","sequence":"first","affiliation":[]},{"given":"Bernhard","family":"Pfahringer","sequence":"additional","affiliation":[]},{"given":"Kurt","family":"Driessens","sequence":"additional","affiliation":[]},{"given":"Tony","family":"Smith","sequence":"additional","affiliation":[]}],"member":"297","published-online":{"date-parts":[[2014,9,24]]},"reference":[{"key":"6_CR1","volume-title":"Reinforcement Learning: An Introduction (Adaptive Computation and Machine Learning)","author":"RS Sutton","year":"1998","unstructured":"Sutton, R.S., Barto, A.G.: Reinforcement Learning: An Introduction (Adaptive Computation and Machine Learning). The MIT Press, Cambridge (1998)"},{"key":"6_CR2","doi-asserted-by":"publisher","first-page":"7","DOI":"10.1023\/A:1007694015589","volume":"43","author":"S D\u017eeroski","year":"2001","unstructured":"D\u017eeroski, S., De Raedt, L., Driessens, K.: Relational reinforcement learning. Mach. Learn. 43, 7\u201352 (2001)","journal-title":"Mach. Learn."},{"key":"6_CR3","volume-title":"The Logic of Adaptive Behaviour: Knowledge Representation and Algorithms for the Markov Decision Process Framework in First-Order Domains","author":"M van Otterlo","year":"2009","unstructured":"van Otterlo, M.: The Logic of Adaptive Behaviour: Knowledge Representation and Algorithms for the Markov Decision Process Framework in First-Order Domains. IOS Press, Amsterdam (2009)"},{"volume-title":"Reinforcement Learning: State-Of-The-Art","year":"2012","key":"6_CR4","unstructured":"Wiering, M., van Otterlo, M. (eds.): Reinforcement Learning: State-Of-The-Art, vol. 12. Springer-Verlag New York Incorporated, New York (2012)"},{"key":"6_CR5","unstructured":"Driessens, K.: Relational reinforcement learning. Ph.D. thesis, Department of Computer Science, Katholieke Universiteit Leuven, Belgium (2004)"},{"key":"6_CR6","doi-asserted-by":"crossref","unstructured":"Driessens, K., D\u017eeroski, S.: Combining model-based and instance-based learning for first order regression. In: Proceedings of the 22nd International Conference on Machine Learning, pp. 193\u2013200. ACM (2005)","DOI":"10.1145\/1102351.1102376"},{"key":"6_CR7","unstructured":"Croonenborghs, T., Ramon, J., Blockeel, H., Bruynooghe, M.: Online learning and exploiting relational models in reinforcement learning. In: Proceeding of the International Conference on Artificial Intelligence (IJCAI), pp. 726\u2013731 (2007)"},{"issue":"3","key":"6_CR8","doi-asserted-by":"publisher","first-page":"271","DOI":"10.1023\/B:MACH.0000039779.47329.3a","volume":"57","author":"K Driessens","year":"2004","unstructured":"Driessens, K., D\u017eeroski, S.: Integrating guidance into relational reinforcement learning. Mach. Learn. 57(3), 271\u2013304 (2004)","journal-title":"Mach. Learn."},{"key":"6_CR9","unstructured":"Muller, T., van Otterlo, M.: Evolutionary reinforcement learning in relational domains. In: Proceedings of the 7th European Workshop on Reinforcement Learning, Citeseer (2005)"},{"key":"6_CR10","unstructured":"van Otterlo, M., De Vuyst, T.: Evolving and transferring probabilistic policies for relational reinforcement learning. In: BNAIC 2009: Benelux Conference on Artificial Intelligence, October 2009"},{"issue":"1","key":"6_CR11","doi-asserted-by":"publisher","first-page":"89","DOI":"10.1016\/S0377-2217(96)00385-2","volume":"99","author":"RY Rubinstein","year":"1997","unstructured":"Rubinstein, R.Y.: Optimization of computer simulation models with rare events. Eur. J. Oper. Res. 99(1), 89\u2013112 (1997)","journal-title":"Eur. J. Oper. Res."},{"issue":"1","key":"6_CR12","first-page":"659","volume":"30","author":"I Szita","year":"2007","unstructured":"Szita, I., L\u00f6rincz, A.: Learning to play using low-complexity rule-based policies: illustrations through Ms. Pac-Man. J. Artif. Int. Res. 30(1), 659\u2013684 (2007)","journal-title":"Pac-Man. J. Artif. Int. Res."},{"key":"6_CR13","unstructured":"Kistemaker, S., Oliehoek, F., Whiteso, S.: Cross-entropy method for reinforcement learning. Bachelor thesis, University of Amsterdam, Amsterdam, The Netherlands, June 2008"},{"key":"6_CR14","unstructured":"Tak, M.: The cross-entropy method applied to SameGame. Bachelor thesis, Maastricht University, Maastricht, The Netherlands (2010)"},{"issue":"1","key":"6_CR15","doi-asserted-by":"publisher","first-page":"19","DOI":"10.1007\/s10479-005-5724-z","volume":"134","author":"P De Boer","year":"2004","unstructured":"De Boer, P., Kroese, D., Mannor, S., Rubinstein, R.: A tutorial on the cross-entropy method. Ann. Oper. Res. 134(1), 19\u201367 (2004)","journal-title":"Ann. Oper. Res."},{"key":"6_CR16","volume-title":"Genetic Algorithms in Search, Optimization and Machine Learning","author":"DE Goldberg","year":"1989","unstructured":"Goldberg, D.E.: Genetic Algorithms in Search, Optimization and Machine Learning, 1st edn. Addison-Wesley Longman Publishing Co., Inc., Boston (1989)","edition":"1"},{"issue":"2","key":"6_CR17","doi-asserted-by":"publisher","first-page":"149","DOI":"10.1162\/evco.1995.3.2.149","volume":"3","author":"SW Wilson","year":"1995","unstructured":"Wilson, S.W.: Classifier fitness based on accuracy. Evol. Comput. 3(2), 149\u2013175 (1995)","journal-title":"Evol. Comput."},{"key":"6_CR18","series-title":"Lecture Notes in Computer Science (Lecture Notes in Artificial Intelligence)","doi-asserted-by":"publisher","first-page":"169","DOI":"10.1007\/978-3-540-88138-4_10","volume-title":"Learning Classifier Systems","author":"D Mellor","year":"2008","unstructured":"Mellor, D., Mellor, D.: A learning classifier system approach to relational reinforcement learning. In: Takadama, K., et al. (eds.) IWLCS 2006 and IWLCS 2007. LNCS (LNAI), vol. 4998, pp. 169\u2013188. Springer, Heidelberg (2008)"},{"key":"6_CR19","doi-asserted-by":"crossref","unstructured":"Mellor, D.: A learning classifier system approach to relational reinforcement learning. Ph.D. thesis, School of Electrical Engineering and Computer Science, The University of Newcastle, Australia (2008)","DOI":"10.1007\/978-3-540-88138-4_10"},{"issue":"1","key":"6_CR20","first-page":"75","volume":"25","author":"A Fern","year":"2006","unstructured":"Fern, A., Yoon, S., Givan, R.: Approximate policy iteration with a policy language bias: solving relational markov decision processes. J. Artif. Int. Res. 25(1), 75\u2013118 (2006)","journal-title":"J. Artif. Int. Res."},{"key":"6_CR21","doi-asserted-by":"crossref","unstructured":"Kersting, K., Driessens, K.: Non-parametric policy gradients: a unified treatment of propositional and relational domains. In: Proceedings of the 25th International Conference on Machine Learning, ICML \u201908, pp. 456\u2013463. ACM, New York (2008)","DOI":"10.1145\/1390156.1390214"},{"key":"6_CR22","first-page":"153","volume":"5","author":"GD Plotkin","year":"1970","unstructured":"Plotkin, G.D.: A note on inductive generalization. Mach. Intell. 5, 153\u2013163 (1970)","journal-title":"Mach. Intell."},{"key":"6_CR23","unstructured":"Szita, I., L\u00f6rincz, A.: Online variants of the cross-entropy method. Technical report, arXiv:0801.1988 (2008)"},{"key":"6_CR24","unstructured":"Aslam, J.A., Popa, R.A., Rivest, R.L.: On estimating the size and confidence of a statistical audit. In: Proceedings of the USENIX Workshop on Accurate Electronic Voting Technology, EVT\u201907, pp. 8\u20138. USENIX Association, Berkeley (2007)"},{"key":"6_CR25","unstructured":"Sarjant, S.: Policy search based relational reinforcement learning using the cross-entropy method. Ph.D. thesis, The University of Waikato (2013)"},{"key":"6_CR26","unstructured":"Heyden, C.: Implementing a computer player for Carcassonne. Master\u2019s thesis, Maastricht University (2009)"}],"container-title":["Lecture Notes in Computer Science","Inductive Logic Programming"],"original-title":[],"language":"en","link":[{"URL":"https:\/\/link.springer.com\/content\/pdf\/10.1007\/978-3-662-44923-3_6","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2023,2,20]],"date-time":"2023-02-20T11:17:18Z","timestamp":1676891838000},"score":1,"resource":{"primary":{"URL":"https:\/\/link.springer.com\/10.1007\/978-3-662-44923-3_6"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2014]]},"ISBN":["9783662449226","9783662449233"],"references-count":26,"URL":"https:\/\/doi.org\/10.1007\/978-3-662-44923-3_6","relation":{},"ISSN":["0302-9743","1611-3349"],"issn-type":[{"type":"print","value":"0302-9743"},{"type":"electronic","value":"1611-3349"}],"subject":[],"published":{"date-parts":[[2014]]},"assertion":[{"value":"24 September 2014","order":1,"name":"first_online","label":"First Online","group":{"name":"ChapterHistory","label":"Chapter History"}}]}}