@article{10.1371/journal.pcbi.1002691,
author = {Friedrich, Johannes AND Senn, Walter},
journal = {PLOS Computational Biology},
publisher = {Public Library of Science},
title = {Spike-based Decision Learning of Nash Equilibria in Two-Player Games},
year = {2012},
month = {09},
volume = {8},
url = {http://dx.doi.org/10.1371%2Fjournal.pcbi.1002691},
pages = {1-12},
abstract = {Author Summary Socio-economic interactions are captured in a game theoretic framework by multiple agents acting on a pool of goods to maximize their own reward. Neuroeconomics tries to explain the agent's behavior in neuronal terms. Classical models in neuroeconomics use temporal-difference(TD)-learning. This algorithm incrementally updates values of state-action pairs, and actions are selected according to a value-based policy. In contrast, policy gradient methods do not introduce values as intermediate steps, but directly derive an action selection policy which maximizes the total expected reward. We consider a decision making network consisting of a population of neurons which, upon presentation of a spatio-temporal spike pattern, encodes binary actions by the population output spike trains and a subsequent majority vote. The action selection policy is parametrized by the strengths of synapses projecting to the population neurons. A gradient learning rule is derived which modifies these synaptic strengths and which depends on four factors, the pre- and postsynaptic activities, the action and the reward. We show that for classical game-theoretical tasks our decision making network endowed with the four-factor learning rule leads to Nash-optimal action selections. It also mimics human decision learning for these same tasks.},
number = {9},
doi = {10.1371/journal.pcbi.1002691}
}