@Inproceedings{ LeibfriedKH2017,
title = {A Deep Learning Approach for Joint Video Frame and Reward Prediction in Atari Games},
year = {2017},
month = {4},
pages = {1-17},
abstract = {Reinforcement learning is concerned with learning to interact with environments that are initially unknown. State-of-the-art reinforcement learning approaches, such as DQN, are model-free and learn to act effectively across a wide range of environments such as Atari games, but require huge amounts of data. Model-based techniques are more data-efficient, but need to acquire explicit knowledge about the environment dynamics or the reward structure.
In this paper we take a step towards using model-based techniques in environments with high-dimensional visual state space when system dynamics and the reward structure are both unknown and need to be learned, by demonstrating that it is possible to learn both jointly. Empirical evaluation on five Atari games demonstrate accurate cumulative reward prediction of up to 200 frames. We consider these positive results as opening up important directions for model-based RL in complex, initially unknown environments.},
web_url = {https://arxiv.org/abs/1611.07078},
event_name = {5th International Conference on Learning Representations (ICLR 2017)},
state = {submitted},
author = {Leibfried F{fleibfried}{Research Group Sensorimotor Learning and Decision-Making}; Kushman N; Hofmann K}
}
@Article{ GeneweinB2016,
title = {Bio-inspired feedback-circuit implementation of discrete, free energy optimizing, winner-take-all computations},
journal = {Biological Cybernetics},
year = {2016},
month = {6},
volume = {110},
number = {2},
pages = {135–150},
abstract = {Bayesian inference and bounded rational decision-making require the accumulation of evidence or utility, respectively, to transform a prior belief or strategy into a posterior probability distribution over hypotheses or actions. Crucially, this process cannot be simply realized by independent integrators, since the different hypotheses and actions also compete with each other. In continuous time, this competitive integration process can be described by a special case of the replicator equation. Here we investigate simple analog electric circuits that implement the underlying differential equation under the constraint that we only permit a limited set of building blocks that we regard as biologically interpretable, such as capacitors, resistors, voltage-dependent conductances and voltage- or current-controlled current and voltage sources. The appeal of these circuits is that they intrinsically perform normalization without requiring an explicit divisive normalization. However, even in idealized simulations, we find that these circuits are very sensitive to internal noise as they accumulate error over time. We discuss in how far neural circuits could implement these operations that might provide a generic competitive principle underlying both perception and action.},
web_url = {http://link.springer.com/content/pdf/10.1007%2Fs00422-016-0684-8.pdf},
state = {published},
DOI = {10.1007/s00422-016-0684-8},
author = {Genewein T{tgenewein}{Research Group Sensorimotor Learning and Decision-Making}; Braun DA{dbraun}{Research Group Sensorimotor Learning and Decision-Making}}
}
@Article{ GrauMoyaOB2016,
title = {Decision-Making under Ambiguity Is Modulated by Visual Framing, but Not by Motor vs. Non-Motor Context. Experiments and an Information-Theoretic Ambiguity Model},
journal = {PLoS ONE},
year = {2016},
month = {4},
volume = {11},
number = {4},
pages = {1-21},
abstract = {A number of recent studies have investigated differences in human choice behavior depending on task framing, especially comparing economic decision-making to choice behavior in equivalent sensorimotor tasks. Here we test whether decision-making under ambiguity exhibits effects of task framing in motor vs. non-motor context. In a first experiment, we designed an experience-based urn task with varying degrees of ambiguity and an equivalent motor task where subjects chose between hitting partially occluded targets. In a second experiment, we controlled for the different stimulus design in the two tasks by introducing an urn task with bar stimuli matching those in the motor task. We found ambiguity attitudes to be mainly influenced by stimulus design. In particular, we found that the same subjects tended to be ambiguity-preferring when choosing between ambiguous bar stimuli, but ambiguity-avoiding when choosing between ambiguous urn sample stimuli. In contrast, subjects’ choice pattern was not affected by changing from a target hitting task to a non-motor context when keeping the stimulus design unchanged. In both tasks subjects’ choice behavior was continuously modulated by the degree of ambiguity. We show that this modulation of behavior can be explained by an information-theoretic model of ambiguity that generalizes Bayes-optimal decision-making by combining Bayesian inference with robust decision-making under model uncertainty. Our results demonstrate the benefits of information-theoretic models of decision-making under varying degrees of ambiguity for a given context, but also demonstrate the sensitivity of ambiguity attitudes across contexts that theoretical models struggle to explain.},
web_url = {http://journals.plos.org/plosone/article/asset?id=10.1371%2Fjournal.pone.0153179.PDF},
state = {published},
DOI = {10.1371/journal.pone.0153179},
EPUB = {e0153179},
author = {Grau-Moya J{jgrau}{Research Group Sensorimotor Learning and Decision-Making}; Ortega PA{portega}{Research Group Sensorimotor Learning and Decision-Making}; Braun DA{dbraun}{Research Group Sensorimotor Learning and Decision-Making}}
}
@Inproceedings{ GrauMoyaLGB2016,
title = {Planning with Information-Processing Constraints and Model Uncertainty in Markov Decision Processes},
year = {2016},
month = {9},
pages = {475-491},
abstract = {Information-theoretic principles for learning and acting have been proposed to solve particular classes of Markov Decision Problems. Mathematically, such approaches are governed by a variational free energy principle and allow solving MDP planning problems with information-processing constraints expressed in terms of a Kullback-Leibler divergence with respect to a reference distribution. Here we consider a generalization of such MDP planners by taking model uncertainty into account. As model uncertainty can also be formalized as an information-processing constraint, we can derive a unified solution from a single generalized variational principle. We provide a generalized value iteration scheme together with a convergence proof. As limit cases, this generalized scheme includes standard value iteration with a known model, Bayesian MDP planning, and robust planning. We demonstrate the benefits of this approach in a grid world simulation.},
web_url = {http://link.springer.com/content/pdf/10.1007%2F978-3-319-46227-1_30.pdf},
editor = {Frasconi, P. , N. Landwehr, G. Manco, J. Vreeken},
publisher = {Springer},
address = {Cham, Switzerland},
series = {Lecture Notes in Computer Science ; 9852},
booktitle = {Machine Learning and Knowledge Discovery in Databases},
event_name = {European Conference on Machine Learning and Principles and Practice of Knowledge Discovery (ECML PKDD 2016)},
event_place = {Riva del Garda, Italy},
state = {published},
ISBN = {978-3-319-46226-4},
DOI = {10.1007/978-3-319-46227-1_30},
author = {Grau-Moya J{jgrau}{Research Group Sensorimotor Learning and Decision-Making}; Leibfried F{fleibfried}{Research Group Sensorimotor Learning and Decision-Making}; Genewein T{tgenewein}{Research Group Sensorimotor Learning and Decision-Making}; Braun DA{dbraun}{Research Group Sensorimotor Learning and Decision-Making}}
}
@Inproceedings{ LeibfriedB2016,
title = {Bounded Rational Decision-Making in Feedforward Neural Networks},
year = {2016},
month = {6},
pages = {407-416},
abstract = {Bounded rational decision-makers transform sensory input into motor output under limited computational resources. Mathematically, such decision-makers can be modeled as information-theoretic channels with limited transmission rate. Here, we apply this formalism for the first time to multilayer feedforward neural networks. We derive synaptic weight update rules for two scenarios, where either each neuron is considered as a bounded rational decision-maker or the network as a whole. In the update rules, bounded rationality translates into information-theoretically motivated types of regularization in weight space. In experiments on the MNIST benchmark classification task for handwritten digits, we show that such information-theoretic regularization successfully prevents overfitting across different architectures and attains results that are competitive with other recent techniques like dropout, dropconnect and Bayes by backprop, for both ordinary and convolutional neural networks.},
web_url = {http://auai.org/uai2016/proceedings.php},
web_url2 = {https://arxiv.org/abs/1602.08332},
editor = {Ihler, A. , D. Janzing},
publisher = {AUAI Press},
address = {Corvallis, OR, USA},
booktitle = {Uncertainty in Artificial Intelligence},
event_name = {32nd Conference on Uncertainty in Artificial Intelligence (UAI 2016)},
event_place = {New York, NY, USA},
state = {published},
ISBN = {978-0-9966431-1-5},
author = {Leibfried F{fleibfried}{Research Group Sensorimotor Learning and Decision-Making}; Braun D{dbraun}{Research Group Sensorimotor Learning and Decision-Making}}
}
@Conference{ Braun2016,
title = {Information-Theoretic Bounded Rationality for Learning and Decision-Making},
year = {2016},
month = {12},
day = {9},
abstract = {We study an information-theoretic framework of bounded rational decision-making that trades off utility maximization against information-processing costs. We apply the basic principle of this framework to perception-action systems and show how the formation of abstractions and decision-making hierarchies depends on information-processing costs.},
web_url = {http://www.utia.cz/imperfectDM},
event_name = {NIPS 2016 Workshop on Imperfect Decision Makers: Admitting Real-World Rationality},
event_place = {Barcelona, Spain},
state = {published},
author = {Braun D{dbraun}{Research Group Sensorimotor Learning and Decision-Making}}
}
@Conference{ Braun2016_2,
title = {Information-processing principles for sensorimotor learning and decision-making},
year = {2016},
month = {6},
day = {2},
abstract = {Recent advances in movement neuroscience suggest that sensorimotor control can be considered as a continuous decision-making process in complex environments in which uncertainty and task variability play a key role. Leading theories of motor control assume that the motor system learns probabilistic models and that motor behavior can be explained as the optimization of payoff or cost criteria under the expectation of these models. Here we discuss how the motor system exploits task variability to build up efficient models and then discuss evidence that humans deviate from Bayes optimal behavior in their movements, because they exhibit effects of model uncertainty. Furthermore, we discuss in how far model uncertainty can be considered as a special case of a general information-processing and decision-making framework inspired by statistical physics and thermodynamics.},
web_url = {http://www.cognovo.eu/events/linked-up-2015.php},
event_name = {CogNovo},
event_place = {Plymouth, UK},
state = {published},
author = {Braun D{dbraun}{Research Group Sensorimotor Learning and Decision-Making}}
}
@Conference{ Genewein2016,
title = {Information-theoretic bounded rationality in perception-action systems},
year = {2016},
month = {5},
day = {16},
abstract = {The ability to form abstractions and to generalize well from few samples are hallmarks of human and animal intelligence underlying the unrivaled flexibility of behavior in biological systems. Achieving such flexibility in artificial systems is challenging, particularly because the underlying computational principles are not fully understood. This talk introduces an information-theoretic framework for bounded rational decision-making, that is optimal decision-making under limited computational resources. One consequence of acting optimally under computational limitations is the emergence of natural abstractions which allow for more efficient processing of information. The consequent application of the theoretical framework to perception-action systems results in an interesting optimality principle that leads to a tight coupling between perception and action. As a result, the objective of bounded-optimal perception is not to represent a sensory state as faithfully as possible, but rather to extract the most relevant information for bounded-optimal acting.},
web_url = {http://task-driven-representations.mit.edu/},
event_name = {ICRA 2016 Workshop on Task-Driven Perceptual Representations: Sensing, Planning and Control under Resource Constraints},
event_place = {Stockholm, Sweden},
state = {published},
author = {Genewein T{tgenewein}{Research Group Sensorimotor Learning and Decision-Making}}
}
@Conference{ Genewein2016_2,
title = {Hierarchical decision-making in perception-action systems},
year = {2016},
month = {2},
day = {29},
web_url = {http://tim.inversetemperature.net/Talk-Bosch-Renningen/},
event_name = {Cognitive Systems and Machine Learning Group: Bosch Research},
event_place = {Renningen, Germany},
state = {published},
author = {Genewein T{tgenewein}{Research Group Sensorimotor Learning and Decision-Making}}
}
@Article{ PengB2015_2,
title = {Entropic Movement Complexity Reflects Subjective Creativity Rankings of Visualized Hand Motion Trajectories},
journal = {Frontiers in Psychology},
year = {2015},
month = {12},
volume = {6},
number = {1879},
pages = {1-13},
abstract = {In a previous study we have shown that human motion trajectories can be characterized by translating continuous trajectories into symbol sequences with well-defined complexity measures. Here we test the hypothesis that the motion complexity individuals generate in their movements might be correlated to the degree of creativity assigned by a human observer to the visualized motion trajectories. We asked participants to generate 55 novel hand movement patterns in virtual reality, where each pattern had to be repeated 10 times in a row to ensure reproducibility. This allowed us to estimate a probability distribution over trajectories for each pattern. We assessed motion complexity not only by the previously proposed complexity measures on symbolic sequences, but we also propose two novel complexity measures that can be directly applied to the distributions over trajectories based on the frameworks of Gaussian Processes and Probabilistic Movement Primitives. In contrast to previous studies, these new methods allow computing complexities of individual motion patterns from very few sample trajectories. We compared the different complexity measures to how a group of independent jurors rank ordered the recorded motion trajectories according to their personal creativity judgment. We found three entropic complexity measures that correlate significantly with human creativity judgment and discuss differences between the measures. We also test whether these complexity measures correlate with individual creativity in divergent thinking tasks, but do not find any consistent correlation. Our results suggest that entropic complexity measures of hand motion may reveal domain-specific individual differences in kinesthetic creativity.},
web_url = {http://journal.frontiersin.org/article/10.3389/fpsyg.2015.01879/pdf},
state = {published},
DOI = {10.3389/fpsyg.2015.01879},
author = {Peng Z{zpeng}{Research Group Sensorimotor Learning and Decision-Making}; Braun DA{dbraun}{Research Group Sensorimotor Learning and Decision-Making}}
}
@Article{ OrtegaB2015,
title = {What is epistemic value in free energy models of learning and acting? A bounded rationality perspective},
journal = {Cognitive Neuroscience},
year = {2015},
month = {12},
volume = {6},
number = {4},
pages = {215-216},
abstract = {Free energy models of learning and acting do not only care about utility or extrinsic value, but also about intrinsic value, that is, the information value stemming from probability distributions that represent beliefs or strategies. While these intrinsic values can be interpreted as epistemic values or exploration bonuses under certain conditions, the framework of bounded rationality offers a complementary interpretation in terms of information-processing costs that we discuss here.},
web_url = {http://www.tandfonline.com/doi/abs/10.1080/17588928.2015.1051525},
state = {published},
DOI = {10.1080/17588928.2015.1051525},
author = {Ortega PA{portega}{Research Group Sensorimotor Learning and Decision-Making}; Braun DA{dbraun}{Research Group Sensorimotor Learning and Decision-Making}}
}
@Article{ GeneweinLGB2015,
title = {Bounded rationality, abstraction and hierarchical decision-making: an information-theoretic optimality principle},
journal = {Frontiers in Robotics and AI},
year = {2015},
month = {10},
volume = {2},
number = {27},
pages = {1-24},
abstract = {Abstraction and hierarchical information-processing are hallmarks of human and animal intelligence underlying the unrivaled flexibility of behavior in biological systems. Achieving such a flexibility in artificial systems is challenging, even with more and more computational power. Here we investigate the hypothesis that abstraction and hierarchical information-processing might in fact be the consequence of limitations in information-processing power. In particular, we study an information-theoretic framework of bounded rational decision-making that trades off utility maximization against information-processing costs. We apply the basic principle of this framework to perception-action systems with multiple information-processing nodes and derive bounded optimal solutions. We show how the formation of abstractions and decision-making hierarchies depends on information-processing costs. We illustrate the theoretical ideas with example simulations and conclude by formalizing a mathematically unifying optimization principle that could potentially be extended to more complex systems.},
web_url = {http://journal.frontiersin.org/article/10.3389/frobt.2015.00027/pdf},
state = {published},
DOI = {10.3389/frobt.2015.00027},
author = {Genewein T{tgenewein}{Research Group Sensorimotor Learning and Decision-Making}; Leibfried F{fleibfried}{Research Group Sensorimotor Learning and Decision-Making}; Grau-Moya J{jgrau}{Research Group Sensorimotor Learning and Decision-Making}; Braun DA{dbraun}{Research Group Sensorimotor Learning and Decision-Making}}
}
@Article{ LeibfriedGB2015,
title = {Signaling equilibria in sensorimotor interactions},
journal = {Cognition},
year = {2015},
month = {8},
volume = {141},
pages = {73-86},
abstract = {Although complex forms of communication like human language are often assumed to have evolved out of more simple forms of sensorimotor signaling, less attention has been devoted to investigate the latter. Here, we study communicative sensorimotor behavior of humans in a two-person joint motor task where each player controls one dimension of a planar motion. We designed this joint task as a game where one player (the sender) possesses private information about a hidden target the other player (the receiver) wants to know about, and where the sender's actions are costly signals that influence the receiver's control strategy. We developed a game-theoretic model within the framework of signaling games to investigate whether subjects' behavior could be adequately described by the corresponding equilibrium solutions. The model predicts both separating and pooling equilibria, in which signaling does and does not occur respectively. We observed both kinds of equilibria in subjects and found that, in line with model predictions, the propensity of signaling decreased with increasing signaling costs and decreasing uncertainty on the part of the receiver. Our study demonstrates that signaling games, which have previously been applied to economic decision-making and animal communication, provide a framework for human signaling behavior arising during sensorimotor interactions in continuous and dynamic environments.},
web_url = {http://www.sciencedirect.com/science/article/pii/S001002771500058X},
state = {published},
DOI = {10.1016/j.cognition.2015.03.008},
author = {Leibfried F{fleibfried}{Research Group Sensorimotor Learning and Decision-Making}; Grau-Moya J{jgrau}{Research Group Sensorimotor Learning and Decision-Making}; Braun DA{dbraun}{Research Group Sensorimotor Learning and Decision-Making}}
}
@Article{ GeneweinHRB2015,
title = {Structure Learning in Bayesian Sensorimotor Integration},
journal = {PLoS Computational Biology},
year = {2015},
month = {8},
volume = {11},
number = {8},
pages = {1-27},
abstract = {Previous studies have shown that sensorimotor processing can often be described by Bayesian learning, in particular the integration of prior and feedback information depending on its degree of reliability. Here we test the hypothesis that the integration process itself can be tuned to the statistical structure of the environment. We exposed human participants to a reaching task in a three-dimensional virtual reality environment where we could displace the visual feedback of their hand position in a two dimensional plane. When introducing statistical structure between the two dimensions of the displacement, we found that over the course of several days participants adapted their feedback integration process in order to exploit this structure for performance improvement. In control experiments we found that this adaptation process critically depended on performance feedback and could not be induced by verbal instructions. Our results suggest that structural learning is an important meta-learning component of Bayesian sensorimotor integration.},
web_url = {http://www.ploscompbiol.org/article/fetchObject.action?uri=info:doi/10.1371/journal.pcbi.1004369&representation=PDF},
state = {published},
DOI = {10.1371/journal.pcbi.1004369},
EPUB = {e1004369},
author = {Genewein T{tgenewein}{Research Group Sensorimotor Learning and Decision-Making}; Hez E{ehez}{Research Group Sensorimotor Learning and Decision-Making}; Razzaghpanah Z{zrazzaghpanah}; Braun DA{dbraun}{Research Group Sensorimotor Learning and Decision-Making}}
}
@Article{ LeibfriedB2015,
title = {A Reward-Maximizing Spiking Neuron as a Bounded Rational Decision Maker},
journal = {Neural Computation},
year = {2015},
month = {7},
volume = {27},
number = {8},
pages = {1686-1720},
abstract = {Rate distortion theory describes how to communicate relevant information most efficiently over a channel with limited capacity. One of the many applications of rate distortion theory is bounded rational decision making, where decision makers are modeled as information channels that transform sensory input into motor output under the constraint that their channel capacity is limited. Such a bounded rational decision maker can be thought to optimize an objective function that trades off the decision maker's utility or cumulative reward against the information processing cost measured by the mutual information between sensory input and motor output. In this study, we interpret a spiking neuron as a bounded rational decision maker that aims to maximize its expected reward under the computational constraint that the mutual information between the neuron's input and output is upper bounded. This abstract computational constraint translates into a penalization of the deviation between the neuron's instantaneous and average firing behavior. We derive a synaptic weight update rule for such a rate distortion optimizing neuron and show in simulations that the neuron efficiently extracts reward-relevant information from the input by trading off its synaptic strengths against the collected reward.},
web_url = {http://www.mitpressjournals.org/doi/10.1162/NECO_a_00758},
state = {published},
DOI = {10.1162/NECO_a_00758},
author = {Leibfried F{fleibfried}{Research Group Sensorimotor Learning and Decision-Making}; Braun DA{dbraun}{Research Group Sensorimotor Learning and Decision-Making}}
}
@Inproceedings{ GrauMoyaB2015,
title = {Adaptive information-theoretic bounded rational decision-making with parametric priors},
year = {2015},
month = {12},
day = {11},
pages = {1-4},
abstract = {Deviations from rational decision-making due to limited computational resources have been studied in the field of bounded rationality, originally proposed by Herbert Simon. There have been a number of different approaches to model bounded rationality ranging from optimality principles to heuristics. Here we take an information-theoretic approach to bounded rationality, where information-processing costs are measured by the relative entropy between a posterior decision strategy and a given fixed prior strategy. In the case of multiple environments, it can be shown that there is an optimal prior rendering the bounded rationality problem equivalent to the rate distortion problem for lossy compression in information theory. Accordingly, the optimal prior and posterior strategies can be computed by the well-known Blahut-Arimoto algorithm which requires the computation of partition sums over all possible outcomes and cannot be applied straightforwardly to continuous problems. Here we derive a sampling-based alternative update rule for the adaptation of prior behaviors of decision-makers and we show convergence to the optimal prior predicted by rate distortion theory. Importantly, the update rule avoids typical infeasible operations such as the computation of partition sums. We show in simulations a proof of concept for discrete action and environment domains. This approach is not only interesting as a generic computational method, but might also provide a more realistic model of human decision-making processes occurring on a fast and a slow time scale.},
web_url = {https://sites.google.com/site/boundedoptimalityworkshop/},
web_url2 = {http://arxiv.org/abs/1511.01710},
event_name = {NIPS 2015 Workshop on Bounded Optimality and Rational Metareasoning},
event_place = {Montreal, Canada},
state = {published},
author = {Grau-Moya J{jgrau}{Research Group Sensorimotor Learning and Decision-Making}; Braun DA{dbraun}{Research Group Sensorimotor Learning and Decision-Making}}
}
@Inproceedings{ PengB2015,
title = {Developing neural networks with neurons competing for survival},
year = {2015},
month = {8},
pages = {152-153},
abstract = {We study developmental growth in a feedforward neural network model inspired by the survival principle in nature. Each neuron has to select its incoming connections in a way that allow it to fire, as neurons that are not able to fire over a period of time degenerate and die. In order to survive, neurons have to find reoccurring patterns in the activity of the neurons in the preceding layer, because each neuron requires more than one active input at any one time to have enough activation for firing. The sensory input at the lowest layer therefore provides the maximum amount of activation that all neurons compete for. The whole network grows dynamically over time depending on how many patterns can be found and how many neurons can maintain themselves accordingly.
We show in simulations that this naturally leads to abstractions in higher layers that emerge in a unsupervised fashion. When evaluating the network in a supervised learning paradigm, it is clear that our network is not competitive. What is interesting though is that this performance was achieved by neurons that simply struggle for survival and do not know about performance error. In contrast to most studies on neural evolution that rely on a network-wide fitness function, our goal was to show that learning behaviour can appear in a system without being driven by any specific utility function or reward signal.},
web_url = {http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=7346133},
publisher = {IEEE},
address = {Piscataway, NJ, USA},
event_name = {5th Joint IEEE International Conference on Development and Learning and on Epigenetic Robotics (IEEE ICDL-EPIROB 2015)},
event_place = {Providence, RI, USA},
state = {published},
ISBN = {978-1-4673-9320-1},
DOI = {10.1109/DEVLRN.2015.7346133},
author = {Peng Z{zpeng}{Research Group Sensorimotor Learning and Decision-Making}; Braun DA{dbraun}{Research Group Sensorimotor Learning and Decision-Making}}
}
@Poster{ GrauMoyaKB2015,
title = {Non-equilibrium behaviour of information-processing systems with computational constraints},
year = {2015},
month = {1},
pages = {68},
abstract = {Living organisms from single cells to humans need to adapt continuously to respond to changes in their environment. This process of adaptation of behaviour—from ”simple” regulation of temperature to more complex processes of decision-making can be thought of as improvements in performance according to some fitness function. Here we consider an abstract model of organisms as decision-makers with limited information-processing resources that trade off between maximization of utility (performance) and computational costs measured by a relative entropy.
Isothermal thermodynamic systems formally undergo the same trade-off when subject to changes in their surrounding (e.g. the appearance of a magnetic field). Such systems minimize the free energy to reach equilibrium states that balance internal energy and entropic cost. When there is a fast change in the environment these systems evolve in a non-equilibrium fashion because they are unable to follow exactly the path of equilibrium distributions. In this situation the work spent to change the thermodynamic system is greater than the free energy.
Similarly, the utility of an organism in a fast changing environment is less than the optimal utility it could obtain if it could adapt instantaneously. We quantify the
relation between performance losses during adaptation processes and the computational capabilities of decision-makers. We discuss how non-equilibrium equalities like the Jarzynski equation and Crooks’ fluctuation theorem hold both for physical systems and abstract decision makers.},
web_url = {https://sites.google.com/site/luxemburgoutofequilibrium/programm},
event_name = {Workshop on Recent Developments In Non-Equilibrium Physics "Luxembourg out of Equilibrium"},
event_place = {Luxembourg, Luxembourg},
state = {published},
author = {Grau-Moya J{jgrau}{Research Group Sensorimotor Learning and Decision-Making}; Krueger M; Braun DA{dbraun}{Research Group Sensorimotor Learning and Decision-Making}}
}
@Conference{ Braun2015_2,
title = {Model uncertainty in sensorimotor learning and decision-making},
year = {2015},
month = {10},
day = {2},
pages = {10},
abstract = {Recent advances in movement neuroscience suggest that sensorimotor control can be considered as a continuous decision-making process in complex environments in which uncertainty and task variability play a key role. Leading theories of motor control assume that the motor system learns probabilistic models and that motor behavior can be explained as the optimization of payoff or cost criteria under the expectation of these models. Here we discuss evidence that humans deviate from Bayes optimal behavior in their movements, because they are sensitive to model
uncertainty. Furthermore, we discuss in how far model uncertainty can be incorporated in optimality models of sensorimotor behavior.},
web_url = {http://www.bgu.ac.il/~nisky/Second_Motor_Control_RAMIS_workshop.htm},
event_name = {IROS 2015 2nd Workshop on The Role of Human Sensorimotor Control in Surgical Robotics},
event_place = {Hamburg, Germany},
state = {published},
author = {Braun D{dbraun}{Research Group Sensorimotor Learning and Decision-Making}}
}
@Article{ NarainSMBv2014,
title = {Structure learning and the Occam's razor principle: A new view of human function acquisition},
journal = {Frontiers in Computational Neuroscience},
year = {2014},
month = {9},
volume = {8},
number = {121},
pages = {1-13},
abstract = {We often encounter pairs of variables in the world whose mutual relationship can be described by a function. After training, human responses closely correspond to these functional relationships. Here we study how humans predict unobserved segments of a function that they have been trained on and we compare how human predictions differ to those made by various function-learning models in the literature. Participants' performance was best predicted by the polynomial functions that generated the observations. Further, participants were able to explicitly report the correct generating function in most cases upon a post-experiment survey. This suggests that humans can abstract functions. To understand how they do so, we modeled human learning using an hierarchical Bayesian framework organized at two levels of abstraction: function learning and parameter learning, and used it to understand the time course of participants' learning as we surreptitiously changed the generating function over time. This Bayesian model selection framework allowed us to analyze the time course of function learning and parameter learning in relative isolation. We found that participants acquired new functions as they changed and even when parameter learning was not completely accurate, the probability that the correct function was learned remained high. Most importantly, we found that humans selected the simplest-fitting function with the highest probability and that they acquired simpler functions faster than more complex ones. Both aspects of this behavior, extent and rate of selection, present evidence that human function learning obeys the Occam's razor principle.},
web_url = {http://journal.frontiersin.org/Journal/10.3389/fncom.2014.00121/pdf},
state = {published},
DOI = {10.3389/fncom.2014.00121},
author = {Narain D{dnarain}{Research Group Sensorimotor Learning and Decision-Making}; Smeets JB; Mamassian P{pascal}; Brenner E; van Beers RJ}
}
@Article{ BraunO2014,
title = {Information-Theoretic Bounded Rationality and Optimality},
journal = {Entropy},
year = {2014},
month = {8},
volume = {16},
number = {8},
pages = {4662-4676},
abstract = {Bounded rationality concerns the study of decision makers with limited information processing resources. Previously, the free energy difference functional has been suggested to model bounded rational decision making, as it provides a natural trade-off between an energy or utility function that is to be optimized and information processing costs that are measured by entropic search costs. The main question of this article is how the information-theoretic free energy model relates to simple ε-optimality models of bounded rational decision making, where the decision maker is satisfied with any action in an ε-neighborhood of the optimal utility. We find that the stochastic policies that optimize the free energy trade-off comply with the notion of ε-optimality. Moreover, this optimality criterion even holds when the environment is adversarial. We conclude that the study of bounded rationality based on ε-optimality criteria that abstract away from the particulars of the information processing constraints is compatible with the information-theoretic free energy model of bounded rationality.},
web_url = {http://www.mdpi.com/1099-4300/16/8/4662},
state = {published},
DOI = {10.3390/e16084662},
author = {Braun DA{dbraun}{Research Group Sensorimotor Learning and Decision-Making}; Ortega PA{portega}{Research Group Sensorimotor Learning and Decision-Making}}
}
@Article{ GeneweinB2014,
title = {Occam's Razor in sensorimotor learning},
journal = {Proceedings of the Royal Society of London B},
year = {2014},
month = {5},
volume = {281},
number = {1783},
pages = {1-7},
abstract = {A large number of recent studies suggest that the sensorimotor system uses probabilistic models to predict its environment and makes inferences about unobserved variables in line with Bayesian statistics. One of the important features of Bayesian statistics is Occam's Razor—an inbuilt preference for simpler models when comparing competing models that explain some observed data equally well. Here, we test directly for Occam's Razor in sensorimotor control. We designed a sensorimotor task in which participants had to draw lines through clouds of noisy samples of an unobserved curve generated by one of two possible probabilistic models—a simple model with a large length scale, leading to smooth curves, and a complex model with a short length scale, leading to more wiggly curves. In training trials, participants were informed about the model that generated the stimulus so that they could learn the statistics of each model. In probe trials, participants were then exposed to ambiguous stimuli. In probe trials where the ambiguous stimulus could be fitted equally well by both models, we found that participants showed a clear preference for the simpler model. Moreover, we found that participants’ choice behaviour was quantitatively consistent with Bayesian Occam's Razor. We also show that participants’ drawn trajectories were similar to samples from the Bayesian predictive distribution over trajectories and significantly different from two non-probabilistic heuristics. In two control experiments, we show that the preference of the simpler model cannot be simply explained by a difference in physical effort or by a preference for curve smoothness. Our results suggest that Occam's Razor is a general behavioural principle already present during sensorimotor processing.},
web_url = {http://rspb.royalsocietypublishing.org/content/281/1783/20132952.full.pdf+html},
state = {published},
DOI = {10.1098/rspb.2013.2952},
EPUB = {20132952},
author = {Genewein T{tgenewein}{Research Group Sensorimotor Learning and Decision-Making}; Braun D{dbraun}{Research Group Sensorimotor Learning and Decision-Making}}
}
@Article{ PengGB2014,
title = {Assessing randomness and complexity in human motion trajectories through analysis of symbolic sequences},
journal = {Frontiers in Human Neuroscience},
year = {2014},
month = {3},
volume = {8},
number = {168},
pages = {1-13},
abstract = {Complexity is a hallmark of intelligent behavior consisting both of regular patterns and random variation. To quantitatively assess the complexity and randomness of human motion, we designed a motor task in which we translated subjects' motion trajectories into strings of symbol sequences. In the first part of the experiment participants were asked to perform self-paced movements to create repetitive patterns, copy pre-specified letter sequences, and generate random movements. To investigate whether the degree of randomness can be manipulated, in the second part of the experiment participants were asked to perform unpredictable movements in the context of a pursuit game, where they received feedback from an online Bayesian predictor guessing their next move. We analyzed symbol sequences representing subjects' motion trajectories with five common complexity measures: predictability, compressibility, approximate entropy, Lempel-Ziv complexity, as well as effective measure complexity. We found that subjects’ self-created patterns were the most complex, followed by drawing movements of letters and self-paced random motion. We also found that participants could change the randomness of their behavior depending on context and feedback. Our results suggest that humans can adjust both complexity and regularity in different movement types and contexts and that this can be assessed with information-theoretic measures of the symbolic sequences generated from movement trajectories.},
web_url = {http://journal.frontiersin.org/Journal/10.3389/fnhum.2014.00168/abstract},
state = {published},
DOI = {10.3389/fnhum.2014.00168},
author = {Peng Z{zpeng}{Research Group Sensorimotor Learning and Decision-Making}; Genewein T{tgenewein}{Research Group Sensorimotor Learning and Decision-Making}; Braun DA{dbraun}{Research Group Sensorimotor Learning and Decision-Making}}
}
@Article{ OrtegaB2014,
title = {Generalized Thompson sampling for sequential decision-making and causal inference},
journal = {Complex Adaptive Systems Modeling},
year = {2014},
month = {3},
volume = {2},
number = {2},
pages = {1-23},
abstract = {Purpose
Sampling an action according to the probability that the action is believed to be the optimal one is sometimes called Thompson sampling.
Methods
Although mostly applied to bandit problems, Thompson sampling can also be used to solve sequential adaptive control problems, when the optimal policy is known for each possible environment. The predictive distribution over actions can then be constructed by a Bayesian superposition of the policies weighted by their posterior probability of being optimal.
Results
Here we discuss two important features of this approach. First, we show in how far such generalized Thompson sampling can be regarded as an optimal strategy under limited information processing capabilities that constrain the sampling complexity of the decision-making process. Second, we show how such Thompson sampling can be extended to solve causal inference problems when interacting with an environment in a sequential fashion.
Conclusion
In summary, our results suggest that Thompson sampling might not merely be a useful heuristic, but a principled method to address problems of adaptive sequential decision-making and causal inference.},
web_url = {http://www.casmodeling.com/content/pdf/2194-3206-2-2.pdf},
web_url2 = {http://link.springer.com/content/pdf/10.1186%2Fs40294-014-0004-x.pdf},
state = {published},
DOI = {10.1186/2194-3206-2-2},
author = {Ortega PA{portega}{Research Group Sensorimotor Learning and Decision-Making}; Braun DA{dbraun}{Research Group Sensorimotor Learning and Decision-Making}}
}
@Inproceedings{ PengB2014,
title = {Curiosity-driven learning with Context Tree Weighting},
year = {2014},
month = {10},
pages = {366-367},
abstract = {In the first simulation, the intrinsic motivation of the agent was given by measuring learning progress through reduction in informational surprise (Figure 1 A-C). This way the agent should first learn the action that is easiest to learn (a1), and then switch to other actions that still allow for learning (a2) and ignore actions that cannot be learned at all (a3). This is exactly what we found in our simple environment. Compared to the original developmental learning algorithm based on learning progress proposed by Oudeyer [2], our Context Tree Weighting approach does not require local experts to do prediction, rather it learns the conditional probability distribution over observations given action in one structure. In the second simulation, the intrinsic motivation of the agent was given by measuring compression progress through improvement in compressibility (Figure 1 D-F). The agent behaves similarly: the agent first concentrates on the action with the most predictable consequence and then switches over to the regular action where the consequence is more difficult to predict, but still learnable. Unlike the previous simulation, random actions are also interesting to some extent because the compressed symbol strings use 8-bit representations, while only 2 bits are required for our observation space. Our preliminary results suggest that Context Tree Weighting might provide a useful representation to study problems of development.},
web_url = {http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=6983008},
publisher = {IEEE},
address = {Piscataway, NJ, USA},
event_name = {4th Joint IEEE International Conference on Development and Learning and on Epigenetic Robotics (IEEE ICDL-EPIROB 2014)},
event_place = {Genova, Italy},
state = {published},
ISBN = {978-1-4799-7540-2},
DOI = {10.1109/DEVLRN.2014.6983008},
author = {Peng Z{zpeng}{Research Group Sensorimotor Learning and Decision-Making}; Braun DA{dbraun}{Research Group Sensorimotor Learning and Decision-Making}}
}
@Inproceedings{ OrtegaBT2014,
title = {Monte Carlo methods for exact & efficient solution of the generalized optimality equations},
year = {2014},
month = {6},
pages = {4322-4327},
abstract = {Previous work has shown that classical sequential decision making rules, including expectimax and minimax, are limit cases of a more general class of bounded rational planning problems that trade off the value and the complexity of the solution, as measured by its information divergence from a given reference. This allows modeling a range of novel planning problems having varying degrees of control due to resource constraints, risk-sensitivity, trust and model uncertainty. However, so far it has been unclear in what sense information constraints relate to the complexity of planning. In this paper, we introduce Monte Carlo methods to solve the generalized optimality equations in an efficient & exact way when the inverse temperatures in a generalized decision tree are of the same sign. These methods highlight a fundamental relation between inverse temperatures and the number of Monte Carlo proposals. In particular, it is seen that the number of proposals is essentially independent of the size of the decision tree.},
file_url = {fileadmin/user_upload/files/publications/2014/ICRA-2014-Ortega.pdf},
web_url = {http://www.icra2014.com/},
publisher = {IEEE},
address = {Piscataway, NJ, USA},
event_name = {IEEE International Conference on Robotics and Automation (ICRA 2014)},
event_place = {Hong Kong, China},
state = {published},
ISBN = {978-1-4799-3684-7},
DOI = {10.1109/ICRA.2014.6907488},
author = {Ortega PA{portega}{Research Group Sensorimotor Learning and Decision-Making}; Braun DA{dbraun}{Research Group Sensorimotor Learning and Decision-Making}; Tishby N}
}
@Poster{ LeibfriedGB2014,
title = {Sensorimotor interactions as signaling games},
journal = {Cognitive Processing},
year = {2014},
month = {9},
volume = {15},
number = {Supplement 1},
pages = {S50-S51},
abstract = {In our everyday lives, humans not only signal their intentions through verbal communication, but also through body movements (Sebanz et al. 2006; Obhi and Sebanz 2011; Pezzulo et al. 2013), for instance when doing sports to inform team mates about one’s own intended actions or to feint members of an opposing team. We study such
sensorimotor signaling in order to investigate how communication emerges and on what variables it depends on. In our setup, there are two players with different aims that have partial control in a joint motor task and where one of the two players possesses private information the other player would like to know about. The question
then is under what conditions this private information is shared through a signaling process. We manipulated the critical variables given by the costs of signaling and the uncertainty of the ignorant player. We found that the dependency of both players’ strategies on these variables can be modeled successfully by a game-theoretic analysis. Signaling games are typically investigated within the context of non-cooperative game theory, where each player tries to maximize their own benefit given the other player’s strategy (Cho and Kreps 1987). This allows defining equilibrium strategies where no player can improve their performance by changing their strategy unilaterally.
These equilibria are called Bayesian Nash equilibria, which is a generalization of the Nash equilibrium concept in the presence of private information (Harsanyi 1968). In general, signaling games allow both for pooling equilibria, where no information is shared, and for separating equilibria with reliable signaling. In our study we translated the job market signaling game into a sensorimotor task. In the job market signaling game (Spence 1973), there is an applicant—the sender—who has private information about his true working skill, called the type. The future employer—the receiver—cannot directly know about the working skill, but only through a signal—for example, educational certificates—that are the more costly to acquire, the less working skill the applicant has. The
sender can choose a costly signal that may or may not transmit information about the type to the receiver. The receiver uses this signal to make a decision by trying to match the payment—the-action—to the presumed type (working skill) that she infers from the signal. The sender’s decision about the signal trades off the expected benefits from the receiver’s action against the signaling costs.
To translate this game into a sensorimotor task, we designed a dyadic reaching task that implemented a signaling game with continuous signal, type and action space. Two players sat next to each other in front of a bimanual manipulandum, such that they could not see each others’ faces. In this task, each player controlled one
dimension of a two-dimensional cursor position. No other communication than the joint cursor position was allowed. The sender’s dimension encoded the signal that could be used to convey information about a target position (the type) that the receiver wanted to hit, but did not know about. The receiver’s dimension encoded her action that determined the sender’s payoff. The sender’s aim was to
maximize a point score that was displayed as a two-dimensional color map The point score increased with the reach distance of the receiver — so there was an incentive to make the receiver believe that the target is far away. However, the point score also decreased with the
magnitude of the signal—so there was an incentive to signal as little as possible due to implied signaling costs. The receiver’s payoff was determined by the difference between his action and the true target position that was revealed after each trial. Each player was instructed about the setup, their aim and the possibility of signaling. The
question was whether players’ behavior converged to Bayesian Nash Equilibria under different conditions where we manipulated the signaling cost and the variability of the target position. By fitting participants’ variance of their signaling, we could quantitatively predict the influence of signaling costs and target variability on the
amount of signaling. In line with our game-theoretic predictions, we found that increasing signaling costs and decreasing target variability leads in most dyads to less signaling. We conclude that the theory of signaling games provides an appropriate framework to study sensorimotor
interactions in the presence of private information.},
web_url = {http://link.springer.com/content/pdf/10.1007%2Fs10339-014-0632-2.pdf},
event_name = {12th Biannual Conference of the German Cognitive Science Society (KogWis 2014)},
event_place = {Tübingen, Germany},
state = {published},
DOI = {10.1007/s10339-014-0632-2},
author = {Leibfried F{fleibfried}{Research Group Sensorimotor Learning and Decision-Making}; Grau-Moya J{jgrau}{Research Group Sensorimotor Learning and Decision-Making}; Braun D{dbraun}{Research Group Sensorimotor Learning and Decision-Making}}
}
@Conference{ GeneweinB2014_2,
title = {An information-theoretic optimality principle for the formation of abstractions},
year = {2014},
month = {12},
day = {16},
web_url = {http://ml.informatik.uni-freiburg.de/events/gso14/program},
event_name = {Seventh International Workshop on Guided Self-Organization (GSO 2014)},
event_place = {Freiburg, Germany},
state = {published},
author = {Genewein T{tgenewein}{Research Group Sensorimotor Learning and Decision-Making}; Braun DA{dbraun}{Research Group Sensorimotor Learning and Decision-Making}}
}
@Conference{ Braun2014,
title = {Signaling games in sensorimotor interactions},
journal = {Cognitive Processing},
year = {2014},
month = {9},
day = {28},
volume = {15},
number = {Supplement 1},
pages = {S11},
abstract = {In our everyday lives, humans not only signal their intentions through verbal communication, but also through body movements, for instance when doing sports to inform team mates about one’s own intended actions or to feint members of an opposing team. Here, we study such
sensorimotor signaling in order to investigate how communication emerges and on what variables it depends on. In our setup, there are two players with different aims that have partial control in a joint motor task and where one of the two players possesses private information
the other player would like to know about. The question then is under what conditions this private information is shared through a signaling process. We manipulated the critical variables given by the costs of signaling and the uncertainty of the ignorant player. We found that the dependency of both players’ strategies on these variables can be modeled successfully by a game-theoretic analysis.},
web_url = {http://link.springer.com/content/pdf/10.1007%2Fs10339-014-0632-2.pdf},
event_name = {12th Biannual Conference of the German Cognitive Science Society (KogWis 2014)},
event_place = {Tübingen, Germany},
state = {published},
DOI = {10.1007/s10339-014-0632-2},
author = {Braun D{dbraun}{Research Group Sensorimotor Learning and Decision-Making}}
}
@Conference{ BraunGO2014,
title = {Ellsberg's paradox in sensorimotor learning},
year = {2014},
month = {9},
day = {12},
pages = {84},
abstract = {Both sensorimotor and economic behavior in humans can be
understood as optimal decisionmaking under uncertainty specified by probabilistic models. In many important everyday situations, however, such models might not be available or be ambiguous due to lack of familiarity
with the environment. Deviations from optimal decisionmaking
in the face of ambiguity have first been reported by Ellsberg in economic choices between urns of known and unknown composition. Here we designed an urn task similar to Ellsberg's task and an equivalent motor task, where subjects choose between hitting partially occluded targets with differing degree of ambiguity. In both experiments subjects had to choose between a risky and an ambiguous option in every trial. The risky option provided full information about the probabilities of the possible outcomes. The ambiguous option was always characterized by a lack of information with respect to the probabilities. We could manipulate the degree of ambiguity by varying the
amount of information revealed about the ambiguous option. In the motor task, we manipulated the extent to which an ambiguous target was occluded that subjects aimed to hit, whereas in the urn task we varied the number of balls drawn from the ambiguous urn before subjects made their decision. This way, we could test the more general hypothesis that decisionmakers gradually switch from ambiguity to risk when more information becomes available. Ellsberg's paradox then arises in the limit case in which the ambiguous option gives away no information. We found that subjects tended to avoid ambiguous urns in line with Ellsberg's results, however, the same subjects tended to be ambiguityloving
or neutral in the motor task. One of the most important points of Ellsberg's original experiment was to show that expected utility models—that is models that only care about
maximizing expected success—cannot explain subjects' choice behavior under ambiguity. Since then a number of models for decisionmaking under ambiguity have been proposed. However, few of them are able to dynamically change the degree of ambiguity as new information arrives. Here we employ a multiplier preference model, that is a type of variational
preference model for decisionmaking under ambiguity, and use it under a Bayesian update procedure to integrate novel information. We show that the deviations from optimal decisionmaking can be explained by such a robust Bayesian decisionmaking model. Our results suggest that ambiguity
is a ubiquitous phenomenon, not only to understand economic choice behavior, but also sensorimotor learning and control.},
web_url = {http://www.bris.ac.uk/decisions-research/conference2014/programme/},
event_name = {Theoretical and Empirical Research in Decision-Making (DMB 2014)},
event_place = {Bristol, UK},
state = {published},
author = {Braun DA{dbraun}{Research Group Sensorimotor Learning and Decision-Making}; Grau-Moya J{jgrau}{Research Group Sensorimotor Learning and Decision-Making}; Ortega PA{portega}{Research Group Sensorimotor Learning and Decision-Making}}
}
@Conference{ Braun2014_2,
title = {Sensorimotor learning and decision-making in complex environments},
year = {2014},
month = {6},
pages = {7},
abstract = {Recent advances in movement neuroscience suggest that sensorimotor control can be considered as a continuous decision-making process in complex environments in which uncertainty and task variability play a key role. Leading theories of motor control assume that the motor system learns probabilistic models and that motor behavior can be explained as the optimization of payoff or cost criteria under the expectation of these models. Here we discuss first how the motor system exploits task variability to build up efficient models and then discuss evidence that humans deviate from Bayes optimal behavior in their movements, because they exhibit effects of model uncertainty. Furthermore, we discuss in how far model uncertainty can be considered as a special case of a general decision-making framework inspired by statistical physics and thermodynamics.},
web_url = {https://www.uni-marburg.de/fb13/forschungsgruppen/neurophysik/brainact/downloads/abstracts2014.pdf},
event_name = {4th Joint Spring School Multisensory Perception for Action},
event_place = {Wildbad Kreuth, Germany},
state = {published},
author = {Braun D{dbraun}{Research Group Sensorimotor Learning and Decision-Making}}
}
@Article{ Braun2013,
title = {Structural learning},
journal = {Scholarpedia},
year = {2013},
month = {10},
volume = {8},
number = {10},
pages = {12312},
abstract = {Structural learning in motor control refers to a metalearning process whereby an agent extracts (abstract) invariants from its sensorimotor stream when experiencing a range of environments that share similar structure. Such invariants can then be exploited for faster generalization and learning-to-learn when experiencing novel, but related task environments.},
web_url = {http://www.scholarpedia.org/article/Structural_learning},
state = {published},
DOI = {10.4249/scholarpedia.12312},
author = {Braun D{dbraun}{Research Group Sensorimotor Learning and Decision-Making}}
}
@Article{ GrauMoyaHPB2013,
title = {The effect of model uncertainty on cooperation in sensorimotor interactions},
journal = {Journal of the Royal Society Interface},
year = {2013},
month = {10},
volume = {10},
number = {87},
pages = {1-11},
abstract = {Decision-makers have been shown to rely on probabilistic models for perception and action. However, these models can be incorrect or partially wrong in which case the decision-maker has to cope with model uncertainty. Model uncertainty has recently also been shown to be an important determinant of sensorimotor behaviour in humans that can lead to risk-sensitive deviations from Bayes optimal behaviour towards worst-case or best-case outcomes. Here, we investigate the effect of model uncertainty on cooperation in sensorimotor interactions similar to the stag-hunt game, where players develop models about the other player and decide between a pay-off-dominant cooperative solution and a risk-dominant, non-cooperative solution. In simulations, we show that players who allow for optimistic deviations from their opponent model are much more likely to converge to cooperative outcomes. We also implemented this agent model in a virtual reality environment, and let human subjects play against a virtual player. In this game, subjects' pay-offs were experienced as forces opposing their movements. During the experiment, we manipulated the risk sensitivity of the computer player and observed human responses. We found not only that humans adaptively changed their level of cooperation depending on the risk sensitivity of the computer player but also that their initial play exhibited characteristic risk-sensitive biases. Our results suggest that model uncertainty is an important determinant of cooperation in two-player sensorimotor interactions.},
web_url = {http://rsif.royalsocietypublishing.org/content/10/87/20130554.short},
state = {published},
DOI = {10.1098/rsif.2013.0554},
EPUB = {20130554},
author = {Grau-Moya J{jgrau}{Research Group Sensorimotor Learning and Decision-Making}; Hez E{ehez}{Research Group Sensorimotor Learning and Decision-Making}; Pezzulo G; Braun DA{dbraun}{Research Group Sensorimotor Learning and Decision-Making}}
}
@Article{ BalduzziOB2012,
title = {Metabolic cost as an organizing principle for cooperative learning},
journal = {Advances in Complex Systems},
year = {2013},
month = {5},
volume = {16},
number = {02n03},
pages = {1-18},
abstract = {This article investigates how neurons can use metabolic cost to facilitate learning at a population level. Although decision-making by individual neurons has been extensively studied, questions regarding how neurons should behave to cooperate effectively remain largely unaddressed. Under assumptions that capture a few basic features of cortical neurons, we show that constraining reward maximization by metabolic cost aligns the information content of actions with their expected reward. Thus, metabolic cost provides a mechanism whereby neurons encode expected reward into their outputs. Further, aside from reducing energy expenditures, imposing a tight metabolic constraint also increases the accuracy of empirical estimates of rewards, increasing the robustness of distributed learning. Finally, we present two implementations of metabolically constrained learning that confirm our theoretical finding. These results suggest that metabolic cost may be an organizing principle underlying the neural code, and may also provide a useful guide to the design and analysis of other cooperating populations.},
web_url = {http://www.worldscientific.com/doi/abs/10.1142/S0219525913500124},
state = {published},
DOI = {10.1142/S0219525913500124},
author = {Balduzzi D{balduzzi}; Ortega PA{portega}{Research Group Sensorimotor Learning and Decision-Making}; Besserve M{besserve}{Department Physiology of Cognitive Processes}}
}
@Article{ OrtegaB2013,
title = {Thermodynamics as a theory of decision-making with information-processing costs},
journal = {Proceedings of the Royal Society of London A},
year = {2013},
month = {5},
volume = {469},
number = {2153},
pages = {1-18},
abstract = {Perfectly rational decision-makers maximize expected utility, but crucially ignore the resource costs incurred when determining optimal actions. Here, we propose a thermodynamically inspired formalization of bounded rational decision-making where information processing is modelled as state changes in thermodynamic systems that can be quantified by differences in free energy. By optimizing a free energy, bounded rational decision-makers trade off expected utility gains and information-processing costs measured by the relative entropy. As a result, the bounded rational decision-making problem can be rephrased in terms of well-known variational principles from statistical physics. In the limit when computational costs are ignored, the maximum expected utility principle is recovered. We discuss links to existing decision-making frameworks and applications to human decision-making experiments that are at odds with expected utility theory. Since most of the mathematical machinery can be borrowed from statistical physics, the main contribution is to re-interpret the formalism of thermodynamic free-energy differences in terms of bounded rational decision-making and to discuss its relationship to human decision-making experiments.},
web_url = {http://rspa.royalsocietypublishing.org/content/469/2153/20120683.short},
state = {published},
DOI = {10.1098/rspa.2012.0683},
EPUB = {20120683},
author = {Ortega PA{portega}{Research Group Sensorimotor Learning and Decision-Making}; Braun DA{dbraun}{Research Group Sensorimotor Learning and Decision-Making}}
}
@Inproceedings{ GeneweinB2013_3,
title = {Abstraction in Decision-Makers with Limited Information Processing Capabilities},
year = {2013},
month = {12},
pages = {1-9},
abstract = {A distinctive property of human and animal intelligence is the ability to form abstractions by neglecting irrelevant information which allows to separate structure from noise. From an information theoretic point of view abstractions are desirable because they allow for very efficient information processing. In artificial systems abstractions are often implemented through computationally costly formations of groups or clusters. In this work we establish the relation between the free-energy framework for
decision-making and rate-distortion theory and demonstrate how the application of rate-distortion for decision-making leads to the emergence of abstractions. We argue that abstractions are induced due to a limit in information processing capacity.},
file_url = {fileadmin/user_upload/files/publications/2013/NIPS-2013-Workshop-Genewein.pdf},
web_url = {http://www.seas.upenn.edu/~ope/workshop/program.html},
event_name = {NIPS 2013 Workshop Planning with Information Constraints for Control, Reinforcement Learning, Computational Neuroscience, Robotics and Games},
event_place = {Lake Tahoe, NV, USA},
state = {published},
author = {Genewein T{tgenewein}{Research Group Sensorimotor Learning and Decision-Making}; Braun DA{dbraun}{Research Group Sensorimotor Learning and Decision-Making}}
}
@Inproceedings{ GrauMoyaB2013,
title = {Bounded Rational Decision-Making in Changing Environments},
year = {2013},
month = {12},
pages = {1-9},
abstract = {A perfectly rational decision-maker chooses the best action with the highest utility gain from a set of possible actions. The optimality principles that describe such decision processes do not take into account the computational costs of finding the optimal action. Bounded rational decision-making addresses this problem by specifically trading off information-processing costs and expected utility. Interestingly, a similar trade-off between energy and entropy arises when describing changes in
thermodynamic systems. This similarity has been recently used to describe bounded rational agents. Crucially, this framework assumes that the environment does not change while the decision-maker is computing the optimal policy. When this requirement is not fulfilled, the decision-maker will suffer inefficiencies in utility, that arise because the current policy is optimal for an environment in the past. Here we borrow concepts from non-equilibrium thermodynamics to quantify these inefficiencies and
illustrate with simulations its relationship with computational resources.},
file_url = {fileadmin/user_upload/files/publications/2013/NIPS-2013-Workshop-Grau.pdf},
web_url = {http://www.seas.upenn.edu/~ope/workshop/},
event_name = {NIPS 2013 Workshop Planning with Information Constraints for Control, Reinforcement Learning, Computational Neuroscience, Robotics and Games},
event_place = {Lake Tahoe, NV, USA},
state = {published},
author = {Grau-Moya J{jgrau}{Research Group Sensorimotor Learning and Decision-Making}; Braun DA{dbraun}{Research Group Sensorimotor Learning and Decision-Making}}
}
@Inproceedings{ OrtegaGGBB2012,
title = {A Nonparametric Conjugate Prior Distribution for the Maximizing Argument of a Noisy Function},
year = {2013},
month = {4},
pages = {3014-3022},
abstract = {We propose a novel Bayesian approach to solve stochastic optimization problems that involve finding extrema of noisy, nonlinear functions. Previous work has focused on representing possible functions explicitly, which leads to a two-step procedure of first, doing inference over the function space and second, finding the extrema of these functions. Here we skip the representation step and directly model the distribution over extrema. To this end, we devise a non-parametric conjugate prior where the natural parameter corresponds to a given kernel function and the sufficient statistic is composed of the observed function values. The resulting posterior distribution directly captures the uncertainty over the maximum of the unknown function.},
file_url = {fileadmin/user_upload/files/publications/2012/NIPS-2012-Ortega.pdf},
web_url = {http://nips.cc/Conferences/2012/},
editor = {Bartlett, P. , F.C.N. Pereira, L. Bottou, C.J.C. Burges, K.Q. Weinberger},
publisher = {Curran},
address = {Red Hook, NY, USA},
booktitle = {Advances in Neural Information Processing Systems 25},
event_name = {Twenty-Sixth Annual Conference on Neural Information Processing Systems (NIPS 2012)},
event_place = {Lake Tahoe, NV, USA},
state = {published},
ISBN = {978-1-627-48003-1},
author = {Ortega PA{portega}{Research Group Sensorimotor Learning and Decision-Making}; Grau-Moya J{jgrau}{Research Group Sensorimotor Learning and Decision-Making}; Genewein T{tgenewein}{Research Group Sensorimotor Learning and Decision-Making}; Balduzzi D{balduzzi}; Braun DA{dbraun}{Research Group Sensorimotor Learning and Decision-Making}}
}
@Poster{ PengGB2013_2,
title = {Towards assessing randomness and complexity in human motion
trajectories},
year = {2013},
month = {10},
volume = {14},
pages = {31},
abstract = {Intelligence is often related to the behavioural complexity an agent can generate. For example, when studying human language one typically finds that sequences of letters or words are neither completely random nor totally determinate. This is often assessed quantitatively by
studying the conditional entropy of sequences [1]. Similarly, entropy measures can also be used to assess the human ability to generate random numbers — a task that humans often find difficult [2]. Previous studies in motor control have found, for example, that humans cannot
significantly increase the level of trajectory randomness in single-joint movements [3]. Here we test human randomness when generating trajectories and compare entropic measurements of random vs. non-random motion. We designed a motor task where participants controlled
a cursor by moving a Phantom manipulandum in a three-dimensional virtual environment. The cursor was constrained to move inside a 10x10 grid. In the first part of the experiment participants were asked to (1) perform a rhythmic movement, (2) write pre-specified letters,
and (3) perform a random movement. In the second part of the experiment participants were asked again to perform random movements, but this time they received feedback from an artificial intelligence (based on context-tree weighting) predicting their next move. We found that participants can change the randomness of their behaviour through feedback and that excess entropy can be used as a complexity measure of motion trajectories. [1] Rao, R. P.
N., Yadav, N., Vahia, M. N., Joglekar, H., Adhikari, R., and Mahadevan, I. (2009). Entropic evidence for linguistic structure in the Indus script. Science, 324(5931):1165. [2] Figurska, M., Stanczyk, M., and Kulesza, K. (2008). Humans cannot consciously generate random numbers sequences: Polemic study. Medical hypotheses,},
web_url = {http://www.cin.uni-tuebingen.de/fileadmin/content/05_News_%26_Events/Conferences/Conference_130930_NeNa_2013.pdf},
event_name = {14th Conference of Junior Neuroscientists of Tübingen (NeNa 2013)},
event_place = {Schramberg, Germany},
state = {published},
author = {Peng Z{zpeng}{Research Group Sensorimotor Learning and Decision-Making}; Genewein T{tgenewein}{Research Group Sensorimotor Learning and Decision-Making}; Braun D{dbraun}{Research Group Sensorimotor Learning and Decision-Making}}
}
@Poster{ PengGB2013,
title = {Assessing randomness in human motion trajectories},
year = {2013},
month = {9},
pages = {49-50},
abstract = {Intelligence is often related to the behavioural complexity an agent can generate. For example, when studying human language one typically finds that sequences of letters or words are neither completely random nor totally determinate. This is often assessed quantitatively by studying the conditional entropy of sequences [1]. Similarly, entropy can be used to assess the human ability to generate random numbers. Humans have often been found to be not very good at generating random numbers[2]. Here we test human randomness when generating trajectories and compare entropic measurements of random vs. non-random motion.
We designed a motor task where participants controlled a cursor by moving a Phantom manipulandum in a three-dimensional virtual environment. The cursor was constrained to move inside a 10x10 grid. In the first part of the experiment participants were asked to (1) perform a rhythmic movement, (2) write pre-specified letters, and (3) perform a random movement. In the second part of the experiment participants were asked again to perform random movements, but this time they received feedback from an artificial intelligence (based on context-tree weighting algorithm) predicting their next move. We found that the conditional entropy revealed different patterns for different motion types and that participants’ motion randomness was only weakly susceptible to feedback.},
web_url = {https://portal.g-node.org/abstracts/bc13/#/doi/nncn.bc2013.0027},
event_name = {Bernstein Conference 2013},
event_place = {Tübingen, Germany},
state = {published},
DOI = {10.12751/nncn.bc2013.0027},
author = {Peng Z{zpeng}{Research Group Sensorimotor Learning and Decision-Making}; Genewein T{tgenewein}{Research Group Sensorimotor Learning and Decision-Making}; Braun DA{dbraun}{Research Group Sensorimotor Learning and Decision-Making}}
}
@Poster{ GeneweinB2013,
title = {Occam's Razor in sensorimotor learning},
year = {2013},
month = {9},
number = {W25},
abstract = {Prediction is a ubiquitous phenomenon in biological systems ranging from basic motor control in animals [1] to scientific hypothesis formation in humans. A central problem in prediction systems is how to choose one’s predictions if there are multiple competing hypothesis that explain the observed data equally well. Following Occam's Razor the simpler explanation requiring fewer assumptions should be preferred. An implicit and elegant way to apply Occam’s Razor is Bayesian inference. In particular, a Bayesian Occam's Razor effect arises when comparing different hypothesis based on their marginal likelihood [2]. Here we investigate whether sensorimotor prediction systems implicitly apply Occam’s Razor in everyday movements. This question is particularly compelling, as recent studies have found evidence that the sensorimotor system makes inferences about unobserved latent variables in a way that is consistent with Bayesian statistics [3,4]. We designed a sensorimotor task, where participants had to draw regression trajectories through a number of observed data points, representing noisy samples of an underlying ideal trajectory. The ideal trajectory was generated by one of two possible Gaussian process (GP) models—a simple model with a large length-scale, leading to smooth trajectories and a complex model with a short length-scale, leading to more wiggly trajectories. Participants were trained on the two different trajectory models and then exposed to ambiguous stimuli to see whether they showed a preference for the simpler model. In case the presented stimulus could be fit equally well by both models, we found that participants showed a clear preference for the simpler model. For general stimuli, we found that participants’ behavior was quantitatively consistent with Bayesian Occam’s Razor. We could also show that participants’ drawn trajectories were similar to samples from the posterior predictive GP and significantly different from two non-probabilistic heuristics.},
web_url = {https://portal.g-node.org/abstracts/bc13/#/doi/nncn.bc2013.0026},
event_name = {Bernstein Conference 2013},
event_place = {Tübingen, Germany},
state = {published},
DOI = {10.12751/nncn.bc2013.0026},
author = {Genewein T{tgenewein}{Research Group Sensorimotor Learning and Decision-Making}; Braun DA{dbraun}{Research Group Sensorimotor Learning and Decision-Making}}
}
@Poster{ LeibfriedGB2013,
title = {Signaling in sensorimotor interactions},
year = {2013},
month = {9},
pages = {48},
abstract = {Communication relies on signals that convey information. In non-cooperative game theory, signaling games [1] are used to investigate under what conditions two players may communicate with each other when their ultimate aim is to maximize their own benefit. In this case, one player (the sender) possesses private information (the type) that the other player (the receiver) would like to know. However, signaling this information is costly. At the same time the receiver has control over a variable that influences the sender’s payoff. The key question is under which circumstances so-called Perfect Bayesian Nash equilibria with reliable signaling occur. Here, we investigate whether human sensorimotor behavior conforms with optimal strategies corresponding to these equilibria [2]. We designed a sensorimotor task, where two participants controlled a two-dimensional cursor. Importantly, each player could control only one of the two dimensions. The sender’s dimension could be used to communicate a target position that the receiver had to hit without knowing its location. The sender’s aim was to maximize a point score displayed on a two-dimensional color map. The point score decreased with the magnitude of the signal and increased with the reach distance of the receiver. The sender therefore had a trade-off between communicating the real target distance with the hope that the receiver would learn to interpret this signal and give appropriate reward, and trying to avoid signaling costs. We found that participants developed strategies that resulted in separating equilibria as predicted by analytically derived game theoretic solutions.},
web_url = {https://portal.g-node.org/abstracts/bc13/#/doi/nncn.bc2013.0025},
event_name = {Bernstein Conference 2013},
event_place = {Tübingen, Germany},
state = {published},
DOI = {10.12751/nncn.bc2013.0025},
author = {Leibfried F{fleibfried}{Research Group Sensorimotor Learning and Decision-Making}; Grau-Moya J{jgrau}{Research Group Sensorimotor Learning and Decision-Making}; Braun DA{dbraun}{Research Group Sensorimotor Learning and Decision-Making}}
}
@Poster{ GeneweinB2013_2,
title = {Bayesian Occam’s Razor for structure selection in
human motor learning},
year = {2013},
month = {6},
day = {28},
abstract = {Learning structure is a key-element for achieving flexible
and adaptive control in real-world environments. However,
what looks easy and natural in human motor control, remains
one of the main challenges in today’s robotics. Here we in-
vestigate in a quantitative manner how humans select between
several learned structures when faced with novel adaptation
problems.
One very successful framework for modeling learning of
statistical structures are hierarchical Bayesian models, because of their capability to capture statistical relationships on different levels of abstraction. Another important advantage is the automatic trade-off between prediction error and model complexity that is embodied by Bayesian inference. This so called Bayesian Occam’s Razor
results from the marginalization over the model parameters when computing a model’s evidence and has the effect of penalizing unnecessarily complex models — see Figure 1.
Bayesian Occam’s razor. Evidence P (DjM) for a simple model
M1(blue, solid line) and a complex model M2(red, dashed line). Because both models have to spread unit probability mass over all compatible observations, the simpler model
M1 has a higher evidence in the overlapping region D and is thus the more probable model.
A standard paradigm to illustrate the trade-off between
prediction error and model complexity is regression, where
a curve has to be fitted to noisy observations with the aim of recovering an underlying functional relationship that defines a structure.
Here, we tested human behavior in a sensorimotor regres-
sion task, where participants had to draw a curve through noisy observations of an underlying trajectory generated by one of two possible Gaussian process (GP) models with different length-scales, a simple model with long length scale generating mostly smooth trajectories and a complex model with short length scale generating mostly wiggly trajectories. Participants were trained on both models, in order to be able to learn the two different structures. They then observed ambiguous stimuli that could be explained by both models and had to draw regression trajectories, which implied reporting their belief
about the generating model.
In ambiguous trials where both models explained the ob-
servations equally well, we found that participants strongly
preferred the simpler model. In all trials, Bayesian model
selection provided a good explanation of subjects’ choice and drawing behavior.
The approach presented in this work might also lend itself
for application in robotic tasks, where sensory data has to be disambiguated or a goodness-of-fit versus complexity trade-off has to be performed.},
file_url = {fileadmin/user_upload/files/publications/2013/RSS-2013-Workshop-Genewein.pdf},
web_url = {http://www.ias.tu-darmstadt.de/Workshops/RSS2013},
event_name = {RSS 2013 Workshop on Hierarchical and Structured Learning for Robotics},
event_place = {Berlin, Germany},
state = {published},
author = {Genewein T{tgenewein}{Research Group Sensorimotor Learning and Decision-Making}; Braun DA{dbraun}{Research Group Sensorimotor Learning and Decision-Making}}
}
@Conference{ Braun2013_2,
title = {Model uncertainty and risk-sensitivity in sensorimotor learning and decision-making},
year = {2013},
month = {6},
day = {24},
abstract = {Recent advances in theoretical neuroscience suggest that sensorimotor control can be considered as a continuous decision-making process in which uncertainty plays a key role. Decision-makers can be risk-sensitive with respect to this uncertainty, first by not only considering the average payoff of an outcome, but also the variability of the payoffs, and second by taking into account model uncertainty in the presence of latent variables. Although such risk-sensitivity is a well-established phenomenon in psychology and economics, it has been much less studied in motor control. In fact, leading theories of motor control, such as optimal feedback control, assume that motor behaviors can be explained as the optimization of a given expected payoff or cost. Here we discuss evidence that humans exhibit risk-sensitivity and model uncertainty in their motor behaviors. Furthermore, we discuss how risk-sensitivity and model uncertainty can be considered as a special case of a general decision-making framework inspired by statistical physics and thermodynamics.},
web_url = {http://www.uni-tuebingen.de/print/fakultaeten/mathematisch-naturwissenschaftliche-fakultaet/fachbereiche/informatik/lehrstuehle/neuronale-informationsverarbeitung/news/newsfullview-archive/article/talk-nip-by-daniel-braun.html},
event_name = {Universität Tübingen: AG Neuronale Informationsverarbeitung},
event_place = {Tübingen, Germany},
state = {published},
author = {Braun D{dbraun}{Research Group Sensorimotor Learning and Decision-Making}}
}
@Conference{ Genewein2013,
title = {Bayesian model selection in sensorimotor tasks},
year = {2013},
month = {1},
day = {17},
event_name = {University of Cambridge: Computational and Biological Learning Lab},
event_place = {Cambridge, UK},
state = {published},
author = {Genewein T{tgenewein}{Research Group Sensorimotor Learning and Decision-Making}}
}
@Article{ GeneweinB2012,
title = {A sensorimotor paradigm for Bayesian model selection},
journal = {Frontiers in Human Neuroscience},
year = {2012},
month = {10},
volume = {6},
number = {291},
pages = {1-16},
abstract = {Sensorimotor control is thought to rely on predictive internal models in order to cope efficiently with uncertain environments. Recently, it has been shown that humans not only learn different internal models for different tasks, but that they also extract common structure between tasks. This raises the question of how the motor system selects between different structures or models, when each model can be associated with a range of different task-specific parameters. Here we design a sensorimotor task that requires subjects to compensate visuomotor shifts in a three-dimensional virtual reality setup, where one of the dimensions can be mapped to a model variable and the other dimension to the parameter variable. By introducing probe trials that are neutral in the parameter dimension, we can directly test for model selection. We found that model selection procedures based on Bayesian statistics provided a better explanation for subjects’ choice behavior than simple non-probabilistic heuristics. Our experimental design lends itself to the general study of model selection in a sensorimotor context as it allows to separately query model and parameter variables from subjects.},
web_url = {http://www.frontiersin.org/Human_Neuroscience/10.3389/fnhum.2012.00291/abstract},
state = {published},
DOI = {10.3389/fnhum.2012.00291},
author = {Genewein T{tgenewein}{Research Group Sensorimotor Learning and Decision-Making}; Braun DA{dbraun}{Research Group Sensorimotor Learning and Decision-Making}}
}
@Article{ GrauMoyaOB2012,
title = {Risk-Sensitivity in Bayesian Sensorimotor Integration},
journal = {PLoS Computational Biology},
year = {2012},
month = {9},
volume = {8},
number = {9},
pages = {1-7},
abstract = {Information processing in the nervous system during sensorimotor tasks with inherent uncertainty has been shown to be consistent with Bayesian integration. Bayes optimal decision-makers are, however, risk-neutral in the sense that they weigh all possibilities based on prior expectation and sensory evidence when they choose the action with highest expected value. In contrast, risk-sensitive decision-makers are sensitive to model uncertainty and bias their decision-making processes when they do inference over unobserved variables. In particular, they allow deviations from their probabilistic model in cases where this model makes imprecise predictions. Here we test for risk-sensitivity in a sensorimotor integration task where subjects exhibit Bayesian information integration when they infer the position of a target from noisy sensory feedback. When introducing a cost associated with subjects' response, we found that subjects exhibited a characteristic bias towards low cost responses when their uncertainty was high. This result is in accordance with risk-sensitive decision-making processes that allow for deviations from Bayes optimal decision-making in the face of uncertainty. Our results suggest that both Bayesian integration and risk-sensitivity are important factors to understand sensorimotor integration in a quantitative fashion.},
web_url = {http://www.ploscompbiol.org/article/info%3Adoi%2F10.1371%2Fjournal.pcbi.1002698},
state = {published},
DOI = {10.1371/journal.pcbi.1002698},
EPUB = {e1002698},
author = {Grau-Moya J{jgrau}{Research Group Sensorimotor Learning and Decision-Making}; Ortega PA{portega}{Research Group Sensorimotor Learning and Decision-Making}; Braun DA{dbraun}{Research Group Sensorimotor Learning and Decision-Making}}
}
@Inproceedings{ OrtegaB2012,
title = {Adaptive Coding of Actions and Observations},
year = {2012},
month = {12},
pages = {1-4},
abstract = {The application of expected utility theory to construct adaptive agents is both computationally intractable and statistically questionable. To overcome these difficulties,
agents need the ability to delay the choice of the optimal policy to a later stage when they have learned more about the environment. How should agents do this optimally? An information-theoretic answer to this question is given by the Bayesian control rule—the solution to the adaptive coding problem when there are not only observations but also actions. This paper reviews the central ideas behind the Bayesian control rule.},
file_url = {fileadmin/user_upload/files/publications/2012/NIPS-Workshop-2012-Ortega.pdf},
web_url = {http://www.montefiore.ulg.ac.be/~tjung/nips12workshop},
event_name = {NIPS 2012 Workshop on Information in Perception and Action},
event_place = {Lake Tahoe, NV, USA},
state = {published},
author = {Ortega PA{portega}{Research Group Sensorimotor Learning and Decision-Making}; Braun DA{dbraun}{Research Group Sensorimotor Learning and Decision-Making}}
}
@Inproceedings{ OrtegaB2012_2,
title = {Free Energy and the Generalized Optimality Equations for Sequential Decision Making},
year = {2012},
month = {7},
pages = {1-10},
abstract = {The free energy functional has recently been proposed as a variational principle for bounded rational decision-making, since it instantiates a natural trade-off between utility gains and information processing costs that can be axiomatically derived. Here we apply the free energy principle to general decision trees that include both adversarial and stochastic environments.
We derive generalized sequential optimality equations that not only include the Bellman optimality equations as a limit case, but also lead to well-known decision-rules
such as Expectimax, Minimax and Expectiminimax. We show how these decision-rules can be derived from a single free energy principle that assigns a resource parameter to each
node in the decision tree. These resource parameters express a concrete computational cost that can be measured as the amount of samples that are needed from the distribution that belongs to each node. The free energy principle therefore provides the normative basis for generalized optimality equations that account for both adversarial and stochastic environments.},
file_url = {fileadmin/user_upload/files/publications/2012/EWRL-2012-Ortega.pdf},
web_url = {http://ewrl.wordpress.com/ewrl10-2012/#papers},
event_name = {10th European Workshop on Reinforcement Learning (EWRL 2012)},
event_place = {Edinburgh, Scotland},
state = {published},
author = {Ortega PA{portega}{Research Group Sensorimotor Learning and Decision-Making}; Braun DA{dbraun}{Research Group Sensorimotor Learning and Decision-Making}}
}
@Poster{ Genewein2012,
title = {A Sensorimotor Paradigm for Bayesian Model Selection},
year = {2012},
month = {9},
web_url = {http://www.uni-tuebingen.de/einrichtungen/zentrale-einrichtungen/forum-scientiarum/studium/akademien/archiv/sa-2012-2-decisions.html},
event_name = {Tübingen International Summerschool 2012 (TISS 2012)},
event_place = {Heiligkreuztal, Germany},
state = {published},
author = {Genewein T{tgenewein}{Research Group Sensorimotor Learning and Decision-Making}}
}
@Poster{ GrauMoya2012,
title = {Risk-sensitivity in Bayesian Sensorimotor Integration},
year = {2012},
month = {9},
web_url = {http://www.uni-tuebingen.de/einrichtungen/zentrale-einrichtungen/forum-scientiarum/studium/akademien/archiv/sa-2012-2-decisions.html},
event_name = {Tübingen International Summerschool 2012 (TISS 2012)},
event_place = {Heiligkreuztal, Germany},
state = {published},
author = {Grau-Moya J{jgrau}{Research Group Sensorimotor Learning and Decision-Making}}
}
@Conference{ Braun2012,
title = {Risk-sensitivity in motor control},
year = {2012},
month = {11},
day = {6},
web_url = {https://www.bcf.uni-freiburg.de/events/bernstein-seminar/20121106-braun},
event_name = {Bernstein Center Freiburg: Bernstein Seminar},
event_place = {Freiburg i.Br., Germany},
state = {published},
author = {Braun D{dbraun}{Research Group Sensorimotor Learning and Decision-Making}}
}
@Conference{ BraunO2012,
title = {Thermodynamics as a theory of bounded rational decision-making},
year = {2012},
month = {9},
day = {13},
abstract = {Perfectly rational decision-makers maximize expected utility, but crucially ignore the resource costs incurred when determining optimal actions. Here we propose an information-theoretic formalization of bounded rational decision-making where decision-makers trade off expected utility and information processing costs. As a result, the decision-making problem can be rephrased in terms of well-known concepts from thermodynamics and statistical physics, such that the same exponential family distributions that govern statistical ensembles can be used to describe the stochastic choice behavior of bounded decision-makers. This framework does not only explain some well-known experimental deviations from expected utility theory, but also reproduces psychophysical choice pattern captured by diffusion-to-bound models. Furthermore, this framework allows rederiving a number of decision-making schemes including risk-sensitive and robust (minimax) decision-making as well as more recent approximately optimal schemes that are based on the relative entropy. In the limit when resource costs are ignored, the maximum expected utility principle is recovered. Since most of the mathematical machinery can be borrowed from statistical physics, the main contribution is to show how a thermodynamic model of bounded rationality can provide a unified view of diverse decision-making phenomena and control schemes.},
web_url = {http://www.snn.ru.nl/cyberstat_granada/Abstract.html},
event_name = {Workshop on Statistical Physics of Inference and Control Theory},
event_place = {Granada, Spain},
state = {published},
author = {Braun D{dbraun}{Research Group Sensorimotor Learning and Decision-Making}; Ortega PA{portega}{Research Group Sensorimotor Learning and Decision-Making}}
}
@Conference{ Braun2012_3,
title = {Decision-making in sensorimotor control},
year = {2012},
month = {9},
day = {10},
web_url = {http://www.cin.uni-tuebingen.de/news-events/browse-all-events/detail/view/338/page/2/conference-2nd-cin-systems-neuroscience-retreat.html},
event_name = {2nd CIN Systems Neuroscience Retreat},
event_place = {Reutlingen, Germany},
state = {published},
author = {Braun D{dbraun}{Research Group Sensorimotor Learning and Decision-Making}}
}
@Conference{ Braun2012_2,
title = {Risk-sensitivity in sensorimotor control},
year = {2012},
month = {6},
day = {5},
web_url = {http://in.bgu.ac.il/en/engn/biomed/CMCW/Documents/CMCW_ProgramAndCallforPosters.pdf},
event_name = {Eighth Computational Motor Control Workshop at Ben-Gurion University of the Negev},
event_place = {Beer-Sheva, Israel},
state = {published},
author = {Braun D{dbraun}{Research Group Sensorimotor Learning and Decision-Making}}
}
@Inproceedings{ Ortega2011,
title = {Bayesian Causal Induction},
year = {2011},
month = {12},
pages = {1-4},
abstract = {Discovering causal relationships is a hard task, often hindered by the need for intervention, and often requiring large amounts of data to resolve statistical uncertainty.
However, humans quickly arrive at useful causal relationships. One possible reason is that humans extrapolate from past experience to new, unseen situations: that is, they encode beliefs over causal invariances, allowing for sound generalization from the observations they obtain from directly acting in the world. Here we outline a Bayesian model of causal induction where beliefs over competing causal hypotheses are modeled using probability trees. Based on this model, we illustrate why, in the general case, we need interventions plus constraints on our causal hypotheses in order to extract causal information from our experience.},
file_url = {fileadmin/user_upload/files/publications/2011/NIPS-2011-Workshop-Ortega.pdf},
web_url = {http://www.dsi.unive.it/PhiMaLe2011/},
event_name = {NIPS 2011 Workshop on Philosophy and Machine Learning},
event_place = {Sierra Nevada, Spain},
state = {published},
author = {Ortega PA{portega}{Research Group Sensorimotor Learning and Decision-Making}}
}
@Conference{ Braun2015,
title = {Risk-sensitivity in motor control},
year = {2011},
month = {12},
day = {10},
abstract = {Recent advances in theoretical neuroscience suggest that motor control can be considered as a continuous decision-making process in which uncertainty plays a key role. Decision-makers can be risk-sensitive with respect to this uncertainty in that they may not only consider the average payoff of an outcome, but also consider the variability of the payoffs. Although such risk-sensitivity is a well-established phenomenon in psychology and economics, it has been much less studied in motor control. In fact, leading theories of motor control, such as optimal feedback control, assume that motor behaviors can be explained as the optimization of a given expected payoff or cost. Here we discuss evidence that humans exhibit risk-sensitivity in their motor behaviors, thereby demonstrating sensitivity to the variability of “motor costs.” Furthermore, we discuss how risk-sensitivity can be incorporated into optimal feedback control models of motor control. We conclude that risk-sensitivity is an important concept in understanding individual motor behavior under uncertainty.},
web_url = {http://www.bccn-tuebingen.de/events/bernstein-symposium-series-2011/symposium-c/talks-and-abstracts.html},
event_name = {Bernstein Symposium "Bayesian Inference: From Spikes to Behaviour"},
event_place = {Tübingen, Germany},
state = {published},
author = {Braun D{dbraun}{Research Group Sensorimotor Learning and Decision-Making}}
}