@Article{ GeneweinB2014,
title = {Occam's Razor in sensorimotor learning},
journal = {Proceedings of the Royal Society of London B},
year = {2014},
month = {5},
volume = {281},
number = {1783},
pages = {1-7},
abstract = {A large number of recent studies suggest that the sensorimotor system uses probabilistic models to predict its environment and makes inferences about unobserved variables in line with Bayesian statistics. One of the important features of Bayesian statistics is Occam's Razor—an inbuilt preference for simpler models when comparing competing models that explain some observed data equally well. Here, we test directly for Occam's Razor in sensorimotor control. We designed a sensorimotor task in which participants had to draw lines through clouds of noisy samples of an unobserved curve generated by one of two possible probabilistic models—a simple model with a large length scale, leading to smooth curves, and a complex model with a short length scale, leading to more wiggly curves. In training trials, participants were informed about the model that generated the stimulus so that they could learn the statistics of each model. In probe trials, participants were then exposed to ambiguous stimuli. In probe trials where the ambiguous stimulus could be fitted equally well by both models, we found that participants showed a clear preference for the simpler model. Moreover, we found that participants’ choice behaviour was quantitatively consistent with Bayesian Occam's Razor. We also show that participants’ drawn trajectories were similar to samples from the Bayesian predictive distribution over trajectories and significantly different from two non-probabilistic heuristics. In two control experiments, we show that the preference of the simpler model cannot be simply explained by a difference in physical effort or by a preference for curve smoothness. Our results suggest that Occam's Razor is a general behavioural principle already present during sensorimotor processing.},
web_url = {http://rspb.royalsocietypublishing.org/content/281/1783/20132952.full.pdf+html},
state = {published},
DOI = {10.1098/rspb.2013.2952},
EPUB = {20132952},
author = {Genewein T{tgenewein}{Research Group Sensorimotor Learning and Decision-Making} and Braun D{dbraun}{Research Group Sensorimotor Learning and Decision-Making}}
}
@Article{ PengGB2014,
title = {Assessing randomness and complexity in human motion trajectories through analysis of symbolic sequences},
journal = {Frontiers in Human Neurosciencei},
year = {2014},
month = {3},
volume = {8},
number = {168},
pages = {1-13},
abstract = {Complexity is a hallmark of intelligent behavior consisting both of regular patterns and random variation. To quantitatively assess the complexity and randomness of human motion, we designed a motor task in which we translated subjects' motion trajectories into strings of symbol sequences. In the first part of the experiment participants were asked to perform self-paced movements to create repetitive patterns, copy pre-specified letter sequences, and generate random movements. To investigate whether the degree of randomness can be manipulated, in the second part of the experiment participants were asked to perform unpredictable movements in the context of a pursuit game, where they received feedback from an online Bayesian predictor guessing their next move. We analyzed symbol sequences representing subjects' motion trajectories with five common complexity measures: predictability, compressibility, approximate entropy, Lempel-Ziv complexity, as well as effective measure complexity. We found that subjects’ self-created patterns were the most complex, followed by drawing movements of letters and self-paced random motion. We also found that participants could change the randomness of their behavior depending on context and feedback. Our results suggest that humans can adjust both complexity and regularity in different movement types and contexts and that this can be assessed with information-theoretic measures of the symbolic sequences generated from movement trajectories.},
web_url = {http://journal.frontiersin.org/Journal/10.3389/fnhum.2014.00168/abstract},
state = {published},
DOI = {10.3389/fnhum.2014.00168},
author = {Peng Z{zpeng}{Research Group Sensorimotor Learning and Decision-Making}, Genewein T{tgenewein}{Research Group Sensorimotor Learning and Decision-Making} and Braun DA{dbraun}{Research Group Sensorimotor Learning and Decision-Making}}
}
@Article{ OrtegaB2014,
title = {Generalized Thompson sampling for sequential decision-making and causal inference},
journal = {Complex Adaptive Systems Modeling},
year = {2014},
month = {3},
volume = {2},
number = {2},
pages = {1-23},
abstract = {Purpose
Sampling an action according to the probability that the action is believed to be the optimal one is sometimes called Thompson sampling.
Methods
Although mostly applied to bandit problems, Thompson sampling can also be used to solve sequential adaptive control problems, when the optimal policy is known for each possible environment. The predictive distribution over actions can then be constructed by a Bayesian superposition of the policies weighted by their posterior probability of being optimal.
Results
Here we discuss two important features of this approach. First, we show in how far such generalized Thompson sampling can be regarded as an optimal strategy under limited information processing capabilities that constrain the sampling complexity of the decision-making process. Second, we show how such Thompson sampling can be extended to solve causal inference problems when interacting with an environment in a sequential fashion.
Conclusion
In summary, our results suggest that Thompson sampling might not merely be a useful heuristic, but a principled method to address problems of adaptive sequential decision-making and causal inference.},
web_url = {http://www.casmodeling.com/content/pdf/2194-3206-2-2.pdf},
state = {published},
DOI = {10.1186/2194-3206-2-2},
author = {Ortega PA{portega}{Research Group Sensorimotor Learning and Decision-Making} and Braun DA{dbraun}{Research Group Sensorimotor Learning and Decision-Making}}
}
@Article{ Braun2013,
title = {Structural learning},
journal = {Scholarpedia},
year = {2013},
month = {10},
volume = {8},
number = {10},
pages = {12312},
abstract = {Structural learning in motor control refers to a metalearning process whereby an agent extracts (abstract) invariants from its sensorimotor stream when experiencing a range of environments that share similar structure. Such invariants can then be exploited for faster generalization and learning-to-learn when experiencing novel, but related task environments.},
web_url = {http://www.scholarpedia.org/article/Structural_learning},
state = {published},
DOI = {10.4249/scholarpedia.12312},
author = {Braun D{dbraun}{Research Group Sensorimotor Learning and Decision-Making}}
}
@Article{ GrauMoyaHPB2013,
title = {The effect of model uncertainty on cooperation in sensorimotor interactions},
journal = {Journal of the Royal Society Interface},
year = {2013},
month = {10},
volume = {10},
number = {87},
pages = {1-11},
abstract = {Decision-makers have been shown to rely on probabilistic models for perception and action. However, these models can be incorrect or partially wrong in which case the decision-maker has to cope with model uncertainty. Model uncertainty has recently also been shown to be an important determinant of sensorimotor behaviour in humans that can lead to risk-sensitive deviations from Bayes optimal behaviour towards worst-case or best-case outcomes. Here, we investigate the effect of model uncertainty on cooperation in sensorimotor interactions similar to the stag-hunt game, where players develop models about the other player and decide between a pay-off-dominant cooperative solution and a risk-dominant, non-cooperative solution. In simulations, we show that players who allow for optimistic deviations from their opponent model are much more likely to converge to cooperative outcomes. We also implemented this agent model in a virtual reality environment, and let human subjects play against a virtual player. In this game, subjects' pay-offs were experienced as forces opposing their movements. During the experiment, we manipulated the risk sensitivity of the computer player and observed human responses. We found not only that humans adaptively changed their level of cooperation depending on the risk sensitivity of the computer player but also that their initial play exhibited characteristic risk-sensitive biases. Our results suggest that model uncertainty is an important determinant of cooperation in two-player sensorimotor interactions.},
web_url = {http://rsif.royalsocietypublishing.org/content/10/87/20130554.short},
state = {published},
DOI = {10.1098/rsif.2013.0554},
EPUB = {20130554},
author = {Grau-Moya J{jgrau}{Research Group Sensorimotor Learning and Decision-Making}, Hez E{ehez}{Research Group Sensorimotor Learning and Decision-Making}, Pezzulo G and Braun DA{dbraun}{Research Group Sensorimotor Learning and Decision-Making}}
}
@Article{ OrtegaB2013,
title = {Thermodynamics as a theory of decision-making with information-processing costs},
journal = {Proceedings of the Royal Society of London A},
year = {2013},
month = {5},
volume = {469},
number = {2153},
pages = {1-18},
abstract = {Perfectly rational decision-makers maximize expected utility, but crucially ignore the resource costs incurred when determining optimal actions. Here, we propose a thermodynamically inspired formalization of bounded rational decision-making where information processing is modelled as state changes in thermodynamic systems that can be quantified by differences in free energy. By optimizing a free energy, bounded rational decision-makers trade off expected utility gains and information-processing costs measured by the relative entropy. As a result, the bounded rational decision-making problem can be rephrased in terms of well-known variational principles from statistical physics. In the limit when computational costs are ignored, the maximum expected utility principle is recovered. We discuss links to existing decision-making frameworks and applications to human decision-making experiments that are at odds with expected utility theory. Since most of the mathematical machinery can be borrowed from statistical physics, the main contribution is to re-interpret the formalism of thermodynamic free-energy differences in terms of bounded rational decision-making and to discuss its relationship to human decision-making experiments.},
web_url = {http://rspa.royalsocietypublishing.org/content/469/2153/20120683.short},
state = {published},
DOI = {10.1098/rspa.2012.0683},
EPUB = {20120683},
author = {Ortega PA{portega}{Research Group Sensorimotor Learning and Decision-Making} and Braun DA{dbraun}{Research Group Sensorimotor Learning and Decision-Making}}
}
@Article{ GeneweinB2012,
title = {A sensorimotor paradigm for Bayesian model selection},
journal = {Frontiers in Human Neuroscience},
year = {2012},
month = {10},
volume = {6},
number = {291},
pages = {1-16},
abstract = {Sensorimotor control is thought to rely on predictive internal models in order to cope efficiently with uncertain environments. Recently, it has been shown that humans not only learn different internal models for different tasks, but that they also extract common structure between tasks. This raises the question of how the motor system selects between different structures or models, when each model can be associated with a range of different task-specific parameters. Here we design a sensorimotor task that requires subjects to compensate visuomotor shifts in a three-dimensional virtual reality setup, where one of the dimensions can be mapped to a model variable and the other dimension to the parameter variable. By introducing probe trials that are neutral in the parameter dimension, we can directly test for model selection. We found that model selection procedures based on Bayesian statistics provided a better explanation for subjects’ choice behavior than simple non-probabilistic heuristics. Our experimental design lends itself to the general study of model selection in a sensorimotor context as it allows to separately query model and parameter variables from subjects.},
web_url = {http://www.frontiersin.org/Human_Neuroscience/10.3389/fnhum.2012.00291/abstract},
state = {published},
DOI = {10.3389/fnhum.2012.00291},
author = {Genewein T{tgenewein}{Research Group Sensorimotor Learning and Decision-Making} and Braun DA{dbraun}{Research Group Sensorimotor Learning and Decision-Making}}
}
@Article{ GrauMoyaOB2012,
title = {Risk-Sensitivity in Bayesian Sensorimotor Integration},
journal = {PLoS Computational Biology},
year = {2012},
month = {9},
volume = {8},
number = {9},
pages = {1-7},
abstract = {Information processing in the nervous system during sensorimotor tasks with inherent uncertainty has been shown to be consistent with Bayesian integration. Bayes optimal decision-makers are, however, risk-neutral in the sense that they weigh all possibilities based on prior expectation and sensory evidence when they choose the action with highest expected value. In contrast, risk-sensitive decision-makers are sensitive to model uncertainty and bias their decision-making processes when they do inference over unobserved variables. In particular, they allow deviations from their probabilistic model in cases where this model makes imprecise predictions. Here we test for risk-sensitivity in a sensorimotor integration task where subjects exhibit Bayesian information integration when they infer the position of a target from noisy sensory feedback. When introducing a cost associated with subjects' response, we found that subjects exhibited a characteristic bias towards low cost responses when their uncertainty was high. This result is in accordance with risk-sensitive decision-making processes that allow for deviations from Bayes optimal decision-making in the face of uncertainty. Our results suggest that both Bayesian integration and risk-sensitivity are important factors to understand sensorimotor integration in a quantitative fashion.},
web_url = {http://www.ploscompbiol.org/article/info%3Adoi%2F10.1371%2Fjournal.pcbi.1002698},
state = {published},
DOI = {10.1371/journal.pcbi.1002698},
EPUB = {e1002698},
author = {Grau-Moya J{jgrau}{Research Group Sensorimotor Learning and Decision-Making}, Ortega PA{portega}{Research Group Sensorimotor Learning and Decision-Making} and Braun DA{dbraun}{Research Group Sensorimotor Learning and Decision-Making}}
}
@Article{ TurnhamBW2012,
title = {Facilitation of learning induced by both random and gradual visuomotor task variation},
journal = {Journal of Neurophysiology},
year = {2012},
month = {2},
volume = {107},
number = {4},
pages = {1111-1122},
abstract = {Motor task variation has been shown to be a key ingredient in skill transfer, retention and structural learning. However, many studies only compare training of randomly varying tasks to either blocked or null training, and it is not clear how experiencing different non-random temporal orderings of tasks might affect meta-learning processes. Here we study learning in human subjects who experience the same set of visuomotor rotations, evenly spaced between -60° and +60°, either in a random order or in an order in which the rotation angle changed gradually. We compared subsequent learning of three test blocks of +30° → -30° → +30° rotations. The groups that underwent either random or gradual training showed significant (p<0.01) facilitation of learning in the test blocks compared to a control group who had not experienced any visuomotor rotations before. We also found that movement initiation times in the random group during the test blocks were significantly (p<0.05) lower than for the gradual or the control group. When we fit a state-space model with fast and slow learning processes to our data, we found that the differences in performance in the test block were consistent with the gradual or random task variation changing the learning and retention rates of only the fast learning process. Such adaptation of learning rates may be a key feature of ongoing meta-learning processes. Our results therefore suggest that both gradual and random task variation can induce meta-learning and that random learning has an advantage in terms of shorter initiation times, suggesting less reliance on cognitive processes.},
web_url = {http://jn.physiology.org/content/107/4/1111.full},
state = {published},
DOI = {10.1152/jn.00635.2011},
author = {Turnham EJA, Braun DA{dbraun} and Wolpert DM}
}
@Article{ BraunOW2011,
title = {Motor coordination: when two have to act as one},
journal = {Experimental Brain Research},
year = {2011},
month = {6},
volume = {211},
number = {3-4},
pages = {631-641},
abstract = {Trying to pass someone walking toward you in a narrow corridor is a familiar example of a two-person motor game that requires coordination. In this study, we investigate coordination in sensorimotor tasks that correspond to classic coordination games with multiple Nash equilibria, such as "choosing sides," "stag hunt," "chicken," and "battle of sexes". In these tasks, subjects made reaching movements reflecting their continuously evolving "decisions" while they received a continuous payoff in the form of a resistive force counteracting their movements. Successful coordination required two subjects to "choose" the same Nash equilibrium in this force-payoff landscape within a single reach. We found that on the majority of trials coordination was achieved. Compared to the proportion of trials in which miscoordination occurred, successful coordination was characterized by several distinct features: an increased mutual information between the players' movement endpoints, an increased joint entropy during the movements, and by differences in the timing of the players' responses. Moreover, we found that the probability of successful coordination depends on the players' initial distance from the Nash equilibria. Our results suggest that two-person coordination arises naturally in motor interactions and is facilitated by favorable initial positions, stereotypical motor pattern, and differences in response times.},
web_url = {http://www.springerlink.com/content/hwr4705050w12qm8/fulltext.pdf},
state = {published},
DOI = {10.1007/s00221-011-2642-y},
author = {Braun DA{dbraun}, Ortega PA{portega} and Wolpert DM}
}
@Article{ BraunAPVRM2011,
title = {Online adaptation and over-trial learning in macaque visuomotor control},
journal = {Frontiers in Computational Neuroscience},
year = {2011},
month = {6},
volume = {5},
number = {27},
pages = {1-9},
abstract = {When faced with unpredictable environments, the human motor system has been shown to develop optimized adaptation strategies that allow for online adaptation during the control process. Such online adaptation is to be contrasted to slower over-trial learning that corresponds to a trial-by-trial update of the movement plan. Here we investigate the interplay of both processes, i.e., online adaptation and over-trial learning, in a visuomotor experiment performed by macaques. We show that simple non-adaptive control schemes fail to perform in this task, but that a previously suggested adaptive optimal feedback control model can explain the observed behavior. We also show that over-trial learning as seen in learning and aftereffect curves can be explained by learning in a radial basis function network. Our results suggest that both the process of over-trial learning and the process of online adaptation are crucial to understand visuomotor learning.},
web_url = {http://www.frontiersin.org/computational_neuroscience/10.3389/fncom.2011.00027/abstract},
state = {published},
DOI = {10.3389/fncom.2011.00027},
author = {Braun DA{dbraun}, Aertsen A, Paz R, Vaadia E, Rotter S and Mehring C}
}
@Article{ NagengastBW2011,
title = {Risk sensitivity in a motor task with speed-accuracy trade-off},
journal = {Journal of Neurophysiology},
year = {2011},
month = {6},
volume = {105},
number = {6},
pages = {2668-2674},
abstract = {When a racing driver steers a car around a sharp bend, there is a trade-off between speed and accuracy, in that high speed can lead to a skid whereas a low speed increases lap time, both of which can adversely affect the driver's payoff function. While speed-accuracy trade-offs have been studied extensively, their susceptibility to risk sensitivity is much less understood, since most theories of motor control are risk neutral with respect to payoff, i.e., they only consider mean payoffs and ignore payoff variability. Here we investigate how individual risk attitudes impact a motor task that involves such a speed-accuracy trade-off. We designed an experiment where a target had to be hit and the reward (given in points) increased as a function of both subjects' endpoint accuracy and endpoint velocity. As faster movements lead to poorer endpoint accuracy, the variance of the reward increased for higher velocities. We tested subjects on two reward conditions that had the same mean reward but differed in the variance of the reward. A risk-neutral account predicts that subjects should only maximize the mean reward and hence perform identically in the two conditions. In contrast, we found that some (risk-averse) subjects chose to move with lower velocities and other (risk-seeking) subjects with higher velocities in the condition with higher reward variance (risk). This behavior is suboptimal with regard to maximizing the mean number of points but is in accordance with a risk-sensitive account of movement selection. Our study suggests that individual risk sensitivity is an important factor in motor tasks with speed-accuracy trade-offs.},
web_url = {http://jn.physiology.org/content/105/6/2668.full.pdf+html},
state = {published},
DOI = {10.1152/jn.00804.2010},
author = {Nagengast AJ, Braun DA{dbraun} and Wolpert DM}
}
@Article{ TurnhamBW2011,
title = {Inferring Visuomotor Priors for Sensorimotor Learning},
journal = {PLoS Computational Biology},
year = {2011},
month = {3},
volume = {7},
number = {3},
pages = {1-13},
abstract = {Sensorimotor learning has been shown to depend on both prior expectations and sensory evidence in a way that is consistent with Bayesian integration. Thus, prior beliefs play a key role during the learning process, especially when only ambiguous sensory information is available. Here we develop a novel technique to estimate the covariance structure of the prior over visuomotor transformations – the mapping between actual and visual location of the hand – during a learning task. Subjects performed reaching movements under multiple visuomotor transformations in which they received visual feedback of their hand position only at the end of the movement. After experiencing a particular transformation for one reach, subjects have insufficient information to determine the exact transformation, and so their second reach reflects a combination of their prior over visuomotor transformations and the sensory evidence from the first reach. We developed a Bayesian observer model in order to infer the covariance structure of the subjects' prior, which was found to give high probability to parameter settings consistent with visuomotor rotations. Therefore, although the set of visuomotor transformations experienced had little structure, the subjects had a strong tendency to interpret ambiguous sensory evidence as arising from rotation-like transformations. We then exposed the same subjects to a highly-structured set of visuomotor transformations, designed to be very different from the set of visuomotor rotations. During this exposure the prior was found to have changed significantly to have a covariance structure that no longer favored rotation-like transformations. In summary, we have developed a technique which can estimate the full covariance structure of a prior in a sensorimotor task and have shown that the prior over visuomotor transformations favor a rotation-like structure. Moreover, through experience of a novel task structure, participants can appropriately alter the covariance structure of their prior.},
web_url = {http://www.ploscompbiol.org/article/info%3Adoi%2F10.1371%2Fjournal.pcbi.1001112},
state = {published},
DOI = {10.1371/journal.pcbi.1001112},
EPUB = {e1001112},
author = {Turnham EJA, Braun DA{dbraun} and Wolpert DM}
}
@Article{ NagengastBW2011_2,
title = {Risk-sensitivity and the mean-variance trade-off: decision making in sensorimotor control},
journal = {Proceedings of the Royal Society of London B},
year = {2011},
month = {1},
volume = {278},
number = {1716},
pages = {2325-2332},
abstract = {Numerous psychophysical studies suggest that the sensorimotor system chooses actions that optimize the average cost associated with a movement. Recently, however, violations of this hypothesis have been reported in line with economic theories of decision-making that not only consider the mean payoff, but are also sensitive to risk, that is the variability of the payoff. Here, we examine the hypothesis that risk-sensitivity in sensorimotor control arises as a mean-variance trade-off in movement costs. We designed a motor task in which participants could choose between a sure motor action that resulted in a fixed amount of effort and a risky motor action that resulted in a variable amount of effort that could be either lower or higher than the fixed effort. By changing the mean effort of the risky action while experimentally fixing its variance, we determined indifference points at which participants chose equiprobably between the sure, fixed amount of effort option and the risky, variable effort option. Depending on whether participants accepted a variable effort with a mean that was higher, lower or equal to the fixed effort, they could be classified as risk-seeking, risk-averse or risk-neutral. Most subjects were risk-sensitive in our task consistent with a mean-variance trade-off in effort, thereby, underlining the importance of risk-sensitivity in computational models of sensorimotor control.},
web_url = {http://rspb.royalsocietypublishing.org/content/278/1716/2325.full.pdf+html},
state = {published},
DOI = {0.1098/rspb.2010.2518},
author = {Nagengast AJ, Braun DA{dbraun} and Wolpert DM}
}
@Article{ BraunNW2011,
title = {Risk-sensitivity in sensorimotor control},
journal = {Frontiers in Human Neuroscience},
year = {2011},
month = {1},
volume = {5},
number = {1},
pages = {1-10},
abstract = {Recent advances in theoretical neuroscience suggest that motor control can be considered as a continuous decision-making process in which uncertainty plays a key role. Decision-makers can be risk-sensitive with respect to this uncertainty in that they may not only consider the average payoff of an outcome, but also consider the variability of the payoffs. Although such risk-sensitivity is a well-established phenomenon in psychology and economics, it has been much less studied in motor control. In fact, leading theories of motor control, such as optimal feedback control, assume that motor behaviors can be explained as the optimization of a given expected payoff or cost. Here we review evidence that humans exhibit risk-sensitivity in their motor behaviors, thereby demonstrating sensitivity to the variability of “motor costs.” Furthermore, we discuss how risk-sensitivity can be incorporated into optimal feedback control models of motor control. We conclude that risk-sensitivity is an important concept in understanding individual motor behavior under uncertainty.},
web_url = {http://www.frontiersin.org/human_neuroscience/10.3389/fnhum.2011.00001/abstract},
state = {published},
DOI = {10.3389/fnhum.2011.00001},
author = {Braun DA{dbraun}, Nagengast AJ and Wolpert DM}
}
@Article{ NagengastBW2010,
title = {Risk-Sensitive Optimal Feedback Control Accounts for Sensorimotor Behavior under Uncertainty},
journal = {PLoS Computational Biology},
year = {2010},
month = {7},
volume = {6},
number = {7},
pages = {1-15},
abstract = {Many aspects of human motor behavior can be understood using optimality principles such as optimal feedback control. However, these proposed optimal control models are risk-neutral; that is, they are indifferent to the variability of the movement cost. Here, we propose the use of a risk-sensitive optimal controller that incorporates movement cost variance either as an added cost (risk-averse controller) or as an added value (risk-seeking controller) to model human motor behavior in the face of uncertainty. We use a sensorimotor task to test the hypothesis that subjects are risk-sensitive. Subjects controlled a virtual ball undergoing Brownian motion towards a target. Subjects were required to minimize an explicit cost, in points, that was a combination of the final positional error of the ball and the integrated control cost. By testing subjects on different levels of Brownian motion noise and relative weighting of the position and control cost, we could distinguish between risk-sensitive and risk-neutral control. We show that subjects change their movement strategy pessimistically in the face of increased uncertainty in accord with the predictions of a risk-averse optimal controller. Our results suggest that risk-sensitivity is a fundamental attribute that needs to be incorporated into optimal feedback control models.},
web_url = {http://www.ploscompbiol.org/article/info%3Adoi%2F10.1371%2Fjournal.pcbi.1000857},
state = {published},
DOI = {10.1371/journal.pcbi.1000857},
EPUB = {e1000857},
author = {Nagengast AJ, Braun DA{dbraun} and Wolpert DM}
}
@Article{ OrtegaB2010_3,
title = {A Minimum Relative Entropy Principle for Learning and Acting},
journal = {Journal of Artificial Intelligence Research},
year = {2010},
month = {5},
volume = {38},
number = {1},
pages = {475-511},
abstract = {This paper proposes a method to construct an adaptive agent that is universal with respect to a given class of experts, where each expert is designed specifically for a particular environment. This adaptive control problem is formalized as the problem of minimizing the relative entropy of the adaptive agent from the expert that is most suitable for the unknown environment. If the agent is a passive observer, then the optimal solution is the well-known Bayesian predictor. However, if the agent is active, then its past actions need to be treated as causal interventions on the I/O stream rather than normal probability conditions. Here it is shown that the solution to this new variational problem is given by a stochastic controller called the Bayesian control rule, which implements adaptive behavior as a mixture of experts. Furthermore, it is shown that under mild assumptions, the Bayesian control rule converges to the control law of the most suitable expert.},
web_url = {http://dl.acm.org/citation.cfm?id=1892223},
state = {published},
DOI = {10.1613/jair.3062},
author = {Ortega PA{portega} and Braun DA{dbraun}}
}
@Article{ BraunWAWM2010,
title = {Structure Learning in a Sensorimotor Association Task},
journal = {PLoS ONE},
year = {2010},
month = {1},
volume = {5},
number = {1},
pages = {1-8},
abstract = {Learning is often understood as an organism's gradual acquisition of the association between a given sensory stimulus and the correct motor response. Mathematically, this corresponds to regressing a mapping between the set of observations and the set of actions. Recently, however, it has been shown both in cognitive and motor neuroscience that humans are not only able to learn particular stimulus-response mappings, but are also able to extract abstract structural invariants that facilitate generalization to novel tasks. Here we show how such structure learning can enhance facilitation in a sensorimotor association task performed by human subjects. Using regression and reinforcement learning models we show that the observed facilitation cannot be explained by these basic models of learning stimulus-response associations. We show, however, that the observed data can be explained by a hierarchical Bayesian model that performs structure learning. In line with previous results from cognitive tasks, this suggests that hierarchical Bayesian inference might provide a common framework to explain both the learning of specific stimulus-response associations and the learning of abstract structures that are shared by different task environments.},
web_url = {http://www.plosone.org/article/info:doi%2F10.1371%2Fjournal.pone.0008973},
state = {published},
DOI = {10.1371/journal.pone.0008973},
EPUB = {e8973},
author = {Braun DA{dbraun}, Waldert S, Aertsen A, Wolpert DM and Mehring C}
}
@Article{ BraunMW2010,
title = {Structure learning in action},
journal = {Behavioural Brain Research},
year = {2010},
month = {1},
volume = {206},
number = {2},
pages = {157–165},
abstract = {‘Learning to learn’ phenomena have been widely investigated in cognition, perception and more recently also in action. During concept learning tasks, for example, it has been suggested that characteristic features are abstracted from a set of examples with the consequence that learning of similar tasks is facilitated—a process termed ‘learning to learn’. From a computational point of view such an extraction of invariants can be regarded as learning of an underlying structure. Here we review the evidence for structure learning as a ‘learning to learn’ mechanism, especially in sensorimotor control where the motor system has to adapt to variable environments. We review studies demonstrating that common features of variable environments are extracted during sensorimotor learning and exploited for efficient adaptation in novel tasks. We conclude that structure learning plays a fundamental role in skill learning and may underlie the unsurpassed flexibility and adaptability of the motor system.},
web_url = {http://www.sciencedirect.com/science/article/pii/S0166432809005099},
state = {published},
DOI = {10.1016/j.bbr.2009.08.031},
author = {Braun DA{dbraun}, Mehring C and Wolpert DM}
}
@Article{ BraunOW2009,
title = {Nash Equilibria in Multi-Agent Motor Interactions},
journal = {PLoS Computational Biology},
year = {2009},
month = {8},
volume = {5},
number = {8},
pages = {1-8},
abstract = {Social interactions in classic cognitive games like the ultimatum game or the prisoner's dilemma typically lead to Nash equilibria when multiple competitive decision makers with perfect knowledge select optimal strategies. However, in evolutionary game theory it has been shown that Nash equilibria can also arise as attractors in dynamical systems that can describe, for example, the population dynamics of microorganisms. Similar to such evolutionary dynamics, we find that Nash equilibria arise naturally in motor interactions in which players vie for control and try to minimize effort. When confronted with sensorimotor interaction tasks that correspond to the classical prisoner's dilemma and the rope-pulling game, two-player motor interactions led predominantly to Nash solutions. In contrast, when a single player took both roles, playing the sensorimotor game bimanually, cooperative solutions were found. Our methodology opens up a new avenue for the study of human motor interactions within a game theoretic framework, suggesting that the coupling of motor systems can lead to game theoretic solutions.},
web_url = {http://www.ploscompbiol.org/article/info%3Adoi%2F10.1371%2Fjournal.pcbi.1000468},
state = {published},
DOI = {10.1371/journal.pcbi.1000468},
EPUB = {e1000468},
author = {Braun DA{dbraun}, Ortega PA{portega} and Wolpert DM}
}
@Article{ NagengastBW2009,
title = {Optimal Control Predicts Human Performance on Objects with Internal Degrees of Freedom},
journal = {PLoS Computational Biology},
year = {2009},
month = {6},
volume = {5},
number = {6},
pages = {1-15},
abstract = {On a daily basis, humans interact with a vast range of objects and tools. A class of tasks, which can pose a serious challenge to our motor skills, are those that involve manipulating objects with internal degrees of freedom, such as when folding laundry or using a lasso. Here, we use the framework of optimal feedback control to make predictions of how humans should interact with such objects. We confirm the predictions experimentally in a two-dimensional object manipulation task, in which subjects learned to control six different objects with complex dynamics. We show that the non-intuitive behavior observed when controlling objects with internal degrees of freedom can be accounted for by a simple cost function representing a trade-off between effort and accuracy. In addition to using a simple linear, point-mass optimal control model, we also used an optimal control model, which considers the non-linear dynamics of the human arm. We find that the more realistic optimal control model captures aspects of the data that cannot be accounted for by the linear model or other previous theories of motor control. The results suggest that our everyday interactions with objects can be understood by optimality principles and advocate the use of more realistic optimal control models for the study of human motor neuroscience.},
web_url = {http://www.ploscompbiol.org/article/info%3Adoi%2F10.1371%2Fjournal.pcbi.1000419},
state = {published},
DOI = {10.1371/journal.pcbi.1000419},
EPUB = {e1000419},
author = {Nagengast AJ, Braun DA{dbraun} and Wolpert DM}
}
@Article{ BraunAWM2009_2,
title = {Learning Optimal Adaptation Strategies in Unpredictable Motor Tasks},
journal = {Journal of Neuroscience},
year = {2009},
month = {5},
volume = {29},
number = {20},
pages = {6472-6478},
abstract = {Picking up an empty milk carton that we believe to be full is a familiar example of adaptive control, because the adaptation process of estimating the carton's weight must proceed simultaneously with the control process of moving the carton to a desired location. Here we show that the motor system initially generates highly variable behavior in such unpredictable tasks but eventually converges to stereotyped patterns of adaptive responses predicted by a simple optimality principle. These results suggest that adaptation can become specifically tuned to identify task-specific parameters in an optimal manner.},
web_url2 = {http://www.jneurosci.org/content/29/20/6472.full.pdf+html},
state = {published},
DOI = {10.1523/JNEUROSCI.3075-08.2009},
author = {Braun DA{dbraun}, Aertsen A, Wolpert DM and Mehring C}
}
@Article{ BraunAWM2009,
title = {Motor Task Variation Induces Structural Learning},
journal = {Current Biology},
year = {2009},
month = {2},
volume = {19},
number = {4},
pages = {352-357},
abstract = {When we have learned a motor skill, such as cycling or ice-skating, we can rapidly generalize to novel tasks, such as motorcycling or rollerblading [1,2,3,4,5,6,7,8]. Such facilitation of learning could arise through two distinct mechanisms by which the motor system might adjust its control parameters. First, fast learning could simply be a consequence of the proximity of the original and final settings of the control parameters. Second, by structural learning [9,10,11,12,13,14], the motor system could constrain the parameter adjustments to conform to the control parameters' covariance structure. Thus, facilitation of learning would rely on the novel task parameters' lying on the structure of a lower-dimensional subspace that can be explored more efficiently. To test between these two hypotheses, we exposed subjects to randomly varying visuomotor tasks of fixed structure. Although such randomly varying tasks are thought to prevent learning, we show that when subsequently presented with novel tasks, subjects exhibit three key features of structural learning: facilitated learning of tasks with the same structure, strong reduction in interference normally observed when switching between tasks that require opposite control strategies, and preferential exploration along the learned structure. These results suggest that skill generalization relies on task variation and structural learning.},
web_url = {http://www.sciencedirect.com/science/article/pii/S0960982209006083},
state = {published},
DOI = {10.1016/j.cub.2009.01.036},
author = {Braun DA{dbraun}, Aertsen A, Wolpert DM and Mehring C}
}
@Article{ BraunW2007,
title = {Optimal Control: When Redundancy Matters},
journal = {Current Biology},
year = {2007},
month = {11},
volume = {17},
number = {22},
pages = {R973–R975},
abstract = {A new experiment provides support for optimal feedback control as a theoretical basis of how the motor system responds to perturbations in a context-dependent manner.},
web_url = {http://www.sciencedirect.com/science/article/pii/S096098220701977X},
state = {published},
DOI = {10.1016/j.cub.2007.09.018},
author = {Braun DA{dbraun} and Wolpert DM}
}
@Inproceedings{ OrtegaBT2014,
title = {Monte Carlo Methods for Exact & Efficient Solution of the Generalized Optimality Equations},
year = {2014},
month = {6},
pages = {4322-4327},
abstract = {Previous work has shown that classical sequential decision making rules, including expectimax and minimax, are limit cases of a more general class of bounded rational planning problems that trade off the value and the complexity of the solution, as measured by its information divergence from a given reference. This allows modeling a range of novel planning problems having varying degrees of control due to resource constraints, risk-sensitivity, trust and model
uncertainty. However, so far it has been unclear in what sense information constraints relate to the complexity of planning.
In this paper, we introduce Monte Carlo methods to solve the
generalized optimality equations in an efficient & exact way
when the inverse temperatures in a generalized decision tree
are of the same sign. These methods highlight a fundamental
relation between inverse temperatures and the number of
Monte Carlo proposals. In particular, it is seen that the number of proposals is essentially independent of the size of the decision tree.},
web_url = {https://cld.pt/dl/download/f9658d95-0c61-4ebd-8d70-3cada6be2c0b/ICRA2014/media/files/1560.pdf},
publisher = {IEEE},
address = {Piscataway, NJ, USA},
event_name = {IEEE International Conference on Robotics and Automation},
event_place = {Hong Kong, China},
state = {published},
ISBN = {978-1-4799-3684-7},
author = {Ortega PA{portega}{Research Group Sensorimotor Learning and Decision-Making}, Braun DA{dbraun}{Research Group Sensorimotor Learning and Decision-Making} and Tishby N}
}
@Inproceedings{ GeneweinB2013_3,
title = {Abstraction in Decision-Makers with Limited Information Processing Capabilities},
year = {2013},
month = {12},
pages = {1-9},
abstract = {A distinctive property of human and animal intelligence is the ability to form abstractions by neglecting irrelevant information which allows to separate structure from noise. From an information theoretic point of view abstractions are desirable because they allow for very efficient information processing. In artificial systems abstractions are often implemented through computationally costly formations of groups or clusters. In this work we establish the relation between the free-energy framework for
decision-making and rate-distortion theory and demonstrate how the application of rate-distortion for decision-making leads to the emergence of abstractions. We argue that abstractions are induced due to a limit in information processing capacity.},
file_url = {fileadmin/user_upload/files/publications/2013/NIPS-2013-Workshop-Genewein.pdf},
web_url = {http://www.seas.upenn.edu/~ope/workshop/program.html},
event_name = {NIPS 2013 Workshop Planning with Information Constraints for Control, Reinforcement Learning, Computational Neuroscience, Robotics and Games},
event_place = {Lake Tahoe, NV, USA},
state = {published},
author = {Genewein T{tgenewein}{Research Group Sensorimotor Learning and Decision-Making} and Braun DA{dbraun}{Research Group Sensorimotor Learning and Decision-Making}}
}
@Inproceedings{ GrauMoyaB2013,
title = {Bounded Rational Decision-Making in Changing Environments},
year = {2013},
month = {12},
pages = {1-9},
abstract = {A perfectly rational decision-maker chooses the best action with the highest utility gain from a set of possible actions. The optimality principles that describe such decision processes do not take into account the computational costs of finding the optimal action. Bounded rational decision-making addresses this problem by specifically trading off information-processing costs and expected utility. Interestingly, a similar trade-off between energy and entropy arises when describing changes in
thermodynamic systems. This similarity has been recently used to describe bounded rational agents. Crucially, this framework assumes that the environment does not change while the decision-maker is computing the optimal policy. When this requirement is not fulfilled, the decision-maker will suffer inefficiencies in utility, that arise because the current policy is optimal for an environment in the past. Here we borrow concepts from non-equilibrium thermodynamics to quantify these inefficiencies and
illustrate with simulations its relationship with computational resources.},
file_url = {fileadmin/user_upload/files/publications/2013/NIPS-2013-Workshop-Grau.pdf},
web_url = {http://www.seas.upenn.edu/~ope/workshop/},
event_name = {NIPS 2013 Workshop Planning with Information Constraints for Control, Reinforcement Learning, Computational Neuroscience, Robotics and Games},
event_place = {Lake Tahoe, NV, USA},
state = {published},
author = {Grau-Moya J{jgrau}{Research Group Sensorimotor Learning and Decision-Making} and Braun DA{dbraun}{Research Group Sensorimotor Learning and Decision-Making}}
}
@Inproceedings{ OrtegaGGBB2012,
title = {A Nonparametric Conjugate Prior Distribution for the Maximizing Argument of a Noisy Function},
year = {2013},
month = {4},
pages = {3014-3022},
abstract = {We propose a novel Bayesian approach to solve stochastic optimization problems that involve finding extrema of noisy, nonlinear functions. Previous work has focused on representing possible functions explicitly, which leads to a two-step procedure of first, doing inference over the function space and second, finding the extrema of these functions. Here we skip the representation step and directly model the distribution over extrema. To this end, we devise a non-parametric conjugate prior where the natural parameter corresponds to a given kernel function and the sufficient statistic is composed of the observed function values. The resulting posterior distribution directly captures the uncertainty over the maximum of the unknown function.},
file_url = {fileadmin/user_upload/files/publications/2012/NIPS-2012-Ortega.pdf},
web_url = {http://nips.cc/Conferences/2012/},
editor = {Bartlett, P. , F.C.N. Pereira, L. Bottou, C.J.C. Burges, K.Q. Weinberger},
publisher = {Curran},
address = {Red Hook, NY, USA},
booktitle = {Advances in Neural Information Processing Systems 25},
event_name = {Twenty-Sixth Annual Conference on Neural Information Processing Systems (NIPS 2012)},
event_place = {Lake Tahoe, NV, USA},
state = {published},
ISBN = {978-1-627-48003-1},
author = {Ortega PA{portega}{Research Group Sensorimotor Learning and Decision-Making}, Grau-Moya J{jgrau}{Research Group Sensorimotor Learning and Decision-Making}, Genewein T{tgenewein}{Research Group Sensorimotor Learning and Decision-Making}, Balduzzi D{balduzzi} and Braun DA{dbraun}{Research Group Sensorimotor Learning and Decision-Making}}
}
@Inproceedings{ OrtegaB2012,
title = {Adaptive Coding of Actions and Observations},
year = {2012},
month = {12},
pages = {1-4},
abstract = {The application of expected utility theory to construct adaptive agents is both computationally intractable and statistically questionable. To overcome these difficulties,
agents need the ability to delay the choice of the optimal policy to a later stage when they have learned more about the environment. How should agents do this optimally? An information-theoretic answer to this question is given by the Bayesian control rule—the solution to the adaptive coding problem when there are not only observations but also actions. This paper reviews the central ideas behind the Bayesian control rule.},
file_url = {fileadmin/user_upload/files/publications/2012/NIPS-Workshop-2012-Ortega.pdf},
web_url = {http://www.montefiore.ulg.ac.be/~tjung/nips12workshop},
event_name = {NIPS Workshop on Information in Perception and Action 2012},
event_place = {Lake Tahoe, NV, USA},
state = {published},
author = {Ortega PA{portega}{Research Group Sensorimotor Learning and Decision-Making} and Braun DA{dbraun}{Research Group Sensorimotor Learning and Decision-Making}}
}
@Inproceedings{ OrtegaB2012_2,
title = {Free Energy and the Generalized Optimality Equations for Sequential Decision Making},
year = {2012},
month = {7},
pages = {1-10},
abstract = {The free energy functional has recently been proposed as a variational principle for bounded rational decision-making, since it instantiates a natural trade-off between utility gains and information processing costs that can be axiomatically derived. Here we apply the free energy principle to general decision trees that include both adversarial and stochastic environments.
We derive generalized sequential optimality equations that not only include the Bellman optimality equations as a limit case, but also lead to well-known decision-rules
such as Expectimax, Minimax and Expectiminimax. We show how these decision-rules can be derived from a single free energy principle that assigns a resource parameter to each
node in the decision tree. These resource parameters express a concrete computational cost that can be measured as the amount of samples that are needed from the distribution that belongs to each node. The free energy principle therefore provides the normative basis for generalized optimality equations that account for both adversarial and stochastic environments.},
file_url = {fileadmin/user_upload/files/publications/2012/EWRL-2012-Ortega.pdf},
web_url = {http://ewrl.wordpress.com/ewrl10-2012/#papers},
event_name = {10th European Workshop on Reinforcement Learning (EWRL 2012)},
event_place = {Edinburgh, Scotland},
state = {published},
author = {Ortega PA{portega}{Research Group Sensorimotor Learning and Decision-Making} and Braun DA{dbraun}{Research Group Sensorimotor Learning and Decision-Making}}
}
@Inproceedings{ OrtegaB2011,
title = {Information, utility and bounded rationality},
year = {2011},
month = {8},
pages = {269-274},
abstract = {Perfectly rational decision-makers maximize expected utility, but crucially ignore the resource costs incurred when determining optimal actions. Here we employ an axiomatic framework for bounded rational decision-making based on a thermodynamic interpretation of resource costs as information costs. This leads to a variational "free utility" principle akin to thermodynamical free energy that trades off utility and information costs. We show that bounded optimal control solutions can be derived from this variational principle, which leads in general to stochastic policies. Furthermore, we show that risk-sensitive and robust (minimax) control schemes fall out naturally from this framework if the environment is considered as a bounded rational and perfectly rational opponent, respectively. When resource costs are ignored, the maximum expected utility principle is recovered.},
web_url = {http://agi-conf.org/2011/},
editor = {Schmidhuber, J. , K.R. Thórisson, M. Looks},
publisher = {Springer},
address = {Berlin, Germany},
booktitle = {Artificial General Intelligence},
event_name = {Fourth International Conference on Artificial General Intelligence (AGI 2011)},
event_place = {Mountain View, CA, USA},
state = {published},
ISBN = {978-3-642-22886-5},
DOI = {10.1007/978-3-642-22887-2_28},
author = {Ortega PA{portega} and Braun DA{dbraun}}
}
@Inproceedings{ OrtegaBG2011,
title = {Reinforcement Learning and the Bayesian Control Rule},
year = {2011},
month = {8},
pages = {281-285},
abstract = {We present an actor-critic scheme for reinforcement learning in complex domains. The main contribution is to show that planning and I/O dynamics can be separated such that an intractable planning problem reduces to a simple multi-armed bandit problem, where each lever stands for a potentially arbitrarily complex policy. Furthermore, we use the Bayesian control rule to construct an adaptive bandit player that is universal with respect to a given class of optimal bandit players, thus indirectly constructing an adaptive agent that is universal with respect to a given class of policies.},
web_url = {http://agi-conf.org/2011/},
editor = {Schmidhuber, J. , K.R. Thórisson, M. Looks},
publisher = {Springer},
address = {Berlin, Germany},
booktitle = {Artificial General Intelligence},
event_name = {Fourth International Conference on Artificial General Intelligence (AGI 2011)},
event_place = {Mountain View, CA, USA},
state = {published},
ISBN = {978-3-642-22886-5},
DOI = {10.1007/978-3-642-22887-2_30},
author = {Ortega PA{portega}, Braun DA{dbraun} and Godsill S}
}
@Inproceedings{ BraunOTS2011,
title = {Path integral control and bounded rationality},
year = {2011},
month = {4},
pages = {202-209},
abstract = {Path integral methods have recently been shown to be applicable to a very general class of optimal control problems. Here we examine the path integral formalism from a decision-theoretic point of view, since an optimal controller can always be regarded as an instance of a perfectly rational decision-maker that chooses its actions so as to maximize its expected utility. The problem with perfect rationality is, however, that finding optimal actions is often very difficult due to prohibitive computational resource costs that are not taken into account. In contrast, a bounded rational decision-maker has only limited resources and therefore needs to strike some compromise between the desired utility and the required resource costs. In particular, we suggest an information-theoretic measure of resource costs that can be derived axiomatically. As a consequence we obtain a variational principle for choice probabilities that trades off maximizing a given utility criterion and avoiding resource costs that arise due to deviating from initially given default choice probabilities. The resulting bounded rational policies are in general probabilistic. We show that the solutions found by the path integral formalism are such bounded rational policies. Furthermore, we show that the same formalism generalizes to discrete control problems, leading to linearly solvable bounded rational control policies in the case of Markov systems. Importantly, Bellman's optimality principle is not presupposed by this variational principle, but it can be derived as a limit case. This suggests that the information-theoretic formalization of bounded rationality might serve as a general principle in control design that unifies a number of recently reported approximate optimal control methods both in the continuous and discrete domain.},
web_url = {http://ieeexplore.ieee.org/xpls/abs_all.jsp?arnumber=5967366&tag=1},
publisher = {IEEE},
address = {Piscataway, NJ, USA},
event_name = {IEEE Symposium on Adaptive Dynamic Programming And Reinforcement Learning (ADPRL 2011)},
event_place = {Paris, France},
state = {published},
ISBN = {978-1-4244-9887-1},
DOI = {10.1109/ADPRL.2011.5967366},
author = {Braun DA{dbraun}, Ortega PA{portega}, Theodorou E and Schaal S{sschaal}}
}
@Inproceedings{ BraunO2010,
title = {A minimum relative entropy principle for adaptive control in linear quadratic regulators},
year = {2010},
month = {6},
pages = {103-108},
web_url = {http://www.icinco.org/ICINCO2010/cfp.asp},
web_url2 = {http://dl.acm.org/citation.cfm?id=1892223},
editor = {Filipe, J. , J. Andrade-Cetto, J.-L. Ferrier},
publisher = {SciTePress},
event_name = {7th International Conference on Informatics in Control, Automation and Robotics (ICINCO 2010)},
event_place = {Funchal, Madeira, Portugal},
state = {published},
ISBN = {978-989-8425-02-7},
author = {Braun DA{dbraun} and Ortega PA{portega}}
}
@Inproceedings{ OrtegaB2010,
title = {A Bayesian rule for adaptive control based on causal interventions},
year = {2010},
month = {3},
pages = {121-126},
abstract = {Explaining adaptive behavior is a central problem in artificial intelligence research. Here we formalize adaptive agents as mixture distributions over sequences of inputs and outputs (I/O). Each distribution of the mixture constitutes a `possible world', but the agent does not know which of the possible worlds it is actually facing. The problem is to adapt the I/O stream in a way that is compatible with the true world. A natural measure of adaptation can be obtained by the Kullback-Leibler (KL) divergence between the I/O distribution of the true world and the I/O distribution expected by the agent that is uncertain about possible worlds. In the case of pure input streams, the Bayesian mixture provides a well-known solution for this problem. We show, however, that in the case of I/O streams this solution breaks down, because outputs are issued by the agent itself and require a different probabilistic syntax as provided by intervention calculus. Based on this calculus, we obtain a Bayesian control rule that allows modeling adaptive behavior with mixture distributions over I/O streams. This rule might allow for a novel approach to adaptive control based on a minimum KL-principle.},
web_url = {http://agi-conf.org/2010/},
web_url2 = {http://arxiv.org/abs/0911.5104},
editor = {Hutter, M. , E. Kitzelmann},
publisher = {Atlantis Press},
address = {Amsterdam, Netherlands},
event_name = {Third Conference on Artificial General Intelligence (AGI 2010)},
event_place = {Lugano, Switzerland},
state = {published},
ISBN = {978-90-78677-36-9},
author = {Ortega PA{portega} and Braun DA{dbraun}}
}
@Inproceedings{ OrtegaB2010_2,
title = {A conversion between utility and information},
year = {2010},
month = {3},
pages = {115-120},
abstract = {Rewards typically express desirabilities or preferences over a set of alternatives. Here we propose that rewards can be defined for any probability distribution based on three desiderata, namely that rewards should be real-valued, additive and order-preserving, where the latter implies that more probable events should also be more desirable. Our main result states that rewards are then uniquely determined by the negative information content. To analyze stochastic processes, we define the utility of a realization as its reward rate. Under this interpretation, we show that the expected utility of a stochastic process is its negative entropy rate. Furthermore, we apply our results to analyze agent-environment interactions. We show that the expected utility that will actually be achieved by the agent is given by the negative cross-entropy from the input-output (I/O) distribution of the coupled interaction system and the agent's I/O distribution. Thus, our results allow for an information-theoretic interpretation of the notion of utility and the characterization of agent-environment interactions in terms of entropy dynamics.},
web_url = {http://agi-conf.org/2010/},
web_url2 = {http://arxiv.org/abs/0911.5106},
editor = {Hutter, M. , E. Kitzelmann},
publisher = {Atlantis Press},
address = {Amsterdam, Netherlands},
event_name = {Third Conference on Artificial General Intelligence (AGI 2010)},
event_place = {Lugano, Switzerland},
state = {published},
ISBN = {978-90-78677-36-9},
author = {Ortega PA{portega} and Braun DA{dbraun}}
}
@Poster{ PengGB2013_2,
title = {Towards assessing randomness and complexity in human motion
trajectories},
year = {2013},
month = {10},
volume = {14},
pages = {31},
abstract = {Intelligence is often related to the behavioural complexity an agent can generate. For example, when studying human language one typically finds that sequences of letters or words are neither completely random nor totally determinate. This is often assessed quantitatively by
studying the conditional entropy of sequences [1]. Similarly, entropy measures can also be used to assess the human ability to generate random numbers — a task that humans often find difficult [2]. Previous studies in motor control have found, for example, that humans cannot
significantly increase the level of trajectory randomness in single-joint movements [3]. Here we test human randomness when generating trajectories and compare entropic measurements of random vs. non-random motion. We designed a motor task where participants controlled
a cursor by moving a Phantom manipulandum in a three-dimensional virtual environment. The cursor was constrained to move inside a 10x10 grid. In the first part of the experiment participants were asked to (1) perform a rhythmic movement, (2) write pre-specified letters,
and (3) perform a random movement. In the second part of the experiment participants were asked again to perform random movements, but this time they received feedback from an artificial intelligence (based on context-tree weighting) predicting their next move. We found that participants can change the randomness of their behaviour through feedback and that excess entropy can be used as a complexity measure of motion trajectories. [1] Rao, R. P.
N., Yadav, N., Vahia, M. N., Joglekar, H., Adhikari, R., and Mahadevan, I. (2009). Entropic evidence for linguistic structure in the Indus script. Science, 324(5931):1165. [2] Figurska, M., Stanczyk, M., and Kulesza, K. (2008). Humans cannot consciously generate random numbers sequences: Polemic study. Medical hypotheses,},
web_url = {http://www.neuroschool-tuebingen-nena.de/fileadmin/user_upload/Dokumente/behav/AbstractbookNeNa2013.pdf},
event_name = {14th Conference of Junior Neuroscientists of Tübingen (NeNa 2013)},
event_place = {Schramberg, Germany},
state = {published},
author = {Peng Z{zpeng}{Research Group Sensorimotor Learning and Decision-Making}, Genewein T{tgenewein}{Research Group Sensorimotor Learning and Decision-Making} and Braun D{dbraun}{Research Group Sensorimotor Learning and Decision-Making}}
}
@Poster{ PengGB2013,
title = {Assessing randomness in human motion trajectories},
year = {2013},
month = {9},
number = {W26},
abstract = {Intelligence is often related to the behavioural complexity an agent can generate. For example, when studying human language one typically finds that sequences of letters or words are neither completely random nor totally determinate. This is often assessed quantitatively by studying the conditional entropy of sequences [1]. Similarly, entropy can be used to assess the human ability to generate random numbers. Humans have often been found to be not very good at generating random numbers[2]. Here we test human randomness when generating trajectories and compare entropic measurements of random vs. non-random motion.
We designed a motor task where participants controlled a cursor by moving a Phantom manipulandum in a three-dimensional virtual environment. The cursor was constrained to move inside a 10x10 grid. In the first part of the experiment participants were asked to (1) perform a rhythmic movement, (2) write pre-specified letters, and (3) perform a random movement. In the second part of the experiment participants were asked again to perform random movements, but this time they received feedback from an artificial intelligence (based on context-tree weighting algorithm) predicting their next move. We found that the conditional entropy revealed different patterns for different motion types and that participants’ motion randomness was only weakly susceptible to feedback.},
web_url = {https://portal.g-node.org/abstracts/bc13/#/doi/nncn.bc2013.0027},
event_name = {Bernstein Conference 2013},
event_place = {Tübingen, Germany},
state = {published},
DOI = {10.12751/nncn.bc2013.0027},
author = {Peng Z{zpeng}{Research Group Sensorimotor Learning and Decision-Making}, Genewein T{tgenewein}{Research Group Sensorimotor Learning and Decision-Making} and Braun DA{dbraun}{Research Group Sensorimotor Learning and Decision-Making}}
}
@Poster{ GeneweinB2013,
title = {Occam's Razor in sensorimotor learning},
year = {2013},
month = {9},
number = {W25},
abstract = {Prediction is a ubiquitous phenomenon in biological systems ranging from basic motor control in animals [1] to scientific hypothesis formation in humans. A central problem in prediction systems is how to choose one’s predictions if there are multiple competing hypothesis that explain the observed data equally well. Following Occam's Razor the simpler explanation requiring fewer assumptions should be preferred. An implicit and elegant way to apply Occam’s Razor is Bayesian inference. In particular, a Bayesian Occam's Razor effect arises when comparing different hypothesis based on their marginal likelihood [2]. Here we investigate whether sensorimotor prediction systems implicitly apply Occam’s Razor in everyday movements. This question is particularly compelling, as recent studies have found evidence that the sensorimotor system makes inferences about unobserved latent variables in a way that is consistent with Bayesian statistics [3,4]. We designed a sensorimotor task, where participants had to draw regression trajectories through a number of observed data points, representing noisy samples of an underlying ideal trajectory. The ideal trajectory was generated by one of two possible Gaussian process (GP) models—a simple model with a large length-scale, leading to smooth trajectories and a complex model with a short length-scale, leading to more wiggly trajectories. Participants were trained on the two different trajectory models and then exposed to ambiguous stimuli to see whether they showed a preference for the simpler model. In case the presented stimulus could be fit equally well by both models, we found that participants showed a clear preference for the simpler model. For general stimuli, we found that participants’ behavior was quantitatively consistent with Bayesian Occam’s Razor. We could also show that participants’ drawn trajectories were similar to samples from the posterior predictive GP and significantly different from two non-probabilistic heuristics.},
web_url = {https://portal.g-node.org/abstracts/bc13/#/doi/nncn.bc2013.0026},
event_name = {Bernstein Conference 2013},
event_place = {Tübingen, Germany},
state = {published},
DOI = {10.12751/nncn.bc2013.0026},
author = {Genewein T{tgenewein}{Research Group Sensorimotor Learning and Decision-Making} and Braun DA{dbraun}{Research Group Sensorimotor Learning and Decision-Making}}
}
@Poster{ LeibfriedGB2013,
title = {Signaling in sensorimotor interactions},
year = {2013},
month = {9},
number = {W24},
abstract = {Communication relies on signals that convey information. In non-cooperative game theory, signaling games [1] are used to investigate under what conditions two players may communicate with each other when their ultimate aim is to maximize their own benefit. In this case, one player (the sender) possesses private information (the type) that the other player (the receiver) would like to know. However, signaling this information is costly. At the same time the receiver has control over a variable that influences the sender’s payoff. The key question is under which circumstances so-called Perfect Bayesian Nash equilibria with reliable signaling occur. Here, we investigate whether human sensorimotor behavior conforms with optimal strategies corresponding to these equilibria [2]. We designed a sensorimotor task, where two participants controlled a two-dimensional cursor. Importantly, each player could control only one of the two dimensions. The sender’s dimension could be used to communicate a target position that the receiver had to hit without knowing its location. The sender’s aim was to maximize a point score displayed on a two-dimensional color map. The point score decreased with the magnitude of the signal and increased with the reach distance of the receiver. The sender therefore had a trade-off between communicating the real target distance with the hope that the receiver would learn to interpret this signal and give appropriate reward, and trying to avoid signaling costs. We found that participants developed strategies that resulted in separating equilibria as predicted by analytically derived game theoretic solutions.},
web_url = {https://portal.g-node.org/abstracts/bc13/#/doi/nncn.bc2013.0025},
event_name = {Bernstein Conference 2013},
event_place = {Tübingen, Germany},
state = {published},
DOI = {10.12751/nncn.bc2013.0025},
author = {Leibfried F{fleibfried}{Research Group Sensorimotor Learning and Decision-Making}, Grau-Moya J{jgrau}{Research Group Sensorimotor Learning and Decision-Making} and Braun DA{dbraun}{Research Group Sensorimotor Learning and Decision-Making}}
}
@Poster{ GeneweinB2013_2,
title = {Bayesian Occam’s Razor for structure selection in
human motor learning},
year = {2013},
month = {6},
day = {28},
abstract = {Learning structure is a key-element for achieving flexible
and adaptive control in real-world environments. However,
what looks easy and natural in human motor control, remains
one of the main challenges in today’s robotics. Here we in-
vestigate in a quantitative manner how humans select between
several learned structures when faced with novel adaptation
problems.
One very successful framework for modeling learning of
statistical structures are hierarchical Bayesian models, because of their capability to capture statistical relationships on different levels of abstraction. Another important advantage is the automatic trade-off between prediction error and model complexity that is embodied by Bayesian inference. This so called Bayesian Occam’s Razor
results from the marginalization over the model parameters when computing a model’s evidence and has the effect of penalizing unnecessarily complex models — see Figure 1.
Bayesian Occam’s razor. Evidence P (DjM) for a simple model
M1(blue, solid line) and a complex model M2(red, dashed line). Because both models have to spread unit probability mass over all compatible observations, the simpler model
M1 has a higher evidence in the overlapping region D and is thus the more probable model.
A standard paradigm to illustrate the trade-off between
prediction error and model complexity is regression, where
a curve has to be fitted to noisy observations with the aim of recovering an underlying functional relationship that defines a structure.
Here, we tested human behavior in a sensorimotor regres-
sion task, where participants had to draw a curve through noisy observations of an underlying trajectory generated by one of two possible Gaussian process (GP) models with different length-scales, a simple model with long length scale generating mostly smooth trajectories and a complex model with short length scale generating mostly wiggly trajectories. Participants were trained on both models, in order to be able to learn the two different structures. They then observed ambiguous stimuli that could be explained by both models and had to draw regression trajectories, which implied reporting their belief
about the generating model.
In ambiguous trials where both models explained the ob-
servations equally well, we found that participants strongly
preferred the simpler model. In all trials, Bayesian model
selection provided a good explanation of subjects’ choice and drawing behavior.
The approach presented in this work might also lend itself
for application in robotic tasks, where sensory data has to be disambiguated or a goodness-of-fit versus complexity trade-off has to be performed.},
file_url = {fileadmin/user_upload/files/publications/2013/RSS-2013-Workshop-Genewein.pdf},
web_url = {http://www.ias.tu-darmstadt.de/Workshops/RSS2013},
event_name = {RSS 2013 Workshop on Hierarchical and Structured Learning for Robotics},
event_place = {Berlin, Germany},
state = {published},
author = {Genewein T{tgenewein}{Research Group Sensorimotor Learning and Decision-Making} and Braun DA{dbraun}{Research Group Sensorimotor Learning and Decision-Making}}
}
@Thesis{ Braun2011,
title = {Philosophische Verwicklungen der neurobiologischen Bewusstseinsforschung},
year = {2011},
month = {1},
web_url = {http://www.freidok.uni-freiburg.de/volltexte/8088/},
state = {published},
type = {PhD},
author = {Braun DA{dbraun}}
}