
@article{ref1,
title="Statistical Evidence in Experimental Psychology: An Empirical Comparison Using 855 t Tests",
journal="Perspectives on psychological science",
year="2011",
author="Wetzels, Ruud and Matzke, Dora and Lee, Michael D. and Rouder, Jeffrey N. and Iverson, Geoffrey J. and Wagenmakers, Eric-Jan",
volume="6",
number="3",
pages="291-298",
abstract="Statistical inference in psychology has traditionally relied heavily on p-value significance testing. This approach to drawing conclusions from data, however, has been widely criticized, and two types of remedies have been advocated. The first proposal is to supplement p values with complementary measures of evidence, such as effect sizes. The second is to replace inference with Bayesian measures of evidence, such as the Bayes factor. The authors provide a practical comparison of p values, effect sizes, and default Bayes factors as measures of statistical evidence, using 855 recently published t tests in psychology. The comparison yields two main results. First, although p values and default Bayes factors almost always agree about what hypothesis is better supported by the data, the measures often disagree about the strength of this support; for 70% of the data sets for which the p value falls between.01 and.05, the default Bayes factor indicates that the evidence is only anecdotal. Second, effect sizes can provide additional evidence to p values and default Bayes factors. The authors conclude that the Bayesian approach is comparatively prudent, preventing researchers from overestimating the evidence in favor of an effect.<p /><p>Language: en</p>",
language="en",
issn="1745-6916",
doi="10.1177/1745691611406923",
url="http://dx.doi.org/10.1177/1745691611406923"
}