
@article{ref1,
title="An individualized predictor of health and disease using paired reference and target samples",
journal="BMC bioinformatics",
year="2016",
author="Liu, Tzu-Yu and Burke, Thomas and Park, Lawrence P. and Woods, Christopher W. and Zaas, Aimee K. and Ginsburg, Geoffrey S. and Hero, Alfred O.",
volume="17",
number="1",
pages="e47-e47",
abstract="BACKGROUND: Consider the problem of designing a panel of complex biomarkers to predict a patient's health or disease state when one can pair his or her current test sample, called a target sample, with the patient's previously acquired healthy sample, called a reference sample. As contrasted to a population averaged reference this reference sample is individualized. Automated predictor algorithms that compare and contrast the paired samples to each other could result in a new generation of test panels that compare to a person's healthy reference to enhance predictive accuracy. This paper develops such an individualized predictor and illustrates the added value of including the healthy reference for design of predictive gene expression panels. <br><br>RESULTS: The objective is to predict each subject's state of infection, e.g., neither exposed nor infected, exposed but not infected, pre-acute phase of infection, acute phase of infection, post-acute phase of infection. Using gene microarray data collected in a large scale serially sampled respiratory virus challenge study we quantify the diagnostic advantage of pairing a person's baseline reference with his or her target sample. The full study consists of 2886 microarray chips assaying 12,023 genes of 151 human volunteer subjects under 4 different inoculation regimes (HRV, RSV, H1N1, H3N2). We train (with cross-validation) reference-aided sparse multi-class classifier algorithms on this data to show that inclusion of a subject's reference sample can improve prediction accuracy by as much as 14 %, for the H3N2 cohort, and by at least 6 %, for the H1N1 cohort. Remarkably, these gains in accuracy are achieved by using smaller panels of genes, e.g., 39 % fewer for H3N2 and 31 % fewer for H1N1. The biomarkers selected by the predictors fall into two categories: 1) contrasting genes that tend to differentially express between target and reference samples over the population; 2) reinforcement genes that remain constant over the two samples, which function as housekeeping normalization genes. Many of these genes are common to all 4 viruses and their roles in the predictor elucidate the function that they play in differentiating the different states of host immune response. <br><br>CONCLUSIONS: If one uses a suitable mathematical prediction algorithm, inclusion of a healthy reference in biomarker diagnostic testing can potentially improve accuracy of disease prediction with fewer biomarkers.<p /> <p>Language: en</p>",
language="en",
issn="1471-2105",
doi="10.1186/s12859-016-0889-9",
url="http://dx.doi.org/10.1186/s12859-016-0889-9"
}