
@article{ref1,
title="Measuring the impact of spatial perturbations on the relationship between data privacy and validity of descriptive statistics",
journal="International journal of health geographics",
year="2021",
author="Zelner, Jon and Trangucci, Rob and Broen, Kelly",
volume="20",
number="1",
pages="e3-e3",
abstract="BACKGROUND: Like many scientific fields, epidemiology is addressing issues of research reproducibility. Spatial epidemiology, which often uses the inherently  identifiable variable of participant address, must balance reproducibility with  participant privacy. In this study, we assess the impact of several different data  perturbation methods on key spatial statistics and patient privacy. <br><br>METHODS: We  analyzed the impact of perturbation on spatial patterns in the full set of  address-level mortality data from Lawrence, MA during the period from 1911 to 1913. The original death locations were perturbed using seven different published  approaches to stochastic and deterministic spatial data anonymization. Key spatial  descriptive statistics were calculated for each perturbation, including changes in  spatial pattern center, Global Moran's I, Local Moran's I, distance to the k-th  nearest neighbors, and the L-function (a normalized form of Ripley's K). A spatially  adapted form of k-anonymity was used to measure the privacy protection conferred by  each method, and its compliance with HIPAA and GDPR privacy standards. <br><br>RESULTS:  Random perturbation at 50 m, donut masking between 5 and 50 m, and Voronoi masking  maintain the validity of descriptive spatial statistics better than other  perturbations. Grid center masking with both 100 × 100 and 250 × 250 m cells led to  large changes in descriptive spatial statistics. None of the perturbation methods  adhered to the HIPAA standard that all points have a k-anonymity > 10. All other  perturbation methods employed had at least 265 points, or over 6%, not adhering to  the HIPAA standard. <br><br>CONCLUSIONS: Using the set of published perturbation methods  applied in this analysis, HIPAA and GDPR compliant de-identification was not  compatible with maintaining key spatial patterns as measured by our chosen summary  statistics. Further research should investigate alternate methods to balancing  tradeoffs between spatial data privacy and preservation of key patterns in public  health data that are of scientific and medical importance.<p /> <p>Language: en</p>",
language="en",
issn="1476-072X",
doi="10.1186/s12942-020-00256-8",
url="http://dx.doi.org/10.1186/s12942-020-00256-8"
}