@article{ref1,
title="Emotional sounds of crowds: spectrogram-based analysis using deep learning",
journal="Multimedia tools and applications",
year="2020",
author="Franzoni, Valentina and Biondi, Giulio and Milani, Alfredo",
volume="ePub",
number="ePub",
pages="ePub-ePub",
abstract="Crowds express emotions as a collective individual, which is evident from the sounds that a crowd produces in particular events, e.g., collective booing, laughing or cheering in sports matches, movies, theaters, concerts, political demonstrations, and riots. A critical question concerning the innovative concept of crowd emotions is whether the emotional content of crowd sounds can be characterized by frequency-amplitude features, using analysis techniques similar to those applied on individual voices, where deep learning classification is applied to spectrogram images derived by sound transformations. In this work, we present a technique based on the generation of sound spectrograms from fragments of fixed length, extracted from original audio clips recorded in high-attendance events, where the crowd acts as a collective individual. Transfer learning techniques are used on a convolutional neural network, pre-trained on low-level features using the well-known ImageNet extensive dataset of visual knowledge. The original sound clips are filtered and normalized in amplitude for a correct spectrogram generation, on which we fine-tune the domain-specific features. Experiments held on the finally trained Convolutional Neural Network show promising performances of the proposed model to classify the emotions of the crowd.<p /> <p>Language: en</p>",
language="en",
issn="1380-7501",
doi="10.1007/s11042-020-09428-x",
url="http://dx.doi.org/10.1007/s11042-020-09428-x"
}