
@article{ref1,
title="Traffic safety evaluation in Northwestern Federal District using sentiment analysis of Internet users' reviews",
journal="Transportation research procedia",
year="2020",
author="Seliverstov, Yaroslav and Seliverstov, Svyatoslav and Malygin, Igor and Korolev, Oleg",
volume="50",
number="",
pages="626-635",
abstract="The paper addresses the task of analyzing traffic safety in the Northwestern Federal District according to the reviews published in the Web. To accomplish the task, the authors developed a system of automatic review classification based on a sentiment classifier. They analyzed open source libraries for data mining, developed a web crawler using Scrapy framework, written in Python 3, and collected reviews. They also considered the methods of text vectorization and lemmatization and their application in the Scikit-Learn library: Bag-of-Words, N-gram, CountVectorizer, and TF-IDF Vectorizer. For the purpose of classification, the authors used the naïve Bayes algorithm and a linear classifier model with stochastic gradient descent optimization. A base of tagged Twitter reviews was used as a training set. The classifier was trained using cross-validation and ShuffleSplit strategies. The authors also tested and compared the classification results for different classifiers. As a result of validation, the best model was determined. The developed system was applied to analyze the quality of roads in the Northwestern Federal District. Based on the outcome, the roads were marked-up in color to illustrate the results of the research.<p /> <p>Language: en</p>",
language="en",
issn="2352-1465",
doi="10.1016/j.trpro.2020.10.074",
url="http://dx.doi.org/10.1016/j.trpro.2020.10.074"
}