@article{879, keywords = {Health information technology, Incident reports, Machine learning, Patient safety event reports, Text classification}, author = {Allan Fong and Katharine T. Adams and Michael J. Gaunt and Jessica L. Howe and Kathryn M. Kellogg and Raj M. Ratwani}, title = {Identifying health information technology related safety event reports from patient safety event report databases.}, abstract = {

OBJECTIVE: The objective of this paper was to identify health information technology (HIT) related events from patient safety event (PSE) report free-text descriptions. A difference-based scoring approach was used to prioritize and select model features. A feature-constraint model was developed and evaluated to support the analysis of PSE reports.

METHODS: 5287 PSE reports manually coded as likely or unlikely related to HIT were used to train unigram, bigram, and combined unigram-bigram logistic regression and support vector machine models using five-fold cross validation. A difference-based scoring approach was used to prioritize and select unigram and bigram features by their relative importance to likely and unlikely HIT reports. A held-out set of 2000 manually coded reports were used for testing.

RESULTS: Unigram models tended to perform better than bigram and combined models. A 300-unigram logistic regression had comparable classification performance to a 4030-unigram SVM model but with a faster relative run-time. The 300-unigram logistic regression model evaluated with the testing data had an AUC of 0.931 and a F1-score of 0.765.

DISCUSSION: A difference-based scoring, prioritization, and feature selection approach can be used to generate simplified models with high performance. A feature-constraint model may be more easily shared across healthcare organizations seeking to analyze their respective datasets and customized for local variations in PSE reporting practices.

CONCLUSION: The feature-constraint model provides a method to identify HIT-related patient safety hazards using a method that is applicable across healthcare systems with variability in their PSE report structures.

}, year = {2018}, journal = {J Biomed Inform}, volume = {86}, pages = {135-142}, month = {12/2018}, issn = {1532-0480}, doi = {10.1016/j.jbi.2018.09.007}, language = {eng}, }