From 365b41c9434daec86608d011349b46e118b47ea8 Mon Sep 17 00:00:00 2001 From: Allen Schmaltz Date: Wed, 31 Jul 2019 20:48:55 -0400 Subject: [PATCH] documentation updates --- .DS_Store | Bin 8196 -> 8196 bytes .gitignore | 21 +++++++++++++++++++-- README.md | 16 +++++++++++++++- vignettes/.DS_Store | Bin 6148 -> 6148 bytes vignettes/cui2vec.Rmd | 21 +++++++++++---------- vignettes/cui2vec.bib | 37 ++++++++++++++++++++----------------- vignettes/prev_cui2vec.bib | 35 +++++++++++++++++++++++++++++++++++ 7 files changed, 100 insertions(+), 30 deletions(-) create mode 100755 vignettes/prev_cui2vec.bib diff --git a/.DS_Store b/.DS_Store index c14e483d293841bece72f76e001aa021197517cc..66fc5024cce66e22fdb856a237ef2acd4bc6149d 100644 GIT binary patch delta 125 zcmZp1XmOa}&nU7nU^hRb$YvgaRZPOn47m)640&b2MR_^-dFc!c42+vM3q50!=VT~j m$Ye-o$YV%lC;`G$hGJwz8w)L&HnU57W10M1WD{Y Kompa, B., Schmaltz, A., Fried, I., Griffin, W, Palmer, N.P., Shi, X., Cai, T., Kohane, I.S., and Beam, A.L., 2019. Clinical Concept Embeddings Learned from Massive Sources of Multimodal Medical Data. arXiv preprint arXiv:1804.01486. + +# Overview + +This repo contains the R pacakge `cui2vec`, which provides code for fitting embeddings to your own co-occurrence data in the manner presented in the above paper. The package can be installed locally from source. An overview of usage is provided in the following HTML vignette, which can be viewed in your browser: + +[vignettes/rendered/2019_07_31/cui2vecWorkflow.html](vignettes/rendered/2019_07_31/cui2vecWorkflow.html). + +Additional information on each of the public functions can be accessed in the standard way (e.g., ```?cui2vec::construct_word2vec_embedding```). + +Data agreements prevent us from releasing all of our original source data, but upon acceptance, we will release our embeddings at the following URL: TBD. diff --git a/vignettes/.DS_Store b/vignettes/.DS_Store index ca62346f256a65d3e3d3321cbb7294b0e9dd5ef0..a11c6101b0f70f6254c61e548f3e22d924d0dbbc 100755 GIT binary patch delta 57 zcmZoMXfc@J&&WJ6U^gT4WFE$~vK$OW45 BibTeX. +%% This BibTeX bibliography file was created using BibDesk. +%% http://bibdesk.sourceforge.net/ -@ARTICLE{Beam2018-vl, - title = "Clinical Concept Embeddings Learned from Massive Sources of - Multimodal Medical Data", - author = "Beam, Andrew L and Kompa, Benjamin and Fried, Inbar and - Palmer, Nathan P and Shi, Xu and Cai, Tianxi and Kohane, - Isaac S", - abstract = "Word embeddings are a popular approach to unsupervised +%% Created for Allen Schmaltz at 2019-07-31 20:15:58 -0400 + + +%% Saved with string encoding Unicode (UTF-8) + + + +@article{Beam2018-vl, + Abstract = {Word embeddings are a popular approach to unsupervised learning of word relationships that are widely used in natural language processing. In this article, we present a new set of embeddings for medical concepts learned using an @@ -25,11 +27,12 @@ @ARTICLE{Beam2018-vl previous methods in most instances. Finally, we provide a downloadable set of pre-trained embeddings for other researchers to use, as well as an online tool for - interactive exploration of the cui2vec embeddings.", - month = apr, - year = 2018, - keywords = "cui2vec", - archivePrefix = "arXiv", - primaryClass = "cs.CL", - eprint = "1804.01486" -} + interactive exploration of the cui2vec embeddings.}, + Archiveprefix = {arXiv}, + Author = {Kompa, Benjamin and Schmaltz, Allen and Fried, Inbar and Palmer, Nathan P and Shi, Xu and Cai, Tianxi and Kohane, Isaac S, and Beam, Andrew L}, + Date-Modified = {2019-08-01 00:15:41 +0000}, + Eprint = {1804.01486}, + Keywords = {cui2vec}, + Primaryclass = {cs.CL}, + Title = {Clinical Concept Embeddings Learned from Massive Sources of Multimodal Medical Data}, + Year = 2019} diff --git a/vignettes/prev_cui2vec.bib b/vignettes/prev_cui2vec.bib new file mode 100755 index 0000000..da20112 --- /dev/null +++ b/vignettes/prev_cui2vec.bib @@ -0,0 +1,35 @@ +% Generated by Paperpile. Check out http://paperpile.com for more information. +% BibTeX export options can be customized via Settings -> BibTeX. + +@ARTICLE{Beam2018-vl, + title = "Clinical Concept Embeddings Learned from Massive Sources of + Multimodal Medical Data", + author = "Beam, Andrew L and Kompa, Benjamin and Fried, Inbar and + Palmer, Nathan P and Shi, Xu and Cai, Tianxi and Kohane, + Isaac S", + abstract = "Word embeddings are a popular approach to unsupervised + learning of word relationships that are widely used in + natural language processing. In this article, we present a + new set of embeddings for medical concepts learned using an + extremely large collection of multimodal medical data. + Leaning on recent theoretical insights, we demonstrate how + an insurance claims database of 60 million members, a + collection of 20 million clinical notes, and 1.7 million + full text biomedical journal articles can be combined to + embed concepts into a common space, resulting in the largest + ever set of embeddings for 108,477 medical concepts. To + evaluate our approach, we present a new benchmark + methodology based on statistical power specifically designed + to test embeddings of medical concepts. Our approach, called + cui2vec, attains state of the art performance relative to + previous methods in most instances. Finally, we provide a + downloadable set of pre-trained embeddings for other + researchers to use, as well as an online tool for + interactive exploration of the cui2vec embeddings.", + month = apr, + year = 2018, + keywords = "cui2vec", + archivePrefix = "arXiv", + primaryClass = "cs.CL", + eprint = "1804.01486" +}