Skip to content

Commit

Permalink
fixed construct pmi
Browse files Browse the repository at this point in the history
  • Loading branch information
bkompa committed Jul 25, 2019
1 parent 66943c3 commit 87c060f
Show file tree
Hide file tree
Showing 5 changed files with 14 additions and 2 deletions.
Binary file modified .DS_Store
Binary file not shown.
4 changes: 4 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
^.*\.Rproj$
^\.Rproj\.user$
^doc$
^Meta$
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
.Rproj.user
.Rhistory
.RData
.Ruserdata
doc
Meta
6 changes: 4 additions & 2 deletions R/word2vec_fit.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@
#' @export
construct_pmi <- function(cooccur,singletons,N,smooth=0.75) {
cooccur_matrix <- as.matrix(cooccur)
# masking the lower half of the matrix because cooccur will always be symmetric
# don't want to double count CUI1-CUI2 as CUI2-CUI1
cooccur_matrix[lower.tri(cooccur_matrix, diag = FALSE)] <- 0
singletons$Count <- singletons$Count^smooth/N^smooth
concept_list <- row.names(cooccur_matrix)
nz <- which(cooccur_matrix != 0, arr.ind = TRUE)
Expand All @@ -19,8 +22,7 @@ construct_pmi <- function(cooccur,singletons,N,smooth=0.75) {
dplyr::inner_join(singletons,by=c("Concept_2" = "CUI")) %>%
dplyr::rename(Concept_2_Prob=.data$Count) %>%
dplyr::mutate(PMI = log(.data$JointProb/(.data$Concept_1_Prob * .data$Concept_2_Prob))) %>%
dplyr::select(.data$Concept_1, .data$Concept_2, .data$PMI) %>%
dplyr::top_frac(.5) #symmetric top and bottom half
dplyr::select(.data$Concept_1, .data$Concept_2, .data$PMI)
return(pmi_df)
}

Expand Down
Binary file modified inst/.DS_Store
Binary file not shown.

0 comments on commit 87c060f

Please sign in to comment.