From 5c3af68df465a1a3cca6004cd13e9368cdbf11cc Mon Sep 17 00:00:00 2001 From: Gregory Jefferis Date: Fri, 18 Oct 2024 11:43:06 +0100 Subject: [PATCH 1/6] include optic lobe cell types in banc meta * they are handled differently in Jasper's annotation hierarchy * see https://github.com/jasper-tms/the-BANC-fly-connectome/wiki/Annotations-(cell-types,-etc.) --- R/meta.R | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/R/meta.R b/R/meta.R index 14e7bf1..821f61e 100644 --- a/R/meta.R +++ b/R/meta.R @@ -167,7 +167,18 @@ banc_meta <- function(ids=NULL, ...) { } fancorbanc_meta <- function(table, ids=NULL, ...) { - fid=list(tag2=c('primary class',"anterior-posterior projection pattern", "neuron identity")) + ol_classes=c("centrifugal", "distal medulla", "distal medulla dorsal rim area", + "lamina intrinsic", "lamina monopolar", "lamina tangential", + "lamina wide field", "lobula intrinsic", "lobula lobula plate tangential", + "lobula medulla amacrine", "lobula medulla tangential", + "lobula plate intrinsic", "medulla intrinsic", + "medulla lobula lobula plate amacrine", "medulla lobula tangential", + "photoreceptors", "proximal distal medulla tangential", + "proximal medulla", "serpentine medulla", "T neuron", + "translobula plate", "transmedullary", "transmedullary Y", + "Y neuron") + fid=list(tag2=c('primary class',"anterior-posterior projection pattern", + "neuron identity", ol_classes)) fid=list(fid) names(fid)=table selc=list(c("id", "tag", "tag2", "pt_root_id", 'pt_supervoxel_id')) @@ -180,7 +191,13 @@ fancorbanc_meta <- function(table, ids=NULL, ...) { } else { cell_infosw <- cell_infos %>% mutate(tag=sub("\n\n\n*banc-bot*","", fixed = T, tag)) %>% - tidyr::pivot_wider(id_cols = pt_root_id, + mutate( + class2=case_when(tag2 %in% ol_classes ~ 'optic', T ~ NA_character_), + tag2=case_when( tag2 %in% ol_classes ~ 'neuron identity', + T ~ tag2) + + ) %>% + tidyr::pivot_wider(id_cols = c(pt_root_id, class2), names_from = tag2, values_from = tag, values_fn = function(x) { @@ -193,6 +210,7 @@ fancorbanc_meta <- function(table, ids=NULL, ...) { cell_infosw %>% rename(id=pt_root_id, class=`primary class`, apc=`anterior-posterior projection pattern`,type=`neuron identity`) %>% mutate(class=case_when( + !is.na(class2) ~ class2, class=='sensory neuron' & grepl('scending', apc) ~ paste('sensory', apc), (is.na(class) | class=='central neuron') & apc=='ascending' ~ 'ascending', (is.na(class) | class=='central neuron') & apc=='descending' ~ 'descending', From 710b7dd254187db39b2a20045665f9c13b4e401a Mon Sep 17 00:00:00 2001 From: Gregory Jefferis Date: Sat, 19 Oct 2024 18:42:41 +0100 Subject: [PATCH 2/6] handle side info (when it exists) --- R/meta.R | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/R/meta.R b/R/meta.R index 821f61e..2a8aa82 100644 --- a/R/meta.R +++ b/R/meta.R @@ -178,7 +178,7 @@ fancorbanc_meta <- function(table, ids=NULL, ...) { "translobula plate", "transmedullary", "transmedullary Y", "Y neuron") fid=list(tag2=c('primary class',"anterior-posterior projection pattern", - "neuron identity", ol_classes)) + "neuron identity", "soma side", ol_classes)) fid=list(fid) names(fid)=table selc=list(c("id", "tag", "tag2", "pt_root_id", 'pt_supervoxel_id')) @@ -208,7 +208,8 @@ fancorbanc_meta <- function(table, ids=NULL, ...) { paste(sux, collapse = ';') }) cell_infosw %>% - rename(id=pt_root_id, class=`primary class`, apc=`anterior-posterior projection pattern`,type=`neuron identity`) %>% + rename(id=pt_root_id, class=`primary class`, apc=`anterior-posterior projection pattern`, + type=`neuron identity`, side=`soma side`) %>% mutate(class=case_when( !is.na(class2) ~ class2, class=='sensory neuron' & grepl('scending', apc) ~ paste('sensory', apc), @@ -219,8 +220,13 @@ fancorbanc_meta <- function(table, ids=NULL, ...) { T ~ paste(class, apc) )) %>% mutate(class=sub(" neuron", '', class)) %>% - select(id, class, type) %>% - mutate(id=as.character(id), side=NA) + mutate(side=sub('soma on ', '', side)) |> + mutate(side=case_when( + is.na(side) ~ side, + T ~ toupper(substr(side,1,1)) + )) %>% + select(id, class, type, side) %>% + mutate(id=as.character(id)) } if(length(ids)) left_join(data.frame(id=ids), metadf, by='id') From 9ea80f14c340ce4db9ca4496f5455191549c714a Mon Sep 17 00:00:00 2001 From: Gregory Jefferis Date: Sat, 19 Oct 2024 18:43:14 +0100 Subject: [PATCH 3/6] remove duplicate central/optic entries * this works but is very slow due to grouped mutate it seems --- R/meta.R | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/R/meta.R b/R/meta.R index 2a8aa82..0358f9f 100644 --- a/R/meta.R +++ b/R/meta.R @@ -197,6 +197,13 @@ fancorbanc_meta <- function(table, ids=NULL, ...) { T ~ tag2) ) %>% + arrange(pt_root_id) |> + group_by(pt_root_id) |> + mutate(class2=case_when( + any(!is.na(class2)) ~ na.omit(class2)[1], + T ~ class2 + )) |> + ungroup() |> tidyr::pivot_wider(id_cols = c(pt_root_id, class2), names_from = tag2, values_from = tag, From b9143eaa8ee500a6da3964155c977b45010680cc Mon Sep 17 00:00:00 2001 From: Gregory Jefferis Date: Sun, 20 Oct 2024 14:53:02 +0100 Subject: [PATCH 4/6] dplyr speed up (summarise vs case_when) * it seems that case_when can be quite small when there are many small groups --- R/meta.R | 54 ++++++++++++++++++++++++++++++------------------------ 1 file changed, 30 insertions(+), 24 deletions(-) diff --git a/R/meta.R b/R/meta.R index 0358f9f..afe04b0 100644 --- a/R/meta.R +++ b/R/meta.R @@ -189,36 +189,42 @@ fancorbanc_meta <- function(table, ids=NULL, ...) { metadf <- if(nrow(cell_infos)<1) { df=data.frame(id=character(), class=character(), type=character(), side=character()) } else { - cell_infosw <- cell_infos %>% - mutate(tag=sub("\n\n\n*banc-bot*","", fixed = T, tag)) %>% + cell_infos2 <- cell_infos %>% mutate( - class2=case_when(tag2 %in% ol_classes ~ 'optic', T ~ NA_character_), - tag2=case_when( tag2 %in% ol_classes ~ 'neuron identity', - T ~ tag2) + tag=sub("\n\n\n*banc-bot*","", fixed = T, tag), + pt_root_id=as.character(pt_root_id)) + cell_infos3 <- cell_infos2 |> + mutate( + tag2=case_when( + tag2 %in% ol_classes ~ 'neuron identity', + T ~ tag2) + ) |> + arrange(pt_root_id, tag) |> + distinct(pt_root_id, tag2, tag, .keep_all = T) |> + group_by(pt_root_id, tag2) |> + # summarise(tag=paste0(tag, collapse=";"), .groups = 'drop') + summarise(tag={ + if(length(tag)>1 && any(grepl("?", tag, fixed = T))) { + # we would like to remove duplicate tags + # that would otherwise give: DNg75;DNg75? + usx=unique(sub("?", "", tag, fixed = T)) + if(length(usx) filter(tag2 %in% ol_classes) - ) %>% - arrange(pt_root_id) |> - group_by(pt_root_id) |> - mutate(class2=case_when( - any(!is.na(class2)) ~ na.omit(class2)[1], - T ~ class2 - )) |> - ungroup() |> - tidyr::pivot_wider(id_cols = c(pt_root_id, class2), + cell_infos4 <- cell_infos3 |> + tidyr::pivot_wider(id_cols = pt_root_id, names_from = tag2, - values_from = tag, - values_fn = function(x) { - sux=sort(unique(x)) - # try removing ? - sux2=sort(unique(sub("?","", x, fixed = T))) - if(length(sux2)% + values_from = tag + ) %>% rename(id=pt_root_id, class=`primary class`, apc=`anterior-posterior projection pattern`, type=`neuron identity`, side=`soma side`) %>% mutate(class=case_when( - !is.na(class2) ~ class2, + id %in% cell_infos2.ol$pt_root_id ~ "optic", class=='sensory neuron' & grepl('scending', apc) ~ paste('sensory', apc), (is.na(class) | class=='central neuron') & apc=='ascending' ~ 'ascending', (is.na(class) | class=='central neuron') & apc=='descending' ~ 'descending', From d4400317e7e57f27a698824f6d28b49c44d28d0c Mon Sep 17 00:00:00 2001 From: Gregory Jefferis Date: Sun, 20 Oct 2024 15:37:42 +0100 Subject: [PATCH 5/6] actions: try running on maocsx * mystery build failure for python ImportError: Can't connect to HTTPS URL because the SSL module is not available --- .github/workflows/R-CMD-check.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 226d1ce..766c622 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -10,7 +10,7 @@ name: R-CMD-check jobs: R-CMD-check: - runs-on: ubuntu-latest + runs-on: macOS-latest env: GITHUB_PAT: ${{ secrets.GJPAT2 }} # GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} From 46875388ec02cd8fd52ba4fdf781c24f24244227 Mon Sep 17 00:00:00 2001 From: Gregory Jefferis Date: Mon, 21 Oct 2024 18:40:15 +0100 Subject: [PATCH 6/6] fill empty values in banc/fanc meta with "" not NA * this means that you can get a result for a query like "/type:.*" which may sometimes be helpful --- R/meta.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/meta.R b/R/meta.R index afe04b0..96f0687 100644 --- a/R/meta.R +++ b/R/meta.R @@ -219,7 +219,8 @@ fancorbanc_meta <- function(table, ids=NULL, ...) { cell_infos4 <- cell_infos3 |> tidyr::pivot_wider(id_cols = pt_root_id, names_from = tag2, - values_from = tag + values_from = tag, + values_fill = "" ) %>% rename(id=pt_root_id, class=`primary class`, apc=`anterior-posterior projection pattern`, type=`neuron identity`, side=`soma side`) %>%