Skip to content
This repository has been archived by the owner on Oct 2, 2024. It is now read-only.

Assess BRAF fusion breakpoint distribution by ancestry #43

Merged
merged 26 commits into from
Jan 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
719bfc7
Merge branch 'rjcorb/41-1-annotate-breakpoints' into rjcorb/41-2-brea…
rjcorb Jan 23, 2024
62ee867
add breakpoint by ancestry analysis script
rjcorb Jan 23, 2024
59ee497
add survival by breakpoint analysis script and results
rjcorb Jan 23, 2024
7cffa59
update script annotation
rjcorb Jan 24, 2024
9f9df8f
add README, wrapper script
rjcorb Jan 24, 2024
e3c4e0e
Merge branch 'rjcorb/34-update-add-histologies' into rjcorb/41-2-brea…
rjcorb Jan 24, 2024
a099b00
Merge branch 'rjcorb/41-2-breakpoint-by-ancestry' into rjcorb/41-3-su…
rjcorb Jan 24, 2024
af51494
filter out partial domain retention breakpoints
rjcorb Jan 24, 2024
75e6d33
Merge branch 'rjcorb/41-2-breakpoint-by-ancestry' into rjcorb/41-3-su…
rjcorb Jan 24, 2024
2bc15e7
rerun removing partial breakpoints
rjcorb Jan 24, 2024
035f08a
rm input files
rjcorb Jan 24, 2024
67f7ccd
update README, wrapper script
rjcorb Jan 24, 2024
d3baa76
rm old results file
rjcorb Jan 24, 2024
d52c922
relevel breakpoint groups
rjcorb Jan 25, 2024
51e251a
update README
rjcorb Jan 25, 2024
ed693c1
rm uncommitted files from README tree
rjcorb Jan 25, 2024
406e6a2
rm low-confidence breakpoint
rjcorb Jan 25, 2024
ce3fc19
Merge branch 'rjcorb/41-2-breakpoint-by-ancestry' into rjcorb/41-3-su…
rjcorb Jan 25, 2024
1062736
rerun with low conf breakpoint rm
rjcorb Jan 25, 2024
d80e83a
generate new df of breakpoints by pt
rjcorb Jan 25, 2024
0daab51
rm fusion filter for ancestry cohort
rjcorb Jan 25, 2024
658df1c
add BS_063ERW0R low confidence fusion exclusion
jharenza Jan 25, 2024
291c4ae
Merge branch 'main' into 43
jharenza Jan 25, 2024
56ed028
Merge branch '43' into 44
jharenza Jan 25, 2024
38c7b64
rerun with sample dup removed
rjcorb Jan 26, 2024
c02b781
Merge pull request #44 from d3b-center/rjcorb/41-3-survival-by-breakp…
rjcorb Jan 26, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
250 changes: 250 additions & 0 deletions analyses/braf-fusions/02-fusion-breakpoints-by-ancestry.Rmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,250 @@
---
title: 'Assess BRAF fusion breakpoints in PBTA ancestry cohort'
output:
html_document:
toc: TRUE
toc_float: TRUE
author: Ryan Corbett
date: "2024"
---

This script calculates frequency of BRAF fusion breakpoint types by predicted ancestry in pLGG, BRAF fusion subtype patients in the PBTA ancestry cohort

Load libraries and set directories

```{r load libraries and set directories}
library(data.table)
library(tidyverse)

root_dir <- rprojroot::find_root(rprojroot::has_dir(".git"))

data_dir <- file.path(root_dir, "data")
analysis_dir <- file.path(root_dir, "analyses", "braf-fusions")
results_dir <- file.path(analysis_dir, "results")
input_dir <- file.path(analysis_dir, "input")
plot_dir <- file.path(analysis_dir, "plots")

```

Set file paths

```{r}
fusion_file <- file.path(results_dir, "braf-fusions-exon-annotation.tsv")

fusion_dgd_file <- file.path(data_dir, "fusion-dgd.tsv.gz")

cohort_hist_file <- file.path(root_dir, "analyses", "survival", "results", "merged_ancestry_histology_survival.tsv")

hist_file <- file.path(data_dir, "histologies.tsv")

```

Load data

```{r}
braf_hist <- read_tsv(cohort_hist_file) %>%
dplyr::filter(plot_group == "Low-grade glioma" & grepl("-BRAF", molecular_subtype)) %>%
dplyr::mutate(predicted_ancestry = factor(predicted_ancestry))

hist <- read_tsv(hist_file)

# filter fusion file for in-frame fusions and duplications, and determine whether BRAF kinase domain is retained
braf_fusions <- read_tsv(fusion_file) %>%
dplyr::filter(Fusion_Type == "in-frame",
grepl("duplication", annots)) %>%
mutate(keep = case_when(FusionName == "KIAA1549--BRAF" & DomainRetainedGene1B == "Yes" ~ "yes",
FusionName == "BRAF--KIAA1549" & DomainRetainedGene1A == "Yes" ~ "yes",
TRUE ~ "no")) %>%
# The below samples have two different common breakpoints and one is low conf in arriba, so we will remove that one
mutate(keep = case_when(Sample %in% c("BS_SDZY6RZ3", "BS_063ERW0R") & LeftBreakpoint_position == 138867975 ~ "no",
TRUE ~ keep))

braf_fusions_dgd <- read_tsv(fusion_dgd_file) %>%
dplyr::filter(FusionName == "KIAA1549--BRAF" & !grepl("Tier 3|Tier 4", variant_tier)) %>%
dplyr::rename(Kids_First_Biospecimen_ID = Sample) %>%
dplyr::select(Kids_First_Biospecimen_ID, FusionName, `5_prime_region`, `3_prime_region`) %>%
dplyr::mutate(`5_prime_region` = gsub("^Exon |^exon ", "", `5_prime_region`),
`3_prime_region` = gsub("^Exon |^exon ", "", `3_prime_region`),
breakpoint_exons = paste(`5_prime_region`, `3_prime_region`, sep = ":"),
breakpoint_type = case_when(breakpoint_exons %in% c("16:9", "15:9", "16:11", "18:10") ~ "common",
TRUE ~ "rare/novel"),
keep = "yes") %>%
left_join(hist[,c("Kids_First_Participant_ID", "Kids_First_Biospecimen_ID")])

```

Consolidate all common and rare exon breakpoints per patient

```{r}
common_fusions_pbta <- braf_fusions %>%
dplyr::filter(breakpoint_exons %in% c("16:9", "15:9", "16:11", "18:10") & keep == "yes") %>%
distinct(Kids_First_Participant_ID, breakpoint_exons, .keep_all = T) %>%
group_by(Kids_First_Participant_ID) %>%
summarise(breakpoint_exons_common = str_c(breakpoint_exons, collapse = ";"))

common_fusions <- braf_fusions_dgd %>%
dplyr::filter(breakpoint_exons %in% c("16:9", "15:9", "16:11", "18:10") & keep == "yes") %>%
distinct(Kids_First_Participant_ID, breakpoint_exons, .keep_all = T) %>%
group_by(Kids_First_Participant_ID) %>%
summarise(breakpoint_exons_common = str_c(breakpoint_exons, collapse = ";")) %>%
bind_rows(common_fusions_pbta) %>%
distinct(Kids_First_Participant_ID, breakpoint_exons_common, .keep_all = T) %>%
group_by(Kids_First_Participant_ID) %>%
summarise(breakpoint_exons_common = str_c(breakpoint_exons_common, collapse = ";"))

rare_novel_fusions_pbta <- braf_fusions %>%
dplyr::filter(!breakpoint_exons %in% c("16:9", "15:9", "16:11", "18:10") & keep == "yes") %>%
distinct(Kids_First_Participant_ID, breakpoint_exons, .keep_all = T) %>%
group_by(Kids_First_Participant_ID) %>%
summarise(breakpoint_exons_rare = str_c(breakpoint_exons, collapse = ";"))

rare_novel_fusions <- braf_fusions_dgd %>%
dplyr::filter(!breakpoint_exons %in% c("16:9", "15:9", "16:11", "18:10") & keep == "yes") %>%
distinct(Kids_First_Participant_ID, breakpoint_exons, .keep_all = T) %>%
group_by(Kids_First_Participant_ID) %>%
summarise(breakpoint_exons_rare = str_c(breakpoint_exons, collapse = ";")) %>%
bind_rows(rare_novel_fusions_pbta) %>%
distinct(Kids_First_Participant_ID, breakpoint_exons_rare, .keep_all = T) %>%
group_by(Kids_First_Participant_ID) %>%
summarise(breakpoint_exons_rare = str_c(breakpoint_exons_rare, collapse = ";"))

fusions_all <- common_fusions %>%
bind_rows(rare_novel_fusions) %>%
dplyr::mutate(breakpoint_type = case_when(
!is.na(breakpoint_exons_common) & !is.na(breakpoint_exons_rare) ~ NA_character_,
!is.na(breakpoint_exons_rare) ~ "rare",
!is.na(breakpoint_exons_common) ~ "common",
TRUE ~ NA_character_
)) %>%
# Assign breakpoint group (unique common breakpoint or broad rare group)
dplyr::mutate(breakpoint_group = case_when(
!is.na(breakpoint_exons_common) & !is.na(breakpoint_exons_rare) ~ NA_character_,
breakpoint_exons_common == "16:9" ~ "16:9",
breakpoint_exons_common == "15:9" ~ "15:9",
breakpoint_exons_common == "16:11" ~ "16:11",
breakpoint_exons_common == "18:10" ~ "18:10",
!is.na(breakpoint_exons_rare) ~ "rare",
TRUE ~ NA_character_
)) %>%
write_tsv(file.path(results_dir, "braf-fusion-breakpoints-by-patient.tsv"))



```

Merge breakpoint data to braf hist

```{r}
braf_hist <- braf_hist %>%
left_join(fusions_all) %>%
dplyr::filter(!is.na(breakpoint_type))

```

How many patients do we have breakpoint data for?

```{r}
nrow(braf_hist)

```

## Common breakpoint summary

What is the distribution of common breakpoints by ancestry?

```{r}
table(braf_hist$breakpoint_exons_common, braf_hist$predicted_ancestry)

```

Calculate fusion breakpoint frequency by predicted ancestry and Fisher's exact test p-value

```{r}
common_breakpoints <- c("16:9", "15:9", "16:11", "18:10")

common_df <- braf_hist %>%
dplyr::filter(breakpoint_exons_common %in% common_breakpoints) %>%
group_by(breakpoint_exons_common, predicted_ancestry) %>%
summarise(count = n()) %>%
group_by(predicted_ancestry) %>%
mutate(total_count = sum(count),
percentage = round(count / total_count * 100, 1)) %>%
dplyr::mutate(ct_perc = glue::glue("{count} ({percentage}%)")) %>%
dplyr::select(breakpoint_exons_common, predicted_ancestry, ct_perc) %>%
spread(predicted_ancestry, ct_perc, fill = 0)

common_df <- common_df %>%
mutate(
p = map_dbl(breakpoint_exons_common, ~round(fisher.test(table(grepl(.x, braf_hist$breakpoint_exons_common), braf_hist$predicted_ancestry))$p.value, 4))
)

write_tsv(common_df,
file.path(results_dir, "lgg-braf-fusion-common-breakpoint-freq.tsv"))

```

Print rare/novel fusion breakpoint count by predicted ancestry

```{r}
table(braf_hist$breakpoint_exons_rare, braf_hist$predicted_ancestry)

```

Assess distribution of resection level in patients by breakpoint type (common vs. rare)

```{r}
round(table(braf_hist$breakpoint_type, braf_hist$extent_of_tumor_resection)/as.vector(table(braf_hist$breakpoint_type)), 2)

fisher.test(table(braf_hist$breakpoint_type, braf_hist$extent_of_tumor_resection))

```

Assess distribution of resection level in patients by breakpoint group

```{r}
round(table(braf_hist$breakpoint_group, braf_hist$extent_of_tumor_resection)/as.vector(table(braf_hist$breakpoint_group)), 2)

fisher.test(table(braf_hist$breakpoint_group, braf_hist$extent_of_tumor_resection), simulate.p.value = T)

```

Assess distribution of tumor CNS region in patients by breakpoint type and group

```{r}
round(table(braf_hist$breakpoint_type, braf_hist$CNS_region)/as.vector(table(braf_hist$breakpoint_type)), 3) * 100

fisher.test(table(braf_hist$breakpoint_type, braf_hist$CNS_region))

```


```{r}
round(table(braf_hist$breakpoint_group, braf_hist$CNS_region)/as.vector(table(braf_hist$breakpoint_group)), 3) * 100

fisher.test(table(braf_hist$breakpoint_group, braf_hist$CNS_region), simulate.p.value = T)

```

Confirm that CNS region and degree of tumor resection are significantly associated:

```{r}
round(table(braf_hist$CNS_region, braf_hist$extent_of_tumor_resection)/as.vector(table(braf_hist$CNS_region)), 3) * 100

fisher.test(table(braf_hist$CNS_region, braf_hist$extent_of_tumor_resection), simulate.p.value= T)

```

Save braf-specific hist file with breakpoint annotation

```{r}
write_tsv(braf_hist,
file.path(results_dir, "lgg-braf-fusion-breakpoint-annotation.tsv"))

```

Print session info

```{r}
sessionInfo()

```
868 changes: 868 additions & 0 deletions analyses/braf-fusions/02-fusion-breakpoints-by-ancestry.html

Large diffs are not rendered by default.

Loading
Loading