Skip to content

Commit

Permalink
fix merge conflict
Browse files Browse the repository at this point in the history
  • Loading branch information
matteodelucchi authored Mar 20, 2020
1 parent c42c50d commit 42f58f4
Showing 1 changed file with 0 additions and 65 deletions.
65 changes: 0 additions & 65 deletions results/swissprot_virus-virushost_parasite-parasitehost.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,7 @@ rm(list = ls(all = TRUE))
gc()
source("helpers.R")
<<<<<<< HEAD
# colour setup:
=======
# colour setup:
>>>>>>> 78c72c2d39b33614f7a0c7a9d48137ed1e97323a
#library(RColorBrewer); display.brewer.all() # to display available colour palettes
colour_count = 13 # alternative: length(unique(sp_gathered$Kingdom))
getPalette = colorRampPalette(brewer.pal(9, "Dark2"))
Expand Down Expand Up @@ -61,7 +57,6 @@ tr_all_sp <- tr_all_sp %>%
mutate(TR_id = row_number())
```

<<<<<<< HEAD
```{r general overview of viral proteins}
# no. of viral proteins in Swissprot
length(unique(sp_all$ID[which(sp_all$Superkingdom == "Viruses")]))
Expand All @@ -83,25 +78,13 @@ sum(table(table(sp_all$Species[which(sp_all$Superkingdom == "Viruses")]))[1:31])
```


=======
>>>>>>> 78c72c2d39b33614f7a0c7a9d48137ed1e97323a
From all Tandem Repeats, select only those which are viral (Superkingdom = Viruses) and have a known Virushost.
```{r "subset virushost == TRUE"}
# filter all tandem repeat containing proteins from Swissprot which have annotated virushosts
tr_all_sp_virus <- tr_all_sp[!(tr_all_sp$virus_hosts == ""),]
```

<<<<<<< HEAD
```{r Viral Proteins with TR in general: summary statistics}
=======
```{r Viral Proteins in general: summary statistics}
# no. of viral proteins in Swissprot
length(unique(sp_all$ID[which(sp_all$Superkingdom == "Viruses")]))
nrow(sp_all[which(sp_all$Superkingdom == "Viruses"),])
# %of viral proteins in swissprot
length(unique(sp_all$ID[which(sp_all$Superkingdom == "Viruses")]))/ length(unique(sp_all$ID))
>>>>>>> 78c72c2d39b33614f7a0c7a9d48137ed1e97323a
# no. of viral proteins in Swissprot containing TRs
length(unique(tr_all_sp$ID[which(tr_all_sp$Superkingdom == "Viruses")]))
length(unique(tr_all_sp$ID[which(tr_all_sp$Superkingdom == "Viruses")])) / length(unique(sp_all$ID[which(sp_all$Superkingdom == "Viruses")]))
Expand All @@ -113,7 +96,6 @@ nrow(tr_all_sp[which(tr_all_sp$Superkingdom == "Viruses"),])
tr_all_sp_virus$ID <- factor(tr_all_sp_virus$ID, levels = unique(tr_all_sp_virus$ID)) # drop unused levels
table(table(tr_all_sp_virus$ID))
table(table(tr_all_sp_virus$ID)) / sum(table(table(tr_all_sp_virus$ID)))
<<<<<<< HEAD
# Viral proteins function
head(tr_all_sp_virus)
Expand All @@ -136,10 +118,6 @@ tr_all_sp_virus[grepl(pattern = gene_names[9], x = tr_all_sp_virus$protein_name)
tr_all_sp_virus[grepl(pattern = gene_names[10], x = tr_all_sp_virus$protein_name),]
tr_all_sp_virus[grepl(pattern = gene_names[11], x = tr_all_sp_virus$protein_name),]
```
=======
```
Of the 16605 viral proteins in swissprot 44% contain at least one TR. Most (59%) of the viral proteins have a single TR.
>>>>>>> 78c72c2d39b33614f7a0c7a9d48137ed1e97323a

```{r Viral Proteins with annotated host species: summary statistics}
# no. of viral proteins in Swissprot with annotated virus host
Expand Down Expand Up @@ -412,10 +390,7 @@ table(sp_all.long$Superkingdom_virushost[which(sp_all.long$has_tr == TRUE)])[[3]
From all viral proteins (which have a virushost), most of them have a eukaryotic virushost (25044, 92%) followed by bacterial virus host (6%) and archael (2%). Finally there were 3 Proteins from Staphylococcus phage Twort, which have itself as virushost. -> self produced viral proteins? Not reliable on host organism?
Most of the viral proteins (10528, 72%) which can be associated with a host species, are found only in a single host species. Interestingly, some proteins can be found in up to 23 different host species. Those are capsid proteins and some replication assosiated proteins.
43% of all viral proteins contain TRs. Of all TR containing viral proteins, 44% have a eukaryotic virushost.
<<<<<<< HEAD
95.1\% of viral TR-containing proteins had an eukaryotic host organism but only few had a bacterial (3\%) or archaeal (1\%) host.
=======
>>>>>>> 78c72c2d39b33614f7a0c7a9d48137ed1e97323a

# Combine Virus Protein information with Host-species Protein information
### Problematic:
Expand Down Expand Up @@ -602,12 +577,8 @@ p2a <- ggplot(data=df_archaea, aes(x=factor(1), y=fraction_nTR, fill=nTR)) +
facet_grid(~Superkingdom)+
# geom_text(aes(y=fraction_nTR, label=nTR), vjust=1.6,
# color="white", size=3.5)+
<<<<<<< HEAD
# scale_fill_manual(values = c(cols1.4, "#5C7881"))+
scale_fill_manual(values = cols1.4.bright)+
=======
scale_fill_manual(values = c(cols1.4, "#5C7881"))+
>>>>>>> 78c72c2d39b33614f7a0c7a9d48137ed1e97323a
# ggtitle("Archaea")+
labs(fill= "TR count")+
theme_minimal()
Expand Down Expand Up @@ -759,11 +730,7 @@ p2b <- ggplot(data=df_bacteria, aes(x=factor(1), y=fraction_nTR, fill=nTR)) +
facet_grid(~Superkingdom)+
# geom_text(aes(y=fraction_nTR, label=nTR), vjust=1.6,
# color="white", size=3.5)+
<<<<<<< HEAD
scale_fill_manual(values = cols1.4.bright)+
=======
scale_fill_manual(values = c(cols1.4, "#5C7881"))+
>>>>>>> 78c72c2d39b33614f7a0c7a9d48137ed1e97323a
# ggtitle("Bacteria")+
labs(fill= "TR count")+
theme_minimal()
Expand Down Expand Up @@ -1018,11 +985,7 @@ p1e <- ggplot(data=df_eukaryota, aes(x=factor(1), y=fraction_nTR, fill=nTR)) +
# color="white", size=3.5)+
# geom_label_repel(aes(label = nTR), size=4, show.legend = F, nudge_x = 1,
# segment.size = .5, direction = 'x')+
<<<<<<< HEAD
scale_fill_manual(values = cols1.4.bright)+
=======
scale_fill_manual(values = c(cols1.4, "#5C7881"))+
>>>>>>> 78c72c2d39b33614f7a0c7a9d48137ed1e97323a
ggtitle("Eukaryota")+
labs(x="", y="Fraction", fill= "TR count")+
theme_minimal()
Expand All @@ -1046,11 +1009,7 @@ p2e <- ggplot(data=df_eukaryota, aes(x=factor(1), y=fraction_nTR, fill=nTR)) + #
facet_grid(~Superkingdom)+
# geom_text(aes(y=fraction_nTR, label=nTR), vjust=1.6,
# color="white", size=3.5)+
<<<<<<< HEAD
scale_fill_manual(values = cols1.4.bright)+
=======
scale_fill_manual(values = c(cols1.4, "#5C7881"))+
>>>>>>> 78c72c2d39b33614f7a0c7a9d48137ed1e97323a
# ggtitle("Eukaryota")+
labs(fill= "TR count")+
theme_minimal()
Expand Down Expand Up @@ -1208,11 +1167,7 @@ p <- ggplot(data=df_eukaryota, aes(x=factor(1), y=fraction_nTR, fill=nTR)) + # T
facet_grid(~Superkingdom)+
# geom_text(aes(y=fraction_nTR, label=nTR), vjust=1.6,
# color="white", size=3.5)+
<<<<<<< HEAD
scale_fill_manual(values = cols1.4.bright)+
=======
scale_fill_manual(values = c(cols1.4, "#5C7881"))+
>>>>>>> 78c72c2d39b33614f7a0c7a9d48137ed1e97323a
# ggtitle("Eukaryota")+
labs(fill= "TR count")+
theme_minimal()
Expand Down Expand Up @@ -1354,11 +1309,7 @@ pe <- ggplot(data=df_eukaryota, aes(x=factor(1), y=fraction_nTR, fill=nTR)) +
facet_grid(~Superkingdom)+
# geom_text(aes(y=fraction_nTR, label=nTR), vjust=1.6,
# color="white", size=3.5)+
<<<<<<< HEAD
scale_fill_manual(values = cols1.4.bright)+
=======
scale_fill_manual(values = c(cols1.4, "#5C7881"))+
>>>>>>> 78c72c2d39b33614f7a0c7a9d48137ed1e97323a
# ggtitle("Eukaryota")+
labs(fill= "TR count")+
theme_minimal()
Expand Down Expand Up @@ -1544,11 +1495,7 @@ p2h <- ggplot(data=df_human, aes(x=factor(1), y=fraction_nTR, fill=nTR)) + # TOD
facet_grid(~Superkingdom)+
# geom_text(aes(y=fraction_nTR, label=nTR), vjust=1.6,
# color="white", size=3.5)+
<<<<<<< HEAD
scale_fill_manual(values = cols1.4.bright)+
=======
scale_fill_manual(values = c(cols1.4, "#5C7881"))+
>>>>>>> 78c72c2d39b33614f7a0c7a9d48137ed1e97323a
# ggtitle("Homo sapiens")+
labs(fill= "TR count")+
theme_minimal()
Expand Down Expand Up @@ -1692,11 +1639,7 @@ p2h <- ggplot(data=df_human, aes(x=factor(1), y=fraction_nTR, fill=nTR)) + # TOD
facet_grid(~Superkingdom)+
# geom_text(aes(y=fraction_nTR, label=nTR), vjust=1.6,
# color="white", size=3.5)+
<<<<<<< HEAD
scale_fill_manual(values = cols1.4.bright)+
=======
scale_fill_manual(values = c(cols1.4, "#5C7881"))+
>>>>>>> 78c72c2d39b33614f7a0c7a9d48137ed1e97323a
# ggtitle("Homo sapiens")+
labs(fill= "TR count")+
theme_minimal()
Expand Down Expand Up @@ -1863,11 +1806,7 @@ p1p <- ggplot(data=df_parasite, aes(x=factor(1), y=df_parasite$fraction_nTR, fil
# geom_label_repel(aes(label = nTR), size=4, show.legend = F, nudge_x = 1,
# segment.size = .5, direction = 'x')+
# geom_text(data = ann_text, label=paste0(as.numeric(as.character(ann_text$fraction_nTR))*100,"%"))+
<<<<<<< HEAD
scale_fill_manual(values = cols1.4.bright)+
=======
scale_fill_manual(values = c(cols1.4, "#5C7881"))+
>>>>>>> 78c72c2d39b33614f7a0c7a9d48137ed1e97323a
ggtitle("Homo sapiens")+
labs(x="", y="Fraction", fill= "TR count")+
theme_minimal()
Expand All @@ -1891,11 +1830,7 @@ p2p <- ggplot(data=df_parasite, aes(x=factor(1), y=fraction_nTR, fill=nTR)) + #
facet_grid(~organism)+
# geom_text(aes(y=fraction_nTR, label=nTR), vjust=1.6,
# color="white", size=3.5)+
<<<<<<< HEAD
scale_fill_manual(values = cols1.4.bright)+
=======
scale_fill_manual(values = c(cols1.4, "#5C7881"))+
>>>>>>> 78c72c2d39b33614f7a0c7a9d48137ed1e97323a
# ggtitle("Homo sapiens")+
labs(fill= "TR count")+
theme_minimal()
Expand Down

0 comments on commit 42f58f4

Please sign in to comment.