Skip to content

Commit

Permalink
fix merge conflict
Browse files Browse the repository at this point in the history
  • Loading branch information
matteodelucchi authored Mar 20, 2020
1 parent 97ff5af commit 949c42a
Showing 1 changed file with 44 additions and 21 deletions.
65 changes: 44 additions & 21 deletions results/swissprot_general_overview.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ p = ggplot(d_summary, aes(x=l_effective, y=n_effective_rounded)) +
scale_x_discrete(breaks=c(1,seq(0,80,5),80)) +
scale_y_discrete(breaks=c(2,seq(0,40,5),40)) +
guides(size = FALSE) # remove size legend
# Mean_Prot_length_vs_Frac_TR
p = beautifier(p, x.axis.text.angle = 0)
p <- paper.figure(p, x.axis.text.angle = 0)
p
Expand All @@ -186,9 +186,11 @@ p = ggplot(d_summary, aes(x=l_effective, y=n_effective_rounded)) +
p = beautifier(p, x.axis.text.angle = 0) +
theme(legend.position="bottom", legend.box = "horizontal")
plot_gg(p, width = 5, height = 4, scale = 300, multicore = TRUE, windowsize = c(1000,800))
render_camera(theta = 320, phi = 60)
# plot_gg(p, width = 3.5, multicore = TRUE, windowsize = c(800, 800),
# zoom = 0.5, phi = 35, theta = 30, sunangle = 225, soliddepth = -100)
render_camera(theta = 320, phi = 60, zoom = 0.62)
# rgl.postscript(paste0(pathImages, "fig1_3D"), fmt="svg")
render_snapshot(paste0(pathImages, "fig1_3D", figureFormat))
render_snapshot(paste0(pathImages, "fig1_3D", figureFormat), clear = TRUE)
```
Fig. 1a: Distribution (Heatmap) of all tandem repeats (TRs) in Swiss-Prot as a function of their repeat unit length $l_{effective} <= 80$ (x-Axis, x1) and their number of repeat units $n_{effective} <= 40$ (x2, y-Axis). Darker colour indicates a larger number of TRs with a specific length and number of repeats. The majority of TRs has short TR units. Yet, there is a blob of domain TRs ($25 < l_{effective} < 50$), with certain TR unit length clearly enriched (e.g., $l_{effective} = 28$, mostly Zn finger TRs.)

Expand Down Expand Up @@ -459,10 +461,11 @@ p = ggplot(sp, aes(x=mean_sequence_length, y=has_tr_fraction, colour=Superkingdo
hjust = 0.7) +
scale_color_manual(values=cols1) +
scale_size_continuous(range=c(2,7),
name="log10(Prot. count)")+ #scale_colour_brewer(type=2, palette="RdYlBu")
name=expression(log[10](Prot.count)))+ #scale_colour_brewer(type=2, palette="RdYlBu")
# ggtitle('All tandem repeats')
labs(x="Mean Protein Length",
y="Fraction of TR")
y="Proportion of TR",
size= expression(log[10](count)))
p = beautifier(p, x.axis.text.angle = 0)
p <- paper.figure(p, x.axis.text.angle = 0)
p
Expand Down Expand Up @@ -538,7 +541,7 @@ p <- ggplot(TR_frac_Spe_highlight, aes(x = TR_frac_Spe_highlight$mean_sequence_l
segment.size = 0.8,
label.size = 0) +
labs(x="Mean Protein Length",
y="Fraction of TR")
y="Proportion of TR")
p = beautifier(p, x.axis.text.angle = 0)
p <- paper.figure(p, x.axis.text.angle = 0)
p
Expand Down Expand Up @@ -595,7 +598,7 @@ p = ggplot(sp, aes(x=sequence_length_bin, y=has_tr_fraction, colour=Superkingdom
axis.text.x = element_text(family = "sans", angle = 90, margin=margin(1,1,2,1,"pt")),
axis.text.y = element_text(family = "sans", margin=margin(1,1,2,1,"pt"))) +
labs(x="Protein Length",
y="Fraction of TR")
y="Proportion of TR")
p = beautifier(p, x.axis.text.angle = 45, x.axis.text.hjust = 1)
p <- paper.figure(p, x.axis.text.angle = 45, x.axis.text.hjust = 1, x.axis.text.size = 22)+
guides(colour=guide_legend(override.aes = list(size = 1.5)),
Expand Down Expand Up @@ -628,6 +631,23 @@ cor.test(bins, sp$has_tr_fraction[which(sp$Superkingdom == "Viruses")], method =
print(paste("rho^2:", round(cor.test(bins, sp$has_tr_fraction[which(sp$Superkingdom == "Viruses")], method = "spearman")$estimate[1]^2, 3)))
```

## TR length by superkingdom
```{r TR length by superkingdom}
tr_all_sp %>%
subset(has_tr == TRUE) %>%
group_by(Superkingdom) %>%
summarise(Mean_by_TR = sum(l_effective)/length(ID), # normalized by number of TRs
Mean_by_prot = sum(l_effective)/length(unique(ID))) # normalized by number of proteins
```
eukaryotes have on average the longest TRs.

```{r protein length by superkingdom}
sp_all %>%
group_by(Superkingdom) %>%
summarise(prot_length = sum(Length)/length(ID))
```
eukaryotes have second longest proteins. Viruses have longest proteins.

## Micro Tandem Repeats -> Supplementary (incl. small and domain)
```{r, echo=FALSE}
p = ggplot(sp, aes(x=sequence_length_bin, y=has_homo_tr_fraction, colour=Superkingdom))
Expand All @@ -647,9 +667,12 @@ p = p +
axis.text.x = element_text(family = "sans", angle = 90, margin=margin(1,1,2,1,"pt")),
axis.text.y = element_text(family = "sans", margin=margin(1,1,2,1,"pt"))) +
labs(x="Protein Length",
y="Fraction with homo TR")
y="Proportion with homo TR")
p = beautifier(p, x.axis.text.angle = 45, x.axis.text.hjust = 1)
p <- paper.figure(p, x.axis.text.angle = 45, x.axis.text.hjust = 1, x.axis.text.size = 22)
p <- paper.figure(p, x.axis.text.angle = 45, x.axis.text.hjust = 1, x.axis.text.size = 22)+
guides(colour=guide_legend(override.aes = list(size = 1.5)),
size=FALSE)+
theme(legend.position = "bottom", legend.box = "vertical")
p
if( save) {
ggsave(paste0(pathImages, "fig2a-3", figureFormat), width=12, height=8, dpi = 300)
Expand All @@ -672,7 +695,7 @@ p = p +
axis.text.x = element_text(family = "sans", angle = 90, margin=margin(1,1,2,1,"pt")),
axis.text.y = element_text(family = "sans", margin=margin(1,1,2,1,"pt"))) +
labs(x="Protein Length",
y="Fraction with micro TR")
y="Proportion with micro TR")
p = beautifier(p, x.axis.text.angle = 45, x.axis.text.hjust = 1)
p <- paper.figure(p, x.axis.text.angle = 45, x.axis.text.hjust = 1, x.axis.text.size = 22)+
guides(colour=guide_legend(override.aes = list(size = 1.5)),
Expand Down Expand Up @@ -701,7 +724,7 @@ p = p +
axis.text.x = element_text(family = "sans", angle = 90, margin=margin(1,1,2,1,"pt")),
axis.text.y = element_text(family = "sans", margin=margin(1,1,2,1,"pt"))) +
labs(x="Protein Length",
y="Fraction with small TR")
y="Proportion with small TR")
p = beautifier(p, x.axis.text.angle = 45, x.axis.text.hjust = 1)
p <- paper.figure(p, x.axis.text.angle = 45, x.axis.text.hjust = 1, x.axis.text.size = 22)+
guides(colour=guide_legend(override.aes = list(size = 1.5)),
Expand Down Expand Up @@ -730,7 +753,7 @@ p = p +
axis.text.x = element_text(family = "sans", angle = 90, margin=margin(1,1,2,1,"pt")),
axis.text.y = element_text(family = "sans", margin=margin(1,1,2,1,"pt"))) +
labs(x="Protein Length",
y="Fraction with domain TR")
y="Proportion with domain TR")
p = beautifier(p, x.axis.text.angle = 45, x.axis.text.hjust = 1)
p <- paper.figure(p, x.axis.text.angle = 45, x.axis.text.hjust = 1, x.axis.text.size = 22)+
guides(colour=guide_legend(override.aes = list(size = 1.5)),
Expand Down Expand Up @@ -919,7 +942,7 @@ In proteins, which have >4 TRs, The distribution of the TRs is in Archaea: 77.2%
```{r}
df <- gather(many_distinct_regions_TR_fractions,
key = "TR_type",
value = "Fraction",
value = "Proportion",
c("has_homo_tr_fraction", "has_micro_tr_fraction", "has_short_tr_fraction", "has_domain_tr_fraction"))
df$TR_type[which(df$TR_type=="has_homo_tr_fraction")] <- "homo TRs"
df$TR_type[which(df$TR_type=="has_micro_tr_fraction")] <- "micro TRs"
Expand All @@ -930,7 +953,7 @@ df$TR_type[which(df$TR_type=="has_domain_tr_fraction")] <- "domain TRs"
df$TR_type <- as.factor(df$TR_type)
df$TR_type <- relevel(df$TR_type, ref = "homo TRs")
cols1 <- c("#AA3939", "#AA7939", "#29506D", "#2D882D") #http://paletton.com/#uid=7000I0kllllaFw0g0qFqFg0w0aF
p <- ggplot(df, aes(x = Superkingdom, y = Fraction, facet = TR_type, fill = Superkingdom))+
p <- ggplot(df, aes(x = Superkingdom, y = Proportion, facet = TR_type, fill = Superkingdom))+
facet_wrap(facets="TR_type",
nrow = 1,
strip.position = "bottom") +
Expand Down Expand Up @@ -1079,7 +1102,7 @@ p = sp_gathered %>%
axis.ticks.length = unit(0.05, "cm")) +
geom_point(size=2) +
labs(x="Mean Protein Length",
y="Fraction of TR")+
y="Proportion of TR")+
coord_cartesian(ylim = c(min(sp_gathered$Fraction),max(sp_gathered$Fraction)))+
scale_fill_manual(values=getPalette(colour_count)) +
scale_colour_brewer(type=2, palette="Dark2")+
Expand Down Expand Up @@ -1165,7 +1188,7 @@ p = ggplot(sp, aes(x=mean_sequence_length, y=has_tr_fraction, shape=Superkingdom
geom_abline(intercept=0, slope=mean(sp$has_tr_fraction)/mean(sp$mean_sequence_length), colour="grey")+
# ggtitle('All tandem repeats')+
labs(x="Mean Protein Length",
y="Fraction of TR")+
y="Proportion of TR")+
# geom_label_repel(aes(label = origin), # textbox label
# direction = c("both"),
# label.size = NA,
Expand Down Expand Up @@ -1274,7 +1297,7 @@ p = ggplot(sp, aes(x=mean_sequence_length, y=has_homo_tr_fraction, shape=Superki
axis.text.y = element_text(family = "sans",margin=margin(1,1,2,1,"pt")),
axis.ticks.length = unit(0.05, "cm")) +
labs(x="Mean Protein Length",
y="Fraction of homo TR")+
y="Proportion of homo TR")+
coord_cartesian(ylim = c(min(sp_gathered$Fraction),max(sp_gathered$Fraction)))+
scale_fill_manual(values=getPalette(colour_count)) +
scale_size_continuous(range=c(2,7)) +
Expand Down Expand Up @@ -1302,7 +1325,7 @@ p = ggplot(sp, aes(x=mean_sequence_length, y=has_micro_tr_fraction, shape=Superk
axis.text.y = element_text(family = "sans",margin=margin(1,1,2,1,"pt")),
axis.ticks.length = unit(0.05, "cm")) +
labs(x="Mean Protein Length",
y="Fraction of micro TR")+
y="Proportion of micro TR")+
coord_cartesian(ylim = c(min(sp_gathered$Fraction),max(sp_gathered$Fraction)))+
scale_fill_manual(values=getPalette(colour_count)) +
scale_size_continuous(range=c(2,7)) +
Expand Down Expand Up @@ -1357,7 +1380,7 @@ p = ggplot(sp, aes(x=mean_sequence_length, y=has_short_tr_fraction, shape=Superk
axis.ticks.length = unit(0.05, "cm")) +
geom_point(size=2) +
labs(x="Mean Protein Length",
y="Fraction of small TR")+
y="Proportion of small TR")+
coord_cartesian(ylim = c(min(sp_gathered$Fraction),max(sp_gathered$Fraction)))+
scale_fill_manual(values=getPalette(colour_count)) +
scale_size_continuous(range=c(2,7)) +
Expand Down Expand Up @@ -1385,7 +1408,7 @@ p = ggplot(sp, aes(x=mean_sequence_length, y=has_domain_tr_fraction, shape=Super
axis.ticks.length = unit(0.05, "cm")) +
geom_point(size=2) +
labs(x="Mean Protein Length",
y="Fraction of domain TR")+
y="Proportion of domain TR")+
coord_cartesian(ylim = c(min(sp_gathered$Fraction),max(sp_gathered$Fraction)))+
scale_fill_manual(values=getPalette(colour_count)) +
scale_size_continuous(range=c(2,7)) +
Expand Down Expand Up @@ -1687,7 +1710,7 @@ p <- ggplot(tr_all_sp, aes(x=fraction_disordered_chars, colour=Superkingdom, fil
facet_wrap(~ factor(l_type, levels=c("homo", "micro","small","domain")), scales = "free") +
scale_fill_manual(values = c("#2D882D", "#AA3939", "#AA7939", "#29506D"))+
scale_color_manual(values = c("#2D882D", "#AA3939", "#AA7939", "#29506D"))+
labs(x="Fraction of disordered AA in TRs")
labs(x="Proportion of disordered AA in TRs")
p = beautifier(p, x.axis.text.angle = 0)
p <- paper.figure(p, x.axis.text.angle = 0)
p
Expand Down

0 comments on commit 949c42a

Please sign in to comment.