-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathstructural_mapping.Rmd
158 lines (126 loc) · 6.04 KB
/
structural_mapping.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
---
title: "Map DMS to the chIgY-Fab sturcture"
author: "Tyler Starr"
date: "4/29/2022"
output:
github_document:
toc: true
html_preview: false
editor_options:
chunk_output_type: inline
---
This notebook analyzes the structure of the CGGnaive Fab bound to the chicken IgY dimer. It generates a list of inter-residue distances and maps sitewise DMS properties to the B-factor column for visualization in PyMol.
```{r setup, message=FALSE, warning=FALSE, error=FALSE}
require("knitr")
knitr::opts_chunk$set(echo = T)
knitr::opts_chunk$set(dev.args = list(png = list(type = "cairo")))
options(repos = c(CRAN = "https://cran.r-project.org"))
#list of packages to install/load
packages = c("yaml","data.table","tidyverse","gridExtra","bio3d","ggrepel")
#install any packages not already installed
installed_packages <- packages %in% rownames(installed.packages())
if(any(installed_packages == F)){
install.packages(packages[!installed_packages])
}
#load packages
invisible(lapply(packages, library, character.only=T))
#read in config file
config <- read_yaml("config.yaml")
#make output directory
if(!file.exists(config$structural_mapping_dir)){
dir.create(file.path(config$structural_mapping_dir))
}
```
Session info for reproducing environment:
```{r print_sessionInfo}
sessionInfo()
```
## Data input
Read in tables of mutant measurements.
```{r input_data}
dt <- data.table(read.csv(file=config$final_variant_scores_mut_file,stringsAsFactors=F))
```
Read in the structure pdb
```{r input_pdb}
sites <- read.csv(file=config$CGGnaive_site_info,stringsAsFactors=F)
```
Read in the table giving annotations and information on CGGnaive sites
```{r input_sites}
pdb <- read.pdb(file=config$pdb)
```
## Compute site-wise metrics from DMS data for mapping to structure
Want to map the mean effect of mutations on each of the three properties to structure. Also plot the *max* effect of any mutation at a site on binding as a metric of evolvability. Only include mutations with n_bc > 3 for these properties.
```{r compute_site_properties}
sites$mean_CGG_bind <- NA
sites$max_CGG_bind <- NA
sites$quartile75_CGG_bind <- NA
sites$mean_expression <- NA
# sites$mean_polyspecificity <- NA
for(i in 1:nrow(sites)){
if(sites[i,"chain"] != "link"){
sites$mean_CGG_bind[i] <- mean(dt[position_IMGT==sites[i,"site"] & chain==sites[i,"chain"] & wildtype != mutant & n_bc_bind_CGG>3,delta_bind_CGG],na.rm=T)
sites$max_CGG_bind[i] <- max(dt[position_IMGT==sites[i,"site"] & chain==sites[i,"chain"] & wildtype != mutant & n_bc_bind_CGG>3,delta_bind_CGG],na.rm=T)
sites$quartile75_CGG_bind[i] <- quantile(dt[position_IMGT==sites[i,"site"] & chain==sites[i,"chain"] & wildtype != mutant & n_bc_bind_CGG>3,delta_bind_CGG],0.75,na.rm=T)
sites$mean_expression[i] <- mean(dt[position_IMGT==sites[i,"site"] & chain==sites[i,"chain"] & wildtype != mutant & n_bc_expr>3,delta_expr],na.rm=T)
# sites$mean_polyspecificity[i] <- mean(dt[position_IMGT==sites[i,"site"] & chain==sites[i,"chain"] & wildtype != mutant & n_bc_psr>3,delta_psr],na.rm=T)
}
}
```
## Map metrics to the structure
```{r map_properties_to_pdb}
b_mean_CGG <- rep(0, length(pdb$atom$b))
b_max_CGG <- rep(0, length(pdb$atom$b))
b_75th_CGG <- rep(0, length(pdb$atom$b))
b_mean_expr <- rep(0, length(pdb$atom$b))
# b_mean_psr <- rep(0, length(pdb$atom$b))
for(i in 1:nrow(pdb$atom)){
res <- pdb$atom$resno[i]
if(pdb$atom$chain[i] %in% c("B","H") & res %in% sites[sites$chain=="H","site"]){
b_mean_CGG[i] <- sites[sites$site==res & sites$chain=="H","mean_CGG_bind"]
b_max_CGG[i] <- sites[sites$site==res & sites$chain=="H","max_CGG_bind"]
b_75th_CGG[i] <- sites[sites$site==res & sites$chain=="H","quartile75_CGG_bind"]
b_mean_expr[i] <- sites[sites$site==res & sites$chain=="H","mean_expression"]
# b_mean_psr[i] <- sites[sites$site==res & sites$chain=="H","mean_polyspecificity"]
}else if(pdb$atom$chain[i] %in% c("C","L") & res %in% sites[sites$chain=="L","site"]){
b_mean_CGG[i] <- sites[sites$site==res & sites$chain=="L","mean_CGG_bind"]
b_max_CGG[i] <- sites[sites$site==res & sites$chain=="L","max_CGG_bind"]
b_75th_CGG[i] <- sites[sites$site==res & sites$chain=="L","quartile75_CGG_bind"]
b_mean_expr[i] <- sites[sites$site==res & sites$chain=="L","mean_expression"]
# b_mean_psr[i] <- sites[sites$site==res & sites$chain=="L","mean_polyspecificity"]
}
}
write.pdb(pdb=pdb, file=paste(config$structural_mapping_dir,"/CGG_mean.pdb",sep=""), b=b_mean_CGG)
write.pdb(pdb=pdb, file=paste(config$structural_mapping_dir,"/CGG_max.pdb",sep=""), b=b_max_CGG)
write.pdb(pdb=pdb, file=paste(config$structural_mapping_dir,"/CGG_75th.pdb",sep=""), b=b_75th_CGG)
write.pdb(pdb=pdb, file=paste(config$structural_mapping_dir,"/expression_mean.pdb",sep=""), b=b_mean_expr)
# write.pdb(pdb=pdb, file=paste(config$structural_mapping_dir,"/psr_mean.pdb",sep=""), b=b_mean_psr)
```
Generate a list of contact residues (5A or closer contacts)
Below are heavy chain residues that are 5A or closer to CGG ligand in each of the two protomers:
```{r heavy_contacts}
binding.site(pdb,
a.inds=atom.select(pdb,chain=c("H","B")),
b.inds=atom.select(pdb,chain=c("A","D")),
cutoff=5,hydrogens=F)$resnames
```
Below are light chain residues that are 5A or closer to CGG ligand in each of the two protomers:
```{r light_contacts}
binding.site(pdb,
a.inds=atom.select(pdb,chain=c("L","C")),
b.inds=atom.select(pdb,chain=c("A","D")),
cutoff=5,hydrogens=F)$resnames
```
Below are heavy chain residues that are 5A or closer to light chain in each of the two protomers
```{r heavy_contacts_with_light}
binding.site(pdb,
a.inds=atom.select(pdb,chain=c("H","B")),
b.inds=atom.select(pdb,chain=c("L","C")),
cutoff=5,hydrogens=F)$resnames
```
Below are light chain residues that are 5A or closer to heavy chain in each of the two protomers
```{r light_contacts_with_heavy}
binding.site(pdb,
a.inds=atom.select(pdb,chain=c("L","C")),
b.inds=atom.select(pdb,chain=c("H","B")),
cutoff=5,hydrogens=F)$resnames
```