-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathdefine_interfaces.py
150 lines (95 loc) · 5.38 KB
/
define_interfaces.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
import os
import sys
from src.module.confidance_contact_matrix import CCM_AF3
from src.module.alingment_utils import compare_protein_seq
from src.module.domain_clustering import domain_clustering
from src.module.parsers import MMCIFPARSER, HSSPPARSER, alphafold_msa
#from src.module.conservation_score import CONSERVATION_SCORE
from src.module.interface_identification import interface_identification
from src.module.ribbon_diagram import RIBBON_DIAGRAM
import argparse
working_dir = os.path.dirname(os.path.realpath(__file__))
def parse_args():
#####################
# START CODING HERE #
#####################
# Implement a simple argument parser (WITH help documentation!) that parses
# the information needed by main() from commandline.
parser = argparse.ArgumentParser()
if len(sys.argv)==1:
parser.print_help()
# parser.print_usage() # for just the usage line
parser.exit()
parser.add_argument('-i', dest='in_dir', default='',
help='path to a directory where input folder are stored')
parser.add_argument('-c', dest='config_dir', default='',
help='path to a directory where input folder are stored')
parser.add_argument('-m','--mode', dest='mode', choices=['AF3', 'AF2', 'ColabFold'] , default='AF3',
help='output from different AlphaFold Version. Options: AF3, AF2, ColabFold')
parser.add_argument('-t','--threshold', dest='contact_threshold' , default=0.7, type=restricted_float,
help='contact threshold to detect a contact-link in the contact_proability matrix')
# parser.add_argument(?)
# parser.add_argument(?)
# parser.add_argument(?)
args = parser.parse_args()
return args
def restricted_float(x):
try:
x = float(x)
except ValueError:
raise argparse.ArgumentTypeError("%r not a floating-point literal" % (x,))
if x < 0.0 or x > 1.0:
raise argparse.ArgumentTypeError("%r not in range [0.0, 1.0]"%(x,))
return x
def write_dataframe(df, filename, outdir_path):
filepath= os.path.join(outdir_path, filename)
df.to_csv(f'{filepath}.csv', index = False)
def define_interfaces(in_dir, mode, contact_threshold):
outdir = os.path.join(in_dir, 'AlphaBridge')
if not os.path.isdir(outdir):
os.makedirs(outdir)
if mode == 'AF3':
FEATURE_OBJECT = CCM_AF3(in_dir)
feature_path, structure_path, job_request_path = FEATURE_OBJECT.extract_feature_filepath()
list_sequence_info, rec_sequence_list, structure_sequence_list, polymer_chain_dict = FEATURE_OBJECT.extract_sequence_info()
else:
raise NotImplementedError("Output from AF2 or ColabFold not implemented yet")
chain_dict = compare_protein_seq(structure_sequence_list, rec_sequence_list).extract_chain_dict()
matrix_dict = FEATURE_OBJECT.extract_matrix_dict()
plddt_dict = FEATURE_OBJECT.get_scores_dict(matrix_dict['plddt'], list_sequence_info)
contact_matrix = matrix_dict['contact_matrix']
#FEATURE_OBJECT.print_matrix_dict(matrix_dict)
coevolutionary_domains, interacting_coevolutionary_domains, entity_region_dict = domain_clustering(matrix_dict,
list_sequence_info,
alphafold_version=mode,
outdir = outdir,
plotting=True).run_domain_clustering()
INTERFACE_IDENTIFICATION = interface_identification(interacting_coevolutionary_domains,
entity_region_dict,
plddt_dict,
rec_sequence_list,
list_sequence_info,
contact_matrix,
contact_threshold,
chain_dict,
polymer_chain_dict)
interface_dict, protein_interface_dict, interaction_link_dict = INTERFACE_IDENTIFICATION.extract_interface()
interface_df_per_token, interface_df = INTERFACE_IDENTIFICATION.get_interface_info_dataframes(interface_dict,interaction_link_dict)
ribbon_diagram = RIBBON_DIAGRAM(list_sequence_info,
interface_dict,
protein_interface_dict,
plddt_dict,
outdir=outdir)
ribbon_diagram.plot_ribbon_diagram()
write_dataframe(interface_df_per_token, 'interface_df_per_token', outdir )
write_dataframe(interface_df, 'binding_interfaces', outdir )
return interface_df_per_token, interface_df
def main():
args = parse_args()
in_dir = args.in_dir
mode = args.mode
contact_threshold = args.contact_threshold
interface_df_per_token, interface_df = define_interfaces(in_dir, mode, contact_threshold)
print('finished')
if __name__ == '__main__':
main()