-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsubmit_2DClass.py
160 lines (143 loc) · 6.44 KB
/
submit_2DClass.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
#!/usr/bin/env python3
import json
import argparse
import os
import sys
import subprocess
from check_if_done import check_state_comet
import time
import shutil
from write_submit_script_comet import write_submit_comet
import re
'''
Submit relion 2D classification job.
Inputs: 1. Path of the *particles.star file from particle extraction,
2. Name of the 2D classification directory,
3. Diameter of the mask,
4. Number of classes.
Output: 2D classification results, saved in the output directory.
'''
def setupParserOptions():
ap = argparse.ArgumentParser()
## General inputs
ap.add_argument('-i', '--input',
help="Provide star file of the ctf corrected micrographs.")
ap.add_argument('-o','--output', default='2DClass',
help="Name of the directory where the outputs of 2d classification are stored.")
ap.add_argument('-p', '--program', default='relion_2DClass',
help='The program to use to do particle extraction. Currently only supports relion_class2d.')
## Program specific parameters
ap.add_argument('-d', '--diameter',
help="Diameter of the particle to be used in 2D classification (in Angstrom).")
ap.add_argument('-K', '--numclass', default='200',
help="Number of classes to be used in 2D classification. Default is 200 (the max allowed).")
## Cluster submission needed
ap.add_argument('--template', default='comet_submit_template.sh',
help="Name of the submission template. Currently only supports comet_submit_template.sh")
ap.add_argument('--cluster', default='comet-cpu',
help='The computer cluster the job will run on. Currently only supports comet-cpu.')
ap.add_argument('--jobname', default='2DClassification', help='Jobname on the submission script.')
ap.add_argument('--user_email', help='User email address to send the notification to.')
ap.add_argument('--walltime', default='48:00:00', help='Expected max run time of the job.')
ap.add_argument('--nodes', default='10',help='Number of nodes used in the computer cluster.')
args = vars(ap.parse_args())
return args
def editparameters(s, diameter, k):
new_s = s.replace('$$diameter', diameter).replace('$$K', k)
return new_s
def check_good(class_dir):
'''
Currently only supports relion 2D classification.
Check if 'run_it025_model.star' file exists.
'''
return os.path.isfile(os.path.join(class_dir, 'run_it025_model.star'))
def submit(**args):
cluster = args['cluster']
codedir = os.path.abspath(os.path.join(os.path.realpath(sys.argv[0]), os.pardir))
wkdir = os.path.abspath(os.path.join(os.path.dirname(args['input']), os.pardir))
cluster_config_file='cluster_config.json'
job_config_file = '2DClass_config.json'
## mkdir to setup the job
os.chdir(wkdir)
try:
os.mkdir(args['output'])
except OSError:
pass
os.chdir(codedir)
with open(cluster_config_file, 'r') as f:
cluster_config = json.load(f)
with open(job_config_file, 'r') as f:
job_config = json.load(f)
jobname = args['jobname']
user_email = args['user_email']
walltime = args['walltime']
program = args['program']
nodes = args['nodes']
np = str(24*int(nodes))
specs = 'diam%sk%s'%(args['diameter'], args['numclass'])
submit_name = 'submit_%s_%s.sh' %(args['program'], specs)
input = '--i %s '%args['input']
output_dir = os.path.join(args['output'], specs)
output = '--o %s/run '%output_dir
stdout = os.path.join('> %s'%output_dir, 'run_%s.out '%args['program'])
stderr = os.path.join('2> %s'%output_dir, 'run_%s.err '%args['program'])
module = 'module load relion/3.0.8_cpu'
conda_env = 'conda activate /projects/cosmic2/conda/pipeline'
command = 'mpirun -np %s relion_refine_mpi '%np
parameters = editparameters(job_config[program]['parameters'], \
args['diameter'], args['numclass'])
write_submit_comet(codedir, wkdir, submit_name, \
jobname, user_email, walltime, nodes, \
job_config_file, program, \
input, output, stdout, stderr, \
module, conda_env, command, parameters, \
template_file=args['template'],\
cluster='comet-cpu')
os.chdir(wkdir)
try:
shutil.rmtree(output_dir)
os.mkdir(output_dir)
except OSError:
os.mkdir(output_dir) # make "diamxxxkxxx" directory under the output directory
cmd='sbatch ' + submit_name
job_id = subprocess.check_output(cmd, shell=True)
job_id = job_id.decode("utf-8")
job_id = re.findall('job (\d+)', job_id)[0]
with open('%s_%s_log.txt' %(args['program'], specs), 'a+') as f:
f.write('Job submitted. Job ID is %s.\n' %(job_id))
query_cmd = cluster_config[cluster]['query_cmd']
keyarg = cluster_config[cluster]['keyarg']
# os.chdir(codedir) ## cd back to the directory of the code
return job_id, query_cmd, keyarg
def check_complete(job_id, query_cmd, keyarg):
## Below: check every 2 seconds if the job has finished.
state = check_state_comet(query_cmd, job_id, keyarg)
start_time = time.time()
interval = 2
# i = 1
# while state!='completed':
# time.sleep(start_time + i*interval - time.time())
# state = check_state(query_cmd, job_id, keyarg)
# i = i + 1
while state!='completed':
time.sleep(interval)
state = check_state_comet(query_cmd, job_id, keyarg)
def check_output_good(**args):
wkdir = os.path.abspath(os.path.join(os.path.dirname(args['input']), os.pardir))
os.chdir(wkdir)
specs = 'diam%sk%s'%(args['diameter'], args['numclass'])
output_dir = os.path.join(args['output'], specs)
## Below: check if the particle picking output is correct.
with open('%s_%s_log.txt' %(args['program'], specs), 'a+') as f:
f.write('Checking outputs....\n')
isgood = check_good(output_dir)
with open('%s_%s_log.txt' %(args['program'], specs), 'a+') as f:
if isgood:
f.write('2D classification for %s has finished.\n'%specs)
else:
f.write('Submission job %s is done but the output may not be right. Please check.\n'%specs)
if __name__ == '__main__':
args = setupParserOptions()
job_id, query_cmd, keyarg = submit(**args)
check_complete(job_id, query_cmd, keyarg)
check_output_good(**args)