Skip to content

Commit

Permalink
update to 0.2.0 (#11)
Browse files Browse the repository at this point in the history
* Update README.md

* debug

* debug for K3 mic

* update fft

* compatible to K3 data

* debug

* debug

* debug

* debug

* debug

* debug

* debug

* debug

* debug

* debug

* debug

* debug

* debug

* debug

* debug

* debug

* debug

* bugfix
add threads option

* bugfix

* bugfix

* debug

* bugfix

* bugfix

* limit to use only 1 gpu

* specify gpu

* add doc

* bugfix

* test

* bugfix

* debug

* re-organiize

* relion3.1 star file supported

* update 0.2.0

* update

* preprocess for clsavg std plus a 1e-5

* prepclsavg std plus 1e-7

* update

* Update README.md
  • Loading branch information
yilaili authored Dec 11, 2020
1 parent df9ffe7 commit 4f84a99
Show file tree
Hide file tree
Showing 14 changed files with 421 additions and 385 deletions.
1 change: 0 additions & 1 deletion MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,3 @@

# Include conf file
include cryosparc2/protocols.conf

6 changes: 5 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@ Tools to run user-free preprocessing of cryo-EM datasets: https://www.biorxiv.or

MicAssess and 2DAssess are incorporated into the freely available for academic research on COSMIC2 science gateway: https://cosmic2.sdsc.edu:8443/gateway/. Just upload your input files and you can run the jobs on the cloud!

**Updates (12/1/2020, v0.2.0)**
1. MicAssess now supports Relion 3.1 star file as the input.
2. Fix requirements dependency issues.

**Note (5/8/2020)**
2DAssess gives syntax error for some users. We have fix the bug and it should be ok to run now.

Expand All @@ -21,7 +25,7 @@ Both MicAssess and 2DAssess are python based and need anaconda installed to run.

1. Create an anaconda environment
```
conda create -n cryoassess -c anaconda python=3.6 pyqt=5 cudnn=7.1.2 numpy=1.14.5 intel-openmp=2019.4
conda create -n cryoassess -c anaconda python=3.6 pyqt=5 cudnn=7.1.2 intel-openmp=2019.4
```
2. Activate this conda environment by
```
Expand Down
24 changes: 12 additions & 12 deletions cryoassess/assess2d.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,12 @@
import glob
from functools import partial, update_wrapper
from itertools import product
from cryoassess.check_center_p import check_center
from cryoassess.classavg2jpg_p import save_mrcs
import re
from cryoassess.lib.check_center import checkCenter
from cryoassess.mrcs2jpg import mrcs2jpg
from cryoassess.lib import imgprep
from cryoassess.lib import utils


def setupParserOptions():
ap = argparse.ArgumentParser()
Expand Down Expand Up @@ -53,8 +56,8 @@ def w_categorical_crossentropy(y_true, y_pred, weights):
final_mask += (weights[c_t, c_p] * y_pred_max_mat[:, c_p] * y_true[:, c_t])
return K.categorical_crossentropy(y_true, y_pred) * final_mask

def predict(**args):
print('Assessing 2D class averages with 2DAssess....')
def predict(args):
print('Assessing 2D class averages....')
test_data_dir = os.path.abspath(args['output'])
batch_size = args['batch_size']
labels = ['Clip', 'Edge', 'Good', 'Noise']
Expand Down Expand Up @@ -83,14 +86,14 @@ def predict(**args):
class_mode=None,
interpolation='lanczos')
prob = model.predict_generator(test_generator)
print('Assessment finished. Copying files to corresponding directories....')
print('Assessment finished.')

for l in labels:
os.mkdir(l)
i = 0
for file in sorted(glob.glob('data/*.jpg')):
if labels[np.argmax(prob[i])] == 'good':
if check_center(file) == True:
if checkCenter(file) == True:
copy2(file, 'Good')
else:
copy2(file, 'Clipping')
Expand All @@ -103,20 +106,17 @@ def predict(**args):
for fname in os.listdir('Good'):
good_idx.append(re.findall((args['name']+'_'+'(\d+)'), fname[:-4])[0])

print('All finished! Outputs are stored in', test_data_dir)
print('Outputs are stored in', test_data_dir)
print('Good class averages indices are (starting from 1): ', end='')
print(', '.join(good_idx))

def main():

start_dir = os.getcwd()
args = setupParserOptions()
args['model'] = os.path.abspath(args['model'])
os.chdir(start_dir)
save_mrcs(**args)
predict(**args)

mrcs2jpg(args)
predict(args)

if __name__ == '__main__':

main()
32 changes: 0 additions & 32 deletions cryoassess/classavg_preprocessing_p.py

This file was deleted.

Empty file added cryoassess/lib/__init__.py
Empty file.
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@
from scipy import ndimage
from skimage import measure
import cv2
import matplotlib.pyplot as plt
# import matplotlib.pyplot as plt

def check_center(img_name):
def checkCenter(img_name):
img = Image.open(img_name)
saliency = cv2.saliency.StaticSaliencySpectralResidual_create()
(success, saliencyMap) = saliency.computeSaliency(np.asarray(img))
Expand Down
133 changes: 133 additions & 0 deletions cryoassess/lib/imgprep.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
'''
Helper functions for simple image preprocessing used.
'''
import numpy as np
from PIL import Image
from scipy import ndimage
import pandas as pd
# from PIL import ImageOps


def createCircularMask(h, w, center=None, radius=None):
if center is None: # use the middle of the image
center = [int(w/2), int(h/2)]
if radius is None: # use the smallest distance between the center and image walls
radius = min(center[0], center[1], w-center[0], h-center[1])
Y, X = np.ogrid[:h, :w]
dist_from_center = np.sqrt((X - center[0])**2 + (Y-center[1])**2)
mask = dist_from_center <= radius
return mask

def maskImg(img):
mask = createCircularMask(img.shape[0], img.shape[1])
masked_img = img.copy()
masked_img[~mask] = 0
return masked_img

#### BELOW: for micrographs
def downsample(img, height=494):
'''
Downsample 2d array using fourier transform.
factor is the downsample factor.
'''
m,n = img.shape[-2:]
ds_factor = m/height
# height = round(m/ds_factor/2)*2
width = round(n/ds_factor/2)*2
F = np.fft.rfft2(img)
A = F[...,0:height//2,0:width//2+1]
B = F[...,-height//2:,0:width//2+1]
F = np.concatenate([A, B], axis=0)
f = np.fft.irfft2(F, s=(height, width))
return f

def scaleImage(img, height=494):
'''
Downsample image, scale the pixel value from 0-255 and save it as the Image object.
'''
new_img = downsample(img, height)
new_img = ((img-img.min())/((img.max()-img.min())+1e-7)*255).astype('uint8')
new_img = Image.fromarray(new_img)
new_img = new_img.convert("L")
return new_img

def cropLeft(img, cropx, cropy):
y = img.shape[0]
startx = 0
starty = y//2-(cropy//2)
new_img_left = img[starty:starty+cropy,startx:startx+cropx]
new_img_left = Image.fromarray(new_img_left)
new_img_left = new_img_left.convert("L")
return new_img_left

def cropRight(img, cropx, cropy):
y = img.shape[0]
x = img.shape[1]
startx = x-cropx
starty = y//2-(cropy//2)
new_img_right = img[starty:starty+cropy,startx:startx+cropx]
new_img_right = Image.fromarray(new_img_right)
new_img_right = new_img_right.convert("L")
return new_img_right

def cropCenter(img,cropx,cropy):
y = img.shape[0]
x = img.shape[1]
startx = x//2-(cropx//2)
starty = y//2-(cropy//2)
return img[starty:starty+cropy,startx:startx+cropx]

def preprocessMics(img):
'''
Crop the images to make it square.
Center to 0 and divide by std to normalize.
And then apply a circular mask to make it rotatable.
'''
short_edge = min(img.shape[0], img.shape[1])
square_img = cropCenter(img, short_edge, short_edge)
norm_img = (square_img - np.mean(square_img))/np.std(square_img)
masked_img = maskImg(norm_img)
return masked_img

#### BELOW: for class averages
def cutByRadius(img):
'''
Crop the images (2d class averages) by the radius of the mask.
Will find the radius from the image and crop the image.
'''
h = img.shape[0]
w = img.shape[1]
# empty_val = img[0,0] # because the image is already masked (2d class avg), the [0,0] point must be empty
edge_l = 0
for i in range(w):
if np.sum(img[i,:]) > 1e-7 or np.sum(img[:,i]) < -1e-7:
edge_l = i
break
edge_r = 0
for ii in range(w):
if np.sum(img[-ii,:]) > 1e-7 or np.sum(img[:,-ii]) < -1e-7:
edge_r = ii
break
edge_t = 0
for j in range(h):
if np.sum(img[:,j]) > 1e-7 or np.sum(img[:,j]) < -1e-7:
edge_t = j
break
edge_b = 0
for jj in range(h):
if np.sum(img[:,-jj]) > 1e-7 or np.sum(img[:,-jj]) < -1e-7:
edge_b = jj
break
edge = min(edge_l, edge_r, edge_t, edge_b)
new_img = img[edge:h-edge+1, edge:w-edge+1]
return new_img


def preprocessClsavg(img):
'''
Center to 0 and divide by std to normalize.
And then apply a circular mask to make it rotatable.
'''
norm_img = (img - np.mean(img)) / (np.std(img) + 1e-7)
masked_img = maskImg(norm_img)
return masked_img
94 changes: 94 additions & 0 deletions cryoassess/lib/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
'''
Conversion between dataframes and star files in Relion.
ABOUT star_df:
star_df is a dictionary:
Keys: blockcodes ('data_xx');
Values: lists of pd dataframes:
Each element in the value/list corresponds to a "data block", start
with "loop_", and converted to a pd dataframe.
Each column name of the dataframe is "data name" (e.g. _rlnMicrographName)
All data in the dataframe is stored as strings.
The commented part started with "#" are deleted during the conversion.
'''

# import numpy as np
import pandas as pd

def loop2df(loop):
keys_idx = [i for i, x in enumerate(loop) if x.startswith('_')]
keys = [loop[i].split('#',1)[0].strip() for i in keys_idx] # remove everything after the first "#" on the keys

df = loop[keys_idx[-1]+1:]
df = [x.split() for x in df]
df = pd.DataFrame(df).dropna()
df.columns = keys

return df

def block2df(block):
loop_idx = [i for i, x in enumerate(block) if x == 'loop_']
loop_idx.append(len(block))
loops = [block[loop_idx[i]:loop_idx[i+1]] for i in range(len(loop_idx)-1)]

df_list = []
for loop in loops:
df_list.append(loop2df(loop))

return df_list


def star2df(starfile):
with open(starfile) as f:
star =[l for l in (line.strip() for line in f) if l and not l.startswith('#')] # read only non-blank and non "#" lines and rm all '\n' or spaces

blockcode_idx = [i for i, x in enumerate(star) if x.startswith('data_')]
blockcodes = [star[i] for i in blockcode_idx]
blockcode_idx.append(len(star))
blocks = [star[blockcode_idx[i]:blockcode_idx[i+1]] for i in range(len(blockcode_idx)-1)]

block_list = []
for block in blocks:
block_list.append(block2df(block))

star_df = dict(zip(blockcodes, block_list))
return star_df


def df2loop(df, file):
file.write('loop_ \n')

keys = df.columns.tolist()
for l in keys:
file.write(l + ' \n')

for i in range(len(df)):
s = ' '.join(df.iloc[i].tolist())
file.write(s + ' \n')

file.write('\n')


def df2star(star_df, star_name):
blockcodes = list(star_df.keys())
block_list = list(star_df.values())

with open(star_name, 'w') as f:
for i in range(len(blockcodes)):
f.write(blockcodes[i] + ' \n\n')
df_list = block_list[i]
for j in range(len(df_list)):
df = df_list[j]
df2loop(df, f)


def micBlockcode(star_df):
if len(list(star_df.keys())) == 1:
return list(star_df.keys())[0]
else:
return 'data_micrographs'

def star2miclist(starfile):
star_df = star2df(starfile)
mic_blockcode = micBlockcode(star_df)
micList = star_df[mic_blockcode][0]['_rlnMicrographName'].tolist()
return micList
Loading

0 comments on commit 4f84a99

Please sign in to comment.