update to 0.2.0 (#11)

* Update README.md * debug * debug for K3 mic * update fft * compatible to K3 data * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * debug * bugfix add threads option * bugfix * bugfix * debug * bugfix * bugfix * limit to use only 1 gpu * specify gpu * add doc * bugfix * test * bugfix * debug * re-organiize * relion3.1 star file supported * update 0.2.0 * update * preprocess for clsavg std plus a 1e-5 * prepclsavg std plus 1e-7 * update * Update README.md
cianfrocco-lab · Dec 11, 2020 · 4f84a99 · 4f84a99
1 parent df9ffe7
commit 4f84a99
Show file tree

Hide file tree

Showing 14 changed files with 421 additions and 385 deletions.
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -6,4 +6,3 @@
 
 # Include conf file
  include cryosparc2/protocols.conf
-
diff --git a/README.md b/README.md
@@ -5,6 +5,10 @@ Tools to run user-free preprocessing of cryo-EM datasets: https://www.biorxiv.or
 
 MicAssess and 2DAssess are incorporated into the freely available for academic research on COSMIC2 science gateway: https://cosmic2.sdsc.edu:8443/gateway/. Just upload your input files and you can run the jobs on the cloud!
 
+**Updates (12/1/2020, v0.2.0)**
+1. MicAssess now supports Relion 3.1 star file as the input.
+2. Fix requirements dependency issues.
+
 **Note (5/8/2020)**
 2DAssess gives syntax error for some users. We have fix the bug and it should be ok to run now.
 
@@ -21,7 +25,7 @@ Both MicAssess and 2DAssess are python based and need anaconda installed to run.
 
 1. Create an anaconda environment
 ```
-conda create -n cryoassess -c anaconda python=3.6 pyqt=5 cudnn=7.1.2 numpy=1.14.5 intel-openmp=2019.4
+conda create -n cryoassess -c anaconda python=3.6 pyqt=5 cudnn=7.1.2 intel-openmp=2019.4
 ```
 2. Activate this conda environment by
 ```

diff --git a/cryoassess/assess2d.py b/cryoassess/assess2d.py
@@ -19,9 +19,12 @@
 import glob
 from functools import partial, update_wrapper
 from itertools import product
-from cryoassess.check_center_p import check_center
-from cryoassess.classavg2jpg_p import save_mrcs
 import re
+from cryoassess.lib.check_center import checkCenter
+from cryoassess.mrcs2jpg import mrcs2jpg
+from cryoassess.lib import imgprep
+from cryoassess.lib import utils
+
 
 def setupParserOptions():
     ap = argparse.ArgumentParser()
@@ -53,8 +56,8 @@ def w_categorical_crossentropy(y_true, y_pred, weights):
         final_mask += (weights[c_t, c_p] * y_pred_max_mat[:, c_p] * y_true[:, c_t])
     return K.categorical_crossentropy(y_true, y_pred) * final_mask
 
-def predict(**args):
-    print('Assessing 2D class averages with 2DAssess....')
+def predict(args):
+    print('Assessing 2D class averages....')
     test_data_dir = os.path.abspath(args['output'])
     batch_size = args['batch_size']
     labels = ['Clip', 'Edge', 'Good', 'Noise']
@@ -83,14 +86,14 @@ def predict(**args):
         class_mode=None,
         interpolation='lanczos')
     prob = model.predict_generator(test_generator)
-    print('Assessment finished. Copying files to corresponding directories....')
+    print('Assessment finished.')
 
     for l in labels:
         os.mkdir(l)
     i = 0
     for file in sorted(glob.glob('data/*.jpg')):
         if labels[np.argmax(prob[i])] == 'good':
-            if check_center(file) == True:
+            if checkCenter(file) == True:
                 copy2(file, 'Good')
             else:
                 copy2(file, 'Clipping')
@@ -103,20 +106,17 @@ def predict(**args):
     for fname in os.listdir('Good'):
         good_idx.append(re.findall((args['name']+'_'+'(\d+)'), fname[:-4])[0])
 
-    print('All finished! Outputs are stored in', test_data_dir)
+    print('Outputs are stored in', test_data_dir)
     print('Good class averages indices are (starting from 1): ', end='')
     print(', '.join(good_idx))
 
 def main():
-
     start_dir = os.getcwd()
     args = setupParserOptions()
     args['model'] = os.path.abspath(args['model'])
     os.chdir(start_dir)
-    save_mrcs(**args)
-    predict(**args)
-
+    mrcs2jpg(args)
+    predict(args)
 
 if __name__ == '__main__':
-
     main()
diff --git a/cryoassess/classavg_preprocessing_p.py b/cryoassess/classavg_preprocessing_p.py
diff --git a/cryoassess/lib/__init__.py b/cryoassess/lib/__init__.py
diff --git a/cryoassess/check_center_p.py → cryoassess/lib/check_center.py b/cryoassess/check_center_p.py → cryoassess/lib/check_center.py
@@ -10,9 +10,9 @@
 from scipy import ndimage
 from skimage import measure
 import cv2
-import matplotlib.pyplot as plt
+# import matplotlib.pyplot as plt
 
-def check_center(img_name):
+def checkCenter(img_name):
     img = Image.open(img_name)
     saliency = cv2.saliency.StaticSaliencySpectralResidual_create()
     (success, saliencyMap) = saliency.computeSaliency(np.asarray(img))

diff --git a/cryoassess/lib/imgprep.py b/cryoassess/lib/imgprep.py
@@ -0,0 +1,133 @@
+'''
+Helper functions for simple image preprocessing used.
+'''
+import numpy as np
+from PIL import Image
+from scipy import ndimage
+import pandas as pd
+# from PIL import ImageOps
+
+
+def createCircularMask(h, w, center=None, radius=None):
+    if center is None: # use the middle of the image
+        center = [int(w/2), int(h/2)]
+    if radius is None: # use the smallest distance between the center and image walls
+        radius = min(center[0], center[1], w-center[0], h-center[1])
+    Y, X = np.ogrid[:h, :w]
+    dist_from_center = np.sqrt((X - center[0])**2 + (Y-center[1])**2)
+    mask = dist_from_center <= radius
+    return mask
+
+def maskImg(img):
+    mask = createCircularMask(img.shape[0], img.shape[1])
+    masked_img = img.copy()
+    masked_img[~mask] = 0
+    return masked_img
+
+#### BELOW: for micrographs
+def downsample(img, height=494):
+    '''
+    Downsample 2d array using fourier transform.
+    factor is the downsample factor.
+    '''
+    m,n = img.shape[-2:]
+    ds_factor = m/height
+    # height = round(m/ds_factor/2)*2
+    width = round(n/ds_factor/2)*2
+    F = np.fft.rfft2(img)
+    A = F[...,0:height//2,0:width//2+1]
+    B = F[...,-height//2:,0:width//2+1]
+    F = np.concatenate([A, B], axis=0)
+    f = np.fft.irfft2(F, s=(height, width))
+    return f
+
+def scaleImage(img, height=494):
+    '''
+    Downsample image, scale the pixel value from 0-255 and save it as the Image object.
+    '''
+    new_img = downsample(img, height)
+    new_img = ((img-img.min())/((img.max()-img.min())+1e-7)*255).astype('uint8')
+    new_img = Image.fromarray(new_img)
+    new_img = new_img.convert("L")
+    return new_img
+
+def cropLeft(img, cropx, cropy):
+    y = img.shape[0]
+    startx = 0
+    starty = y//2-(cropy//2)
+    new_img_left = img[starty:starty+cropy,startx:startx+cropx]
+    new_img_left = Image.fromarray(new_img_left)
+    new_img_left = new_img_left.convert("L")
+    return new_img_left
+
+def cropRight(img, cropx, cropy):
+    y = img.shape[0]
+    x = img.shape[1]
+    startx = x-cropx
+    starty = y//2-(cropy//2)
+    new_img_right = img[starty:starty+cropy,startx:startx+cropx]
+    new_img_right = Image.fromarray(new_img_right)
+    new_img_right = new_img_right.convert("L")
+    return new_img_right
+
+def cropCenter(img,cropx,cropy):
+    y = img.shape[0]
+    x = img.shape[1]
+    startx = x//2-(cropx//2)
+    starty = y//2-(cropy//2)
+    return img[starty:starty+cropy,startx:startx+cropx]
+
+def preprocessMics(img):
+    '''
+    Crop the images to make it square.
+    Center to 0 and divide by std to normalize.
+    And then apply a circular mask to make it rotatable.
+    '''
+    short_edge = min(img.shape[0], img.shape[1])
+    square_img = cropCenter(img, short_edge, short_edge)
+    norm_img = (square_img - np.mean(square_img))/np.std(square_img)
+    masked_img = maskImg(norm_img)
+    return masked_img
+
+#### BELOW: for class averages
+def cutByRadius(img):
+    '''
+    Crop the images (2d class averages) by the radius of the mask.
+    Will find the radius from the image and crop the image.
+    '''
+    h = img.shape[0]
+    w = img.shape[1]
+    # empty_val = img[0,0] # because the image is already masked (2d class avg), the [0,0] point must be empty
+    edge_l = 0
+    for i in range(w):
+        if np.sum(img[i,:]) > 1e-7 or np.sum(img[:,i]) < -1e-7:
+            edge_l = i
+            break
+    edge_r = 0
+    for ii in range(w):
+        if np.sum(img[-ii,:]) > 1e-7 or np.sum(img[:,-ii]) < -1e-7:
+            edge_r = ii
+            break
+    edge_t = 0
+    for j in range(h):
+        if np.sum(img[:,j]) > 1e-7 or np.sum(img[:,j]) < -1e-7:
+            edge_t = j
+            break
+    edge_b = 0
+    for jj in range(h):
+        if np.sum(img[:,-jj]) > 1e-7 or np.sum(img[:,-jj]) < -1e-7:
+            edge_b = jj
+            break
+    edge = min(edge_l, edge_r, edge_t, edge_b)
+    new_img = img[edge:h-edge+1, edge:w-edge+1]
+    return new_img
+
+
+def preprocessClsavg(img):
+    '''
+    Center to 0 and divide by std to normalize.
+    And then apply a circular mask to make it rotatable.
+    '''
+    norm_img = (img - np.mean(img)) / (np.std(img) + 1e-7)
+    masked_img = maskImg(norm_img)
+    return masked_img
diff --git a/cryoassess/lib/utils.py b/cryoassess/lib/utils.py
@@ -0,0 +1,94 @@
+'''
+Conversion between dataframes and star files in Relion.
+ABOUT star_df:
+    star_df is a dictionary:
+        Keys: blockcodes ('data_xx');
+        Values: lists of pd dataframes:
+            Each element in the value/list corresponds to a "data block", start
+            with "loop_", and converted to a pd dataframe.
+                Each column name of the dataframe is "data name" (e.g. _rlnMicrographName)
+                All data in the dataframe is stored as strings.
+    The commented part started with "#" are deleted during the conversion.
+'''
+
+# import numpy as np
+import pandas as pd
+
+def loop2df(loop):
+    keys_idx = [i for i, x in enumerate(loop) if x.startswith('_')]
+    keys = [loop[i].split('#',1)[0].strip() for i in keys_idx] # remove everything after the first "#" on the keys
+
+    df = loop[keys_idx[-1]+1:]
+    df = [x.split() for x in df]
+    df = pd.DataFrame(df).dropna()
+    df.columns = keys
+
+    return df
+
+def block2df(block):
+    loop_idx = [i for i, x in enumerate(block) if x == 'loop_']
+    loop_idx.append(len(block))
+    loops = [block[loop_idx[i]:loop_idx[i+1]] for i in range(len(loop_idx)-1)]
+
+    df_list = []
+    for loop in loops:
+        df_list.append(loop2df(loop))
+
+    return df_list
+
+
+def star2df(starfile):
+    with open(starfile) as f:
+        star =[l for l in (line.strip() for line in f) if l and not l.startswith('#')] # read only non-blank and non "#" lines and rm all '\n' or spaces
+
+    blockcode_idx = [i for i, x in enumerate(star) if x.startswith('data_')]
+    blockcodes = [star[i] for i in blockcode_idx]
+    blockcode_idx.append(len(star))
+    blocks = [star[blockcode_idx[i]:blockcode_idx[i+1]] for i in range(len(blockcode_idx)-1)]
+
+    block_list = []
+    for block in blocks:
+        block_list.append(block2df(block))
+
+    star_df = dict(zip(blockcodes, block_list))
+    return star_df
+
+
+def df2loop(df, file):
+    file.write('loop_ \n')
+
+    keys = df.columns.tolist()
+    for l in keys:
+        file.write(l + ' \n')
+
+    for i in range(len(df)):
+        s = '  '.join(df.iloc[i].tolist())
+        file.write(s + ' \n')
+
+    file.write('\n')
+
+
+def df2star(star_df, star_name):
+    blockcodes = list(star_df.keys())
+    block_list = list(star_df.values())
+
+    with open(star_name, 'w') as f:
+        for i in range(len(blockcodes)):
+            f.write(blockcodes[i] + ' \n\n')
+            df_list = block_list[i]
+            for j in range(len(df_list)):
+                df = df_list[j]
+                df2loop(df, f)
+
+
+def micBlockcode(star_df):
+    if len(list(star_df.keys())) == 1:
+        return list(star_df.keys())[0]
+    else:
+        return 'data_micrographs'
+
+def star2miclist(starfile):
+    star_df = star2df(starfile)
+    mic_blockcode = micBlockcode(star_df)
+    micList = star_df[mic_blockcode][0]['_rlnMicrographName'].tolist()
+    return micList
Original file line number	Diff line number	Diff line change
Expand Up		@@ -6,4 +6,3 @@

		# Include conf file
		include cryosparc2/protocols.conf