-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
updated example data and documentation
- Loading branch information
gusqgm
committed
Mar 7, 2022
1 parent
0330110
commit 085f376
Showing
81 changed files
with
10,499 additions
and
49,396 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,40 +1,56 @@ | ||
######################################################################################################################################### | ||
# This is the configuration file used for performing luigi bound tasks with the LSTree processing tools. # | ||
# It is based on the tools present in https://github.com/fmi-basel/LSTree created by Raphael Ortiz and Gustavo Q. G. de Medeiros, # | ||
# and is part of "Multiscale light-sheet organoid imaging framework", de Medeiros et al., (BioRxiv 2021) publication. # | ||
# This file is updated by demand while issues are being solved and functions improved. # | ||
# For any issues, please contact [email protected] # | ||
######################################################################################################################################### | ||
|
||
|
||
### This configuration file is quite extensive, and so before you go ahead here are some important general points: | ||
|
||
# 0) Note that some tasks use multiprocessing when internal operations do not release the global interpreter lock (if you are curious and want to know more: https://wiki.python.org/moin/GlobalInterpreterLock). | ||
# 1) Although the point above might be a bit cryptic, pragmatically here it is: When processing a single dataset use 'pool_workers ~= number of cores'. This way the computer can be used at its max. | ||
# 2) For all instances of 'movie_dirs' please provide a list of full paths to the corresponding channel folders containing the datasets, following the folder structure described in the main Readme of the Repository (https://github.com/fmi-basel/LSTree). | ||
# 3) During training, an initial folder with the name of the model is created, and after the training finishes a new folder with an added '_inference' suffix is added. While the first one holds the parameters 'weights_best.h5' and 'weights_latest.h5', the latter holds the fully trained model which should be used for prediction. For convenience, we have added the two intermediate weights files into the '_inference' folders for all trained models in order to facilitate retraining if needed. | ||
|
||
|
||
######################################################################## | ||
# General ############################################################## | ||
[DEFAULT] | ||
pattern={subdir}/{fname}-T{time:04d}.{ext} | ||
# General luigi configuration parameters - should usually not be changed for an initial run unless you want to tweak things at your own risk: | ||
|
||
### Important points: | ||
# 1) Set the same default number of threads of all multithreaded tasks | ||
# 2) When processing a single dataset use n_threads ~= number of cores | ||
# 3) Note that some tasks actually use multiprocessing when internal operations do not release the GIL. | ||
# 4) For all instances of 'movie_dirs' please provide a list of full paths to the corresponding channel folders containing the datasets, following the folder structure described in the main Readme of the Repository (https://github.com/fmi-basel/LSTree). | ||
# 5) Please n ote that all values given | ||
[DEFAULT] | ||
pattern={subdir}/{fname}-T{time:04d}.{ext} # expected folder structure for input/output. Follows the folder structure present in the repository. | ||
|
||
[resources] | ||
gpu=1 | ||
pool_workers=16 | ||
memory=64000 | ||
gpu=1 # needs to be set for luigi. LSTree has not been tested for multiple GPUs (as of yet!), so better leave it as 1. | ||
pool_workers=16 # sets the maximum number of threads for parallel processing (e.g. while saving compressed .tiffs) | ||
memory=64000 # sets maximum RAM availability | ||
|
||
[core] | ||
workers=16 | ||
log_level=INFO | ||
outdir = ./ | ||
workers=16 # sets the maximum number of parallel tasks to run | ||
log_level=INFO # level of luigi logging of the running jobs. For more information please see: https://luigi.readthedocs.io/en/stable/logging.html | ||
outdir = ./ # saving directory for the luigi.log file | ||
|
||
######################################################################## | ||
# Denoising/Deconvolution ############################################## | ||
[BuildDenoiseTrainingRecordTask] | ||
training_base_dir=models/denoise | ||
base_dir=example/data | ||
# Performs first pre-processing steps. Denoising and deconvolution can minimally aid in curating predicted trees via e.g. Mastodon, as images in the BigDataViewer windows are then of higher contrast. | ||
# IMPORTANT: this is NOT a requirement! All other steps can be ran currently if the raw data is copied into a fodler with the same name as initially, adding the suffix '-Deconv'. | ||
|
||
[BuildDenoiseTrainingRecordTask] # creates all necessary tensorflow records files (.tfrec) files containing the annotation/raw image pairs with correct patch size used later for trainig. This is also present in all other teasks that can perform training. | ||
training_base_dir=models/denoise # base directory where corresponding models can be found / saved | ||
base_dir=example/data # base directory for the image data | ||
n_images=30 | ||
train_fraction=0.9 | ||
valid_fraction=0.1 | ||
min_patch_size=(512,512) | ||
train_fraction=0.9 # fraction of images used for model training | ||
valid_fraction=0.1 # fraction of images used for model validation | ||
min_patch_size=(512,512) # minimum image size used for training | ||
|
||
[DenoiseTrainingTask] | ||
[DenoiseTrainingTask] # Sets all network parameters for the actual training. This is also present in all other teasks that can perform training. | ||
training_base_dir=models/denoise | ||
base_dir=example/data | ||
images_dirs=["*/Channel0", "*/Channel1", "*/Channel2"] | ||
images_dirs=["*/Channel0", "*/Channel1", "*/Channel2"] # directory where the data can be found for training | ||
|
||
# below are the network parameters which reflect the base parameters for a RDCNet network. For more detailed information please refer to the RDCNet publication: https://arxiv.org/abs/2010.00991 . These parameters are also present in all other tasks that can perform training. | ||
downsampling_factor=(4,) | ||
n_downsampling_channels=16 | ||
n_groups=8 | ||
|
@@ -43,17 +59,18 @@ channels_per_group=32 | |
n_steps=5 | ||
dropout=0.1 | ||
|
||
train_batch_size=16 | ||
valid_batch_size=32 | ||
# training specific parameters. These parameters are also present in all other tasks that can perform training. | ||
train_batch_size=16 | ||
valid_batch_size=32 | ||
epochs=200 | ||
n_restarts=5 | ||
n_restarts=5 # a restart means that after a certain numberof epochs the values for loss will have a sudden change | ||
learning_rate=0.0001 | ||
patch_size=(128,128,1) | ||
suffix=20200311 | ||
suffix=20200311 # suffix that is appended to the model folder both during training and after training finishes. If a model with the same name exists, there will be no training to avoid rewriting on the model '.pb' file. | ||
|
||
# If a specific model needs to be retrained, or if a training stopped due to an error and needs to be continued, one can resume the weights based on "weights_latest.h5" or "weights_best.h5" from the previous model by uncommenting below. | ||
# If a specific model needs to be retrained, or if a training stopped due to an error and needs to be continued, one can resume the weights based on "weights_latest.h5" or "weights_best.h5" from the previous model by uncommenting below. For example, considering the default denoise training from the repository retraining following the same baseline cam be done via uncommenting: | ||
|
||
#resume_weights=PATH_TO_CURRENT_MODEL/weights_latest.h5 | ||
#resume_weights = models/denoise/out/RDCNet-F4-DC16-OC1-G8-DR1-2-4-GC32-S5-D0.1_20200311_inference/weights_latest.h5 | ||
|
||
intensity_offset_sigma=0.5 | ||
intensity_scaling_bounds=(0.1, 10.) | ||
|
@@ -65,24 +82,26 @@ intensity_scaling_bounds=(0.1, 10.) | |
out_suffix=-Denoised | ||
|
||
[DeconvolutionTask] | ||
psf_dir=models/deconv/20190830 | ||
psf_dir=models/deconv/20190830 # directory where the point-spread-functions are located | ||
out_suffix=-Deconv | ||
niter=128 | ||
#~max_patch_size=(512,512,512) | ||
niter=128 # number of iterations during deconvolution process | ||
max_patch_size=(9999,9999,9999) | ||
|
||
|
||
[MultiDeconvolutionTask] | ||
ch_subdirs=["Channel0", "Channel1", "Channel2"] | ||
movie_dirs=["example/data/*-*"] | ||
movie_dirs=["example/data/*-*"] # path where the images are for processing. Inside this folder should be the channel subdirectories | ||
ch_subdirs=["Channel0", "Channel1", "Channel2"] # name of the subdirectories which are being processed | ||
|
||
|
||
######################################################################## | ||
# Nuclei segmentation ################################################## | ||
[BuildNucleiTrainingRecordTask] | ||
# Nuclei Segmentation is divided into training and prediction tasks | ||
|
||
[BuildNucleiTrainingRecordTask] # creates all necessary tensorflow records files (.tfrec) files containing the annotation/raw image pairs with correct patch size used later for trainig | ||
training_base_dir=models/nuclei_seg | ||
ch_subdir=Channel1 | ||
annot_subdir=nuclei_annot | ||
spacing=(2,0.26,0.26) | ||
spacing=(2,0.26,0.26) # tfrecord image spacing (Z,Y,X). Should reflect the sapcing of the images used during training, as well as the model spacing. | ||
train_fraction=0.9 | ||
valid_fraction=0.1 | ||
min_patch_size=(32,256,256) | ||
|
@@ -129,9 +148,9 @@ inter_margin=6.0 | |
jaccard_hinge=0.3 | ||
jaccard_eps=0.1 | ||
|
||
# If a specific model needs to be retrained, or if a training stopped due to an error and needs to be continued, one can resume the weights based on "weights_latest.h5" or "weights_best.h5" from the previous model by uncommenting below. | ||
# If a specific model needs to be retrained, or if a training stopped due to an error and needs to be continued, one can resume the weights based on "weights_latest.h5" or "weights_best.h5" from the previous model by uncommenting below. For example, considering the default nuclei prediction model from the repository retraining following the same baseline cam be done via uncommenting: | ||
|
||
#resume_weights=PATH_TO_CURRENT_MODEL/weights_latest.h5 | ||
#resume_weights = models/nuclei_seg/out/RDCNet-F1-8-8-DC32-OC5-G4-DR1-2-4-8-GC32-S5-D0.1_20210227_inference/weights_latest.h5 | ||
|
||
intensity_offset_sigma=0.5 | ||
intensity_scaling_bounds=(0.1, 10.) | ||
|
@@ -145,6 +164,8 @@ movie_dirs=["example/data/*-*"] | |
|
||
######################################################################## | ||
# Cell segmentation #################################################### | ||
# Cell and lumen segmentation are divided into training and prediction tasks | ||
|
||
[BuildLumenTrainingRecordTask] | ||
training_base_dir=models/cell_seg | ||
ch_subdir=Channel0 | ||
|
@@ -193,9 +214,9 @@ inter_margin=6.0 | |
jaccard_hinge=0.3 | ||
jaccard_eps=1.0 | ||
|
||
# If a specific model needs to be retrained, or if a training stopped due to an error and needs to be continued, one can resume the weights based on "weights_latest.h5" or "weights_best.h5" from the previous model by uncommenting below. | ||
# If a specific model needs to be retrained, or if a training stopped due to an error and needs to be continued, one can resume the weights based on "weights_latest.h5" or "weights_best.h5" from the previous model by uncommenting below. For example, considering the default cell prediction model from the repository retraining following the same baseline cam be done via uncommenting: | ||
|
||
#resume_weights=PATH_TO_CURRENT_MODEL/weights_latest.h5 | ||
#resume_weights = models/cell_seg/out/RDCNet-F1-8-8-DC32-OC6-G4-DR1-2-4-8-GC32-S5-D0.1_20210227_inference/weights_latest.h5 | ||
|
||
intensity_offset_sigma=0.5 | ||
intensity_scaling_bounds=(0.1, 10.) | ||
|
@@ -210,12 +231,16 @@ movie_dirs=["example/data/*-*"] | |
|
||
######################################################################## | ||
# Lineage ############################################################## | ||
# calculates all basic properties from the existing lineage tree. Included properties are: distance to parent, time since last division, etc. | ||
|
||
[TreePropsTask] | ||
out_subdir=tree_props | ||
xml_tree=mamut_deconv.xml | ||
|
||
######################################################################## | ||
# Features ############################################################# | ||
# Extracts all features from segmentation results | ||
|
||
[ExtractFeaturesTask] | ||
out_subdir=features | ||
nuclei_subdir=Channel1 | ||
|
@@ -291,9 +316,9 @@ inter_margin=6.0 | |
jaccard_hinge=0.3 | ||
jaccard_eps=0.1 | ||
|
||
# If a specific model needs to be retrained, or if a training stopped due to an error and needs to be continued, one can resume the weights based on "weights_latest.h5" or "weights_best.h5" from the previous model by uncommenting below. | ||
# If a specific model needs to be retrained, or if a training stopped due to an error and needs to be continued, one can resume the weights based on "weights_latest.h5" or "weights_best.h5" from the previous model by uncommenting below. For example, considering the default tracking prediction model from the repository retraining following the same baseline cam be done via uncommenting: | ||
|
||
#resume_weights=PATH_TO_CURRENT_MODEL/weights_latest.h5 | ||
#resume_weights=models/tracking/out/RDCNet-F1-8-8-DC64-OC10-G4-DR1-2-4-8-GC64-S6-D0.1_20210306_inference/weights_latest.h5 | ||
|
||
intensity_offset_sigma=0.5 | ||
intensity_scaling_bounds=(0.1, 10.) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.