fixed #13, update readme

hzi-bifo · Apr 21, 2022 · e2207e5 · e2207e5
1 parent 3f26df3
commit e2207e5
Show file tree

Hide file tree

Showing 4 changed files with 271 additions and 89 deletions.
diff --git a/README.md b/README.md
@@ -154,13 +154,12 @@ optional arguments:
   -t THREADS, --threads THREADS
                         number of threads to use. (default: 10)
   --chunk_size CHUNK_SIZE
-                        chunk_size * threads reads to process per thread.(default: 
-                        1024)
-                        When chunk_size=1024 and threads=20, each process will load 
-                        1024 reads, in total consumming ~20G memory.
+                        chunk_size * 1024 reads to load each time.
+                        When chunk_size=1000 and threads=20, consumming ~20G memory, better to be multiples of the number of threads.
   -v, --version         show program's version number and exit
 ```
 
+**Note**: RiboDetector uses multiprocessing with shared memory, thus the memory use of a single process indicated in `htop` or `top` is actually the total memory used by RiboDector. Some job submission system like SGE mis-calculated the total memory use by adding up the memory use of all process. If you see this do not worry it will cause out of memory issue. 
 
 <!-- ### Benchmarks
 

diff --git a/ribodetector/__init__.py b/ribodetector/__init__.py
@@ -4,5 +4,5 @@
 Accurate and rapid RiboRNA sequences Detector based on deep learning.
 """
 
-__version__ = "0.2.4"
+__version__ = "0.2.5"
 __author__ = 'ZL Deng'
diff --git a/ribodetector/detect.py b/ribodetector/detect.py
@@ -163,13 +163,6 @@ def run(self):
 
             num_batches = math.ceil(num_seqs / self.batch_size)
 
-            # Output probability to files
-            # prob_out_fh = open(
-            #     self.output[0].replace('.r1.fq', '') + '.softmax.probability.txt', 'w')
-
-            # prob_out_fh.write(
-            #     '\t'.join(['read', 'r1_0', 'r1_1', 'r2_0', 'r2_1']) + '\n')
-
             data_loader = tqdm(DataLoader(paired_reads_data,
                                           num_workers=self.args.threads,
                                           pin_memory=self.has_cuda,
@@ -184,28 +177,6 @@ def run(self):
                     r2_output = self.model(r2_data.to(
                         self.device, non_blocking=self.has_cuda))
 
-                    # output the predicted probability of two classes
-                    # for read_r1, r1_probs, r2_probs in zip(r1,
-                    #                                        torch.nn.functional.softmax(
-                    #                                            r1_output, dim=1).tolist(),
-                    #                                        torch.nn.functional.softmax(r2_output, dim=1).tolist()):
-                    #     read = read_r1.split('\n')[0].lstrip(
-                    #         '@').rsplit('-', 1)[0]
-                    #     read_probs = [
-                    #         read] + list(map(str, r1_probs)) + list(map(str, r2_probs))
-                    #     prob_out_fh.write('\t'.join(read_probs) + '\n')
-
-                    # for read_r2, r2_probs in zip(r2, torch.nn.functional.softmax(r2_output, dim=1).tolist()):
-                    #     read_r2_probs = [read_r2.split(
-                    #         '\n')[0]] + list(map(str, r2_probs))
-                    #     prob_out2_fh.write('\t'.join(read_r2_probs) + '\n')
-
-                    # r1_batch_labels = torch.argmax(r1_output, dim=1).tolist()
-                    # r2_batch_labels = torch.argmax(r2_output, dim=1).tolist()
-
-                    # r1_dict, r2_dict = self.separate_paired_reads(
-                    #     r1, r1_batch_labels, r2, r2_batch_labels)
-
                     r1_dict, r2_dict = self.separate_paired_reads(
                         r1, r1_output, r2, r2_output)
                     if r1_dict[0]:
@@ -248,9 +219,6 @@ def run(self):
             norrna1_fh.close()
             norrna2_fh.close()
 
-            # close prob out file handle
-            # prob_out_fh.close()
-
         # Single end reads
         else:
             reads_data = SeqData(SeqEncoder.load_reads(*self.input))