Skip to content

Commit

Permalink
fixed #13, update readme
Browse files Browse the repository at this point in the history
  • Loading branch information
dawnmy committed Apr 21, 2022
1 parent 3f26df3 commit e2207e5
Show file tree
Hide file tree
Showing 4 changed files with 271 additions and 89 deletions.
7 changes: 3 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -154,13 +154,12 @@ optional arguments:
-t THREADS, --threads THREADS
number of threads to use. (default: 10)
--chunk_size CHUNK_SIZE
chunk_size * threads reads to process per thread.(default:
1024)
When chunk_size=1024 and threads=20, each process will load
1024 reads, in total consumming ~20G memory.
chunk_size * 1024 reads to load each time.
When chunk_size=1000 and threads=20, consumming ~20G memory, better to be multiples of the number of threads.
-v, --version show program's version number and exit
```
**Note**: RiboDetector uses multiprocessing with shared memory, thus the memory use of a single process indicated in `htop` or `top` is actually the total memory used by RiboDector. Some job submission system like SGE mis-calculated the total memory use by adding up the memory use of all process. If you see this do not worry it will cause out of memory issue.
<!-- ### Benchmarks
Expand Down
2 changes: 1 addition & 1 deletion ribodetector/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,5 @@
Accurate and rapid RiboRNA sequences Detector based on deep learning.
"""

__version__ = "0.2.4"
__version__ = "0.2.5"
__author__ = 'ZL Deng'
32 changes: 0 additions & 32 deletions ribodetector/detect.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,13 +163,6 @@ def run(self):

num_batches = math.ceil(num_seqs / self.batch_size)

# Output probability to files
# prob_out_fh = open(
# self.output[0].replace('.r1.fq', '') + '.softmax.probability.txt', 'w')

# prob_out_fh.write(
# '\t'.join(['read', 'r1_0', 'r1_1', 'r2_0', 'r2_1']) + '\n')

data_loader = tqdm(DataLoader(paired_reads_data,
num_workers=self.args.threads,
pin_memory=self.has_cuda,
Expand All @@ -184,28 +177,6 @@ def run(self):
r2_output = self.model(r2_data.to(
self.device, non_blocking=self.has_cuda))

# output the predicted probability of two classes
# for read_r1, r1_probs, r2_probs in zip(r1,
# torch.nn.functional.softmax(
# r1_output, dim=1).tolist(),
# torch.nn.functional.softmax(r2_output, dim=1).tolist()):
# read = read_r1.split('\n')[0].lstrip(
# '@').rsplit('-', 1)[0]
# read_probs = [
# read] + list(map(str, r1_probs)) + list(map(str, r2_probs))
# prob_out_fh.write('\t'.join(read_probs) + '\n')

# for read_r2, r2_probs in zip(r2, torch.nn.functional.softmax(r2_output, dim=1).tolist()):
# read_r2_probs = [read_r2.split(
# '\n')[0]] + list(map(str, r2_probs))
# prob_out2_fh.write('\t'.join(read_r2_probs) + '\n')

# r1_batch_labels = torch.argmax(r1_output, dim=1).tolist()
# r2_batch_labels = torch.argmax(r2_output, dim=1).tolist()

# r1_dict, r2_dict = self.separate_paired_reads(
# r1, r1_batch_labels, r2, r2_batch_labels)

r1_dict, r2_dict = self.separate_paired_reads(
r1, r1_output, r2, r2_output)
if r1_dict[0]:
Expand Down Expand Up @@ -248,9 +219,6 @@ def run(self):
norrna1_fh.close()
norrna2_fh.close()

# close prob out file handle
# prob_out_fh.close()

# Single end reads
else:
reads_data = SeqData(SeqEncoder.load_reads(*self.input))
Expand Down
Loading

0 comments on commit e2207e5

Please sign in to comment.