Skip to content

Commit

Permalink
Support paired reads ("_r[1|2]" or "_R[1|2]") in k-mer counter
Browse files Browse the repository at this point in the history
  • Loading branch information
ivartb committed Mar 25, 2022
1 parent de718c6 commit d3bb960
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 19 deletions.
55 changes: 41 additions & 14 deletions src/tools/KmersCounterForManyFilesMain.java
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
package tools;

import io.IOUtils;
import ru.ifmo.genetics.io.ReadersUtils;
import ru.ifmo.genetics.statistics.Timer;
import ru.ifmo.genetics.utils.TextUtils;
import ru.ifmo.genetics.utils.tool.ExecutionFailedException;
import ru.ifmo.genetics.utils.tool.Parameter;
import ru.ifmo.genetics.utils.tool.Tool;
Expand All @@ -12,8 +12,10 @@
import ru.ifmo.genetics.utils.tool.values.InMemoryValue;
import ru.ifmo.genetics.utils.tool.values.InValue;

import java.io.*;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

public class KmersCounterForManyFilesMain extends Tool {
Expand Down Expand Up @@ -64,22 +66,47 @@ public class KmersCounterForManyFilesMain extends Tool {
private Timer t;

@Override
protected void runImpl() throws ExecutionFailedException {
protected void runImpl() throws ExecutionFailedException, IOException {
t = new Timer();
counters.clear();

for (File f : inputFiles.get()) {
KmersCounterMain counter = new KmersCounterMain();
counter.workDir.set(workDir.append("sub-counter"));
counter.k.set(k);
counter.inputFiles.set(new File[]{f});
counter.maximalBadFrequency.set(maximalBadFrequency);
counter.outputDir.set(outputDir);
counter.statsDir.set(statsDir);

addStep(counter);
counters.add(counter);
File[] files = inputFiles.get();
Arrays.sort(files);
List<String> names = new ArrayList<String>();
for (File f : files) {
names.add(ReadersUtils.readDnaLazy(f).name());
}

int i=0;
while (i < files.length) {
if ((names.get(i).endsWith("_r1") && i+1<files.length && names.get(i+1).endsWith("_r2")) ||
(names.get(i).endsWith("_R1") && i+1<files.length && names.get(i+1).endsWith("_R2"))) {
KmersCounterMain counter = new KmersCounterMain();
counter.workDir.set(workDir.append("sub-counter"));
counter.k.set(k);
counter.inputFiles.set(new File[]{files[i], files[i+1]});
counter.maximalBadFrequency.set(maximalBadFrequency);
counter.outputDir.set(outputDir);
counter.statsDir.set(statsDir);

addStep(counter);
counters.add(counter);
i += 2;
} else {
KmersCounterMain counter = new KmersCounterMain();
counter.workDir.set(workDir.append("sub-counter"));
counter.k.set(k);
counter.inputFiles.set(new File[]{files[i]});
counter.maximalBadFrequency.set(maximalBadFrequency);
counter.outputDir.set(outputDir);
counter.statsDir.set(statsDir);

addStep(counter);
counters.add(counter);
i += 1;
}
}

}

@Override
Expand Down
25 changes: 20 additions & 5 deletions src/tools/KmersCounterMain.java
Original file line number Diff line number Diff line change
@@ -1,20 +1,19 @@
package tools;

import io.IOUtils;
import ru.ifmo.genetics.dna.kmers.ShortKmerIteratorFactory;
import ru.ifmo.genetics.io.ReadersUtils;
import ru.ifmo.genetics.statistics.Timer;
import ru.ifmo.genetics.structures.map.BigLong2ShortHashMap;
import ru.ifmo.genetics.utils.Misc;
import ru.ifmo.genetics.utils.tool.*;
import ru.ifmo.genetics.utils.NumUtils;
import ru.ifmo.genetics.utils.tool.ExecutionFailedException;
import ru.ifmo.genetics.utils.tool.Parameter;
import ru.ifmo.genetics.utils.tool.Tool;
import ru.ifmo.genetics.utils.tool.inputParameterBuilder.FileMVParameterBuilder;
import ru.ifmo.genetics.utils.tool.inputParameterBuilder.FileParameterBuilder;
import ru.ifmo.genetics.utils.tool.inputParameterBuilder.IntParameterBuilder;
import ru.ifmo.genetics.utils.tool.values.InMemoryValue;
import ru.ifmo.genetics.utils.tool.values.InValue;
import ru.ifmo.genetics.utils.NumUtils;

import java.io.File;
import java.io.IOException;
Expand Down Expand Up @@ -89,8 +88,7 @@ protected void runImpl() throws ExecutionFailedException, IOException {
stDir.mkdirs();
}

String name = ReadersUtils.readDnaLazy(inputFiles.get()[0]).name()
+ (inputFiles.get().length > 1 ? "+" : "");
String name = getName();
File outFile = new File(outDir, name + ".kmers.bin");
File stFile = new File(stDir, name + ".stat.txt");

Expand Down Expand Up @@ -121,6 +119,23 @@ protected void runImpl() throws ExecutionFailedException, IOException {
resultingKmerFilesPr.set(outFile);
}

private String getName() throws IOException {
if (inputFiles.get().length == 2) {
String name1 = ReadersUtils.readDnaLazy(inputFiles.get()[0]).name();
String name2 = ReadersUtils.readDnaLazy(inputFiles.get()[1]).name();
if ((name1.endsWith("_r1") && name2.endsWith("_r2")) ||
(name1.endsWith("_R1") && name2.endsWith("_R2"))) {
return name1.substring(0, name1.length()-3);
} else {
return name1 + "+";
}

} else {
return ReadersUtils.readDnaLazy(inputFiles.get()[0]).name()
+ (inputFiles.get().length > 1 ? "+" : "");
}
}

@Override
protected void cleanImpl() {
}
Expand Down

0 comments on commit d3bb960

Please sign in to comment.