Skip to content

Commit

Permalink
Merge pull request #7 from prophyle/camel-files
Browse files Browse the repository at this point in the history
  • Loading branch information
karel-brinda authored Jan 17, 2024
2 parents e6fbd04 + 4bd466c commit ccb3655
Show file tree
Hide file tree
Showing 18 changed files with 1,468 additions and 0 deletions.
37 changes: 37 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
name: C++ CI

on:
push:
branches: [ "main" ]
pull_request:
branches: [ "main" ]

jobs:
build_with_unittests:

runs-on: ${{ matrix.os }}

strategy:
matrix:
os: [ubuntu-latest, macos-latest]

steps:
- uses: actions/checkout@v3
- name: make
run: make
- name: submodule
run: git submodule init; git submodule update
- name: cpptests
run: make cpptest

verify_ubuntu:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v3
- name: make
run: make
- name: jellyfish
run: sudo apt install jellyfish
- name: verify
run: make quick-verify
14 changes: 14 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
# Binaries
prophasm
prophasm2
prophasmtest

# Generated test files
bin/

# Generated version file
src/version.h

# IDE files
.idea/

# Prerequisites
*.d

Expand Down
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[submodule "tests/googletest"]
path = tests/googletest
url = https://github.com/google/googletest
46 changes: 46 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
.PHONY: all clean test cpptest verify quick-verify

CXX= g++
CXXFLAGS= -g -Wall -Wno-unused-function -std=c++17 -O2
LDFLAGS= -lz
SRC= src
SCRIPTS= scripts
DATA= data
TESTS= tests
GTEST= $(TESTS)/googletest/googletest
PROG= prophasm2


all: $(PROG)

test: cpptest verify

cpptest: prophasmtest
./prophasmtest

verify: $(PROG) $(SCRIPTS)/verify.py $(DATA)/spneumoniae.fa
python $(SCRIPTS)/verify.py $(DATA)/spneumoniae.fa

quick-verify: $(PROG) $(SCRIPTS)/verify.py $(DATA)/spneumoniae.fa
python $(SCRIPTS)/verify.py $(DATA)/spneumoniae.fa --quick

$(PROG): $(SRC)/main.cpp $(SRC)/$(wildcard *.cpp *.h *.hpp) src/version.h
./create-version.sh
$(CXX) $(CXXFLAGS) $(SRC)/main.cpp -o $@ $(LDFLAGS)


prophasmtest: $(TESTS)/unittest.cpp gtest-all.o $(SRC)/$(wildcard *.cpp *.h *.hpp) $(TESTS)/$(wildcard *.cpp *.h *.hpp)
$(CXX) $(CXXFLAGS) -isystem $(GTEST)/include -I $(GTEST)/include $(TESTS)/unittest.cpp gtest-all.o -pthread -o $@ $(LDFLAGS)

gtest-all.o: $(GTEST)/src/gtest-all.cc $(wildcard *.cpp *.h *.hpp)
$(CXX) $(CXXFLAGS) -isystem $(GTEST)/include -I $(GTEST)/include -I $(GTEST) -DGTEST_CREATE_SHARED_LIBRARY=1 -c -pthread $(GTEST)/src/gtest-all.cc -o $@

src/version.h: src/version
./create-version.sh

clean:
rm -f $(PROG)
rm -f prophasmtest
rm -r -f ./bin
rm -f gtest-all.o
rm -f src/version.h
2 changes: 2 additions & 0 deletions create-version.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#!/usr/bin/env bash
cat <(echo "#define VERSION \"") <(git describe --abbrev=4 --dirty --always --tags 2> /dev/null || cat src/version) <(echo \") | tr -d '\n' > src/version.h
2 changes: 2 additions & 0 deletions data/spneumoniae.fa

Large diffs are not rendered by default.

81 changes: 81 additions & 0 deletions scripts/verify.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
#!/usr/bin/env python3
import subprocess
import sys
import os
import argparse


def verify_instance(fasta_path: str, k: int, complements: bool) -> bool:
"""
Check if running ProphAsm2 on given fasta file produces the same set of k-mers as the original one.
"""
with open("./bin/simplitigs.fa", "w") as k_mers:
args = ["./prophasm2", "-p", fasta_path, "-k", f"{k}"]
if complements:
args.append("-c")
subprocess.run(args, stdout=k_mers)
# in result; in original sequence; in result without complements; in original without complements; in merged file
stats = [{}, {}, {}]
runs = [
(0, "./bin/simplitigs.fa", "simplitigs", complements),
(1, fasta_path, "original", complements),
]
for i, path, result, pass_complements in runs:
args = ["jellyfish", "count", "-m", f"{k}", "-s", "100M", "-o", f"./bin/{result}.jf", path]
if pass_complements:
args.insert(2, "-C")
subprocess.run(args)
with open(f"./bin/{result}_stats.txt", "w") as f:
subprocess.run(["jellyfish", "stats", f"./bin/{result}.jf"], stdout=f)
with open(f"./bin/{result}_stats.txt", "r") as f:
for _ in range(4):
key, value = f.readline().split()
stats[i][key] = value
# Count k-mers on merged file.
subprocess.run(["jellyfish", "merge", "-o", f"./bin/merged.jf", "./bin/simplitigs.jf", "./bin/original.jf"])
with open(f"./bin/merged_stats.txt", "w") as f:
subprocess.run(["jellyfish", "stats", f"./bin/merged.jf"], stdout=f)
with open(f"./bin/merged_stats.txt", "r") as f:
for _ in range(4):
key, value = f.readline().split()
stats[2][key] = value
distinct_key = "Distinct:"
total_key = "Total:"
if stats[0][distinct_key] != stats[1][distinct_key] or stats[0][distinct_key] != stats[2][distinct_key]:
print("F")
print(f"Failed: k={k}: expected orginal_distinct_count={stats[1][distinct_key]}, result_distinct_count={stats[0][distinct_key]} and merged_distinct_count={stats[2][distinct_key]} to be equal.")
return False
elif complements and stats[0][distinct_key] != stats[0][total_key]:
print("W")
print(f"Warning: k={k}: number of masked k-mers={stats[0][total_key]} is not minimal possible (minimum is {stats[0][distinct_key]}).")
else:
print(".", end="")
sys.stdout.flush()
return True


def main():
# Initialize.
if not os.path.exists("bin"):
os.makedirs("bin")

parser = argparse.ArgumentParser("check if ProphAsm2 outputs simplitigs which contain the same set of k-mers"
"as the original sequence")
parser.add_argument("--quick", help="if set do not check for full range of k", action="store_true")
parser.add_argument("path", help="path to the fasta file on which ProphAsm2 is verified")
args = parser.parse_args()

success = True
print("Testing ProphAsm2 outputs valid simplitigs on file " + args.path)
for complements in [True, False]:
for k in range(2, 33, 3 if args.quick else 1):
success &= verify_instance(args.path, k, complements)
print("")

# Print status.
if not success:
print("Tests failed")
exit(1)
print("OK")

main()
Loading

0 comments on commit ccb3655

Please sign in to comment.