Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Step 1: Tailored camel's local for simplitigs #7

Merged
merged 8 commits into from
Jan 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
name: C++ CI

on:
push:
branches: [ "main" ]
pull_request:
branches: [ "main" ]

jobs:
build_with_unittests:

runs-on: ${{ matrix.os }}

strategy:
matrix:
os: [ubuntu-latest, macos-latest]

steps:
- uses: actions/checkout@v3
- name: make
run: make
- name: submodule
run: git submodule init; git submodule update
- name: cpptests
run: make cpptest

verify_ubuntu:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v3
- name: make
run: make
- name: jellyfish
run: sudo apt install jellyfish
- name: verify
run: make quick-verify
14 changes: 14 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
# Binaries
prophasm
prophasm2
prophasmtest

# Generated test files
bin/

# Generated version file
src/version.h

# IDE files
.idea/

# Prerequisites
*.d

Expand Down
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[submodule "tests/googletest"]
path = tests/googletest
url = https://github.com/google/googletest
46 changes: 46 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
.PHONY: all clean test cpptest verify quick-verify

CXX= g++
CXXFLAGS= -g -Wall -Wno-unused-function -std=c++17 -O2
LDFLAGS= -lz
SRC= src
SCRIPTS= scripts
DATA= data
TESTS= tests
GTEST= $(TESTS)/googletest/googletest
PROG= prophasm2


all: $(PROG)

test: cpptest verify

cpptest: prophasmtest
./prophasmtest

verify: $(PROG) $(SCRIPTS)/verify.py $(DATA)/spneumoniae.fa
python $(SCRIPTS)/verify.py $(DATA)/spneumoniae.fa

quick-verify: $(PROG) $(SCRIPTS)/verify.py $(DATA)/spneumoniae.fa
python $(SCRIPTS)/verify.py $(DATA)/spneumoniae.fa --quick

$(PROG): $(SRC)/main.cpp $(SRC)/$(wildcard *.cpp *.h *.hpp) src/version.h
./create-version.sh
$(CXX) $(CXXFLAGS) $(SRC)/main.cpp -o $@ $(LDFLAGS)


prophasmtest: $(TESTS)/unittest.cpp gtest-all.o $(SRC)/$(wildcard *.cpp *.h *.hpp) $(TESTS)/$(wildcard *.cpp *.h *.hpp)
$(CXX) $(CXXFLAGS) -isystem $(GTEST)/include -I $(GTEST)/include $(TESTS)/unittest.cpp gtest-all.o -pthread -o $@ $(LDFLAGS)

gtest-all.o: $(GTEST)/src/gtest-all.cc $(wildcard *.cpp *.h *.hpp)
$(CXX) $(CXXFLAGS) -isystem $(GTEST)/include -I $(GTEST)/include -I $(GTEST) -DGTEST_CREATE_SHARED_LIBRARY=1 -c -pthread $(GTEST)/src/gtest-all.cc -o $@

src/version.h: src/version
./create-version.sh

clean:
rm -f $(PROG)
rm -f prophasmtest
rm -r -f ./bin
rm -f gtest-all.o
rm -f src/version.h
2 changes: 2 additions & 0 deletions create-version.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#!/usr/bin/env bash
cat <(echo "#define VERSION \"") <(git describe --abbrev=4 --dirty --always --tags 2> /dev/null || cat src/version) <(echo \") | tr -d '\n' > src/version.h
2 changes: 2 additions & 0 deletions data/spneumoniae.fa

Large diffs are not rendered by default.

81 changes: 81 additions & 0 deletions scripts/verify.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
#!/usr/bin/env python3
import subprocess
import sys
import os
import argparse


def verify_instance(fasta_path: str, k: int, complements: bool) -> bool:
"""
Check if running ProphAsm2 on given fasta file produces the same set of k-mers as the original one.
"""
with open("./bin/simplitigs.fa", "w") as k_mers:
args = ["./prophasm2", "-p", fasta_path, "-k", f"{k}"]
if complements:
args.append("-c")
subprocess.run(args, stdout=k_mers)
# in result; in original sequence; in result without complements; in original without complements; in merged file
stats = [{}, {}, {}]
runs = [
(0, "./bin/simplitigs.fa", "simplitigs", complements),
(1, fasta_path, "original", complements),
]
for i, path, result, pass_complements in runs:
args = ["jellyfish", "count", "-m", f"{k}", "-s", "100M", "-o", f"./bin/{result}.jf", path]
if pass_complements:
args.insert(2, "-C")
subprocess.run(args)
with open(f"./bin/{result}_stats.txt", "w") as f:
subprocess.run(["jellyfish", "stats", f"./bin/{result}.jf"], stdout=f)
with open(f"./bin/{result}_stats.txt", "r") as f:
for _ in range(4):
key, value = f.readline().split()
stats[i][key] = value
# Count k-mers on merged file.
subprocess.run(["jellyfish", "merge", "-o", f"./bin/merged.jf", "./bin/simplitigs.jf", "./bin/original.jf"])
with open(f"./bin/merged_stats.txt", "w") as f:
subprocess.run(["jellyfish", "stats", f"./bin/merged.jf"], stdout=f)
with open(f"./bin/merged_stats.txt", "r") as f:
for _ in range(4):
key, value = f.readline().split()
stats[2][key] = value
distinct_key = "Distinct:"
total_key = "Total:"
if stats[0][distinct_key] != stats[1][distinct_key] or stats[0][distinct_key] != stats[2][distinct_key]:
print("F")
print(f"Failed: k={k}: expected orginal_distinct_count={stats[1][distinct_key]}, result_distinct_count={stats[0][distinct_key]} and merged_distinct_count={stats[2][distinct_key]} to be equal.")
return False
elif complements and stats[0][distinct_key] != stats[0][total_key]:
print("W")
print(f"Warning: k={k}: number of masked k-mers={stats[0][total_key]} is not minimal possible (minimum is {stats[0][distinct_key]}).")
else:
print(".", end="")
sys.stdout.flush()
return True


def main():
# Initialize.
if not os.path.exists("bin"):
os.makedirs("bin")

parser = argparse.ArgumentParser("check if ProphAsm2 outputs simplitigs which contain the same set of k-mers"
"as the original sequence")
parser.add_argument("--quick", help="if set do not check for full range of k", action="store_true")
parser.add_argument("path", help="path to the fasta file on which ProphAsm2 is verified")
args = parser.parse_args()

success = True
print("Testing ProphAsm2 outputs valid simplitigs on file " + args.path)
for complements in [True, False]:
for k in range(2, 33, 3 if args.quick else 1):
success &= verify_instance(args.path, k, complements)
print("")

# Print status.
if not success:
print("Tests failed")
exit(1)
print("OK")

main()
Loading